datamule 1.2.2__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/submission.py CHANGED
@@ -17,6 +17,7 @@ class Submission:
17
17
  self.path = None
18
18
  self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
19
19
 
20
+ # code dupe
20
21
  self.accession = self.metadata['accession-number']
21
22
  self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
22
23
 
@@ -43,6 +44,11 @@ class Submission:
43
44
  with metadata_path.open('r') as f:
44
45
  self.metadata = json.load(f)
45
46
 
47
+ # Code dupe
48
+ self.accession = self.metadata['accession-number']
49
+ self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
50
+
51
+
46
52
 
47
53
 
48
54
  def document_type(self, document_type):
@@ -65,9 +71,12 @@ class Submission:
65
71
  document_path = self.path / filename
66
72
  extension = document_path.suffix
67
73
 
68
- with document_path.open('r') as f:
74
+ with document_path.open('rb') as f:
69
75
  content = f.read()
70
76
 
77
+ if extension in ['.htm','.html','.txt','.xml']:
78
+ content = content.decode('utf-8', errors='replace')
79
+
71
80
  yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
72
81
  # if loaded from sgml_content
73
82
  else:
@@ -89,9 +98,12 @@ class Submission:
89
98
 
90
99
  # check if the file exists
91
100
  if document_path.exists():
92
- with document_path.open('r') as f:
101
+ with document_path.open('rb') as f:
93
102
  content = f.read()
94
103
 
104
+ if extension in ['.htm','.html','.txt','.xml']:
105
+ content = content.decode('utf-8', errors='replace')
106
+
95
107
  yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
96
108
  else:
97
109
  print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
@@ -100,28 +112,6 @@ class Submission:
100
112
  else:
101
113
  yield self.documents[idx]
102
114
 
103
- # keep documents by document type
104
- def keep(self, document_type):
105
- # Convert single document type to list for consistent handling
106
- if isinstance(document_type, str):
107
- document_types = [document_type]
108
- else:
109
- document_types = document_type
110
-
111
- if self.path is not None:
112
- for doc in self.metadata['documents']:
113
- filename = doc.get('filename')
114
- type = doc.get('type')
115
- if type not in document_types:
116
- # oh we need handling here for sequences case
117
- if filename is None:
118
- filename = doc.sequence + '.txt'
119
-
120
- document_path = self.path / filename
121
- # delete the file
122
- document_path.unlink()
123
- else:
124
- print("Warning: keep() method is only available when loading from path.")
125
115
 
126
116
 
127
117
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.2.2
3
+ Version: 1.2.3
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -5,7 +5,7 @@ datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
6
  datamule/portfolio.py,sha256=so6j2KrkcZOToHIqkANAu3CC4QsfgaUN1zk9CrbRe1E,7225
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
- datamule/submission.py,sha256=tc4-8houjT2gfSK0P7ekowPduT31rj5_zt0axwZUacc,8483
8
+ datamule/submission.py,sha256=HXuFL6snLevGk7DGlvPbjcBOJuccAIxEPXnkA1TXX8Y,8121
9
9
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  datamule/document/document.py,sha256=BRnHPVt-vIT7EZTF-c-Ulv3N33xX9zE02Q9mKXVDeuY,9474
11
11
  datamule/document/processing.py,sha256=fw-1OWfbmZhG1R8XpJx_vcGwz3_djmk0FrblHAMPmwc,27476
@@ -34,7 +34,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
34
34
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
35
35
  datamule/seclibrary/downloader.py,sha256=fJztJ_sEfv2oHHbDff07DRlXLmztXnzt3Yvv5YaZgGk,13718
36
36
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
37
- datamule-1.2.2.dist-info/METADATA,sha256=QpXbg-4cnRknynj-W4Z2Sc1zKlWan62zEG8OrN2_E-A,512
38
- datamule-1.2.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
39
- datamule-1.2.2.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
40
- datamule-1.2.2.dist-info/RECORD,,
37
+ datamule-1.2.3.dist-info/METADATA,sha256=3gODk6YjozgMTYnjvXRX_pox_Otkq7tSDZY2LEl6MiU,512
38
+ datamule-1.2.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
39
+ datamule-1.2.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
40
+ datamule-1.2.3.dist-info/RECORD,,