datamule 1.2.2__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.2.2 → datamule-1.2.3}/PKG-INFO +1 -1
- {datamule-1.2.2 → datamule-1.2.3}/datamule/submission.py +14 -24
- {datamule-1.2.2 → datamule-1.2.3}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-1.2.2 → datamule-1.2.3}/setup.py +1 -1
- {datamule-1.2.2 → datamule-1.2.3}/datamule/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/config.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/document/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/document/document.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/document/processing.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/document/table.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/helper.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/index.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/package_updater.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/portfolio.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/rss/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/rss/monitor.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/utils.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/seclibrary/downloader.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/seclibrary/query.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule/sheet.py +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule.egg-info/SOURCES.txt +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule.egg-info/requires.txt +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.2.2 → datamule-1.2.3}/setup.cfg +0 -0
@@ -17,6 +17,7 @@ class Submission:
|
|
17
17
|
self.path = None
|
18
18
|
self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
19
19
|
|
20
|
+
# code dupe
|
20
21
|
self.accession = self.metadata['accession-number']
|
21
22
|
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
22
23
|
|
@@ -43,6 +44,11 @@ class Submission:
|
|
43
44
|
with metadata_path.open('r') as f:
|
44
45
|
self.metadata = json.load(f)
|
45
46
|
|
47
|
+
# Code dupe
|
48
|
+
self.accession = self.metadata['accession-number']
|
49
|
+
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
50
|
+
|
51
|
+
|
46
52
|
|
47
53
|
|
48
54
|
def document_type(self, document_type):
|
@@ -65,9 +71,12 @@ class Submission:
|
|
65
71
|
document_path = self.path / filename
|
66
72
|
extension = document_path.suffix
|
67
73
|
|
68
|
-
with document_path.open('
|
74
|
+
with document_path.open('rb') as f:
|
69
75
|
content = f.read()
|
70
76
|
|
77
|
+
if extension in ['.htm','.html','.txt','.xml']:
|
78
|
+
content = content.decode('utf-8', errors='replace')
|
79
|
+
|
71
80
|
yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
|
72
81
|
# if loaded from sgml_content
|
73
82
|
else:
|
@@ -89,9 +98,12 @@ class Submission:
|
|
89
98
|
|
90
99
|
# check if the file exists
|
91
100
|
if document_path.exists():
|
92
|
-
with document_path.open('
|
101
|
+
with document_path.open('rb') as f:
|
93
102
|
content = f.read()
|
94
103
|
|
104
|
+
if extension in ['.htm','.html','.txt','.xml']:
|
105
|
+
content = content.decode('utf-8', errors='replace')
|
106
|
+
|
95
107
|
yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
|
96
108
|
else:
|
97
109
|
print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
|
@@ -100,28 +112,6 @@ class Submission:
|
|
100
112
|
else:
|
101
113
|
yield self.documents[idx]
|
102
114
|
|
103
|
-
# keep documents by document type
|
104
|
-
def keep(self, document_type):
|
105
|
-
# Convert single document type to list for consistent handling
|
106
|
-
if isinstance(document_type, str):
|
107
|
-
document_types = [document_type]
|
108
|
-
else:
|
109
|
-
document_types = document_type
|
110
|
-
|
111
|
-
if self.path is not None:
|
112
|
-
for doc in self.metadata['documents']:
|
113
|
-
filename = doc.get('filename')
|
114
|
-
type = doc.get('type')
|
115
|
-
if type not in document_types:
|
116
|
-
# oh we need handling here for sequences case
|
117
|
-
if filename is None:
|
118
|
-
filename = doc.sequence + '.txt'
|
119
|
-
|
120
|
-
document_path = self.path / filename
|
121
|
-
# delete the file
|
122
|
-
document_path.unlink()
|
123
|
-
else:
|
124
|
-
print("Warning: keep() method is only available when loading from path.")
|
125
115
|
|
126
116
|
|
127
117
|
|
@@ -30,7 +30,7 @@ if not file_path.exists():
|
|
30
30
|
setup(
|
31
31
|
name="datamule",
|
32
32
|
author="John Friedman",
|
33
|
-
version="1.2.
|
33
|
+
version="1.2.3",
|
34
34
|
description="Making it easier to use SEC filings.",
|
35
35
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
36
36
|
url="https://github.com/john-friedman/datamule-python",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|