datamule 1.2.5__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +1 -0
- datamule/document/document.py +12 -8
- datamule/document/mappings/d.py +125 -0
- datamule/document/mappings/ex102_abs.py +63 -0
- datamule/document/mappings/information_table.py +1 -0
- datamule/document/mappings/ownership.py +1 -1
- datamule/document/mappings/proxy_voting_record.py +17 -1
- datamule/document/mappings/submission_metadata.py +9 -0
- datamule/document/mappings/thirteenfhr.py +70 -3
- datamule/document/mappings/twentyfivense.py +1 -0
- datamule/document/processing.py +170 -42
- datamule/document/table.py +60 -5
- datamule/helper.py +10 -1
- datamule/index.py +8 -10
- datamule/portfolio.py +16 -11
- datamule/sec/submissions/monitor.py +173 -120
- datamule/sec/submissions/textsearch.py +0 -4
- datamule/sec/xbrl/streamcompanyfacts.py +1 -1
- datamule/seclibrary/downloader.py +2 -2
- datamule/submission.py +80 -14
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/METADATA +1 -2
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/RECORD +24 -23
- datamule/sec/rss/__init__.py +0 -0
- datamule/sec/rss/monitor.py +0 -416
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/WHEEL +0 -0
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/top_level.txt +0 -0
datamule/submission.py
CHANGED
@@ -4,6 +4,70 @@ from .document.document import Document
|
|
4
4
|
from secsgml import parse_sgml_submission_into_memory
|
5
5
|
import os
|
6
6
|
import aiofiles
|
7
|
+
import tempfile
|
8
|
+
|
9
|
+
|
10
|
+
# # NEW CODE YAY. probably will remove
|
11
|
+
|
12
|
+
# def save_metadata_atomically(metadata_file_path, metadata_content):
|
13
|
+
# """Save metadata to a JSONL file atomically, works on any filesystem"""
|
14
|
+
|
15
|
+
# # Create directory if it doesn't exist
|
16
|
+
# os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
|
17
|
+
|
18
|
+
# # Format the JSON with newline
|
19
|
+
# json_str = json.dumps(metadata_content, indent=4) + "\n"
|
20
|
+
|
21
|
+
# # Write complete content to a temporary file first
|
22
|
+
# fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
|
23
|
+
# try:
|
24
|
+
# with os.fdopen(fd, 'w') as temp_file:
|
25
|
+
# temp_file.write(json_str)
|
26
|
+
# temp_file.flush()
|
27
|
+
# os.fsync(temp_file.fileno()) # Force write to disk
|
28
|
+
|
29
|
+
# # Append the temporary file to the main file
|
30
|
+
# with open(metadata_file_path, 'a') as target_file:
|
31
|
+
# with open(temp_path, 'r') as temp_read:
|
32
|
+
# content = temp_read.read()
|
33
|
+
# target_file.write(content)
|
34
|
+
# target_file.flush()
|
35
|
+
# os.fsync(target_file.fileno()) # Force write to disk
|
36
|
+
# finally:
|
37
|
+
# # Clean up the temporary file
|
38
|
+
# if os.path.exists(temp_path):
|
39
|
+
# os.unlink(temp_path)
|
40
|
+
|
41
|
+
# async def save_metadata_atomically_async(metadata_file_path, metadata_content):
|
42
|
+
# """Save metadata to a JSONL file atomically in async mode"""
|
43
|
+
|
44
|
+
# # Create directory if it doesn't exist
|
45
|
+
# os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
|
46
|
+
|
47
|
+
# # Format the JSON with newline
|
48
|
+
# json_str = json.dumps(metadata_content, indent=4) + "\n"
|
49
|
+
|
50
|
+
# # Write to a temporary file first
|
51
|
+
# fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
|
52
|
+
# os.close(fd) # Close the file descriptor
|
53
|
+
|
54
|
+
# try:
|
55
|
+
# async with aiofiles.open(temp_path, 'w') as temp_file:
|
56
|
+
# await temp_file.write(json_str)
|
57
|
+
# await temp_file.flush()
|
58
|
+
|
59
|
+
# # Append the temporary file to the main file
|
60
|
+
# async with aiofiles.open(metadata_file_path, 'a') as target_file:
|
61
|
+
# async with aiofiles.open(temp_path, 'r') as temp_read:
|
62
|
+
# content = await temp_read.read()
|
63
|
+
# await target_file.write(content)
|
64
|
+
# await target_file.flush()
|
65
|
+
# finally:
|
66
|
+
# # Clean up the temporary file
|
67
|
+
# if os.path.exists(temp_path):
|
68
|
+
# os.unlink(temp_path)
|
69
|
+
|
70
|
+
# # END OF NEW CODE
|
7
71
|
|
8
72
|
|
9
73
|
class Submission:
|
@@ -15,16 +79,17 @@ class Submission:
|
|
15
79
|
|
16
80
|
if sgml_content is not None:
|
17
81
|
self.path = None
|
18
|
-
|
82
|
+
metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
83
|
+
self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=None)
|
19
84
|
|
20
85
|
# code dupe
|
21
|
-
self.accession = self.metadata['accession-number']
|
22
|
-
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
86
|
+
self.accession = self.metadata.content['accession-number']
|
87
|
+
self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
|
23
88
|
|
24
89
|
self.documents = []
|
25
90
|
filtered_metadata_documents = []
|
26
91
|
|
27
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
92
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
28
93
|
type = doc.get('type')
|
29
94
|
|
30
95
|
# Keep only specified types
|
@@ -36,17 +101,18 @@ class Submission:
|
|
36
101
|
|
37
102
|
filtered_metadata_documents.append(doc)
|
38
103
|
|
39
|
-
self.metadata['documents'] = filtered_metadata_documents
|
104
|
+
self.metadata.content['documents'] = filtered_metadata_documents
|
40
105
|
|
41
106
|
if path is not None:
|
42
107
|
self.path = Path(path)
|
43
108
|
metadata_path = self.path / 'metadata.json'
|
44
109
|
with metadata_path.open('r') as f:
|
45
|
-
|
110
|
+
metadata = json.load(f)
|
111
|
+
self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
|
46
112
|
|
47
113
|
# Code dupe
|
48
|
-
self.accession = self.metadata['accession-number']
|
49
|
-
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
114
|
+
self.accession = self.metadata.content['accession-number']
|
115
|
+
self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
|
50
116
|
|
51
117
|
|
52
118
|
|
@@ -58,7 +124,7 @@ class Submission:
|
|
58
124
|
else:
|
59
125
|
document_types = document_type
|
60
126
|
|
61
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
127
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
62
128
|
if doc['type'] in document_types:
|
63
129
|
|
64
130
|
# if loaded from path
|
@@ -84,7 +150,7 @@ class Submission:
|
|
84
150
|
|
85
151
|
|
86
152
|
def __iter__(self):
|
87
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
153
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
88
154
|
# if loaded from path
|
89
155
|
if self.path is not None:
|
90
156
|
filename = doc.get('filename')
|
@@ -121,9 +187,9 @@ class Submission:
|
|
121
187
|
|
122
188
|
metadata_path = file_dir / "metadata.json"
|
123
189
|
with open(metadata_path, 'w') as f:
|
124
|
-
json.dump(self.metadata, f, indent=4)
|
190
|
+
json.dump(self.metadata.content, f, indent=4)
|
125
191
|
|
126
|
-
for idx, doc in enumerate(self.metadata['documents']):
|
192
|
+
for idx, doc in enumerate(self.metadata.content['documents']):
|
127
193
|
try:
|
128
194
|
filename = doc.get('filename')
|
129
195
|
if filename is None:
|
@@ -162,9 +228,9 @@ class Submission:
|
|
162
228
|
|
163
229
|
metadata_path = file_dir / "metadata.json"
|
164
230
|
async with aiofiles.open(metadata_path, 'w') as f:
|
165
|
-
await f.write(json.dumps(self.metadata, indent=4))
|
231
|
+
await f.write(json.dumps(self.metadata.content, indent=4))
|
166
232
|
|
167
|
-
for idx, doc in enumerate(self.metadata['documents']):
|
233
|
+
for idx, doc in enumerate(self.metadata.content['documents']):
|
168
234
|
try:
|
169
235
|
filename = doc.get('filename')
|
170
236
|
if filename is None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.9
|
4
4
|
Summary: Making it easier to use SEC filings.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -10,7 +10,6 @@ Requires-Dist: tqdm
|
|
10
10
|
Requires-Dist: requests
|
11
11
|
Requires-Dist: nest-asyncio
|
12
12
|
Requires-Dist: aiofiles
|
13
|
-
Requires-Dist: polars
|
14
13
|
Requires-Dist: setuptools
|
15
14
|
Requires-Dist: selectolax
|
16
15
|
Requires-Dist: pytz
|
@@ -1,35 +1,38 @@
|
|
1
|
-
datamule/__init__.py,sha256=
|
1
|
+
datamule/__init__.py,sha256=glzwBeGJEE6-TG7mRule9GH6L59XaIRR9T7ALcdpMus,1067
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
|
-
datamule/helper.py,sha256=
|
4
|
-
datamule/index.py,sha256=
|
3
|
+
datamule/helper.py,sha256=g9Kb1DWbViCoON06PjOkSX5Ucu0uG7zPwhsO2LQ6C1g,3579
|
4
|
+
datamule/index.py,sha256=_7Ox5hyF_7RWdblVFr5rNyv_ARwBP7VY4f703pk9qQ8,2074
|
5
5
|
datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
|
6
|
-
datamule/portfolio.py,sha256=
|
6
|
+
datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
|
7
7
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
8
|
-
datamule/submission.py,sha256=
|
8
|
+
datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
|
9
9
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
datamule/document/document.py,sha256=
|
11
|
-
datamule/document/processing.py,sha256=
|
12
|
-
datamule/document/table.py,sha256=
|
10
|
+
datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
|
11
|
+
datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
|
12
|
+
datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
|
13
13
|
datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
datamule/document/mappings/atsn.py,sha256=qkZGNIhyPC3VTTOjQ8-FSCQIhUy4XeSycUGLShxNVCo,17743
|
15
15
|
datamule/document/mappings/cfportal.py,sha256=bR9d6DDY0kJ_HGx_hND2y1PNNkZjemYZ2KdyFAcv760,25257
|
16
|
+
datamule/document/mappings/d.py,sha256=ayRK-bTzelNH6fspp-n3gz6RXOrHVx6IjX-TmisrFe4,7714
|
17
|
+
datamule/document/mappings/ex102_abs.py,sha256=FdGKvteRh_HsYgILF-8o4R6aSsjYwcaLpJxzdru4FTE,3976
|
16
18
|
datamule/document/mappings/ex99a_sdr.py,sha256=PNdj9I0ZhNicPObLelNmjp33EgTwzvukqkBDnwxarE0,19
|
17
19
|
datamule/document/mappings/ex99c_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
20
|
datamule/document/mappings/ex99g_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
21
|
datamule/document/mappings/ex99i_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
datamule/document/mappings/information_table.py,sha256=
|
22
|
+
datamule/document/mappings/information_table.py,sha256=6l2Via728I59RS0y9Pit37NoOSAbaT-vclArYxU1vtY,1585
|
21
23
|
datamule/document/mappings/nmfp.py,sha256=WuTyM1SkBiiLVAHqFF4DTZ_8AvsIuonT2w7pwYDPTDw,17767
|
22
24
|
datamule/document/mappings/npx.py,sha256=xwruBueC09kfWhXV3fNUnQWYwCWrdrhQoVO3cKfPTO4,6556
|
23
25
|
datamule/document/mappings/onefourtyfour.py,sha256=_-w9h6wGINGH5pQqQvPrd0cgB5QfCtPG5M40ewf_w8Q,2604
|
24
|
-
datamule/document/mappings/ownership.py,sha256=
|
25
|
-
datamule/document/mappings/proxy_voting_record.py,sha256=
|
26
|
+
datamule/document/mappings/ownership.py,sha256=piD9vs4WFrB4yvp6c0pT5bibLKXgsM7hpnBUzaY0Xxs,10155
|
27
|
+
datamule/document/mappings/proxy_voting_record.py,sha256=tSqLH065EOUq7U80P5GP1JBqipmAiqniPpP3E4adA1A,721
|
26
28
|
datamule/document/mappings/sbs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
29
|
datamule/document/mappings/sbsef.py,sha256=Zw58rbYcnitynk1mh9g1jDrCfqmFlY60OEjPM6p9iF0,534
|
28
30
|
datamule/document/mappings/schedule13.py,sha256=lh9sukpEte514Gid77Nz9zh3uBEFZEemrZ2Uau0qsgk,6295
|
29
31
|
datamule/document/mappings/sdr.py,sha256=UekqZId5PFMMWRAJSaPvCpN4c1Hx-SLAQPEN8GW_Gbg,4829
|
32
|
+
datamule/document/mappings/submission_metadata.py,sha256=pi1eW-tnoAQ6y3laRI29Op80E9BPqqmcfe45owKYStw,271
|
30
33
|
datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
datamule/document/mappings/thirteenfhr.py,sha256=
|
32
|
-
datamule/document/mappings/twentyfivense.py,sha256=
|
34
|
+
datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
|
35
|
+
datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
|
33
36
|
datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
|
34
37
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
38
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
@@ -38,24 +41,22 @@ datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
41
|
datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
|
39
42
|
datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
43
|
datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
|
41
|
-
datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
-
datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
|
43
44
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
45
|
datamule/sec/submissions/downloader.py,sha256=60wX2Yml1UCuxOtU0xMxqqeyHhrypCmlDQ0jZF-StJo,2665
|
45
46
|
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
46
|
-
datamule/sec/submissions/monitor.py,sha256=
|
47
|
+
datamule/sec/submissions/monitor.py,sha256=s6uknn1dF1EemiI3Hl4nEq3txwK7nYl6wmayuUPYpRs,7844
|
47
48
|
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
48
|
-
datamule/sec/submissions/textsearch.py,sha256
|
49
|
+
datamule/sec/submissions/textsearch.py,sha256=zEr3NXdhVFL8eMh2jruVXIt7taUZTMdNy2hOAyRM2pA,5706
|
49
50
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
51
|
datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
|
51
52
|
datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
|
52
|
-
datamule/sec/xbrl/streamcompanyfacts.py,sha256=
|
53
|
+
datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H5N2tbvKUzM,3307
|
53
54
|
datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
|
54
55
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
56
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
56
|
-
datamule/seclibrary/downloader.py,sha256=
|
57
|
+
datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
|
57
58
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
58
|
-
datamule-1.2.
|
59
|
-
datamule-1.2.
|
60
|
-
datamule-1.2.
|
61
|
-
datamule-1.2.
|
59
|
+
datamule-1.2.9.dist-info/METADATA,sha256=5bMwIRcARNqP6S1cdPzoIBuu1miiUJUUdWnTXvwtPNk,490
|
60
|
+
datamule-1.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
61
|
+
datamule-1.2.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
62
|
+
datamule-1.2.9.dist-info/RECORD,,
|
datamule/sec/rss/__init__.py
DELETED
File without changes
|