datamule 1.2.5__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/submission.py CHANGED
@@ -4,6 +4,70 @@ from .document.document import Document
4
4
  from secsgml import parse_sgml_submission_into_memory
5
5
  import os
6
6
  import aiofiles
7
+ import tempfile
8
+
9
+
10
+ # # NEW CODE YAY. probably will remove
11
+
12
+ # def save_metadata_atomically(metadata_file_path, metadata_content):
13
+ # """Save metadata to a JSONL file atomically, works on any filesystem"""
14
+
15
+ # # Create directory if it doesn't exist
16
+ # os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
17
+
18
+ # # Format the JSON with newline
19
+ # json_str = json.dumps(metadata_content, indent=4) + "\n"
20
+
21
+ # # Write complete content to a temporary file first
22
+ # fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
23
+ # try:
24
+ # with os.fdopen(fd, 'w') as temp_file:
25
+ # temp_file.write(json_str)
26
+ # temp_file.flush()
27
+ # os.fsync(temp_file.fileno()) # Force write to disk
28
+
29
+ # # Append the temporary file to the main file
30
+ # with open(metadata_file_path, 'a') as target_file:
31
+ # with open(temp_path, 'r') as temp_read:
32
+ # content = temp_read.read()
33
+ # target_file.write(content)
34
+ # target_file.flush()
35
+ # os.fsync(target_file.fileno()) # Force write to disk
36
+ # finally:
37
+ # # Clean up the temporary file
38
+ # if os.path.exists(temp_path):
39
+ # os.unlink(temp_path)
40
+
41
+ # async def save_metadata_atomically_async(metadata_file_path, metadata_content):
42
+ # """Save metadata to a JSONL file atomically in async mode"""
43
+
44
+ # # Create directory if it doesn't exist
45
+ # os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
46
+
47
+ # # Format the JSON with newline
48
+ # json_str = json.dumps(metadata_content, indent=4) + "\n"
49
+
50
+ # # Write to a temporary file first
51
+ # fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
52
+ # os.close(fd) # Close the file descriptor
53
+
54
+ # try:
55
+ # async with aiofiles.open(temp_path, 'w') as temp_file:
56
+ # await temp_file.write(json_str)
57
+ # await temp_file.flush()
58
+
59
+ # # Append the temporary file to the main file
60
+ # async with aiofiles.open(metadata_file_path, 'a') as target_file:
61
+ # async with aiofiles.open(temp_path, 'r') as temp_read:
62
+ # content = await temp_read.read()
63
+ # await target_file.write(content)
64
+ # await target_file.flush()
65
+ # finally:
66
+ # # Clean up the temporary file
67
+ # if os.path.exists(temp_path):
68
+ # os.unlink(temp_path)
69
+
70
+ # # END OF NEW CODE
7
71
 
8
72
 
9
73
  class Submission:
@@ -15,16 +79,17 @@ class Submission:
15
79
 
16
80
  if sgml_content is not None:
17
81
  self.path = None
18
- self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
82
+ metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
83
+ self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=None)
19
84
 
20
85
  # code dupe
21
- self.accession = self.metadata['accession-number']
22
- self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
86
+ self.accession = self.metadata.content['accession-number']
87
+ self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
23
88
 
24
89
  self.documents = []
25
90
  filtered_metadata_documents = []
26
91
 
27
- for idx,doc in enumerate(self.metadata['documents']):
92
+ for idx,doc in enumerate(self.metadata.content['documents']):
28
93
  type = doc.get('type')
29
94
 
30
95
  # Keep only specified types
@@ -36,17 +101,18 @@ class Submission:
36
101
 
37
102
  filtered_metadata_documents.append(doc)
38
103
 
39
- self.metadata['documents'] = filtered_metadata_documents
104
+ self.metadata.content['documents'] = filtered_metadata_documents
40
105
 
41
106
  if path is not None:
42
107
  self.path = Path(path)
43
108
  metadata_path = self.path / 'metadata.json'
44
109
  with metadata_path.open('r') as f:
45
- self.metadata = json.load(f)
110
+ metadata = json.load(f)
111
+ self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
46
112
 
47
113
  # Code dupe
48
- self.accession = self.metadata['accession-number']
49
- self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
114
+ self.accession = self.metadata.content['accession-number']
115
+ self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
50
116
 
51
117
 
52
118
 
@@ -58,7 +124,7 @@ class Submission:
58
124
  else:
59
125
  document_types = document_type
60
126
 
61
- for idx,doc in enumerate(self.metadata['documents']):
127
+ for idx,doc in enumerate(self.metadata.content['documents']):
62
128
  if doc['type'] in document_types:
63
129
 
64
130
  # if loaded from path
@@ -84,7 +150,7 @@ class Submission:
84
150
 
85
151
 
86
152
  def __iter__(self):
87
- for idx,doc in enumerate(self.metadata['documents']):
153
+ for idx,doc in enumerate(self.metadata.content['documents']):
88
154
  # if loaded from path
89
155
  if self.path is not None:
90
156
  filename = doc.get('filename')
@@ -121,9 +187,9 @@ class Submission:
121
187
 
122
188
  metadata_path = file_dir / "metadata.json"
123
189
  with open(metadata_path, 'w') as f:
124
- json.dump(self.metadata, f, indent=4)
190
+ json.dump(self.metadata.content, f, indent=4)
125
191
 
126
- for idx, doc in enumerate(self.metadata['documents']):
192
+ for idx, doc in enumerate(self.metadata.content['documents']):
127
193
  try:
128
194
  filename = doc.get('filename')
129
195
  if filename is None:
@@ -162,9 +228,9 @@ class Submission:
162
228
 
163
229
  metadata_path = file_dir / "metadata.json"
164
230
  async with aiofiles.open(metadata_path, 'w') as f:
165
- await f.write(json.dumps(self.metadata, indent=4))
231
+ await f.write(json.dumps(self.metadata.content, indent=4))
166
232
 
167
- for idx, doc in enumerate(self.metadata['documents']):
233
+ for idx, doc in enumerate(self.metadata.content['documents']):
168
234
  try:
169
235
  filename = doc.get('filename')
170
236
  if filename is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.2.5
3
+ Version: 1.2.9
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -10,7 +10,6 @@ Requires-Dist: tqdm
10
10
  Requires-Dist: requests
11
11
  Requires-Dist: nest-asyncio
12
12
  Requires-Dist: aiofiles
13
- Requires-Dist: polars
14
13
  Requires-Dist: setuptools
15
14
  Requires-Dist: selectolax
16
15
  Requires-Dist: pytz
@@ -1,35 +1,38 @@
1
- datamule/__init__.py,sha256=8KioESb9y0Xwy72WuTfsYZnnMFdCrRhSv8DW-kZ4-To,1066
1
+ datamule/__init__.py,sha256=glzwBeGJEE6-TG7mRule9GH6L59XaIRR9T7ALcdpMus,1067
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
- datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
4
- datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
3
+ datamule/helper.py,sha256=g9Kb1DWbViCoON06PjOkSX5Ucu0uG7zPwhsO2LQ6C1g,3579
4
+ datamule/index.py,sha256=_7Ox5hyF_7RWdblVFr5rNyv_ARwBP7VY4f703pk9qQ8,2074
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
- datamule/portfolio.py,sha256=zn7Ib29UU_HD4JxPiIRT3v5QgWxmPh4Q1l0VH0aJzUU,7104
6
+ datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
- datamule/submission.py,sha256=HXuFL6snLevGk7DGlvPbjcBOJuccAIxEPXnkA1TXX8Y,8121
8
+ datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
9
9
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- datamule/document/document.py,sha256=eckqSvORnYp9KbZLTQAAjvsftATcIGtYIsFxQHI7dtQ,9859
11
- datamule/document/processing.py,sha256=fw-1OWfbmZhG1R8XpJx_vcGwz3_djmk0FrblHAMPmwc,27476
12
- datamule/document/table.py,sha256=Sv9jTGiVhnWIY9nHaynUUixwbCrvbLsf0fdOnFR-NCY,10791
10
+ datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
11
+ datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
12
+ datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
13
13
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  datamule/document/mappings/atsn.py,sha256=qkZGNIhyPC3VTTOjQ8-FSCQIhUy4XeSycUGLShxNVCo,17743
15
15
  datamule/document/mappings/cfportal.py,sha256=bR9d6DDY0kJ_HGx_hND2y1PNNkZjemYZ2KdyFAcv760,25257
16
+ datamule/document/mappings/d.py,sha256=ayRK-bTzelNH6fspp-n3gz6RXOrHVx6IjX-TmisrFe4,7714
17
+ datamule/document/mappings/ex102_abs.py,sha256=FdGKvteRh_HsYgILF-8o4R6aSsjYwcaLpJxzdru4FTE,3976
16
18
  datamule/document/mappings/ex99a_sdr.py,sha256=PNdj9I0ZhNicPObLelNmjp33EgTwzvukqkBDnwxarE0,19
17
19
  datamule/document/mappings/ex99c_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  datamule/document/mappings/ex99g_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
21
  datamule/document/mappings/ex99i_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- datamule/document/mappings/information_table.py,sha256=t8xshuUPvPFbT3pGUiXcSIw2gnfw7rCvbh_aBV9NSQM,1560
22
+ datamule/document/mappings/information_table.py,sha256=6l2Via728I59RS0y9Pit37NoOSAbaT-vclArYxU1vtY,1585
21
23
  datamule/document/mappings/nmfp.py,sha256=WuTyM1SkBiiLVAHqFF4DTZ_8AvsIuonT2w7pwYDPTDw,17767
22
24
  datamule/document/mappings/npx.py,sha256=xwruBueC09kfWhXV3fNUnQWYwCWrdrhQoVO3cKfPTO4,6556
23
25
  datamule/document/mappings/onefourtyfour.py,sha256=_-w9h6wGINGH5pQqQvPrd0cgB5QfCtPG5M40ewf_w8Q,2604
24
- datamule/document/mappings/ownership.py,sha256=DcOxZW2cPiVD3ra2ojoZX47CYcR-_86O-6Ts8T4oaF4,10214
25
- datamule/document/mappings/proxy_voting_record.py,sha256=gTjx72HM3ge9dKwOFyADjQjlD55pLdqnGjik6dHehA4,29
26
+ datamule/document/mappings/ownership.py,sha256=piD9vs4WFrB4yvp6c0pT5bibLKXgsM7hpnBUzaY0Xxs,10155
27
+ datamule/document/mappings/proxy_voting_record.py,sha256=tSqLH065EOUq7U80P5GP1JBqipmAiqniPpP3E4adA1A,721
26
28
  datamule/document/mappings/sbs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
29
  datamule/document/mappings/sbsef.py,sha256=Zw58rbYcnitynk1mh9g1jDrCfqmFlY60OEjPM6p9iF0,534
28
30
  datamule/document/mappings/schedule13.py,sha256=lh9sukpEte514Gid77Nz9zh3uBEFZEemrZ2Uau0qsgk,6295
29
31
  datamule/document/mappings/sdr.py,sha256=UekqZId5PFMMWRAJSaPvCpN4c1Hx-SLAQPEN8GW_Gbg,4829
32
+ datamule/document/mappings/submission_metadata.py,sha256=pi1eW-tnoAQ6y3laRI29Op80E9BPqqmcfe45owKYStw,271
30
33
  datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- datamule/document/mappings/thirteenfhr.py,sha256=QD4WztCebi6CwVPsgH9vQti0KpP257mYdFhNBQTbfwg,86
32
- datamule/document/mappings/twentyfivense.py,sha256=1IkOk3qODAXqUqO84ZrD-hSqgOJyhSKcncX9aARzl9Y,880
34
+ datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
35
+ datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
33
36
  datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
34
37
  datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
38
  datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
@@ -38,24 +41,22 @@ datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
41
  datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
39
42
  datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
43
  datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
41
- datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
43
44
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  datamule/sec/submissions/downloader.py,sha256=60wX2Yml1UCuxOtU0xMxqqeyHhrypCmlDQ0jZF-StJo,2665
45
46
  datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
46
- datamule/sec/submissions/monitor.py,sha256=Im2kgnUehhTgyY2Vq3uk07n4Vkj4PjII_SsRDi8ehAE,5384
47
+ datamule/sec/submissions/monitor.py,sha256=s6uknn1dF1EemiI3Hl4nEq3txwK7nYl6wmayuUPYpRs,7844
47
48
  datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
48
- datamule/sec/submissions/textsearch.py,sha256=-a5yIrrxxtaK10IJeywFmXuJmSndYL9VKm4SC4I9JAs,5808
49
+ datamule/sec/submissions/textsearch.py,sha256=zEr3NXdhVFL8eMh2jruVXIt7taUZTMdNy2hOAyRM2pA,5706
49
50
  datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
51
  datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
51
52
  datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
52
- datamule/sec/xbrl/streamcompanyfacts.py,sha256=WyJIwuy5mNMXWpx_IkhFzDMe9MOfQ-vNkWl_JzBzFmc,3323
53
+ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H5N2tbvKUzM,3307
53
54
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
54
55
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
56
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
56
- datamule/seclibrary/downloader.py,sha256=fJztJ_sEfv2oHHbDff07DRlXLmztXnzt3Yvv5YaZgGk,13718
57
+ datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
57
58
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
58
- datamule-1.2.5.dist-info/METADATA,sha256=7sZeCfLgGsMTOqhKrRzmCVrjn2oKnWXHcirDjoXZ-zI,512
59
- datamule-1.2.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
60
- datamule-1.2.5.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
61
- datamule-1.2.5.dist-info/RECORD,,
59
+ datamule-1.2.9.dist-info/METADATA,sha256=5bMwIRcARNqP6S1cdPzoIBuu1miiUJUUdWnTXvwtPNk,490
60
+ datamule-1.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
+ datamule-1.2.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
62
+ datamule-1.2.9.dist-info/RECORD,,
File without changes