datamule 2.0.3__tar.gz → 2.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-2.0.3 → datamule-2.0.5}/PKG-INFO +1 -1
- {datamule-2.0.3 → datamule-2.0.5}/datamule/datamule/datamule_mysql_rds.py +22 -2
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/document.py +2 -68
- {datamule-2.0.3 → datamule-2.0.5}/datamule/submission.py +80 -12
- {datamule-2.0.3 → datamule-2.0.5}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-2.0.3 → datamule-2.0.5}/setup.py +1 -1
- {datamule-2.0.3 → datamule-2.0.5}/datamule/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/config.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/datamule/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/datamule/datamule_lookup.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/datamule/downloader.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/datamule/sec_connector.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/atsn.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/d.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ex102_abs.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/information_table.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/npx.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ownership.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/proxy_voting_record.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/sbs.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/sdr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/submission_metadata.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/ta.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/thirteenfhr.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/twentyfivense.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/processing.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/document/table.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/helper.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/index.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/package_updater.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/portfolio.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/portfolio_compression_utils.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/utils.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/seclibrary/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/seclibrary/bq.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/sheet.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/utils/__init__.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/utils/construct_submissions_data.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule/utils/format_accession.py +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule.egg-info/SOURCES.txt +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule.egg-info/requires.txt +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/datamule.egg-info/top_level.txt +0 -0
- {datamule-2.0.3 → datamule-2.0.5}/setup.cfg +0 -0
@@ -77,8 +77,14 @@ class DatamuleMySQL:
|
|
77
77
|
if value is None:
|
78
78
|
continue
|
79
79
|
|
80
|
+
# *** HIGHLIGHTED CHANGE: Special logic for members in simple_xbrl table ***
|
81
|
+
if table == 'simple_xbrl' and key == 'members':
|
82
|
+
if isinstance(value, list):
|
83
|
+
filters[key] = {"type": "find_in_set", "values": value}
|
84
|
+
else:
|
85
|
+
filters[key] = {"type": "find_in_set", "values": [value]}
|
80
86
|
# Special logic for cik
|
81
|
-
|
87
|
+
elif key == 'cik':
|
82
88
|
if isinstance(value, list):
|
83
89
|
value = [int(val) for val in value]
|
84
90
|
else:
|
@@ -100,6 +106,12 @@ class DatamuleMySQL:
|
|
100
106
|
for key, filter_obj in filters.items():
|
101
107
|
if filter_obj["type"] == "range":
|
102
108
|
query_desc.append(f"{key}={filter_obj['values'][0]} to {filter_obj['values'][1]}")
|
109
|
+
# *** HIGHLIGHTED CHANGE: Display logic for find_in_set type ***
|
110
|
+
elif filter_obj["type"] == "find_in_set":
|
111
|
+
if len(filter_obj["values"]) == 1:
|
112
|
+
query_desc.append(f"{key} contains {filter_obj['values'][0]}")
|
113
|
+
else:
|
114
|
+
query_desc.append(f"{key} contains any of {filter_obj['values']}")
|
103
115
|
elif len(filter_obj["values"]) == 1:
|
104
116
|
query_desc.append(f"{key}={filter_obj['values'][0]}")
|
105
117
|
else:
|
@@ -177,6 +189,7 @@ def query_mysql_rds(table, api_key=None, **kwargs):
|
|
177
189
|
Parameters:
|
178
190
|
- table: Table name (e.g., 'simple_xbrl')
|
179
191
|
- cik: Company CIK number(s), can be int, string, or list
|
192
|
+
- members: For simple_xbrl table, search within comma-separated member strings
|
180
193
|
- Any other filter parameters as keyword arguments
|
181
194
|
- page_size: Number of records per page (max 25000, default 25000)
|
182
195
|
- quiet: Boolean, whether to suppress progress output and summary (default False)
|
@@ -186,6 +199,7 @@ def query_mysql_rds(table, api_key=None, **kwargs):
|
|
186
199
|
- Single value: Exact match
|
187
200
|
- List: OR condition (any of the values)
|
188
201
|
- Tuple: Range condition (between first and second values)
|
202
|
+
- members (simple_xbrl only): Searches within comma-separated strings using FIND_IN_SET
|
189
203
|
|
190
204
|
Returns:
|
191
205
|
- List of dictionaries containing the requested data (ready for pandas DataFrame)
|
@@ -224,8 +238,14 @@ def _query_mysql_rds_single(table, api_key=None, **kwargs):
|
|
224
238
|
if value is None:
|
225
239
|
continue
|
226
240
|
|
241
|
+
# *** HIGHLIGHTED CHANGE: Special logic for members in simple_xbrl table ***
|
242
|
+
if table == 'simple_xbrl' and key == 'members':
|
243
|
+
if isinstance(value, list):
|
244
|
+
filters[key] = {"type": "find_in_set", "values": value}
|
245
|
+
else:
|
246
|
+
filters[key] = {"type": "find_in_set", "values": [value]}
|
227
247
|
# special logic for cik
|
228
|
-
|
248
|
+
elif key == 'cik':
|
229
249
|
if isinstance(value, list):
|
230
250
|
value = [int(val) for val in value]
|
231
251
|
else:
|
@@ -12,8 +12,7 @@ from .processing import process_tabular_data
|
|
12
12
|
from pathlib import Path
|
13
13
|
import webbrowser
|
14
14
|
from secsgml.utils import bytes_to_str
|
15
|
-
|
16
|
-
from company_fundamentals import construct_fundamentals
|
15
|
+
|
17
16
|
|
18
17
|
class Document:
|
19
18
|
def __init__(self, type, content, extension,accession,filing_date,path=None):
|
@@ -35,8 +34,7 @@ class Document:
|
|
35
34
|
self.extension = extension
|
36
35
|
# this will be filled by parsed
|
37
36
|
self.data = None
|
38
|
-
|
39
|
-
self.fundamentals = None
|
37
|
+
|
40
38
|
|
41
39
|
#_load_text_content
|
42
40
|
def _preprocess_txt_content(self):
|
@@ -106,70 +104,6 @@ class Document:
|
|
106
104
|
return bool(re.search(pattern, self.content))
|
107
105
|
return False
|
108
106
|
|
109
|
-
# slated for removal
|
110
|
-
def parse_xbrl(self,type='inline'):
|
111
|
-
if self.xbrl:
|
112
|
-
return
|
113
|
-
if type =='inline':
|
114
|
-
if self.extension not in ['.htm','.html']:
|
115
|
-
return
|
116
|
-
self.xbrl = parse_inline_xbrl(self.content)
|
117
|
-
else:
|
118
|
-
raise ValueError("Only inline has been implemented so far.")
|
119
|
-
|
120
|
-
def parse_fundamentals(self,categories=None):
|
121
|
-
self.parse_xbrl()
|
122
|
-
# Transform XBRL records into the format needed by construct_fundamentals
|
123
|
-
xbrl = []
|
124
|
-
|
125
|
-
for xbrl_record in self.xbrl:
|
126
|
-
try:
|
127
|
-
# Extract basic fields
|
128
|
-
value = xbrl_record.get('_val', None)
|
129
|
-
taxonomy, name = xbrl_record['_attributes']['name'].split(':')
|
130
|
-
|
131
|
-
# Handle scaling if present
|
132
|
-
if xbrl_record.get('_attributes', {}).get('scale') is not None:
|
133
|
-
scale = int(xbrl_record['_attributes']['scale'])
|
134
|
-
try:
|
135
|
-
value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
|
136
|
-
except:
|
137
|
-
pass
|
138
|
-
|
139
|
-
# Extract period dates
|
140
|
-
period_start_date = None
|
141
|
-
period_end_date = None
|
142
|
-
|
143
|
-
if xbrl_record.get('_context'):
|
144
|
-
context = xbrl_record['_context']
|
145
|
-
period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
|
146
|
-
period_end_date = context.get('context_period_enddate')
|
147
|
-
|
148
|
-
# Create record in the format expected by construct_fundamentals
|
149
|
-
record = {
|
150
|
-
'taxonomy': taxonomy,
|
151
|
-
'name': name,
|
152
|
-
'value': value,
|
153
|
-
'period_start_date': period_start_date,
|
154
|
-
'period_end_date': period_end_date
|
155
|
-
}
|
156
|
-
|
157
|
-
xbrl.append(record)
|
158
|
-
|
159
|
-
except Exception as e:
|
160
|
-
# Skip malformed records
|
161
|
-
continue
|
162
|
-
|
163
|
-
# Call construct_fundamentals with the transformed data
|
164
|
-
fundamentals = construct_fundamentals(xbrl,
|
165
|
-
taxonomy_key='taxonomy',
|
166
|
-
concept_key='name',
|
167
|
-
start_date_key='period_start_date',
|
168
|
-
end_date_key='period_end_date',
|
169
|
-
categories=categories)
|
170
|
-
|
171
|
-
self.fundamentals = fundamentals
|
172
|
-
|
173
107
|
# Note: this method will be heavily modified in the future
|
174
108
|
def parse(self):
|
175
109
|
# check if we have already parsed the content
|
@@ -9,6 +9,10 @@ import tarfile
|
|
9
9
|
import zstandard as zstd
|
10
10
|
import gzip
|
11
11
|
import urllib.request
|
12
|
+
from secxbrl import parse_inline_xbrl
|
13
|
+
from company_fundamentals import construct_fundamentals
|
14
|
+
from decimal import Decimal
|
15
|
+
|
12
16
|
|
13
17
|
class Submission:
|
14
18
|
def __init__(self, path=None, sgml_content=None, keep_document_types=None,
|
@@ -17,6 +21,7 @@ class Submission:
|
|
17
21
|
|
18
22
|
# declare vars to be filled later
|
19
23
|
self.xbrl = None
|
24
|
+
self.fundamentals = None
|
20
25
|
|
21
26
|
# Validate parameters
|
22
27
|
param_count = sum(x is not None for x in [path, sgml_content, batch_tar_path,url])
|
@@ -242,18 +247,81 @@ class Submission:
|
|
242
247
|
if doc['type'] in document_types:
|
243
248
|
yield self._load_document_by_index(idx)
|
244
249
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
+
def parse_xbrl(self):
|
251
|
+
if self.xbrl:
|
252
|
+
return
|
253
|
+
|
254
|
+
for idx, doc in enumerate(self.metadata.content['documents']):
|
255
|
+
if doc['type'] in ['EX-100.INS','EX-101.INS']:
|
256
|
+
document = self._load_document_by_index(idx)
|
257
|
+
self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
|
258
|
+
return
|
259
|
+
|
260
|
+
if doc['filename'].endswith('_htm.xml'):
|
261
|
+
document = self._load_document_by_index(idx)
|
262
|
+
self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
|
263
|
+
return
|
264
|
+
|
265
|
+
|
266
|
+
def parse_fundamentals(self,categories=None):
|
267
|
+
self.parse_xbrl()
|
268
|
+
|
269
|
+
# if no xbrl return
|
270
|
+
if not self.xbrl:
|
271
|
+
return
|
272
|
+
# Transform XBRL records into the format needed by construct_fundamentals
|
273
|
+
xbrl = []
|
274
|
+
|
275
|
+
for xbrl_record in self.xbrl:
|
276
|
+
try:
|
277
|
+
# Extract basic fields
|
278
|
+
value = xbrl_record.get('_val', None)
|
279
|
+
|
280
|
+
taxonomy, name = xbrl_record['_attributes']['name'].split(':')
|
281
|
+
|
282
|
+
|
283
|
+
# Handle scaling if present
|
284
|
+
if xbrl_record.get('_attributes', {}).get('scale') is not None:
|
285
|
+
scale = int(xbrl_record['_attributes']['scale'])
|
286
|
+
try:
|
287
|
+
value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
|
288
|
+
except:
|
289
|
+
pass
|
290
|
+
|
250
291
|
|
251
|
-
|
252
|
-
|
253
|
-
|
292
|
+
# Extract period dates
|
293
|
+
period_start_date = None
|
294
|
+
period_end_date = None
|
295
|
+
|
296
|
+
if xbrl_record.get('_context'):
|
297
|
+
context = xbrl_record['_context']
|
298
|
+
period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
|
299
|
+
period_end_date = context.get('context_period_enddate')
|
300
|
+
|
301
|
+
# Create record in the format expected by construct_fundamentals
|
302
|
+
record = {
|
303
|
+
'taxonomy': taxonomy,
|
304
|
+
'name': name,
|
305
|
+
'value': value,
|
306
|
+
'period_start_date': period_start_date,
|
307
|
+
'period_end_date': period_end_date
|
308
|
+
}
|
309
|
+
|
310
|
+
xbrl.append(record)
|
311
|
+
|
312
|
+
except Exception as e:
|
313
|
+
# Skip malformed records
|
314
|
+
continue
|
315
|
+
|
316
|
+
|
317
|
+
# Call construct_fundamentals with the transformed data
|
318
|
+
fundamentals = construct_fundamentals(xbrl,
|
319
|
+
taxonomy_key='taxonomy',
|
320
|
+
concept_key='name',
|
321
|
+
start_date_key='period_start_date',
|
322
|
+
end_date_key='period_end_date',
|
323
|
+
categories=categories)
|
324
|
+
|
325
|
+
self.fundamentals = fundamentals
|
254
326
|
|
255
|
-
# print(doc['type'])
|
256
|
-
# if not document:
|
257
|
-
# return
|
258
327
|
|
259
|
-
# self.xbrl = document.parse_xbrl()
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="2.0.
|
35
|
+
version="2.0.5",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|