datamule 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,8 +77,14 @@ class DatamuleMySQL:
77
77
  if value is None:
78
78
  continue
79
79
 
80
+ # *** HIGHLIGHTED CHANGE: Special logic for members in simple_xbrl table ***
81
+ if table == 'simple_xbrl' and key == 'members':
82
+ if isinstance(value, list):
83
+ filters[key] = {"type": "find_in_set", "values": value}
84
+ else:
85
+ filters[key] = {"type": "find_in_set", "values": [value]}
80
86
  # Special logic for cik
81
- if key == 'cik':
87
+ elif key == 'cik':
82
88
  if isinstance(value, list):
83
89
  value = [int(val) for val in value]
84
90
  else:
@@ -100,6 +106,12 @@ class DatamuleMySQL:
100
106
  for key, filter_obj in filters.items():
101
107
  if filter_obj["type"] == "range":
102
108
  query_desc.append(f"{key}={filter_obj['values'][0]} to {filter_obj['values'][1]}")
109
+ # *** HIGHLIGHTED CHANGE: Display logic for find_in_set type ***
110
+ elif filter_obj["type"] == "find_in_set":
111
+ if len(filter_obj["values"]) == 1:
112
+ query_desc.append(f"{key} contains {filter_obj['values'][0]}")
113
+ else:
114
+ query_desc.append(f"{key} contains any of {filter_obj['values']}")
103
115
  elif len(filter_obj["values"]) == 1:
104
116
  query_desc.append(f"{key}={filter_obj['values'][0]}")
105
117
  else:
@@ -177,6 +189,7 @@ def query_mysql_rds(table, api_key=None, **kwargs):
177
189
  Parameters:
178
190
  - table: Table name (e.g., 'simple_xbrl')
179
191
  - cik: Company CIK number(s), can be int, string, or list
192
+ - members: For simple_xbrl table, search within comma-separated member strings
180
193
  - Any other filter parameters as keyword arguments
181
194
  - page_size: Number of records per page (max 25000, default 25000)
182
195
  - quiet: Boolean, whether to suppress progress output and summary (default False)
@@ -186,6 +199,7 @@ def query_mysql_rds(table, api_key=None, **kwargs):
186
199
  - Single value: Exact match
187
200
  - List: OR condition (any of the values)
188
201
  - Tuple: Range condition (between first and second values)
202
+ - members (simple_xbrl only): Searches within comma-separated strings using FIND_IN_SET
189
203
 
190
204
  Returns:
191
205
  - List of dictionaries containing the requested data (ready for pandas DataFrame)
@@ -224,8 +238,14 @@ def _query_mysql_rds_single(table, api_key=None, **kwargs):
224
238
  if value is None:
225
239
  continue
226
240
 
241
+ # *** HIGHLIGHTED CHANGE: Special logic for members in simple_xbrl table ***
242
+ if table == 'simple_xbrl' and key == 'members':
243
+ if isinstance(value, list):
244
+ filters[key] = {"type": "find_in_set", "values": value}
245
+ else:
246
+ filters[key] = {"type": "find_in_set", "values": [value]}
227
247
  # special logic for cik
228
- if key == 'cik':
248
+ elif key == 'cik':
229
249
  if isinstance(value, list):
230
250
  value = [int(val) for val in value]
231
251
  else:
@@ -12,8 +12,7 @@ from .processing import process_tabular_data
12
12
  from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
- from secxbrl import parse_inline_xbrl
16
- from company_fundamentals import construct_fundamentals
15
+
17
16
 
18
17
  class Document:
19
18
  def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -35,8 +34,7 @@ class Document:
35
34
  self.extension = extension
36
35
  # this will be filled by parsed
37
36
  self.data = None
38
- self.xbrl = None
39
- self.fundamentals = None
37
+
40
38
 
41
39
  #_load_text_content
42
40
  def _preprocess_txt_content(self):
@@ -106,70 +104,6 @@ class Document:
106
104
  return bool(re.search(pattern, self.content))
107
105
  return False
108
106
 
109
- # slated for removal
110
- def parse_xbrl(self,type='inline'):
111
- if self.xbrl:
112
- return
113
- if type =='inline':
114
- if self.extension not in ['.htm','.html']:
115
- return
116
- self.xbrl = parse_inline_xbrl(self.content)
117
- else:
118
- raise ValueError("Only inline has been implemented so far.")
119
-
120
- def parse_fundamentals(self,categories=None):
121
- self.parse_xbrl()
122
- # Transform XBRL records into the format needed by construct_fundamentals
123
- xbrl = []
124
-
125
- for xbrl_record in self.xbrl:
126
- try:
127
- # Extract basic fields
128
- value = xbrl_record.get('_val', None)
129
- taxonomy, name = xbrl_record['_attributes']['name'].split(':')
130
-
131
- # Handle scaling if present
132
- if xbrl_record.get('_attributes', {}).get('scale') is not None:
133
- scale = int(xbrl_record['_attributes']['scale'])
134
- try:
135
- value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
136
- except:
137
- pass
138
-
139
- # Extract period dates
140
- period_start_date = None
141
- period_end_date = None
142
-
143
- if xbrl_record.get('_context'):
144
- context = xbrl_record['_context']
145
- period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
146
- period_end_date = context.get('context_period_enddate')
147
-
148
- # Create record in the format expected by construct_fundamentals
149
- record = {
150
- 'taxonomy': taxonomy,
151
- 'name': name,
152
- 'value': value,
153
- 'period_start_date': period_start_date,
154
- 'period_end_date': period_end_date
155
- }
156
-
157
- xbrl.append(record)
158
-
159
- except Exception as e:
160
- # Skip malformed records
161
- continue
162
-
163
- # Call construct_fundamentals with the transformed data
164
- fundamentals = construct_fundamentals(xbrl,
165
- taxonomy_key='taxonomy',
166
- concept_key='name',
167
- start_date_key='period_start_date',
168
- end_date_key='period_end_date',
169
- categories=categories)
170
-
171
- self.fundamentals = fundamentals
172
-
173
107
  # Note: this method will be heavily modified in the future
174
108
  def parse(self):
175
109
  # check if we have already parsed the content
datamule/submission.py CHANGED
@@ -9,6 +9,10 @@ import tarfile
9
9
  import zstandard as zstd
10
10
  import gzip
11
11
  import urllib.request
12
+ from secxbrl import parse_inline_xbrl
13
+ from company_fundamentals import construct_fundamentals
14
+ from decimal import Decimal
15
+
12
16
 
13
17
  class Submission:
14
18
  def __init__(self, path=None, sgml_content=None, keep_document_types=None,
@@ -17,6 +21,7 @@ class Submission:
17
21
 
18
22
  # declare vars to be filled later
19
23
  self.xbrl = None
24
+ self.fundamentals = None
20
25
 
21
26
  # Validate parameters
22
27
  param_count = sum(x is not None for x in [path, sgml_content, batch_tar_path,url])
@@ -242,18 +247,81 @@ class Submission:
242
247
  if doc['type'] in document_types:
243
248
  yield self._load_document_by_index(idx)
244
249
 
245
- # def parse_xbrl(self):
246
- # for idx, doc in enumerate(self.metadata.content['documents']):
247
- # if doc['type'] in ['EX-100.INS','EX-101.INS']:
248
- # document = self._load_document_by_index(idx)
249
- # break
250
+ def parse_xbrl(self):
251
+ if self.xbrl:
252
+ return
253
+
254
+ for idx, doc in enumerate(self.metadata.content['documents']):
255
+ if doc['type'] in ['EX-100.INS','EX-101.INS']:
256
+ document = self._load_document_by_index(idx)
257
+ self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
258
+ return
259
+
260
+ if doc['filename'].endswith('_htm.xml'):
261
+ document = self._load_document_by_index(idx)
262
+ self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
263
+ return
264
+
265
+
266
+ def parse_fundamentals(self,categories=None):
267
+ self.parse_xbrl()
268
+
269
+ # if no xbrl return
270
+ if not self.xbrl:
271
+ return
272
+ # Transform XBRL records into the format needed by construct_fundamentals
273
+ xbrl = []
274
+
275
+ for xbrl_record in self.xbrl:
276
+ try:
277
+ # Extract basic fields
278
+ value = xbrl_record.get('_val', None)
279
+
280
+ taxonomy, name = xbrl_record['_attributes']['name'].split(':')
281
+
282
+
283
+ # Handle scaling if present
284
+ if xbrl_record.get('_attributes', {}).get('scale') is not None:
285
+ scale = int(xbrl_record['_attributes']['scale'])
286
+ try:
287
+ value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
288
+ except:
289
+ pass
290
+
250
291
 
251
- # if doc['filename'].endswith('_htm.xml'):
252
- # document = self._load_document_by_index(idx)
253
- # break
292
+ # Extract period dates
293
+ period_start_date = None
294
+ period_end_date = None
295
+
296
+ if xbrl_record.get('_context'):
297
+ context = xbrl_record['_context']
298
+ period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
299
+ period_end_date = context.get('context_period_enddate')
300
+
301
+ # Create record in the format expected by construct_fundamentals
302
+ record = {
303
+ 'taxonomy': taxonomy,
304
+ 'name': name,
305
+ 'value': value,
306
+ 'period_start_date': period_start_date,
307
+ 'period_end_date': period_end_date
308
+ }
309
+
310
+ xbrl.append(record)
311
+
312
+ except Exception as e:
313
+ # Skip malformed records
314
+ continue
315
+
316
+
317
+ # Call construct_fundamentals with the transformed data
318
+ fundamentals = construct_fundamentals(xbrl,
319
+ taxonomy_key='taxonomy',
320
+ concept_key='name',
321
+ start_date_key='period_start_date',
322
+ end_date_key='period_end_date',
323
+ categories=categories)
324
+
325
+ self.fundamentals = fundamentals
254
326
 
255
- # print(doc['type'])
256
- # if not document:
257
- # return
258
327
 
259
- # self.xbrl = document.parse_xbrl()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -6,15 +6,15 @@ datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,9
6
6
  datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
7
7
  datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
8
8
  datamule/sheet.py,sha256=Ws_YRtpvewLVioarngVMe8cgG_sp11MP9_goGbRaiWE,23952
9
- datamule/submission.py,sha256=DtLoiwRE7JJW2R0NvJNyQfwstWIlU2N9Z6yOgpnH1LU,11812
9
+ datamule/submission.py,sha256=qcb5TogrB2q6x4zcGPKFf4dkrAy0bAPzY71Ops_xW44,14437
10
10
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
11
11
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
13
- datamule/datamule/datamule_mysql_rds.py,sha256=Oj_xPTBKkzWsuRlb_tphjJrBW1eua1cOuxjGwJx581k,10591
13
+ datamule/datamule/datamule_mysql_rds.py,sha256=P5vL3RJnOwLz25hPKuoYmxSX7XeDe83YETnwT6miRMo,11858
14
14
  datamule/datamule/downloader.py,sha256=aTyVUuIwynPtHB0Z9BvCasy9Ao5wfHptNAsjN-7yDTk,18525
15
15
  datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
16
16
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- datamule/document/document.py,sha256=8UC5QfiMDufkA7v3o76mlfftqsUjNUFWKB3j894tsKw,16795
17
+ datamule/document/document.py,sha256=U9hSXT2Y06prM6sPcUU6uziV1f4_BhaaGz3QXE5zveg,14034
18
18
  datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
19
19
  datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
20
20
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -65,7 +65,7 @@ datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,180
65
65
  datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
67
67
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
68
- datamule-2.0.3.dist-info/METADATA,sha256=xILAQeqGiaZbF19rqWWRoFqBLv1to0a3RdRFKdhlu0Q,560
69
- datamule-2.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
70
- datamule-2.0.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
71
- datamule-2.0.3.dist-info/RECORD,,
68
+ datamule-2.0.5.dist-info/METADATA,sha256=UzOW91CX56mST6_QMXKrYeewjnylQc8_B0gyK7fcpc8,560
69
+ datamule-2.0.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
70
+ datamule-2.0.5.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
71
+ datamule-2.0.5.dist-info/RECORD,,