datamule 1.1.7__tar.gz → 1.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {datamule-1.1.7 → datamule-1.1.8}/PKG-INFO +1 -1
  2. datamule-1.1.8/datamule/document.py +472 -0
  3. {datamule-1.1.7 → datamule-1.1.8}/datamule/portfolio.py +2 -4
  4. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/monitor.py +5 -1
  5. datamule-1.1.8/datamule/seclibrary/bq.py +191 -0
  6. datamule-1.1.8/datamule/sheet.py +248 -0
  7. {datamule-1.1.7 → datamule-1.1.8}/datamule/submission.py +2 -1
  8. {datamule-1.1.7 → datamule-1.1.8}/datamule.egg-info/PKG-INFO +1 -1
  9. {datamule-1.1.7 → datamule-1.1.8}/datamule.egg-info/SOURCES.txt +1 -0
  10. {datamule-1.1.7 → datamule-1.1.8}/setup.py +1 -1
  11. datamule-1.1.7/datamule/document.py +0 -263
  12. datamule-1.1.7/datamule/sheet.py +0 -41
  13. {datamule-1.1.7 → datamule-1.1.8}/datamule/__init__.py +0 -0
  14. {datamule-1.1.7 → datamule-1.1.8}/datamule/config.py +0 -0
  15. {datamule-1.1.7 → datamule-1.1.8}/datamule/helper.py +0 -0
  16. {datamule-1.1.7 → datamule-1.1.8}/datamule/index.py +0 -0
  17. {datamule-1.1.7 → datamule-1.1.8}/datamule/mapping_dicts/__init__.py +0 -0
  18. {datamule-1.1.7 → datamule-1.1.8}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  19. {datamule-1.1.7 → datamule-1.1.8}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  20. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/__init__.py +0 -0
  21. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/infrastructure/__init__.py +0 -0
  22. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  23. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/rss/__init__.py +0 -0
  24. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/rss/monitor.py +0 -0
  25. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/__init__.py +0 -0
  26. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/downloader.py +0 -0
  27. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/eftsquery.py +0 -0
  28. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/streamer.py +0 -0
  29. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/submissions/textsearch.py +0 -0
  30. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/utils.py +0 -0
  31. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/xbrl/__init__.py +0 -0
  32. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  33. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  34. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  35. {datamule-1.1.7 → datamule-1.1.8}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  36. {datamule-1.1.7 → datamule-1.1.8}/datamule/seclibrary/__init__.py +0 -0
  37. {datamule-1.1.7 → datamule-1.1.8}/datamule/seclibrary/downloader.py +0 -0
  38. {datamule-1.1.7 → datamule-1.1.8}/datamule/seclibrary/query.py +0 -0
  39. {datamule-1.1.7 → datamule-1.1.8}/datamule.egg-info/dependency_links.txt +0 -0
  40. {datamule-1.1.7 → datamule-1.1.8}/datamule.egg-info/requires.txt +0 -0
  41. {datamule-1.1.7 → datamule-1.1.8}/datamule.egg-info/top_level.txt +0 -0
  42. {datamule-1.1.7 → datamule-1.1.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.7
3
+ Version: 1.1.8
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -0,0 +1,472 @@
1
+ import json
2
+ import csv
3
+ import re
4
+ from doc2dict import xml2dict, txt2dict, dict2dict
5
+ from doc2dict.mapping import flatten_hierarchy
6
+ from .mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
7
+ from .mapping_dicts.xml_mapping_dicts import dict_345
8
+ from selectolax.parser import HTMLParser
9
+
10
+ class Document:
11
+ def __init__(self, type, content, extension):
12
+
13
+ self.type = type
14
+ extension = extension.lower()
15
+ self.content = content
16
+ if extension == '.txt':
17
+ self.content = self._preprocess_txt_content()
18
+ elif extension in ['.htm', '.html']:
19
+ self.content = self._preprocess_html_content()
20
+
21
+ self.extension = extension
22
+ # this will be filled by parsed
23
+ self.data = None
24
+
25
+ #_load_text_content
26
+ def _preprocess_txt_content(self):
27
+ return self.content.read().translate(str.maketrans({
28
+ '\xa0': ' ', '\u2003': ' ',
29
+ '\u2018': "'", '\u2019': "'",
30
+ '\u201c': '"', '\u201d': '"'
31
+ }))
32
+
33
+ # will deprecate this when we add html2dict
34
+ def _preprocess_html_content(self):
35
+ parser = HTMLParser(self.content,detect_encoding=True,decode_errors='ignore')
36
+
37
+ # Remove hidden elements first
38
+ hidden_nodes = parser.css('[style*="display: none"], [style*="display:none"], .hidden, .hide, .d-none')
39
+ for node in hidden_nodes:
40
+ node.decompose()
41
+
42
+ blocks = {'p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'article', 'section', 'li', 'td'}
43
+ lines = []
44
+ current_line = []
45
+
46
+ def flush_line():
47
+ if current_line:
48
+ # Don't add spaces between adjacent spans
49
+ lines.append(''.join(current_line))
50
+ current_line.clear()
51
+
52
+ for node in parser.root.traverse(include_text=True):
53
+ if node.tag in ('script', 'style', 'css'):
54
+ continue
55
+
56
+ if node.tag in blocks:
57
+ flush_line()
58
+ lines.append('')
59
+
60
+ if node.text_content:
61
+ text = node.text_content.strip()
62
+ if text:
63
+ if node.tag in blocks:
64
+ flush_line()
65
+ lines.append(text)
66
+ lines.append('')
67
+ else:
68
+ # Only add space if nodes aren't directly adjacent
69
+ if current_line and not current_line[-1].endswith(' '):
70
+ if node.prev and node.prev.text_content:
71
+ if node.parent != node.prev.parent or node.prev.next != node:
72
+ current_line.append(' ')
73
+ current_line.append(text)
74
+
75
+ flush_line()
76
+
77
+ text = '\n'.join(lines)
78
+ while '\n\n\n' in text:
79
+ text = text.replace('\n\n\n', '\n\n')
80
+
81
+ return text.translate(str.maketrans({
82
+ '\xa0': ' ', '\u2003': ' ',
83
+ '\u2018': "'", '\u2019': "'",
84
+ '\u201c': '"', '\u201d': '"'
85
+ }))
86
+
87
+ def contains_string(self, pattern):
88
+ """Works for select files"""
89
+ if self.extension in ['.htm', '.html', '.txt','.xml']:
90
+ return bool(re.search(pattern, self.content))
91
+ return False
92
+
93
+ # Note: this method will be heavily modified in the future
94
+ def parse(self):
95
+ # check if we have already parsed the content
96
+ if self.data:
97
+ return self.data
98
+ mapping_dict = None
99
+
100
+ if self.extension == '.xml':
101
+ if self.type in ['3', '4', '5']:
102
+ mapping_dict = dict_345
103
+
104
+ self.data = xml2dict(content=self.content, mapping_dict=mapping_dict)
105
+
106
+ # will deprecate this when we add html2dict
107
+ elif self.extension in ['.htm', '.html','.txt']:
108
+
109
+ if self.type == '10-K':
110
+ mapping_dict = dict_10k
111
+ elif self.type == '10-Q':
112
+ mapping_dict = dict_10q
113
+ elif self.type == '8-K':
114
+ mapping_dict = dict_8k
115
+ elif self.type == 'SC 13D':
116
+ mapping_dict = dict_13d
117
+ elif self.type == 'SC 13G':
118
+ mapping_dict = dict_13g
119
+
120
+ self.data = {}
121
+ self.data['document'] = dict2dict(txt2dict(content=self.content, mapping_dict=mapping_dict))
122
+ return self.data
123
+
124
+ def write_json(self, output_filename=None):
125
+ if not self.data:
126
+ self.parse()
127
+
128
+ with open(output_filename, 'w',encoding='utf-8') as f:
129
+ json.dump(self.data, f, indent=2)
130
+
131
+ def to_tabular(self, accession_number=None):
132
+ self.parse()
133
+
134
+ if self.type == "INFORMATION TABLE":
135
+ info_table = self.data['informationTable']['infoTable']
136
+ if isinstance(info_table, dict):
137
+ info_table = [info_table]
138
+
139
+ flattened = self._flatten_dict(info_table)
140
+
141
+ # Original field names
142
+ original_columns = [
143
+ "nameOfIssuer", "titleOfClass", "cusip", "value",
144
+ "shrsOrPrnAmt_sshPrnamt", "shrsOrPrnAmt_sshPrnamtType",
145
+ "investmentDiscretion", "votingAuthority_Sole",
146
+ "votingAuthority_Shared", "votingAuthority_None",
147
+ "reportingOwnerCIK", "putCall", "otherManager", 'figi'
148
+ ]
149
+
150
+ # Define mapping from original to camelCase field names
151
+ field_mapping = {
152
+ "shrsOrPrnAmt_sshPrnamt": "sshPrnamt",
153
+ "shrsOrPrnAmt_sshPrnamtType": "sshPrnamtType",
154
+ "votingAuthority_Sole": "votingAuthoritySole",
155
+ "votingAuthority_Shared": "votingAuthorityShared",
156
+ "votingAuthority_None": "votingAuthorityNone"
157
+ }
158
+
159
+ # Create the new expected columns list with mapped field names
160
+ expected_columns = []
161
+ for column in original_columns:
162
+ if column in field_mapping:
163
+ expected_columns.append(field_mapping[column])
164
+ else:
165
+ expected_columns.append(column)
166
+
167
+ # Process each item in the flattened data
168
+ for item in flattened:
169
+ # Remove newlines from items
170
+ for key in item:
171
+ if isinstance(item[key], str):
172
+ item[key] = re.sub(r'\s+', ' ', item[key])
173
+
174
+ new_item = {}
175
+ for key, value in item.items():
176
+ # Apply the mapping if the key is in our mapping dictionary
177
+ if key in field_mapping:
178
+ new_item[field_mapping[key]] = value
179
+ else:
180
+ new_item[key] = value
181
+
182
+ # Update the original item with the new keys
183
+ item.clear()
184
+ item.update(new_item)
185
+
186
+ # Ensure all expected columns exist
187
+ for column in expected_columns:
188
+ if column not in item:
189
+ item[column] = None
190
+
191
+ item['accession'] = accession_number
192
+
193
+ # Add this block to reorder the items to match the expected order
194
+ ordered_columns = ["nameOfIssuer", "titleOfClass", "cusip", "value", "sshPrnamt", "sshPrnamtType",
195
+ "investmentDiscretion", "votingAuthoritySole", "votingAuthorityShared", "votingAuthorityNone",
196
+ "reportingOwnerCIK", "putCall", "otherManager", "figi"]
197
+ if accession_number is not None:
198
+ ordered_columns.append("accession")
199
+
200
+ ordered_data = []
201
+ for item in flattened:
202
+ ordered_item = {column: item.get(column, None) for column in ordered_columns}
203
+ ordered_data.append(ordered_item)
204
+
205
+ return ordered_data
206
+
207
+ elif self.type in ["3", "4", "5"]:
208
+ # Master mapping dictionary - includes all possible fields
209
+ # The order of this dictionary will determine the output column order
210
+ master_mapping_dict = {
211
+ # Flag fields (will be set programmatically)
212
+ "isDerivative": "isDerivative",
213
+ "isNonDerivative": "isNonDerivative",
214
+
215
+ # Common fields across all types
216
+ "securityTitle_value": "securityTitle",
217
+ "transactionDate_value": "transactionDate",
218
+ "documentType": "documentType",
219
+ "transactionCoding_transactionFormType": "documentType",
220
+ "transactionCoding_transactionCode": "transactionCode",
221
+ "transactionAmounts_transactionAcquiredDisposedCode_value": "transactionCode",
222
+ "transactionCoding_equitySwapInvolved": "equitySwapInvolved",
223
+ "transactionTimeliness_value": "transactionTimeliness",
224
+ "transactionAmounts_transactionShares_value": "transactionShares",
225
+ "transactionAmounts_transactionPricePerShare_value": "transactionPricePerShare",
226
+ "postTransactionAmounts_sharesOwnedFollowingTransaction_value": "sharesOwnedFollowingTransaction",
227
+ "heldFollowingReport": "sharesOwnedFollowingTransaction", # Form 3
228
+ "ownershipNature_directOrIndirectOwnership_value": "ownershipType",
229
+ "ownershipNature_natureOfOwnership_value": "ownershipType",
230
+ "deemedExecutionDate": "deemedExecutionDate",
231
+ "deemedExecutionDate_value": "deemedExecutionDate",
232
+
233
+ # Derivative-specific fields
234
+ "conversionOrExercisePrice_value": "conversionOrExercisePrice",
235
+ "exerciseDate_value": "exerciseDate",
236
+ "expirationDate_value": "expirationDate",
237
+ "underlyingSecurity_underlyingSecurityTitle_value": "underlyingSecurityTitle",
238
+ "underlyingSecurity_underlyingSecurityShares_value": "underlyingSecurityShares",
239
+ "underlyingSecurity_underlyingSecurityValue_value": "underlyingSecurityValue",
240
+
241
+ # Footnote fields
242
+ "transactionPricePerShareFootnote": "transactionPricePerShareFootnote",
243
+ "transactionAmounts_transactionPricePerShare_footnote": "transactionPricePerShareFootnote",
244
+ "transactionCodeFootnote": "transactionCodeFootnote",
245
+ "transactionAmounts_transactionAcquiredDisposedCode_footnote": "transactionCodeFootnote",
246
+ "transactionCoding_footnote": "transactionCodeFootnote",
247
+ "natureOfOwnershipFootnote": "natureOfOwnershipFootnote",
248
+ "ownershipNature_natureOfOwnership_footnote": "natureOfOwnershipFootnote",
249
+ "sharesOwnedFollowingTransactionFootnote": "sharesOwnedFollowingTransactionFootnote",
250
+ "postTransactionAmounts_sharesOwnedFollowingTransaction_footnote": "sharesOwnedFollowingTransactionFootnote",
251
+ "ownershipTypeFootnote": "ownershipTypeFootnote",
252
+ "ownershipNature_directOrIndirectOwnership_footnote": "ownershipTypeFootnote",
253
+ "securityTitleFootnote": "securityTitleFootnote",
254
+ "securityTitle_footnote": "securityTitleFootnote",
255
+ "transactionSharesFootnote": "transactionSharesFootnote",
256
+ "transactionAmounts_transactionShares_footnote": "transactionSharesFootnote",
257
+ "transactionDateFootnote": "transactionDateFootnote",
258
+ "transactionDate_footnote": "transactionDateFootnote",
259
+ "conversionOrExercisePriceFootnote": "conversionOrExercisePriceFootnote",
260
+ "conversionOrExercisePrice_footnote": "conversionOrExercisePriceFootnote",
261
+ "exerciseDateFootnote": "exerciseDateFootnote",
262
+ "exerciseDate_footnote": "exerciseDateFootnote",
263
+ "expirationDateFootnote": "expirationDateFootnote",
264
+ "expirationDate_footnote": "expirationDateFootnote",
265
+ "underlyingSecurityTitleFootnote": "underlyingSecurityTitleFootnote",
266
+ "underlyingSecurity_underlyingSecurityTitle_footnote": "underlyingSecurityTitleFootnote",
267
+ "underlyingSecuritySharesFootnote": "underlyingSecuritySharesFootnote",
268
+ "underlyingSecurity_underlyingSecurityShares_footnote": "underlyingSecuritySharesFootnote",
269
+ "underlyingSecurityValueFootnote": "underlyingSecurityValueFootnote",
270
+ "underlyingSecurity_underlyingSecurityValue_footnote": "underlyingSecurityValueFootnote"
271
+ }
272
+
273
+ # Get the unique target column names in order from the mapping dictionary
274
+ output_columns = []
275
+ for _, target_key in master_mapping_dict.items():
276
+ if target_key not in output_columns:
277
+ output_columns.append(target_key)
278
+
279
+ # Process function that handles any table type
280
+ def process_table(table_data, is_derivative):
281
+ if isinstance(table_data, dict):
282
+ table_data = [table_data]
283
+
284
+ flattened = self._flatten_dict(table_data)
285
+
286
+ # Apply mapping to the flattened data and ensure all expected columns are present
287
+ mapped_data = []
288
+ for item in flattened:
289
+ mapped_item = {}
290
+ # First, apply the mapping
291
+ for old_key, value in item.items():
292
+ target_key = master_mapping_dict.get(old_key, old_key)
293
+ mapped_item[target_key] = value
294
+
295
+ # Set the derivative/non-derivative flags
296
+ mapped_item["isDerivative"] = 1 if is_derivative else 0
297
+ mapped_item["isNonDerivative"] = 0 if is_derivative else 1
298
+
299
+ # Create a new ordered dictionary with all columns
300
+ ordered_item = {}
301
+ for column in output_columns:
302
+ ordered_item[column] = mapped_item.get(column, None)
303
+
304
+ # Add accession_number if available
305
+ if accession_number is not None:
306
+ ordered_item['accession_number'] = accession_number
307
+
308
+ mapped_data.append(ordered_item)
309
+
310
+ return mapped_data
311
+
312
+ # Results container
313
+ all_results = []
314
+
315
+ # Process non-derivative transactions if they exist
316
+ if 'nonDerivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['nonDerivativeTable'] is not None:
317
+ if 'nonDerivativeTransaction' in self.data['ownershipDocument']['nonDerivativeTable']:
318
+ non_deriv_trans = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeTransaction']
319
+ non_deriv_results = process_table(non_deriv_trans, is_derivative=False)
320
+ all_results.extend(non_deriv_results)
321
+
322
+ # Process non-derivative holdings (for Form 3)
323
+ if 'nonDerivativeHolding' in self.data['ownershipDocument']['nonDerivativeTable']:
324
+ non_deriv_hold = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeHolding']
325
+ non_deriv_hold_results = process_table(non_deriv_hold, is_derivative=False)
326
+ all_results.extend(non_deriv_hold_results)
327
+
328
+ # Process derivative transactions if they exist
329
+ if 'derivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['derivativeTable'] is not None:
330
+ if 'derivativeTransaction' in self.data['ownershipDocument']['derivativeTable']:
331
+ deriv_trans = self.data['ownershipDocument']['derivativeTable']['derivativeTransaction']
332
+ deriv_results = process_table(deriv_trans, is_derivative=True)
333
+ all_results.extend(deriv_results)
334
+
335
+ # Process derivative holdings (for Form 3)
336
+ if 'derivativeHolding' in self.data['ownershipDocument']['derivativeTable']:
337
+ deriv_hold = self.data['ownershipDocument']['derivativeTable']['derivativeHolding']
338
+ deriv_hold_results = process_table(deriv_hold, is_derivative=True)
339
+ all_results.extend(deriv_hold_results)
340
+
341
+ # check if any rows not in the mapping dict, raise error if so
342
+ for item in all_results:
343
+ for key in item.keys():
344
+ if key not in master_mapping_dict.values() and key != 'accession_number':
345
+ raise ValueError(f"Key '{key}' not found in mapping dictionary")
346
+
347
+
348
+ return all_results
349
+ else:
350
+ raise ValueError("sorry, rejigging conversion to tabular format")
351
+
352
+ def write_csv(self, output_filename, accession_number=None):
353
+
354
+ data = self.to_tabular(accession_number)
355
+
356
+ if not data:
357
+
358
+ return
359
+
360
+ fieldnames = data[0].keys()
361
+
362
+ with open(output_filename, 'w', newline='') as csvfile:
363
+ writer = csv.DictWriter(csvfile,fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
364
+ writer.writeheader()
365
+ writer.writerows(data)
366
+
367
+
368
+ def _document_to_section_text(self, document_data, parent_key=''):
369
+ items = []
370
+
371
+ if isinstance(document_data, dict):
372
+ for key, value in document_data.items():
373
+ # Build the section name
374
+ section = f"{parent_key}_{key}" if parent_key else key
375
+
376
+ # If the value is a dict, recurse
377
+ if isinstance(value, dict):
378
+ items.extend(self._document_to_section_text(value, section))
379
+ # If it's a list, handle each item
380
+ elif isinstance(value, list):
381
+ for i, item in enumerate(value):
382
+ if isinstance(item, dict):
383
+ items.extend(self._document_to_section_text(item, f"{section}_{i+1}"))
384
+ else:
385
+ items.append({
386
+ 'section': f"{section}_{i+1}",
387
+ 'text': str(item)
388
+ })
389
+ # Base case - add the item
390
+ else:
391
+ items.append({
392
+ 'section': section,
393
+ 'text': str(value)
394
+ })
395
+
396
+ return items
397
+
398
+ # we'll modify this for every dict
399
+ def _flatten_dict(self, d, parent_key=''):
400
+ items = {}
401
+
402
+ if isinstance(d, list):
403
+ return [self._flatten_dict(item) for item in d]
404
+
405
+ for k, v in d.items():
406
+ new_key = f"{parent_key}_{k}" if parent_key else k
407
+
408
+ if isinstance(v, dict):
409
+ items.update(self._flatten_dict(v, new_key))
410
+ else:
411
+ items[new_key] = str(v)
412
+
413
+ return items
414
+
415
+ # this will all have to be changed. default will be to flatten everything
416
+ def __iter__(self):
417
+ self.parse()
418
+
419
+ # Let's remove XML iterable for now
420
+
421
+ # Handle text-based documents
422
+ if self.extension in ['.txt', '.htm', '.html']:
423
+ document_data = self.data
424
+ if not document_data:
425
+ return iter([])
426
+
427
+ # Find highest hierarchy level from mapping dict
428
+ highest_hierarchy = float('inf')
429
+ section_type = None
430
+
431
+ if self.type in ['10-K', '10-Q']:
432
+ mapping_dict = dict_10k if self.type == '10-K' else dict_10q
433
+ elif self.type == '8-K':
434
+ mapping_dict = dict_8k
435
+ elif self.type == 'SC 13D':
436
+ mapping_dict = dict_13d
437
+ elif self.type == 'SC 13G':
438
+ mapping_dict = dict_13g
439
+ else:
440
+ return iter([])
441
+
442
+ # Find section type with highest hierarchy number
443
+ highest_hierarchy = -1 # Start at -1 to find highest
444
+ for mapping in mapping_dict['rules']['mappings']:
445
+ if mapping.get('hierarchy') is not None:
446
+ if mapping['hierarchy'] > highest_hierarchy:
447
+ highest_hierarchy = mapping['hierarchy']
448
+ section_type = mapping['name']
449
+
450
+ if not section_type:
451
+ return iter([])
452
+
453
+ # Extract sections of the identified type
454
+ def find_sections(data, target_type):
455
+ sections = []
456
+ if isinstance(data, dict):
457
+ if data.get('type') == target_type:
458
+ sections.append({
459
+ 'item': data.get('text', ''),
460
+ 'text': flatten_hierarchy(data.get('content', []))
461
+ })
462
+ for value in data.values():
463
+ if isinstance(value, (dict, list)):
464
+ sections.extend(find_sections(value, target_type))
465
+ elif isinstance(data, list):
466
+ for item in data:
467
+ sections.extend(find_sections(item, target_type))
468
+ return sections
469
+
470
+ return iter(find_sections(document_data, section_type))
471
+
472
+ return iter([])
@@ -119,7 +119,7 @@ class Portfolio:
119
119
  # First query, just set the accession numbers
120
120
  self.accession_numbers = new_accession_numbers
121
121
 
122
- def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None, **kwargs):
122
+ def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,requests_per_second=5, **kwargs):
123
123
  if provider is None:
124
124
  config = Config()
125
125
  provider = config.get_default_source()
@@ -142,7 +142,7 @@ class Portfolio:
142
142
  cik=cik,
143
143
  submission_type=submission_type,
144
144
  filing_date=filing_date,
145
- requests_per_second=5,
145
+ requests_per_second=requests_per_second,
146
146
  accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None
147
147
  )
148
148
 
@@ -164,8 +164,6 @@ class Portfolio:
164
164
  )
165
165
 
166
166
 
167
-
168
-
169
167
  def __iter__(self):
170
168
  if not self.submissions_loaded:
171
169
  self._load_submissions()
@@ -20,12 +20,16 @@ async def _process_efts_hits(hits, collected_accession_numbers, data_callback=No
20
20
  submission_type = source.get('form')
21
21
  ciks = source.get('ciks', [])
22
22
  ciks = [str(int(cik)) for cik in ciks]
23
+
24
+ filing_date = source.get('file_date')
23
25
 
24
26
  # Create standardized filing record
25
27
  filing = {
26
28
  'accession_number': accession_number,
27
29
  'submission_type': submission_type,
28
- 'ciks': ciks
30
+ 'ciks': ciks,
31
+ 'filing_date': filing_date,
32
+
29
33
  }
30
34
 
31
35
  processed_hits.append(filing)