datamule 2.2.8__tar.gz → 2.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-2.2.8 → datamule-2.2.9}/PKG-INFO +1 -1
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/document.py +12 -78
- {datamule-2.2.8 → datamule-2.2.9}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-2.2.8 → datamule-2.2.9}/setup.py +1 -1
- {datamule-2.2.8 → datamule-2.2.9}/datamule/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/config.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datamule/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datamule/datamule_lookup.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datamule/datamule_mysql_rds.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datamule/downloader.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datamule/sec_connector.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/datasets.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_13fhr.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_25nse.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_informationtable.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_npx.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_ownership.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_proxyvotingrecord.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_sbsef.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/tables_sdr.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/document/tables/utils.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/helper.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/index.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/package_updater.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/portfolio.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/portfolio_compression_utils.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/utils.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/seclibrary/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/seclibrary/bq.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sentiment/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/sheet.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/submission.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/tags/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/tags/config.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/tags/dictionaries.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/tags/regex.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/tags/utils.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/utils/__init__.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/utils/construct_submissions_data.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/utils/format_accession.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule/utils/pdf.py +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule.egg-info/SOURCES.txt +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule.egg-info/requires.txt +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/datamule.egg-info/top_level.txt +0 -0
- {datamule-2.2.8 → datamule-2.2.9}/setup.cfg +0 -0
@@ -301,23 +301,7 @@ class Document:
|
|
301
301
|
return
|
302
302
|
|
303
303
|
mapping_dict = None
|
304
|
-
|
305
|
-
if self.extension == '.txt':
|
306
|
-
content = self.text
|
307
|
-
if self.type in ['10-Q', '10-Q/A']:
|
308
|
-
mapping_dict = dict_10q
|
309
|
-
elif self.type in ['10-K','10-K/A']:
|
310
|
-
mapping_dict = dict_10k
|
311
|
-
elif self.type in ['8-K', '8-K/A']:
|
312
|
-
mapping_dict = dict_8k
|
313
|
-
elif self.type in ['SC 13D', 'SC 13D/A']:
|
314
|
-
mapping_dict = dict_13d
|
315
|
-
elif self.type in ['SC 13G', 'SC 13G/A']:
|
316
|
-
mapping_dict = dict_13g
|
317
|
-
|
318
|
-
self._data = {}
|
319
|
-
self._data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
|
320
|
-
elif self.extension in ['.htm', '.html']:
|
304
|
+
if self._data_bool:
|
321
305
|
|
322
306
|
if self.type in ['1-K', '1-K/A']:
|
323
307
|
mapping_dict = dict_1kpartii_html
|
@@ -391,8 +375,18 @@ class Document:
|
|
391
375
|
mapping_dict = dict_t3_html
|
392
376
|
elif self.type in ['NT 10-K', 'NT 10-K/A', 'NT 10-Q', 'NT 10-Q/A', 'NT 20-F', 'NT 20-F/A']:
|
393
377
|
mapping_dict = dict_nt10k_html
|
378
|
+
elif self.type in ['SC 13G', 'SC 13G/A']:
|
379
|
+
mapping_dict = dict_13g
|
380
|
+
elif self.type in ['SC 13D', 'SC 13D/A']:
|
381
|
+
mapping_dict = dict_13d
|
382
|
+
|
383
|
+
if self.extension in ['.htm','.html']:
|
384
|
+
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
385
|
+
elif self.extension in ['.txt']:
|
386
|
+
dct = txt2dict(content=self.content, mapping_dict=mapping_dict)
|
387
|
+
else:
|
388
|
+
dct = {}
|
394
389
|
|
395
|
-
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
396
390
|
self._data = dct
|
397
391
|
elif self.extension == '.xml':
|
398
392
|
if self.type in ['3', '4', '5', '3/A', '4/A', '5/A']:
|
@@ -563,63 +557,3 @@ class Document:
|
|
563
557
|
return [item[1] for item in result]
|
564
558
|
else:
|
565
559
|
return [flatten_dict(item[1],format) for item in result]
|
566
|
-
|
567
|
-
|
568
|
-
# TODO CHANGE THIS
|
569
|
-
def __iter__(self):
|
570
|
-
# Use the property to trigger parsing if needed
|
571
|
-
document_data = self.data
|
572
|
-
|
573
|
-
# Let's remove XML iterable for now
|
574
|
-
|
575
|
-
# Handle text-based documents
|
576
|
-
if self.extension in ['.txt', '.htm', '.html']:
|
577
|
-
if not document_data:
|
578
|
-
return iter([])
|
579
|
-
|
580
|
-
# Find highest hierarchy level from mapping dict
|
581
|
-
highest_hierarchy = float('inf')
|
582
|
-
section_type = None
|
583
|
-
|
584
|
-
if self.type in ['10-K', '10-Q']:
|
585
|
-
mapping_dict = dict_10k if self.type == '10-K' else dict_10q
|
586
|
-
elif self.type == '8-K':
|
587
|
-
mapping_dict = dict_8k
|
588
|
-
elif self.type == 'SC 13D':
|
589
|
-
mapping_dict = dict_13d
|
590
|
-
elif self.type == 'SC 13G':
|
591
|
-
mapping_dict = dict_13g
|
592
|
-
else:
|
593
|
-
return iter([])
|
594
|
-
|
595
|
-
# Find section type with highest hierarchy number
|
596
|
-
highest_hierarchy = -1 # Start at -1 to find highest
|
597
|
-
for mapping in mapping_dict['rules']['mappings']:
|
598
|
-
if mapping.get('hierarchy') is not None:
|
599
|
-
if mapping['hierarchy'] > highest_hierarchy:
|
600
|
-
highest_hierarchy = mapping['hierarchy']
|
601
|
-
section_type = mapping['name']
|
602
|
-
|
603
|
-
if not section_type:
|
604
|
-
return iter([])
|
605
|
-
|
606
|
-
# Extract sections of the identified type
|
607
|
-
def find_sections(data, target_type):
|
608
|
-
sections = []
|
609
|
-
if isinstance(data, dict):
|
610
|
-
if data.get('type') == target_type:
|
611
|
-
sections.append({
|
612
|
-
'item': data.get('text', ''),
|
613
|
-
'text': flatten_hierarchy(data.get('content', []))
|
614
|
-
})
|
615
|
-
for value in data.values():
|
616
|
-
if isinstance(value, (dict, list)):
|
617
|
-
sections.extend(find_sections(value, target_type))
|
618
|
-
elif isinstance(data, list):
|
619
|
-
for item in data:
|
620
|
-
sections.extend(find_sections(item, target_type))
|
621
|
-
return sections
|
622
|
-
|
623
|
-
return iter(find_sections(document_data, section_type))
|
624
|
-
|
625
|
-
return iter([])
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="2.2.
|
35
|
+
version="2.2.9",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|