datamule 1.4.2__tar.gz → 1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.4.2 → datamule-1.4.4}/PKG-INFO +1 -1
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/document.py +37 -2
- {datamule-1.4.2 → datamule-1.4.4}/datamule/helper.py +3 -0
- datamule-1.4.4/datamule/mapping_dicts/html_mapping_dicts.py +75 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-1.4.2 → datamule-1.4.4}/setup.py +1 -1
- datamule-1.4.2/datamule/mapping_dicts/html_mapping_dicts.py +0 -11
- {datamule-1.4.2 → datamule-1.4.4}/datamule/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/config.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/atsn.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/d.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex102_abs.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/information_table.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/npx.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ownership.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/proxy_voting_record.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sbs.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/submission_metadata.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ta.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/thirteenfhr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/twentyfivense.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/processing.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/document/table.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/index.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/package_updater.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/portfolio.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/utils.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/downloader.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/query.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/sheet.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule/submission.py +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/SOURCES.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/requires.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.4}/setup.cfg +0 -0
@@ -6,7 +6,7 @@ from doc2dict.mapping import flatten_hierarchy
|
|
6
6
|
from doc2dict import html2dict, visualize_dict, get_title, unnest_dict, pdf2dict
|
7
7
|
from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
|
8
8
|
from ..mapping_dicts.xml_mapping_dicts import dict_345
|
9
|
-
from ..mapping_dicts.html_mapping_dicts import
|
9
|
+
from ..mapping_dicts.html_mapping_dicts import *
|
10
10
|
from selectolax.parser import HTMLParser
|
11
11
|
from .processing import process_tabular_data
|
12
12
|
from pathlib import Path
|
@@ -120,12 +120,47 @@ class Document:
|
|
120
120
|
self.data = {}
|
121
121
|
self.data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
|
122
122
|
elif self.extension in ['.htm', '.html']:
|
123
|
-
|
123
|
+
|
124
|
+
if self.type == '1-K':
|
125
|
+
mapping_dict = dict_1kpartii_html
|
126
|
+
elif self.type == '1-SA':
|
127
|
+
mapping_dict = dict_1sa_html
|
128
|
+
elif self.type == '1-U':
|
129
|
+
mapping_dict = dict_1u_html
|
130
|
+
elif self.type == '10-12B':
|
131
|
+
mapping_dict = dict_1012b_html
|
132
|
+
elif self.type == '10-D':
|
133
|
+
mapping_dict = dict_10d_html
|
134
|
+
elif self.type == '10-K':
|
124
135
|
mapping_dict = dict_10k_html
|
125
136
|
elif self.type == '10-Q':
|
126
137
|
mapping_dict = dict_10q_html
|
138
|
+
elif self.type == '20-F':
|
139
|
+
mapping_dict = dict_20f_html
|
140
|
+
elif self.type == '8-A12B':
|
141
|
+
mapping_dict = dict_8a12b_html
|
142
|
+
elif self.type == '8-A12G':
|
143
|
+
mapping_dict = dict_8a12g_html
|
127
144
|
elif self.type == '8-K':
|
128
145
|
mapping_dict = dict_8k_html
|
146
|
+
elif self.type == '8-K12B':
|
147
|
+
mapping_dict = dict_8k12b_html
|
148
|
+
elif self.type == '8-K12G3':
|
149
|
+
mapping_dict = dict_8k12g3_html
|
150
|
+
elif self.type == '8-K15D5':
|
151
|
+
mapping_dict = dict_8k15d5_html
|
152
|
+
elif self.type == 'ABS-15G':
|
153
|
+
mapping_dict = dict_abs15g_html
|
154
|
+
elif self.type == 'ABS-EE':
|
155
|
+
mapping_dict = dict_absee_html
|
156
|
+
elif self.type == 'APP NTC':
|
157
|
+
dict_appntc_html
|
158
|
+
elif self.type == 'CB':
|
159
|
+
mapping_dict = dict_cb_html
|
160
|
+
elif self.type == 'SD':
|
161
|
+
mapping_dict = dict_sd_html
|
162
|
+
elif self.type in ['NT 10-K', 'NT 10-Q','NT 20-F']:
|
163
|
+
mapping_dict = dict_nt10k_html
|
129
164
|
|
130
165
|
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
131
166
|
self.data = dct
|
@@ -89,6 +89,9 @@ def _process_cik_and_metadata_filters(cik=None, ticker=None, **kwargs):
|
|
89
89
|
if ticker_ciks:
|
90
90
|
cik.extend(ticker_ciks)
|
91
91
|
|
92
|
+
if len(cik) == 0:
|
93
|
+
raise ValueError(f"No CIKs found for ticker: {ticker}")
|
94
|
+
|
92
95
|
# Normalize CIK format
|
93
96
|
if cik is not None:
|
94
97
|
if isinstance(cik, str):
|
@@ -0,0 +1,75 @@
|
|
1
|
+
dict_10k_html = {
|
2
|
+
('part',r'^part\s*([ivx]+)$') : 0,
|
3
|
+
('signatures',r'^signatures?\.*$') : 0,
|
4
|
+
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
5
|
+
}
|
6
|
+
dict_10q_html = dict_10k_html
|
7
|
+
|
8
|
+
dict_8k_html = {
|
9
|
+
('signatures',r'^signatures?\.*$') : 0,
|
10
|
+
('item',r'^item\s*(\d+\.\d+)') : 0,
|
11
|
+
}
|
12
|
+
|
13
|
+
dict_sd_html = {
|
14
|
+
('signatures',r'^signatures?\.*$') : 0,
|
15
|
+
('item',r'^item\s*(\d+\.\d+)') : 0,
|
16
|
+
}
|
17
|
+
|
18
|
+
dict_abs15g_html = {
|
19
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
20
|
+
('signatures',r'^signatures?\.*$') : 0,
|
21
|
+
('item',r'^item\s*(\d+\.\d+)') : 1,
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
dict_nt10k_html = {
|
27
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
28
|
+
}
|
29
|
+
|
30
|
+
dict_1kpartii_html = {
|
31
|
+
('item',r'^item\s*(\d+)') : 0,
|
32
|
+
}
|
33
|
+
|
34
|
+
dict_1sa_html = dict_1kpartii_html
|
35
|
+
|
36
|
+
dict_1u_html = {('item',r'^item\s*(\d+)') : 0,
|
37
|
+
('signatures',r'^signatures?\.*$') : 0,}
|
38
|
+
|
39
|
+
dict_1012b_html = dict_1u_html
|
40
|
+
|
41
|
+
dict_10d_html = dict_10k_html
|
42
|
+
|
43
|
+
dict_20f_html = {
|
44
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
45
|
+
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
46
|
+
('letter',r'\d*\.?([a-z])') : 2,
|
47
|
+
('signatures',r'^signatures?\.*$') : 0,
|
48
|
+
}
|
49
|
+
|
50
|
+
dict_8a12b_html = dict_1kpartii_html
|
51
|
+
dict_8a12g_html = dict_1kpartii_html
|
52
|
+
|
53
|
+
dict_8k12b_html = dict_8k_html
|
54
|
+
|
55
|
+
dict_8k12g3_html = dict_8k_html
|
56
|
+
dict_8k15d5_html = dict_8k_html
|
57
|
+
|
58
|
+
dict_absee_html = {('item',r'^item\s*(\d+)') : 0,
|
59
|
+
('signatures',r'^signatures?\.*$') : 0,}
|
60
|
+
|
61
|
+
dict_appntc_html = {('agency',r'^agency') : 0,
|
62
|
+
('action',r'^action') : 0,
|
63
|
+
('summary',r'^summary of application') : 0,
|
64
|
+
('applicants',r'^applicants') : 0,
|
65
|
+
('filing',r'^filing dates') : 0,
|
66
|
+
('hearing',r'^hearing or notification of hearing') : 0,
|
67
|
+
('addresses',r'^addresses') : 0,
|
68
|
+
('further contact',r'^for further information contact') : 0,
|
69
|
+
('supplementary information',r'^supplementary information') : 0,
|
70
|
+
}
|
71
|
+
|
72
|
+
dict_cb_html = {
|
73
|
+
('part', r'^part\s*([ivx]+)') : 0,
|
74
|
+
('item', r'^item\s*(\d+)') : 1,
|
75
|
+
}
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="1.4.
|
35
|
+
version="1.4.4",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
@@ -1,11 +0,0 @@
|
|
1
|
-
dict_10k_html = {
|
2
|
-
('part',r'^part\s*([ivx]+)$') : 0,
|
3
|
-
('signatures',r'^signatures?\.*$') : 0,
|
4
|
-
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
5
|
-
}
|
6
|
-
dict_10q_html = dict_10k_html
|
7
|
-
|
8
|
-
dict_8k_html = {
|
9
|
-
('signatures',r'^signatures?\.*$') : 0,
|
10
|
-
('item',r'^item\s*(\d+\.\d+)') : 0,
|
11
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|