datamule 1.4.2__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.4.2 → datamule-1.4.3}/PKG-INFO +1 -1
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/document.py +21 -2
- datamule-1.4.3/datamule/mapping_dicts/html_mapping_dicts.py +48 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-1.4.2 → datamule-1.4.3}/setup.py +1 -1
- datamule-1.4.2/datamule/mapping_dicts/html_mapping_dicts.py +0 -11
- {datamule-1.4.2 → datamule-1.4.3}/datamule/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/config.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/atsn.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/d.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ex102_abs.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/information_table.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/npx.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ownership.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/proxy_voting_record.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/sbs.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/sdr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/submission_metadata.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/ta.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/thirteenfhr.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/twentyfivense.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/processing.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/document/table.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/helper.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/index.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/package_updater.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/portfolio.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/utils.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/seclibrary/downloader.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/seclibrary/query.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/sheet.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule/submission.py +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule.egg-info/SOURCES.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule.egg-info/requires.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.4.2 → datamule-1.4.3}/setup.cfg +0 -0
@@ -6,7 +6,7 @@ from doc2dict.mapping import flatten_hierarchy
|
|
6
6
|
from doc2dict import html2dict, visualize_dict, get_title, unnest_dict, pdf2dict
|
7
7
|
from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
|
8
8
|
from ..mapping_dicts.xml_mapping_dicts import dict_345
|
9
|
-
from ..mapping_dicts.html_mapping_dicts import
|
9
|
+
from ..mapping_dicts.html_mapping_dicts import *
|
10
10
|
from selectolax.parser import HTMLParser
|
11
11
|
from .processing import process_tabular_data
|
12
12
|
from pathlib import Path
|
@@ -120,12 +120,31 @@ class Document:
|
|
120
120
|
self.data = {}
|
121
121
|
self.data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
|
122
122
|
elif self.extension in ['.htm', '.html']:
|
123
|
-
|
123
|
+
|
124
|
+
if self.type == '1-K':
|
125
|
+
mapping_dict = dict_1kpartii_html
|
126
|
+
elif self.type == '1-SA':
|
127
|
+
mapping_dict = dict_1sa_html
|
128
|
+
elif self.type == '1-U':
|
129
|
+
mapping_dict = dict_1u_html
|
130
|
+
elif self.type == '10-12B':
|
131
|
+
mapping_dict = dict_1012b_html
|
132
|
+
elif self.type == '10-D':
|
133
|
+
mapping_dict = dict_10d_html
|
134
|
+
elif self.type == '10-K':
|
124
135
|
mapping_dict = dict_10k_html
|
125
136
|
elif self.type == '10-Q':
|
126
137
|
mapping_dict = dict_10q_html
|
138
|
+
elif self.type == '20-F':
|
139
|
+
mapping_dict = dict_20f_html
|
127
140
|
elif self.type == '8-K':
|
128
141
|
mapping_dict = dict_8k_html
|
142
|
+
elif self.type == 'ABS-15G':
|
143
|
+
mapping_dict = dict_abs15g_html
|
144
|
+
elif self.type == 'SD':
|
145
|
+
mapping_dict = dict_sd_html
|
146
|
+
elif self.type in ['NT 10-K', 'NT 10-Q','NT 20-F']:
|
147
|
+
mapping_dict = dict_nt10k_html
|
129
148
|
|
130
149
|
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
131
150
|
self.data = dct
|
@@ -0,0 +1,48 @@
|
|
1
|
+
dict_10k_html = {
|
2
|
+
('part',r'^part\s*([ivx]+)$') : 0,
|
3
|
+
('signatures',r'^signatures?\.*$') : 0,
|
4
|
+
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
5
|
+
}
|
6
|
+
dict_10q_html = dict_10k_html
|
7
|
+
|
8
|
+
dict_8k_html = {
|
9
|
+
('signatures',r'^signatures?\.*$') : 0,
|
10
|
+
('item',r'^item\s*(\d+\.\d+)') : 0,
|
11
|
+
}
|
12
|
+
|
13
|
+
dict_sd_html = {
|
14
|
+
('signatures',r'^signatures?\.*$') : 0,
|
15
|
+
('item',r'^item\s*(\d+\.\d+)') : 0,
|
16
|
+
}
|
17
|
+
|
18
|
+
dict_abs15g_html = {
|
19
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
20
|
+
('signatures',r'^signatures?\.*$') : 0,
|
21
|
+
('item',r'^item\s*(\d+\.\d+)') : 1,
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
dict_nt10k_html = {
|
27
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
28
|
+
}
|
29
|
+
|
30
|
+
dict_1kpartii_html = {
|
31
|
+
('item',r'^item\s*(\d+)') : 1,
|
32
|
+
}
|
33
|
+
|
34
|
+
dict_1sa_html = dict_1kpartii_html
|
35
|
+
|
36
|
+
dict_1u_html = {('item',r'^item\s*(\d+)') : 1,
|
37
|
+
('signatures',r'^signatures?\.*$') : 1,}
|
38
|
+
|
39
|
+
dict_1012b_html = dict_1u_html
|
40
|
+
|
41
|
+
dict_10d_html = dict_10k_html
|
42
|
+
|
43
|
+
dict_20f_html = {
|
44
|
+
('part',r'^part\s*([ivx]+)') : 0,
|
45
|
+
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
46
|
+
('letter',r'\d*\.?([a-z])') : 2,
|
47
|
+
('signatures',r'^signatures?\.*$') : 0,
|
48
|
+
}
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="1.4.
|
35
|
+
version="1.4.3",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
@@ -1,11 +0,0 @@
|
|
1
|
-
dict_10k_html = {
|
2
|
-
('part',r'^part\s*([ivx]+)$') : 0,
|
3
|
-
('signatures',r'^signatures?\.*$') : 0,
|
4
|
-
('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
|
5
|
-
}
|
6
|
-
dict_10q_html = dict_10k_html
|
7
|
-
|
8
|
-
dict_8k_html = {
|
9
|
-
('signatures',r'^signatures?\.*$') : 0,
|
10
|
-
('item',r'^item\s*(\d+\.\d+)') : 0,
|
11
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|