datamule 1.4.2__tar.gz → 1.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {datamule-1.4.2 → datamule-1.4.4}/PKG-INFO +1 -1
  2. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/document.py +37 -2
  3. {datamule-1.4.2 → datamule-1.4.4}/datamule/helper.py +3 -0
  4. datamule-1.4.4/datamule/mapping_dicts/html_mapping_dicts.py +75 -0
  5. {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/PKG-INFO +1 -1
  6. {datamule-1.4.2 → datamule-1.4.4}/setup.py +1 -1
  7. datamule-1.4.2/datamule/mapping_dicts/html_mapping_dicts.py +0 -11
  8. {datamule-1.4.2 → datamule-1.4.4}/datamule/__init__.py +0 -0
  9. {datamule-1.4.2 → datamule-1.4.4}/datamule/config.py +0 -0
  10. {datamule-1.4.2 → datamule-1.4.4}/datamule/data/listed_filer_metadata.csv +0 -0
  11. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/__init__.py +0 -0
  12. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/__init__.py +0 -0
  13. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/atsn.py +0 -0
  14. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/cfportal.py +0 -0
  15. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/d.py +0 -0
  16. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex102_abs.py +0 -0
  17. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99a_sdr.py +0 -0
  18. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99c_sdr.py +0 -0
  19. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99g_sdr.py +0 -0
  20. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ex99i_sdr.py +0 -0
  21. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/information_table.py +0 -0
  22. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/nmfp.py +0 -0
  23. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/npx.py +0 -0
  24. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/onefourtyfour.py +0 -0
  25. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ownership.py +0 -0
  26. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/proxy_voting_record.py +0 -0
  27. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sbs.py +0 -0
  28. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sbsef.py +0 -0
  29. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/schedule13.py +0 -0
  30. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/sdr.py +0 -0
  31. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/submission_metadata.py +0 -0
  32. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/ta.py +0 -0
  33. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/thirteenfhr.py +0 -0
  34. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/twentyfivense.py +0 -0
  35. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/mappings/twentyfourf2nt.py +0 -0
  36. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/processing.py +0 -0
  37. {datamule-1.4.2 → datamule-1.4.4}/datamule/document/table.py +0 -0
  38. {datamule-1.4.2 → datamule-1.4.4}/datamule/index.py +0 -0
  39. {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/__init__.py +0 -0
  40. {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  41. {datamule-1.4.2 → datamule-1.4.4}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  42. {datamule-1.4.2 → datamule-1.4.4}/datamule/package_updater.py +0 -0
  43. {datamule-1.4.2 → datamule-1.4.4}/datamule/portfolio.py +0 -0
  44. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/__init__.py +0 -0
  45. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/infrastructure/__init__.py +0 -0
  46. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  47. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/__init__.py +0 -0
  48. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/downloader.py +0 -0
  49. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/eftsquery.py +0 -0
  50. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/monitor.py +0 -0
  51. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/streamer.py +0 -0
  52. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/submissions/textsearch.py +0 -0
  53. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/utils.py +0 -0
  54. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/__init__.py +0 -0
  55. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  56. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  57. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  58. {datamule-1.4.2 → datamule-1.4.4}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  59. {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/__init__.py +0 -0
  60. {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/bq.py +0 -0
  61. {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/downloader.py +0 -0
  62. {datamule-1.4.2 → datamule-1.4.4}/datamule/seclibrary/query.py +0 -0
  63. {datamule-1.4.2 → datamule-1.4.4}/datamule/sheet.py +0 -0
  64. {datamule-1.4.2 → datamule-1.4.4}/datamule/submission.py +0 -0
  65. {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/SOURCES.txt +0 -0
  66. {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/dependency_links.txt +0 -0
  67. {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/requires.txt +0 -0
  68. {datamule-1.4.2 → datamule-1.4.4}/datamule.egg-info/top_level.txt +0 -0
  69. {datamule-1.4.2 → datamule-1.4.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -6,7 +6,7 @@ from doc2dict.mapping import flatten_hierarchy
6
6
  from doc2dict import html2dict, visualize_dict, get_title, unnest_dict, pdf2dict
7
7
  from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
8
8
  from ..mapping_dicts.xml_mapping_dicts import dict_345
9
- from ..mapping_dicts.html_mapping_dicts import dict_10k_html, dict_10q_html, dict_8k_html
9
+ from ..mapping_dicts.html_mapping_dicts import *
10
10
  from selectolax.parser import HTMLParser
11
11
  from .processing import process_tabular_data
12
12
  from pathlib import Path
@@ -120,12 +120,47 @@ class Document:
120
120
  self.data = {}
121
121
  self.data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
122
122
  elif self.extension in ['.htm', '.html']:
123
- if self.type == '10-K':
123
+
124
+ if self.type == '1-K':
125
+ mapping_dict = dict_1kpartii_html
126
+ elif self.type == '1-SA':
127
+ mapping_dict = dict_1sa_html
128
+ elif self.type == '1-U':
129
+ mapping_dict = dict_1u_html
130
+ elif self.type == '10-12B':
131
+ mapping_dict = dict_1012b_html
132
+ elif self.type == '10-D':
133
+ mapping_dict = dict_10d_html
134
+ elif self.type == '10-K':
124
135
  mapping_dict = dict_10k_html
125
136
  elif self.type == '10-Q':
126
137
  mapping_dict = dict_10q_html
138
+ elif self.type == '20-F':
139
+ mapping_dict = dict_20f_html
140
+ elif self.type == '8-A12B':
141
+ mapping_dict = dict_8a12b_html
142
+ elif self.type == '8-A12G':
143
+ mapping_dict = dict_8a12g_html
127
144
  elif self.type == '8-K':
128
145
  mapping_dict = dict_8k_html
146
+ elif self.type == '8-K12B':
147
+ mapping_dict = dict_8k12b_html
148
+ elif self.type == '8-K12G3':
149
+ mapping_dict = dict_8k12g3_html
150
+ elif self.type == '8-K15D5':
151
+ mapping_dict = dict_8k15d5_html
152
+ elif self.type == 'ABS-15G':
153
+ mapping_dict = dict_abs15g_html
154
+ elif self.type == 'ABS-EE':
155
+ mapping_dict = dict_absee_html
156
+ elif self.type == 'APP NTC':
157
+ dict_appntc_html
158
+ elif self.type == 'CB':
159
+ mapping_dict = dict_cb_html
160
+ elif self.type == 'SD':
161
+ mapping_dict = dict_sd_html
162
+ elif self.type in ['NT 10-K', 'NT 10-Q','NT 20-F']:
163
+ mapping_dict = dict_nt10k_html
129
164
 
130
165
  dct = html2dict(content=self.content, mapping_dict=mapping_dict)
131
166
  self.data = dct
@@ -89,6 +89,9 @@ def _process_cik_and_metadata_filters(cik=None, ticker=None, **kwargs):
89
89
  if ticker_ciks:
90
90
  cik.extend(ticker_ciks)
91
91
 
92
+ if len(cik) == 0:
93
+ raise ValueError(f"No CIKs found for ticker: {ticker}")
94
+
92
95
  # Normalize CIK format
93
96
  if cik is not None:
94
97
  if isinstance(cik, str):
@@ -0,0 +1,75 @@
1
+ dict_10k_html = {
2
+ ('part',r'^part\s*([ivx]+)$') : 0,
3
+ ('signatures',r'^signatures?\.*$') : 0,
4
+ ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
5
+ }
6
+ dict_10q_html = dict_10k_html
7
+
8
+ dict_8k_html = {
9
+ ('signatures',r'^signatures?\.*$') : 0,
10
+ ('item',r'^item\s*(\d+\.\d+)') : 0,
11
+ }
12
+
13
+ dict_sd_html = {
14
+ ('signatures',r'^signatures?\.*$') : 0,
15
+ ('item',r'^item\s*(\d+\.\d+)') : 0,
16
+ }
17
+
18
+ dict_abs15g_html = {
19
+ ('part',r'^part\s*([ivx]+)') : 0,
20
+ ('signatures',r'^signatures?\.*$') : 0,
21
+ ('item',r'^item\s*(\d+\.\d+)') : 1,
22
+ }
23
+
24
+
25
+
26
+ dict_nt10k_html = {
27
+ ('part',r'^part\s*([ivx]+)') : 0,
28
+ }
29
+
30
+ dict_1kpartii_html = {
31
+ ('item',r'^item\s*(\d+)') : 0,
32
+ }
33
+
34
+ dict_1sa_html = dict_1kpartii_html
35
+
36
+ dict_1u_html = {('item',r'^item\s*(\d+)') : 0,
37
+ ('signatures',r'^signatures?\.*$') : 0,}
38
+
39
+ dict_1012b_html = dict_1u_html
40
+
41
+ dict_10d_html = dict_10k_html
42
+
43
+ dict_20f_html = {
44
+ ('part',r'^part\s*([ivx]+)') : 0,
45
+ ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
46
+ ('letter',r'\d*\.?([a-z])') : 2,
47
+ ('signatures',r'^signatures?\.*$') : 0,
48
+ }
49
+
50
+ dict_8a12b_html = dict_1kpartii_html
51
+ dict_8a12g_html = dict_1kpartii_html
52
+
53
+ dict_8k12b_html = dict_8k_html
54
+
55
+ dict_8k12g3_html = dict_8k_html
56
+ dict_8k15d5_html = dict_8k_html
57
+
58
+ dict_absee_html = {('item',r'^item\s*(\d+)') : 0,
59
+ ('signatures',r'^signatures?\.*$') : 0,}
60
+
61
+ dict_appntc_html = {('agency',r'^agency') : 0,
62
+ ('action',r'^action') : 0,
63
+ ('summary',r'^summary of application') : 0,
64
+ ('applicants',r'^applicants') : 0,
65
+ ('filing',r'^filing dates') : 0,
66
+ ('hearing',r'^hearing or notification of hearing') : 0,
67
+ ('addresses',r'^addresses') : 0,
68
+ ('further contact',r'^for further information contact') : 0,
69
+ ('supplementary information',r'^supplementary information') : 0,
70
+ }
71
+
72
+ dict_cb_html = {
73
+ ('part', r'^part\s*([ivx]+)') : 0,
74
+ ('item', r'^item\s*(\d+)') : 1,
75
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="1.4.2",
35
+ version="1.4.4",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
@@ -1,11 +0,0 @@
1
- dict_10k_html = {
2
- ('part',r'^part\s*([ivx]+)$') : 0,
3
- ('signatures',r'^signatures?\.*$') : 0,
4
- ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
5
- }
6
- dict_10q_html = dict_10k_html
7
-
8
- dict_8k_html = {
9
- ('signatures',r'^signatures?\.*$') : 0,
10
- ('item',r'^item\s*(\d+\.\d+)') : 0,
11
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes