datamule 2.1.5__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-2.1.5 → datamule-2.2.0}/PKG-INFO +2 -1
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/document.py +171 -45
- datamule-2.2.0/datamule/sheet.py +306 -0
- datamule-2.2.0/datamule/tags/config.py +16 -0
- datamule-2.2.0/datamule/tags/regex.py +105 -0
- datamule-2.2.0/datamule/tags/utils.py +149 -0
- datamule-2.2.0/datamule/utils/__init__.py +0 -0
- datamule-2.2.0/datamule/utils/dictionaries.py +76 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/PKG-INFO +2 -1
- {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/SOURCES.txt +5 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/requires.txt +1 -0
- {datamule-2.1.5 → datamule-2.2.0}/setup.py +3 -2
- datamule-2.1.5/datamule/sheet.py +0 -706
- {datamule-2.1.5 → datamule-2.2.0}/datamule/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/config.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/datamule_lookup.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/datamule_mysql_rds.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/downloader.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/sec_connector.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/datasets.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_13fhr.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_25nse.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_informationtable.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_npx.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_ownership.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_proxyvotingrecord.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_sbsef.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_sdr.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/utils.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/helper.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/index.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/package_updater.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/portfolio.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/portfolio_compression_utils.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/utils.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/seclibrary/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/seclibrary/bq.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/submission.py +0 -0
- {datamule-2.1.5/datamule/utils → datamule-2.2.0/datamule/tags}/__init__.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/utils/construct_submissions_data.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule/utils/format_accession.py +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/top_level.txt +0 -0
- {datamule-2.1.5 → datamule-2.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.2.0
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -19,3 +19,4 @@ Requires-Dist: secxbrl
|
|
19
19
|
Requires-Dist: secsgml
|
20
20
|
Requires-Dist: websocket-client
|
21
21
|
Requires-Dist: company_fundamentals
|
22
|
+
Requires-Dist: flashtext
|
@@ -13,9 +13,133 @@ from pathlib import Path
|
|
13
13
|
import webbrowser
|
14
14
|
from secsgml.utils import bytes_to_str
|
15
15
|
import tempfile
|
16
|
-
|
16
|
+
import warnings
|
17
17
|
from .tables.tables import Tables
|
18
18
|
|
19
|
+
from ..tags.utils import get_cusip_using_regex, get_isin_using_regex, get_figi_using_regex,get_all_tickers, get_full_names,get_full_names_dictionary_lookup
|
20
|
+
|
21
|
+
|
22
|
+
class Tickers:
|
23
|
+
def __init__(self, document):
|
24
|
+
self.document = document
|
25
|
+
self._tickers_data = None
|
26
|
+
|
27
|
+
def _get_tickers_data(self):
|
28
|
+
"""Get all tickers data once and cache it"""
|
29
|
+
if self._tickers_data is None:
|
30
|
+
# Check if document extension is supported
|
31
|
+
if self.document.extension not in ['.htm', '.html', '.txt']:
|
32
|
+
self._tickers_data = {}
|
33
|
+
else:
|
34
|
+
self._tickers_data = get_all_tickers(self.document.text)
|
35
|
+
return self._tickers_data
|
36
|
+
|
37
|
+
def __getattr__(self, exchange_name):
|
38
|
+
data = self._get_tickers_data()
|
39
|
+
|
40
|
+
if exchange_name in data:
|
41
|
+
return data[exchange_name]
|
42
|
+
|
43
|
+
return []
|
44
|
+
|
45
|
+
def __bool__(self):
|
46
|
+
"""Return True if any tickers were found"""
|
47
|
+
data = self._get_tickers_data()
|
48
|
+
return bool(data.get('all', []))
|
49
|
+
|
50
|
+
def __repr__(self):
|
51
|
+
"""Show the full ticker data when printed or accessed directly"""
|
52
|
+
data = self._get_tickers_data()
|
53
|
+
return str(data)
|
54
|
+
|
55
|
+
def __str__(self):
|
56
|
+
"""Show the full ticker data when printed"""
|
57
|
+
data = self._get_tickers_data()
|
58
|
+
return str(data)
|
59
|
+
|
60
|
+
class Tags:
|
61
|
+
def __init__(self, document):
|
62
|
+
from ..tags.config import _active_dictionaries,_loaded_dictionaries
|
63
|
+
self.not_supported = document.extension not in ['.htm', '.html', '.txt']
|
64
|
+
self.document = document
|
65
|
+
self._tickers = None
|
66
|
+
self.dictionaries = {}
|
67
|
+
|
68
|
+
# Load global dictionaries with their data
|
69
|
+
active_dicts = _active_dictionaries
|
70
|
+
for dict_name in active_dicts:
|
71
|
+
self.dictionaries[dict_name] = _loaded_dictionaries[dict_name]
|
72
|
+
|
73
|
+
|
74
|
+
def _check_support(self):
|
75
|
+
if self.not_supported:
|
76
|
+
warnings.warn(f"Document extension '{self.document.extension}' is not supported. Supported formats: .htm, .html, .txt")
|
77
|
+
return False
|
78
|
+
return True
|
79
|
+
|
80
|
+
@property
|
81
|
+
def cusips(self):
|
82
|
+
if not self._check_support():
|
83
|
+
return None
|
84
|
+
|
85
|
+
if not hasattr(self, '_cusip'):
|
86
|
+
if 'sc13dg_cusips' in self.dictionaries:
|
87
|
+
keywords = self.dictionaries['sc13dg_cusips']
|
88
|
+
self._cusip = get_cusip_using_regex(self.document.text, keywords)
|
89
|
+
else:
|
90
|
+
self._cusip = get_cusip_using_regex(self.document.text)
|
91
|
+
return self._cusip
|
92
|
+
|
93
|
+
@property
|
94
|
+
def isins(self):
|
95
|
+
if not self._check_support():
|
96
|
+
return None
|
97
|
+
|
98
|
+
if not hasattr(self, '_isin'):
|
99
|
+
if 'npx_isins' in self.dictionaries:
|
100
|
+
keywords = self.dictionaries['npx_isins']
|
101
|
+
self._isin = get_isin_using_regex(self.document.text, keywords)
|
102
|
+
else:
|
103
|
+
self._isin = get_isin_using_regex(self.document.text)
|
104
|
+
return self._isin
|
105
|
+
|
106
|
+
@property
|
107
|
+
def figis(self):
|
108
|
+
if not self._check_support():
|
109
|
+
return None
|
110
|
+
|
111
|
+
if not hasattr(self, '_figi'):
|
112
|
+
if 'npx_figis' in self.dictionaries:
|
113
|
+
keywords = self.dictionaries['npx_figis']
|
114
|
+
self._figi = get_figi_using_regex(self.document.text, keywords)
|
115
|
+
else:
|
116
|
+
self._figi = get_figi_using_regex(self.document.text)
|
117
|
+
return self._figi
|
118
|
+
|
119
|
+
@property
|
120
|
+
def tickers(self):
|
121
|
+
if self._tickers is None:
|
122
|
+
self._tickers = Tickers(self.document)
|
123
|
+
return self._tickers
|
124
|
+
|
125
|
+
@property
|
126
|
+
def persons(self):
|
127
|
+
if not self._check_support():
|
128
|
+
return None
|
129
|
+
|
130
|
+
if not hasattr(self, '_persons'):
|
131
|
+
if '8k_2024_persons' in self.dictionaries:
|
132
|
+
# Use FlashText dictionary lookup for 8K persons
|
133
|
+
self._persons = get_full_names_dictionary_lookup(self.document.text, self.dictionaries['8k_2024_persons'])
|
134
|
+
elif 'ssa_baby_first_names' in self.dictionaries:
|
135
|
+
# Use regex with SSA names for validation
|
136
|
+
self._persons = get_full_names(self.document.text, self.dictionaries['ssa_baby_first_names'])
|
137
|
+
else:
|
138
|
+
# Fallback to regex without validation
|
139
|
+
self._persons = get_full_names(self.document.text)
|
140
|
+
return self._persons
|
141
|
+
|
142
|
+
|
19
143
|
class Document:
|
20
144
|
def __init__(self, type, content, extension,accession,filing_date,path=None):
|
21
145
|
|
@@ -34,10 +158,13 @@ class Document:
|
|
34
158
|
self.path = path
|
35
159
|
|
36
160
|
self.extension = extension
|
161
|
+
|
37
162
|
# this will be filled by parsed
|
38
163
|
self._data = None
|
39
164
|
self._tables = None
|
40
165
|
self._text = None
|
166
|
+
|
167
|
+
self.tags = Tags(self)
|
41
168
|
|
42
169
|
|
43
170
|
|
@@ -119,93 +246,92 @@ class Document:
|
|
119
246
|
|
120
247
|
if self.extension == '.txt':
|
121
248
|
content = self.text
|
122
|
-
if self.type
|
249
|
+
if self.type in ['10-Q', '10-Q/A']:
|
123
250
|
mapping_dict = dict_10q
|
124
|
-
elif self.type
|
251
|
+
elif self.type in ['10-K','10-K/A']:
|
125
252
|
mapping_dict = dict_10k
|
126
|
-
elif self.type
|
253
|
+
elif self.type in ['8-K', '8-K/A']:
|
127
254
|
mapping_dict = dict_8k
|
128
|
-
elif self.type
|
255
|
+
elif self.type in ['SC 13D', 'SC 13D/A']:
|
129
256
|
mapping_dict = dict_13d
|
130
|
-
elif self.type
|
257
|
+
elif self.type in ['SC 13G', 'SC 13G/A']:
|
131
258
|
mapping_dict = dict_13g
|
132
259
|
|
133
260
|
self._data = {}
|
134
261
|
self._data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
|
135
262
|
elif self.extension in ['.htm', '.html']:
|
136
263
|
|
137
|
-
if self.type
|
264
|
+
if self.type in ['1-K', '1-K/A']:
|
138
265
|
mapping_dict = dict_1kpartii_html
|
139
|
-
elif self.type
|
266
|
+
elif self.type in ['1-SA', '1-SA/A']:
|
140
267
|
mapping_dict = dict_1sa_html
|
141
|
-
elif self.type
|
268
|
+
elif self.type in ['1-U', '1-U/A']:
|
142
269
|
mapping_dict = dict_1u_html
|
143
|
-
elif self.type
|
270
|
+
elif self.type in ['10-12B', '10-12B/A']:
|
144
271
|
mapping_dict = dict_1012b_html
|
145
|
-
elif self.type
|
272
|
+
elif self.type in ['10-D', '10-D/A']:
|
146
273
|
mapping_dict = dict_10d_html
|
147
|
-
elif self.type
|
274
|
+
elif self.type in ['10-K', '10-K/A']:
|
148
275
|
mapping_dict = dict_10k_html
|
149
|
-
elif self.type
|
276
|
+
elif self.type in ['10-Q', '10-Q/A']:
|
150
277
|
mapping_dict = dict_10q_html
|
151
|
-
elif self.type
|
278
|
+
elif self.type in ['20-F', '20-F/A']:
|
152
279
|
mapping_dict = dict_20f_html
|
153
|
-
elif self.type
|
280
|
+
elif self.type in ['8-A12B', '8-A12B/A']:
|
154
281
|
mapping_dict = dict_8a12b_html
|
155
|
-
elif self.type
|
282
|
+
elif self.type in ['8-A12G', '8-A12G/A']:
|
156
283
|
mapping_dict = dict_8a12g_html
|
157
|
-
elif self.type
|
284
|
+
elif self.type in ['8-K', '8-K/A']:
|
158
285
|
mapping_dict = dict_8k_html
|
159
|
-
elif self.type
|
286
|
+
elif self.type in ['8-K12B', '8-K12B/A']:
|
160
287
|
mapping_dict = dict_8k12b_html
|
161
|
-
elif self.type
|
288
|
+
elif self.type in ['8-K12G3', '8-K12G3/A']:
|
162
289
|
mapping_dict = dict_8k12g3_html
|
163
|
-
elif self.type
|
290
|
+
elif self.type in ['8-K15D5', '8-K15D5/A']:
|
164
291
|
mapping_dict = dict_8k15d5_html
|
165
|
-
elif self.type
|
292
|
+
elif self.type in ['ABS-15G', 'ABS-15G/A']:
|
166
293
|
mapping_dict = dict_abs15g_html
|
167
|
-
elif self.type
|
294
|
+
elif self.type in ['ABS-EE', 'ABS-EE/A']:
|
168
295
|
mapping_dict = dict_absee_html
|
169
|
-
elif self.type
|
170
|
-
dict_appntc_html
|
171
|
-
elif self.type
|
296
|
+
elif self.type in ['APP NTC', 'APP NTC/A']:
|
297
|
+
mapping_dict = dict_appntc_html
|
298
|
+
elif self.type in ['CB', 'CB/A']:
|
172
299
|
mapping_dict = dict_cb_html
|
173
|
-
elif self.type
|
300
|
+
elif self.type in ['DSTRBRPT', 'DSTRBRPT/A']:
|
174
301
|
mapping_dict = dict_dstrbrpt_html
|
175
|
-
elif self.type
|
302
|
+
elif self.type in ['N-18F1', 'N-18F1/A']:
|
176
303
|
mapping_dict = dict_n18f1_html
|
177
|
-
elif self.type
|
304
|
+
elif self.type in ['N-CSRS', 'N-CSRS/A']:
|
178
305
|
mapping_dict = dict_ncsrs_html
|
179
|
-
elif self.type
|
306
|
+
elif self.type in ['NT-10K', 'NT-10K/A']:
|
180
307
|
mapping_dict = dict_nt10k_html
|
181
|
-
elif self.type
|
308
|
+
elif self.type in ['NT-10Q', 'NT-10Q/A']:
|
182
309
|
mapping_dict = dict_nt10q_html
|
183
|
-
elif self.type
|
310
|
+
elif self.type in ['NT 20-F', 'NT 20-F/A']:
|
184
311
|
mapping_dict = dict_nt20f_html
|
185
|
-
elif self.type
|
312
|
+
elif self.type in ['NT-NCEN', 'NT-NCEN/A']:
|
186
313
|
mapping_dict = dict_ntncen_html
|
187
|
-
elif self.type
|
314
|
+
elif self.type in ['NT-NCSR', 'NT-NCSR/A']:
|
188
315
|
mapping_dict = dict_ntncsr_html
|
189
|
-
elif self.type
|
316
|
+
elif self.type in ['NTFNCEN', 'NTFNCEN/A']:
|
190
317
|
mapping_dict = dict_ntfcen_html
|
191
|
-
elif self.type
|
318
|
+
elif self.type in ['NTFNCSR', 'NTFNCSR/A']:
|
192
319
|
mapping_dict = dict_ntfncsr_html
|
193
|
-
elif self.type
|
320
|
+
elif self.type in ['EX-99.CERT', 'EX-99.CERT/A']:
|
194
321
|
mapping_dict = dict_ex99cert_html
|
195
|
-
elif self.type
|
322
|
+
elif self.type in ['SC 13E3', 'SC 13E3/A']:
|
196
323
|
mapping_dict = dict_sc13e3_html
|
197
|
-
elif self.type
|
324
|
+
elif self.type in ['SC 14D9', 'SC 14D9/A']:
|
198
325
|
mapping_dict = dict_sc14d9_html
|
199
|
-
elif self.type
|
326
|
+
elif self.type in ['SP 15D2', 'SP 15D2/A']:
|
200
327
|
mapping_dict = dict_sp15d2_html
|
201
|
-
|
202
|
-
elif self.type == 'SD':
|
328
|
+
elif self.type in ['SD', 'SD/A']:
|
203
329
|
mapping_dict = dict_sd_html
|
204
|
-
elif self.type
|
330
|
+
elif self.type in ['S-1', 'S-1/A']:
|
205
331
|
mapping_dict = dict_s1_html
|
206
|
-
elif self.type
|
332
|
+
elif self.type in ['T-3', 'T-3/A']:
|
207
333
|
mapping_dict = dict_t3_html
|
208
|
-
elif self.type in ['NT 10-K', 'NT 10-Q','NT 20-F']:
|
334
|
+
elif self.type in ['NT 10-K', 'NT 10-K/A', 'NT 10-Q', 'NT 10-Q/A', 'NT 20-F', 'NT 20-F/A']:
|
209
335
|
mapping_dict = dict_nt10k_html
|
210
336
|
|
211
337
|
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
@@ -233,7 +359,7 @@ class Document:
|
|
233
359
|
self._preprocess_html_content()
|
234
360
|
elif self.extension == '.txt':
|
235
361
|
self._preprocess_txt_content()
|
236
|
-
|
362
|
+
return self._text
|
237
363
|
|
238
364
|
def write_json(self, output_filename=None):
|
239
365
|
if not self.data:
|
@@ -0,0 +1,306 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
import csv
|
3
|
+
import os
|
4
|
+
from .helper import _process_cik_and_metadata_filters, load_package_dataset
|
5
|
+
from .sec.xbrl.downloadcompanyfacts import download_company_facts
|
6
|
+
from .datamule.datamule_lookup import datamule_lookup
|
7
|
+
from .datamule.datamule_mysql_rds import query_mysql_rds
|
8
|
+
from company_fundamentals.utils import get_fundamental_mappings
|
9
|
+
from company_fundamentals import construct_fundamentals
|
10
|
+
class Sheet:
|
11
|
+
def __init__(self, path):
|
12
|
+
self.path = Path(path)
|
13
|
+
|
14
|
+
# Keep
|
15
|
+
def get_submissions(self,cik=None, accession_number=None, submission_type=None, filing_date=None,
|
16
|
+
columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
|
17
|
+
|
18
|
+
return datamule_lookup(cik, accession_number, submission_type, filing_date,
|
19
|
+
columns, distinct, page_size, quiet, api_key)
|
20
|
+
|
21
|
+
def get_table(self,table,cik=None,ticker=None,**kwargs):
|
22
|
+
cik = _process_cik_and_metadata_filters(cik, ticker)
|
23
|
+
|
24
|
+
if table == 'fundamentals':
|
25
|
+
fundamentals = kwargs.pop('fundamentals', None)
|
26
|
+
if fundamentals is None:
|
27
|
+
raise ValueError("fundamentals parameter required for fundamentals table")
|
28
|
+
|
29
|
+
categories = kwargs.pop('categories',None)
|
30
|
+
|
31
|
+
mappings = get_fundamental_mappings(fundamentals=fundamentals)
|
32
|
+
#print(mappings)
|
33
|
+
taxonomies = [item[0] for item in mappings]
|
34
|
+
names = [item[1] for item in mappings]
|
35
|
+
xbrl = query_mysql_rds(table='simple_xbrl',cik=cik,taxonomy=taxonomies,name=names,**kwargs)
|
36
|
+
#print(xbrl)
|
37
|
+
|
38
|
+
return construct_fundamentals(xbrl, 'taxonomy', 'name', 'period_start_date', 'period_end_date', categories=categories,fundamentals=fundamentals)
|
39
|
+
|
40
|
+
else:
|
41
|
+
return query_mysql_rds(table=table,cik=cik,**kwargs)
|
42
|
+
|
43
|
+
def download_xbrl(
|
44
|
+
self,
|
45
|
+
cik=None,
|
46
|
+
ticker=None,
|
47
|
+
**kwargs
|
48
|
+
):
|
49
|
+
# If no CIK or ticker specified, get all companies with tickers
|
50
|
+
if cik is None and ticker is None:
|
51
|
+
cik = [row['cik'] for row in load_package_dataset('company_tickers')]
|
52
|
+
|
53
|
+
# Normalize cik to list format
|
54
|
+
if isinstance(cik, (str, int)):
|
55
|
+
cik = [cik]
|
56
|
+
|
57
|
+
# Process CIK and metadata filters
|
58
|
+
cik_list = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
|
59
|
+
|
60
|
+
# Download facts for all CIKs in parallel
|
61
|
+
download_company_facts(cik=cik_list, output_dir=self.path)
|
62
|
+
|
63
|
+
def get_information_table(
|
64
|
+
self,
|
65
|
+
# Optional filtering parameters
|
66
|
+
columns=None,
|
67
|
+
name_of_issuer=None,
|
68
|
+
title_of_class=None,
|
69
|
+
cusip=None,
|
70
|
+
value=None,
|
71
|
+
ssh_prnamt=None,
|
72
|
+
ssh_prnamt_type=None,
|
73
|
+
investment_discretion=None,
|
74
|
+
voting_authority_sole=None,
|
75
|
+
voting_authority_shared=None,
|
76
|
+
voting_authority_none=None,
|
77
|
+
reporting_owner_cik=None,
|
78
|
+
put_call=None,
|
79
|
+
other_manager=None,
|
80
|
+
figi=None,
|
81
|
+
accession=None,
|
82
|
+
filing_date=None,
|
83
|
+
|
84
|
+
# API key handling
|
85
|
+
api_key=None,
|
86
|
+
|
87
|
+
# Additional options
|
88
|
+
print_cost=True,
|
89
|
+
verbose=False
|
90
|
+
):
|
91
|
+
"""
|
92
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
93
|
+
|
94
|
+
Parameters:
|
95
|
+
-----------
|
96
|
+
columns : List[str], optional
|
97
|
+
Specific columns to return. If None, all columns are returned.
|
98
|
+
|
99
|
+
# Filter parameters
|
100
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
101
|
+
- str: Exact match
|
102
|
+
- List[str]: Match any in list
|
103
|
+
- tuple: (min, max) range for numeric/date fields
|
104
|
+
|
105
|
+
api_key : str, optional
|
106
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
107
|
+
print_cost : bool
|
108
|
+
Whether to print the query cost information
|
109
|
+
verbose : bool
|
110
|
+
Whether to print additional information about the query
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
--------
|
114
|
+
List[Dict]
|
115
|
+
A list of dictionaries containing the query results
|
116
|
+
|
117
|
+
Raises:
|
118
|
+
-------
|
119
|
+
ValueError
|
120
|
+
If API key is missing or invalid
|
121
|
+
Exception
|
122
|
+
For API errors or other issues
|
123
|
+
"""
|
124
|
+
|
125
|
+
return get_information_table(
|
126
|
+
columns=columns,
|
127
|
+
name_of_issuer=name_of_issuer,
|
128
|
+
title_of_class=title_of_class,
|
129
|
+
cusip=cusip,
|
130
|
+
value=value,
|
131
|
+
ssh_prnamt=ssh_prnamt,
|
132
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
133
|
+
investment_discretion=investment_discretion,
|
134
|
+
voting_authority_sole=voting_authority_sole,
|
135
|
+
voting_authority_shared=voting_authority_shared,
|
136
|
+
voting_authority_none=voting_authority_none,
|
137
|
+
reporting_owner_cik=reporting_owner_cik,
|
138
|
+
put_call=put_call,
|
139
|
+
other_manager=other_manager,
|
140
|
+
figi=figi,
|
141
|
+
accession=accession,
|
142
|
+
filing_date=filing_date,
|
143
|
+
|
144
|
+
# API key handling
|
145
|
+
api_key=api_key,
|
146
|
+
|
147
|
+
# Additional options
|
148
|
+
print_cost=print_cost,
|
149
|
+
verbose=verbose
|
150
|
+
)
|
151
|
+
|
152
|
+
def get_345(
|
153
|
+
self,
|
154
|
+
# Optional filtering parameters
|
155
|
+
columns=None,
|
156
|
+
is_derivative=None,
|
157
|
+
is_non_derivative=None,
|
158
|
+
security_title=None,
|
159
|
+
transaction_date=None,
|
160
|
+
document_type=None,
|
161
|
+
transaction_code=None,
|
162
|
+
equity_swap_involved=None,
|
163
|
+
transaction_timeliness=None,
|
164
|
+
transaction_shares=None,
|
165
|
+
transaction_price_per_share=None,
|
166
|
+
shares_owned_following_transaction=None,
|
167
|
+
ownership_type=None,
|
168
|
+
deemed_execution_date=None,
|
169
|
+
conversion_or_exercise_price=None,
|
170
|
+
exercise_date=None,
|
171
|
+
expiration_date=None,
|
172
|
+
underlying_security_title=None,
|
173
|
+
underlying_security_shares=None,
|
174
|
+
underlying_security_value=None,
|
175
|
+
accession=None,
|
176
|
+
reporting_owner_cik=None,
|
177
|
+
issuer_cik=None,
|
178
|
+
filing_date=None,
|
179
|
+
|
180
|
+
# API key handling
|
181
|
+
api_key=None,
|
182
|
+
|
183
|
+
# Additional options
|
184
|
+
print_cost=True,
|
185
|
+
verbose=False
|
186
|
+
):
|
187
|
+
"""
|
188
|
+
Query the SEC BigQuery API for Form 345 insider transaction data.
|
189
|
+
|
190
|
+
Parameters:
|
191
|
+
-----------
|
192
|
+
columns : List[str], optional
|
193
|
+
Specific columns to return. If None, all columns are returned.
|
194
|
+
|
195
|
+
# Filter parameters
|
196
|
+
is_derivative, security_title, etc. : Various filters that can be:
|
197
|
+
- str/bool: Exact match
|
198
|
+
- List[str]: Match any in list
|
199
|
+
- tuple: (min, max) range for numeric/date fields
|
200
|
+
|
201
|
+
reporting_owner_cik : str or List[str]
|
202
|
+
CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
|
203
|
+
Any match within the array will return the record.
|
204
|
+
|
205
|
+
issuer_cik : str or List[str]
|
206
|
+
CIK(s) of the company/companies
|
207
|
+
|
208
|
+
api_key : str, optional
|
209
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
210
|
+
print_cost : bool
|
211
|
+
Whether to print the query cost information
|
212
|
+
verbose : bool
|
213
|
+
Whether to print additional information about the query
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
--------
|
217
|
+
List[Dict]
|
218
|
+
A list of dictionaries containing the query results
|
219
|
+
|
220
|
+
Raises:
|
221
|
+
-------
|
222
|
+
ValueError
|
223
|
+
If API key is missing or invalid
|
224
|
+
Exception
|
225
|
+
For API errors or other issues
|
226
|
+
"""
|
227
|
+
|
228
|
+
return get_345(
|
229
|
+
columns=columns,
|
230
|
+
is_derivative=is_derivative,
|
231
|
+
is_non_derivative=is_non_derivative,
|
232
|
+
security_title=security_title,
|
233
|
+
transaction_date=transaction_date,
|
234
|
+
document_type=document_type,
|
235
|
+
transaction_code=transaction_code,
|
236
|
+
equity_swap_involved=equity_swap_involved,
|
237
|
+
transaction_timeliness=transaction_timeliness,
|
238
|
+
transaction_shares=transaction_shares,
|
239
|
+
transaction_price_per_share=transaction_price_per_share,
|
240
|
+
shares_owned_following_transaction=shares_owned_following_transaction,
|
241
|
+
ownership_type=ownership_type,
|
242
|
+
deemed_execution_date=deemed_execution_date,
|
243
|
+
conversion_or_exercise_price=conversion_or_exercise_price,
|
244
|
+
exercise_date=exercise_date,
|
245
|
+
expiration_date=expiration_date,
|
246
|
+
underlying_security_title=underlying_security_title,
|
247
|
+
underlying_security_shares=underlying_security_shares,
|
248
|
+
underlying_security_value=underlying_security_value,
|
249
|
+
accession=accession,
|
250
|
+
reporting_owner_cik=reporting_owner_cik,
|
251
|
+
issuer_cik=issuer_cik,
|
252
|
+
filing_date=filing_date,
|
253
|
+
|
254
|
+
# API key handling
|
255
|
+
api_key=api_key,
|
256
|
+
|
257
|
+
# Additional options
|
258
|
+
print_cost=print_cost,
|
259
|
+
verbose=verbose
|
260
|
+
)
|
261
|
+
|
262
|
+
def _download_to_csv(self, data, filepath, verbose=False):
|
263
|
+
"""
|
264
|
+
Helper method to download data to a CSV file.
|
265
|
+
|
266
|
+
Parameters:
|
267
|
+
-----------
|
268
|
+
data : List[Dict]
|
269
|
+
The data to save
|
270
|
+
filepath : str or Path
|
271
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
272
|
+
verbose : bool
|
273
|
+
Whether to print additional information
|
274
|
+
|
275
|
+
Returns:
|
276
|
+
--------
|
277
|
+
List[Dict]
|
278
|
+
The input data (for method chaining)
|
279
|
+
"""
|
280
|
+
# If no data returned, nothing to save
|
281
|
+
if not data:
|
282
|
+
if verbose:
|
283
|
+
print("No data returned from API. No file was created.")
|
284
|
+
return data
|
285
|
+
|
286
|
+
# Resolve filepath - if it's not absolute, make it relative to self.path
|
287
|
+
filepath_obj = Path(filepath)
|
288
|
+
if not filepath_obj.is_absolute():
|
289
|
+
filepath_obj = self.path / filepath_obj
|
290
|
+
|
291
|
+
# Create directory if it doesn't exist
|
292
|
+
os.makedirs(filepath_obj.parent, exist_ok=True)
|
293
|
+
|
294
|
+
# Get fieldnames from the first record
|
295
|
+
fieldnames = data[0].keys()
|
296
|
+
|
297
|
+
# Write to CSV
|
298
|
+
with open(filepath_obj, 'w', newline='') as csvfile:
|
299
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
300
|
+
writer.writeheader()
|
301
|
+
writer.writerows(data)
|
302
|
+
|
303
|
+
if verbose:
|
304
|
+
print(f"Saved {len(data)} records to {filepath_obj}")
|
305
|
+
|
306
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from ..utils.dictionaries import download_dictionary, load_dictionary
|
2
|
+
|
3
|
+
_active_dictionaries = []
|
4
|
+
_loaded_dictionaries = {}
|
5
|
+
|
6
|
+
def set_dictionaries(dictionaries, overwrite=False):
|
7
|
+
"""Set active dictionaries and load them into memory"""
|
8
|
+
global _active_dictionaries, _loaded_dictionaries
|
9
|
+
_active_dictionaries = dictionaries
|
10
|
+
_loaded_dictionaries = {}
|
11
|
+
|
12
|
+
for dict_name in dictionaries:
|
13
|
+
# Download if needed
|
14
|
+
download_dictionary(dict_name, overwrite=overwrite)
|
15
|
+
# Load into memory
|
16
|
+
_loaded_dictionaries[dict_name] = load_dictionary(dict_name)
|