datamule 2.1.5__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {datamule-2.1.5 → datamule-2.2.0}/PKG-INFO +2 -1
  2. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/document.py +171 -45
  3. datamule-2.2.0/datamule/sheet.py +306 -0
  4. datamule-2.2.0/datamule/tags/config.py +16 -0
  5. datamule-2.2.0/datamule/tags/regex.py +105 -0
  6. datamule-2.2.0/datamule/tags/utils.py +149 -0
  7. datamule-2.2.0/datamule/utils/__init__.py +0 -0
  8. datamule-2.2.0/datamule/utils/dictionaries.py +76 -0
  9. {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/PKG-INFO +2 -1
  10. {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/SOURCES.txt +5 -0
  11. {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/requires.txt +1 -0
  12. {datamule-2.1.5 → datamule-2.2.0}/setup.py +3 -2
  13. datamule-2.1.5/datamule/sheet.py +0 -706
  14. {datamule-2.1.5 → datamule-2.2.0}/datamule/__init__.py +0 -0
  15. {datamule-2.1.5 → datamule-2.2.0}/datamule/config.py +0 -0
  16. {datamule-2.1.5 → datamule-2.2.0}/datamule/data/listed_filer_metadata.csv +0 -0
  17. {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/__init__.py +0 -0
  18. {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/datamule_lookup.py +0 -0
  19. {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/datamule_mysql_rds.py +0 -0
  20. {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/downloader.py +0 -0
  21. {datamule-2.1.5 → datamule-2.2.0}/datamule/datamule/sec_connector.py +0 -0
  22. {datamule-2.1.5 → datamule-2.2.0}/datamule/datasets.py +0 -0
  23. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/__init__.py +0 -0
  24. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/__init__.py +0 -0
  25. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables.py +0 -0
  26. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_13fhr.py +0 -0
  27. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_25nse.py +0 -0
  28. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_informationtable.py +0 -0
  29. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_npx.py +0 -0
  30. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_ownership.py +0 -0
  31. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_proxyvotingrecord.py +0 -0
  32. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_sbsef.py +0 -0
  33. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/tables_sdr.py +0 -0
  34. {datamule-2.1.5 → datamule-2.2.0}/datamule/document/tables/utils.py +0 -0
  35. {datamule-2.1.5 → datamule-2.2.0}/datamule/helper.py +0 -0
  36. {datamule-2.1.5 → datamule-2.2.0}/datamule/index.py +0 -0
  37. {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/__init__.py +0 -0
  38. {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  39. {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  40. {datamule-2.1.5 → datamule-2.2.0}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  41. {datamule-2.1.5 → datamule-2.2.0}/datamule/package_updater.py +0 -0
  42. {datamule-2.1.5 → datamule-2.2.0}/datamule/portfolio.py +0 -0
  43. {datamule-2.1.5 → datamule-2.2.0}/datamule/portfolio_compression_utils.py +0 -0
  44. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/__init__.py +0 -0
  45. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/infrastructure/__init__.py +0 -0
  46. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  47. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/__init__.py +0 -0
  48. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/downloader.py +0 -0
  49. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/eftsquery.py +0 -0
  50. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/monitor.py +0 -0
  51. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/streamer.py +0 -0
  52. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/submissions/textsearch.py +0 -0
  53. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/utils.py +0 -0
  54. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/__init__.py +0 -0
  55. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  56. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  57. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  58. {datamule-2.1.5 → datamule-2.2.0}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  59. {datamule-2.1.5 → datamule-2.2.0}/datamule/seclibrary/__init__.py +0 -0
  60. {datamule-2.1.5 → datamule-2.2.0}/datamule/seclibrary/bq.py +0 -0
  61. {datamule-2.1.5 → datamule-2.2.0}/datamule/submission.py +0 -0
  62. {datamule-2.1.5/datamule/utils → datamule-2.2.0/datamule/tags}/__init__.py +0 -0
  63. {datamule-2.1.5 → datamule-2.2.0}/datamule/utils/construct_submissions_data.py +0 -0
  64. {datamule-2.1.5 → datamule-2.2.0}/datamule/utils/format_accession.py +0 -0
  65. {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/dependency_links.txt +0 -0
  66. {datamule-2.1.5 → datamule-2.2.0}/datamule.egg-info/top_level.txt +0 -0
  67. {datamule-2.1.5 → datamule-2.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.5
3
+ Version: 2.2.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -19,3 +19,4 @@ Requires-Dist: secxbrl
19
19
  Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
21
  Requires-Dist: company_fundamentals
22
+ Requires-Dist: flashtext
@@ -13,9 +13,133 @@ from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
15
  import tempfile
16
-
16
+ import warnings
17
17
  from .tables.tables import Tables
18
18
 
19
+ from ..tags.utils import get_cusip_using_regex, get_isin_using_regex, get_figi_using_regex,get_all_tickers, get_full_names,get_full_names_dictionary_lookup
20
+
21
+
22
+ class Tickers:
23
+ def __init__(self, document):
24
+ self.document = document
25
+ self._tickers_data = None
26
+
27
+ def _get_tickers_data(self):
28
+ """Get all tickers data once and cache it"""
29
+ if self._tickers_data is None:
30
+ # Check if document extension is supported
31
+ if self.document.extension not in ['.htm', '.html', '.txt']:
32
+ self._tickers_data = {}
33
+ else:
34
+ self._tickers_data = get_all_tickers(self.document.text)
35
+ return self._tickers_data
36
+
37
+ def __getattr__(self, exchange_name):
38
+ data = self._get_tickers_data()
39
+
40
+ if exchange_name in data:
41
+ return data[exchange_name]
42
+
43
+ return []
44
+
45
+ def __bool__(self):
46
+ """Return True if any tickers were found"""
47
+ data = self._get_tickers_data()
48
+ return bool(data.get('all', []))
49
+
50
+ def __repr__(self):
51
+ """Show the full ticker data when printed or accessed directly"""
52
+ data = self._get_tickers_data()
53
+ return str(data)
54
+
55
+ def __str__(self):
56
+ """Show the full ticker data when printed"""
57
+ data = self._get_tickers_data()
58
+ return str(data)
59
+
60
+ class Tags:
61
+ def __init__(self, document):
62
+ from ..tags.config import _active_dictionaries,_loaded_dictionaries
63
+ self.not_supported = document.extension not in ['.htm', '.html', '.txt']
64
+ self.document = document
65
+ self._tickers = None
66
+ self.dictionaries = {}
67
+
68
+ # Load global dictionaries with their data
69
+ active_dicts = _active_dictionaries
70
+ for dict_name in active_dicts:
71
+ self.dictionaries[dict_name] = _loaded_dictionaries[dict_name]
72
+
73
+
74
+ def _check_support(self):
75
+ if self.not_supported:
76
+ warnings.warn(f"Document extension '{self.document.extension}' is not supported. Supported formats: .htm, .html, .txt")
77
+ return False
78
+ return True
79
+
80
+ @property
81
+ def cusips(self):
82
+ if not self._check_support():
83
+ return None
84
+
85
+ if not hasattr(self, '_cusip'):
86
+ if 'sc13dg_cusips' in self.dictionaries:
87
+ keywords = self.dictionaries['sc13dg_cusips']
88
+ self._cusip = get_cusip_using_regex(self.document.text, keywords)
89
+ else:
90
+ self._cusip = get_cusip_using_regex(self.document.text)
91
+ return self._cusip
92
+
93
+ @property
94
+ def isins(self):
95
+ if not self._check_support():
96
+ return None
97
+
98
+ if not hasattr(self, '_isin'):
99
+ if 'npx_isins' in self.dictionaries:
100
+ keywords = self.dictionaries['npx_isins']
101
+ self._isin = get_isin_using_regex(self.document.text, keywords)
102
+ else:
103
+ self._isin = get_isin_using_regex(self.document.text)
104
+ return self._isin
105
+
106
+ @property
107
+ def figis(self):
108
+ if not self._check_support():
109
+ return None
110
+
111
+ if not hasattr(self, '_figi'):
112
+ if 'npx_figis' in self.dictionaries:
113
+ keywords = self.dictionaries['npx_figis']
114
+ self._figi = get_figi_using_regex(self.document.text, keywords)
115
+ else:
116
+ self._figi = get_figi_using_regex(self.document.text)
117
+ return self._figi
118
+
119
+ @property
120
+ def tickers(self):
121
+ if self._tickers is None:
122
+ self._tickers = Tickers(self.document)
123
+ return self._tickers
124
+
125
+ @property
126
+ def persons(self):
127
+ if not self._check_support():
128
+ return None
129
+
130
+ if not hasattr(self, '_persons'):
131
+ if '8k_2024_persons' in self.dictionaries:
132
+ # Use FlashText dictionary lookup for 8K persons
133
+ self._persons = get_full_names_dictionary_lookup(self.document.text, self.dictionaries['8k_2024_persons'])
134
+ elif 'ssa_baby_first_names' in self.dictionaries:
135
+ # Use regex with SSA names for validation
136
+ self._persons = get_full_names(self.document.text, self.dictionaries['ssa_baby_first_names'])
137
+ else:
138
+ # Fallback to regex without validation
139
+ self._persons = get_full_names(self.document.text)
140
+ return self._persons
141
+
142
+
19
143
  class Document:
20
144
  def __init__(self, type, content, extension,accession,filing_date,path=None):
21
145
 
@@ -34,10 +158,13 @@ class Document:
34
158
  self.path = path
35
159
 
36
160
  self.extension = extension
161
+
37
162
  # this will be filled by parsed
38
163
  self._data = None
39
164
  self._tables = None
40
165
  self._text = None
166
+
167
+ self.tags = Tags(self)
41
168
 
42
169
 
43
170
 
@@ -119,93 +246,92 @@ class Document:
119
246
 
120
247
  if self.extension == '.txt':
121
248
  content = self.text
122
- if self.type == '10-Q':
249
+ if self.type in ['10-Q', '10-Q/A']:
123
250
  mapping_dict = dict_10q
124
- elif self.type == '10-K':
251
+ elif self.type in ['10-K','10-K/A']:
125
252
  mapping_dict = dict_10k
126
- elif self.type == '8-K':
253
+ elif self.type in ['8-K', '8-K/A']:
127
254
  mapping_dict = dict_8k
128
- elif self.type == 'SC 13D':
255
+ elif self.type in ['SC 13D', 'SC 13D/A']:
129
256
  mapping_dict = dict_13d
130
- elif self.type == 'SC 13G':
257
+ elif self.type in ['SC 13G', 'SC 13G/A']:
131
258
  mapping_dict = dict_13g
132
259
 
133
260
  self._data = {}
134
261
  self._data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
135
262
  elif self.extension in ['.htm', '.html']:
136
263
 
137
- if self.type == '1-K':
264
+ if self.type in ['1-K', '1-K/A']:
138
265
  mapping_dict = dict_1kpartii_html
139
- elif self.type == '1-SA':
266
+ elif self.type in ['1-SA', '1-SA/A']:
140
267
  mapping_dict = dict_1sa_html
141
- elif self.type == '1-U':
268
+ elif self.type in ['1-U', '1-U/A']:
142
269
  mapping_dict = dict_1u_html
143
- elif self.type == '10-12B':
270
+ elif self.type in ['10-12B', '10-12B/A']:
144
271
  mapping_dict = dict_1012b_html
145
- elif self.type == '10-D':
272
+ elif self.type in ['10-D', '10-D/A']:
146
273
  mapping_dict = dict_10d_html
147
- elif self.type == '10-K':
274
+ elif self.type in ['10-K', '10-K/A']:
148
275
  mapping_dict = dict_10k_html
149
- elif self.type == '10-Q':
276
+ elif self.type in ['10-Q', '10-Q/A']:
150
277
  mapping_dict = dict_10q_html
151
- elif self.type == '20-F':
278
+ elif self.type in ['20-F', '20-F/A']:
152
279
  mapping_dict = dict_20f_html
153
- elif self.type == '8-A12B':
280
+ elif self.type in ['8-A12B', '8-A12B/A']:
154
281
  mapping_dict = dict_8a12b_html
155
- elif self.type == '8-A12G':
282
+ elif self.type in ['8-A12G', '8-A12G/A']:
156
283
  mapping_dict = dict_8a12g_html
157
- elif self.type == '8-K':
284
+ elif self.type in ['8-K', '8-K/A']:
158
285
  mapping_dict = dict_8k_html
159
- elif self.type == '8-K12B':
286
+ elif self.type in ['8-K12B', '8-K12B/A']:
160
287
  mapping_dict = dict_8k12b_html
161
- elif self.type == '8-K12G3':
288
+ elif self.type in ['8-K12G3', '8-K12G3/A']:
162
289
  mapping_dict = dict_8k12g3_html
163
- elif self.type == '8-K15D5':
290
+ elif self.type in ['8-K15D5', '8-K15D5/A']:
164
291
  mapping_dict = dict_8k15d5_html
165
- elif self.type == 'ABS-15G':
292
+ elif self.type in ['ABS-15G', 'ABS-15G/A']:
166
293
  mapping_dict = dict_abs15g_html
167
- elif self.type == 'ABS-EE':
294
+ elif self.type in ['ABS-EE', 'ABS-EE/A']:
168
295
  mapping_dict = dict_absee_html
169
- elif self.type == 'APP NTC':
170
- dict_appntc_html
171
- elif self.type == 'CB':
296
+ elif self.type in ['APP NTC', 'APP NTC/A']:
297
+ mapping_dict = dict_appntc_html
298
+ elif self.type in ['CB', 'CB/A']:
172
299
  mapping_dict = dict_cb_html
173
- elif self.type == 'DSTRBRPT':
300
+ elif self.type in ['DSTRBRPT', 'DSTRBRPT/A']:
174
301
  mapping_dict = dict_dstrbrpt_html
175
- elif self.type == 'N-18F1':
302
+ elif self.type in ['N-18F1', 'N-18F1/A']:
176
303
  mapping_dict = dict_n18f1_html
177
- elif self.type == 'N-CSRS':
304
+ elif self.type in ['N-CSRS', 'N-CSRS/A']:
178
305
  mapping_dict = dict_ncsrs_html
179
- elif self.type == 'NT-10K':
306
+ elif self.type in ['NT-10K', 'NT-10K/A']:
180
307
  mapping_dict = dict_nt10k_html
181
- elif self.type == 'NT-10Q':
308
+ elif self.type in ['NT-10Q', 'NT-10Q/A']:
182
309
  mapping_dict = dict_nt10q_html
183
- elif self.type == 'NT 20-F':
310
+ elif self.type in ['NT 20-F', 'NT 20-F/A']:
184
311
  mapping_dict = dict_nt20f_html
185
- elif self.type == 'NT-NCEN':
312
+ elif self.type in ['NT-NCEN', 'NT-NCEN/A']:
186
313
  mapping_dict = dict_ntncen_html
187
- elif self.type == 'NT-NCSR':
314
+ elif self.type in ['NT-NCSR', 'NT-NCSR/A']:
188
315
  mapping_dict = dict_ntncsr_html
189
- elif self.type == 'NTFNCEN':
316
+ elif self.type in ['NTFNCEN', 'NTFNCEN/A']:
190
317
  mapping_dict = dict_ntfcen_html
191
- elif self.type == 'NTFNCSR':
318
+ elif self.type in ['NTFNCSR', 'NTFNCSR/A']:
192
319
  mapping_dict = dict_ntfncsr_html
193
- elif self.type == 'EX-99.CERT':
320
+ elif self.type in ['EX-99.CERT', 'EX-99.CERT/A']:
194
321
  mapping_dict = dict_ex99cert_html
195
- elif self.type == 'SC 13E3':
322
+ elif self.type in ['SC 13E3', 'SC 13E3/A']:
196
323
  mapping_dict = dict_sc13e3_html
197
- elif self.type == 'SC 14D9':
324
+ elif self.type in ['SC 14D9', 'SC 14D9/A']:
198
325
  mapping_dict = dict_sc14d9_html
199
- elif self.type == 'SP 15D2':
326
+ elif self.type in ['SP 15D2', 'SP 15D2/A']:
200
327
  mapping_dict = dict_sp15d2_html
201
-
202
- elif self.type == 'SD':
328
+ elif self.type in ['SD', 'SD/A']:
203
329
  mapping_dict = dict_sd_html
204
- elif self.type == 'S-1':
330
+ elif self.type in ['S-1', 'S-1/A']:
205
331
  mapping_dict = dict_s1_html
206
- elif self.type == 'T-3':
332
+ elif self.type in ['T-3', 'T-3/A']:
207
333
  mapping_dict = dict_t3_html
208
- elif self.type in ['NT 10-K', 'NT 10-Q','NT 20-F']:
334
+ elif self.type in ['NT 10-K', 'NT 10-K/A', 'NT 10-Q', 'NT 10-Q/A', 'NT 20-F', 'NT 20-F/A']:
209
335
  mapping_dict = dict_nt10k_html
210
336
 
211
337
  dct = html2dict(content=self.content, mapping_dict=mapping_dict)
@@ -233,7 +359,7 @@ class Document:
233
359
  self._preprocess_html_content()
234
360
  elif self.extension == '.txt':
235
361
  self._preprocess_txt_content()
236
- return self._text
362
+ return self._text
237
363
 
238
364
  def write_json(self, output_filename=None):
239
365
  if not self.data:
@@ -0,0 +1,306 @@
1
+ from pathlib import Path
2
+ import csv
3
+ import os
4
+ from .helper import _process_cik_and_metadata_filters, load_package_dataset
5
+ from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
+ from .datamule.datamule_lookup import datamule_lookup
7
+ from .datamule.datamule_mysql_rds import query_mysql_rds
8
+ from company_fundamentals.utils import get_fundamental_mappings
9
+ from company_fundamentals import construct_fundamentals
10
+ class Sheet:
11
+ def __init__(self, path):
12
+ self.path = Path(path)
13
+
14
+ # Keep
15
+ def get_submissions(self,cik=None, accession_number=None, submission_type=None, filing_date=None,
16
+ columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
17
+
18
+ return datamule_lookup(cik, accession_number, submission_type, filing_date,
19
+ columns, distinct, page_size, quiet, api_key)
20
+
21
+ def get_table(self,table,cik=None,ticker=None,**kwargs):
22
+ cik = _process_cik_and_metadata_filters(cik, ticker)
23
+
24
+ if table == 'fundamentals':
25
+ fundamentals = kwargs.pop('fundamentals', None)
26
+ if fundamentals is None:
27
+ raise ValueError("fundamentals parameter required for fundamentals table")
28
+
29
+ categories = kwargs.pop('categories',None)
30
+
31
+ mappings = get_fundamental_mappings(fundamentals=fundamentals)
32
+ #print(mappings)
33
+ taxonomies = [item[0] for item in mappings]
34
+ names = [item[1] for item in mappings]
35
+ xbrl = query_mysql_rds(table='simple_xbrl',cik=cik,taxonomy=taxonomies,name=names,**kwargs)
36
+ #print(xbrl)
37
+
38
+ return construct_fundamentals(xbrl, 'taxonomy', 'name', 'period_start_date', 'period_end_date', categories=categories,fundamentals=fundamentals)
39
+
40
+ else:
41
+ return query_mysql_rds(table=table,cik=cik,**kwargs)
42
+
43
+ def download_xbrl(
44
+ self,
45
+ cik=None,
46
+ ticker=None,
47
+ **kwargs
48
+ ):
49
+ # If no CIK or ticker specified, get all companies with tickers
50
+ if cik is None and ticker is None:
51
+ cik = [row['cik'] for row in load_package_dataset('company_tickers')]
52
+
53
+ # Normalize cik to list format
54
+ if isinstance(cik, (str, int)):
55
+ cik = [cik]
56
+
57
+ # Process CIK and metadata filters
58
+ cik_list = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
59
+
60
+ # Download facts for all CIKs in parallel
61
+ download_company_facts(cik=cik_list, output_dir=self.path)
62
+
63
+ def get_information_table(
64
+ self,
65
+ # Optional filtering parameters
66
+ columns=None,
67
+ name_of_issuer=None,
68
+ title_of_class=None,
69
+ cusip=None,
70
+ value=None,
71
+ ssh_prnamt=None,
72
+ ssh_prnamt_type=None,
73
+ investment_discretion=None,
74
+ voting_authority_sole=None,
75
+ voting_authority_shared=None,
76
+ voting_authority_none=None,
77
+ reporting_owner_cik=None,
78
+ put_call=None,
79
+ other_manager=None,
80
+ figi=None,
81
+ accession=None,
82
+ filing_date=None,
83
+
84
+ # API key handling
85
+ api_key=None,
86
+
87
+ # Additional options
88
+ print_cost=True,
89
+ verbose=False
90
+ ):
91
+ """
92
+ Query the SEC BigQuery API for 13F-HR information table data.
93
+
94
+ Parameters:
95
+ -----------
96
+ columns : List[str], optional
97
+ Specific columns to return. If None, all columns are returned.
98
+
99
+ # Filter parameters
100
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
101
+ - str: Exact match
102
+ - List[str]: Match any in list
103
+ - tuple: (min, max) range for numeric/date fields
104
+
105
+ api_key : str, optional
106
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
107
+ print_cost : bool
108
+ Whether to print the query cost information
109
+ verbose : bool
110
+ Whether to print additional information about the query
111
+
112
+ Returns:
113
+ --------
114
+ List[Dict]
115
+ A list of dictionaries containing the query results
116
+
117
+ Raises:
118
+ -------
119
+ ValueError
120
+ If API key is missing or invalid
121
+ Exception
122
+ For API errors or other issues
123
+ """
124
+
125
+ return get_information_table(
126
+ columns=columns,
127
+ name_of_issuer=name_of_issuer,
128
+ title_of_class=title_of_class,
129
+ cusip=cusip,
130
+ value=value,
131
+ ssh_prnamt=ssh_prnamt,
132
+ ssh_prnamt_type=ssh_prnamt_type,
133
+ investment_discretion=investment_discretion,
134
+ voting_authority_sole=voting_authority_sole,
135
+ voting_authority_shared=voting_authority_shared,
136
+ voting_authority_none=voting_authority_none,
137
+ reporting_owner_cik=reporting_owner_cik,
138
+ put_call=put_call,
139
+ other_manager=other_manager,
140
+ figi=figi,
141
+ accession=accession,
142
+ filing_date=filing_date,
143
+
144
+ # API key handling
145
+ api_key=api_key,
146
+
147
+ # Additional options
148
+ print_cost=print_cost,
149
+ verbose=verbose
150
+ )
151
+
152
+ def get_345(
153
+ self,
154
+ # Optional filtering parameters
155
+ columns=None,
156
+ is_derivative=None,
157
+ is_non_derivative=None,
158
+ security_title=None,
159
+ transaction_date=None,
160
+ document_type=None,
161
+ transaction_code=None,
162
+ equity_swap_involved=None,
163
+ transaction_timeliness=None,
164
+ transaction_shares=None,
165
+ transaction_price_per_share=None,
166
+ shares_owned_following_transaction=None,
167
+ ownership_type=None,
168
+ deemed_execution_date=None,
169
+ conversion_or_exercise_price=None,
170
+ exercise_date=None,
171
+ expiration_date=None,
172
+ underlying_security_title=None,
173
+ underlying_security_shares=None,
174
+ underlying_security_value=None,
175
+ accession=None,
176
+ reporting_owner_cik=None,
177
+ issuer_cik=None,
178
+ filing_date=None,
179
+
180
+ # API key handling
181
+ api_key=None,
182
+
183
+ # Additional options
184
+ print_cost=True,
185
+ verbose=False
186
+ ):
187
+ """
188
+ Query the SEC BigQuery API for Form 345 insider transaction data.
189
+
190
+ Parameters:
191
+ -----------
192
+ columns : List[str], optional
193
+ Specific columns to return. If None, all columns are returned.
194
+
195
+ # Filter parameters
196
+ is_derivative, security_title, etc. : Various filters that can be:
197
+ - str/bool: Exact match
198
+ - List[str]: Match any in list
199
+ - tuple: (min, max) range for numeric/date fields
200
+
201
+ reporting_owner_cik : str or List[str]
202
+ CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
203
+ Any match within the array will return the record.
204
+
205
+ issuer_cik : str or List[str]
206
+ CIK(s) of the company/companies
207
+
208
+ api_key : str, optional
209
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
210
+ print_cost : bool
211
+ Whether to print the query cost information
212
+ verbose : bool
213
+ Whether to print additional information about the query
214
+
215
+ Returns:
216
+ --------
217
+ List[Dict]
218
+ A list of dictionaries containing the query results
219
+
220
+ Raises:
221
+ -------
222
+ ValueError
223
+ If API key is missing or invalid
224
+ Exception
225
+ For API errors or other issues
226
+ """
227
+
228
+ return get_345(
229
+ columns=columns,
230
+ is_derivative=is_derivative,
231
+ is_non_derivative=is_non_derivative,
232
+ security_title=security_title,
233
+ transaction_date=transaction_date,
234
+ document_type=document_type,
235
+ transaction_code=transaction_code,
236
+ equity_swap_involved=equity_swap_involved,
237
+ transaction_timeliness=transaction_timeliness,
238
+ transaction_shares=transaction_shares,
239
+ transaction_price_per_share=transaction_price_per_share,
240
+ shares_owned_following_transaction=shares_owned_following_transaction,
241
+ ownership_type=ownership_type,
242
+ deemed_execution_date=deemed_execution_date,
243
+ conversion_or_exercise_price=conversion_or_exercise_price,
244
+ exercise_date=exercise_date,
245
+ expiration_date=expiration_date,
246
+ underlying_security_title=underlying_security_title,
247
+ underlying_security_shares=underlying_security_shares,
248
+ underlying_security_value=underlying_security_value,
249
+ accession=accession,
250
+ reporting_owner_cik=reporting_owner_cik,
251
+ issuer_cik=issuer_cik,
252
+ filing_date=filing_date,
253
+
254
+ # API key handling
255
+ api_key=api_key,
256
+
257
+ # Additional options
258
+ print_cost=print_cost,
259
+ verbose=verbose
260
+ )
261
+
262
+ def _download_to_csv(self, data, filepath, verbose=False):
263
+ """
264
+ Helper method to download data to a CSV file.
265
+
266
+ Parameters:
267
+ -----------
268
+ data : List[Dict]
269
+ The data to save
270
+ filepath : str or Path
271
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
272
+ verbose : bool
273
+ Whether to print additional information
274
+
275
+ Returns:
276
+ --------
277
+ List[Dict]
278
+ The input data (for method chaining)
279
+ """
280
+ # If no data returned, nothing to save
281
+ if not data:
282
+ if verbose:
283
+ print("No data returned from API. No file was created.")
284
+ return data
285
+
286
+ # Resolve filepath - if it's not absolute, make it relative to self.path
287
+ filepath_obj = Path(filepath)
288
+ if not filepath_obj.is_absolute():
289
+ filepath_obj = self.path / filepath_obj
290
+
291
+ # Create directory if it doesn't exist
292
+ os.makedirs(filepath_obj.parent, exist_ok=True)
293
+
294
+ # Get fieldnames from the first record
295
+ fieldnames = data[0].keys()
296
+
297
+ # Write to CSV
298
+ with open(filepath_obj, 'w', newline='') as csvfile:
299
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
300
+ writer.writeheader()
301
+ writer.writerows(data)
302
+
303
+ if verbose:
304
+ print(f"Saved {len(data)} records to {filepath_obj}")
305
+
306
+
@@ -0,0 +1,16 @@
1
+ from ..utils.dictionaries import download_dictionary, load_dictionary
2
+
3
+ _active_dictionaries = []
4
+ _loaded_dictionaries = {}
5
+
6
+ def set_dictionaries(dictionaries, overwrite=False):
7
+ """Set active dictionaries and load them into memory"""
8
+ global _active_dictionaries, _loaded_dictionaries
9
+ _active_dictionaries = dictionaries
10
+ _loaded_dictionaries = {}
11
+
12
+ for dict_name in dictionaries:
13
+ # Download if needed
14
+ download_dictionary(dict_name, overwrite=overwrite)
15
+ # Load into memory
16
+ _loaded_dictionaries[dict_name] = load_dictionary(dict_name)