datamule 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document.py +262 -68
- datamule/portfolio.py +7 -5
- datamule/sec/submissions/downloader.py +19 -2
- datamule/sec/submissions/eftsquery.py +129 -8
- datamule/sec/submissions/monitor.py +5 -1
- datamule/sec/submissions/streamer.py +59 -23
- datamule/sec/submissions/textsearch.py +33 -6
- datamule/seclibrary/bq.py +191 -0
- datamule/sheet.py +220 -6
- datamule/submission.py +94 -19
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/METADATA +1 -1
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/RECORD +14 -13
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/WHEEL +0 -0
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,67 @@ class EFTSQuery:
|
|
42
42
|
await self.session.close()
|
43
43
|
self.session = None
|
44
44
|
|
45
|
+
async def search_name(self, name):
|
46
|
+
"""
|
47
|
+
Search for companies by name using the EFTS name search endpoint.
|
48
|
+
|
49
|
+
Parameters:
|
50
|
+
name (str): Company name to search for
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
list: List of dictionaries containing company information (entity, id, tickers if available)
|
54
|
+
"""
|
55
|
+
if not self.session:
|
56
|
+
raise RuntimeError("No active session. This method must be called within an async context.")
|
57
|
+
|
58
|
+
url = f"{self.base_url}?keysTyped={name}"
|
59
|
+
|
60
|
+
if not self.quiet:
|
61
|
+
print(f"Searching for company: {name}")
|
62
|
+
|
63
|
+
async with self.limiter:
|
64
|
+
try:
|
65
|
+
async with self.session.get(url) as response:
|
66
|
+
if response.status == 429:
|
67
|
+
raise RetryException(url)
|
68
|
+
response.raise_for_status()
|
69
|
+
content = await response.read()
|
70
|
+
await self.rate_monitor.add_request(len(content))
|
71
|
+
data = await response.json()
|
72
|
+
|
73
|
+
if 'hits' in data and 'hits' in data['hits']:
|
74
|
+
hits = data['hits']['hits']
|
75
|
+
results = []
|
76
|
+
|
77
|
+
for hit in hits:
|
78
|
+
source = hit.get('_source', {})
|
79
|
+
result = {
|
80
|
+
'entity': source.get('entity', ''),
|
81
|
+
'id': hit.get('_id', ''),
|
82
|
+
'tickers': source.get('tickers', '')
|
83
|
+
}
|
84
|
+
results.append(result)
|
85
|
+
|
86
|
+
if not self.quiet and results:
|
87
|
+
# Create a compact display of results
|
88
|
+
display_results = [f"{r['entity']} [{r['id']}]" for r in results]
|
89
|
+
print(f"Name matches: {', '.join(display_results[:5])}")
|
90
|
+
if len(results) > 5:
|
91
|
+
print(f"...and {len(results) - 5} more matches")
|
92
|
+
|
93
|
+
return results
|
94
|
+
return []
|
95
|
+
except aiohttp.ClientResponseError as e:
|
96
|
+
if e.status == 429:
|
97
|
+
raise RetryException(url)
|
98
|
+
if not self.quiet:
|
99
|
+
print(f"Error searching for company: {str(e)}")
|
100
|
+
return []
|
101
|
+
except Exception as e:
|
102
|
+
if not self.quiet:
|
103
|
+
print(f"Error searching for company: {str(e)}")
|
104
|
+
return []
|
105
|
+
|
45
106
|
def _get_form_exclusions(self, form):
|
46
107
|
"""Dynamically generate form exclusions based on patterns"""
|
47
108
|
# Skip already negated forms
|
@@ -55,7 +116,7 @@ class EFTSQuery:
|
|
55
116
|
# No exclusions for amendment forms
|
56
117
|
return []
|
57
118
|
|
58
|
-
def _prepare_params(self, cik=None, submission_type=None, filing_date=None):
|
119
|
+
def _prepare_params(self, cik=None, submission_type=None, filing_date=None, location=None):
|
59
120
|
params = {}
|
60
121
|
|
61
122
|
# Handle CIK
|
@@ -111,6 +172,10 @@ class EFTSQuery:
|
|
111
172
|
params['startdt'] = "2001-01-01"
|
112
173
|
params['enddt'] = datetime.now().strftime('%Y-%m-%d')
|
113
174
|
|
175
|
+
# Handle location filtering
|
176
|
+
if location:
|
177
|
+
params['filter_location'] = location
|
178
|
+
|
114
179
|
return params
|
115
180
|
|
116
181
|
def _get_query_description(self, params):
|
@@ -125,6 +190,9 @@ class EFTSQuery:
|
|
125
190
|
if 'startdt' in params and 'enddt' in params:
|
126
191
|
parts.append(f"dates={params['startdt']} to {params['enddt']}")
|
127
192
|
|
193
|
+
if 'filter_location' in params:
|
194
|
+
parts.append(f"location={params['filter_location']}")
|
195
|
+
|
128
196
|
return ", ".join(parts)
|
129
197
|
|
130
198
|
async def _fetch_json(self, url):
|
@@ -413,12 +481,26 @@ class EFTSQuery:
|
|
413
481
|
for params, from_val, size_val, callback in self.pending_page_requests:
|
414
482
|
await self.fetch_queue.put((params, from_val, size_val, callback))
|
415
483
|
|
416
|
-
async def query(self, cik=None, submission_type=None, filing_date=None, callback=None):
|
417
|
-
|
418
|
-
|
484
|
+
async def query(self, cik=None, submission_type=None, filing_date=None, location=None, callback=None, name=None):
|
485
|
+
"""
|
486
|
+
Query SEC filings using the EFTS API.
|
487
|
+
|
488
|
+
Parameters:
|
489
|
+
cik (str or list): Central Index Key(s) for the company
|
490
|
+
submission_type (str or list): Filing form type(s) to filter by
|
491
|
+
filing_date (str, tuple, or list): Date or date range to filter by
|
492
|
+
location (str): Location code to filter by (e.g., 'CA' for California)
|
493
|
+
callback (function): Async callback function to process results as they arrive
|
494
|
+
name (str): Company name to search for (alternative to providing CIK)
|
419
495
|
|
420
|
-
|
421
|
-
|
496
|
+
Returns:
|
497
|
+
list: List of filing documents matching the query criteria
|
498
|
+
"""
|
499
|
+
# If both CIK and name are provided, raise an error
|
500
|
+
if cik is not None and name is not None:
|
501
|
+
raise ValueError("Please provide either 'name' or 'cik', not both")
|
502
|
+
|
503
|
+
all_hits = []
|
422
504
|
|
423
505
|
# Collector callback to gather all hits
|
424
506
|
async def collect_hits(hits):
|
@@ -427,6 +509,25 @@ class EFTSQuery:
|
|
427
509
|
await callback(hits)
|
428
510
|
|
429
511
|
async with self as client:
|
512
|
+
# If name is provided, search for matching companies inside the context manager
|
513
|
+
if name is not None:
|
514
|
+
company_results = await self.search_name(name)
|
515
|
+
if not company_results:
|
516
|
+
if not self.quiet:
|
517
|
+
print(f"No companies found matching: {name}")
|
518
|
+
return []
|
519
|
+
|
520
|
+
# Use the first (best) match's CIK
|
521
|
+
cik = company_results[0]['id']
|
522
|
+
if not self.quiet:
|
523
|
+
print(f"Using CIK {cik} for {company_results[0]['entity']}")
|
524
|
+
|
525
|
+
# Now prepare parameters with the CIK (either provided directly or from name search)
|
526
|
+
params = self._prepare_params(cik, submission_type, filing_date, location)
|
527
|
+
|
528
|
+
# Check if this is a primary documents query
|
529
|
+
self.was_primary_docs_query = '-0' in params.get('forms', '').split(',')
|
530
|
+
|
430
531
|
# Reset state for new query
|
431
532
|
self.total_results_to_fetch = 0
|
432
533
|
self.pending_page_requests = []
|
@@ -506,12 +607,32 @@ class EFTSQuery:
|
|
506
607
|
print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
|
507
608
|
return all_hits
|
508
609
|
|
509
|
-
def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None, quiet=False):
|
610
|
+
def query_efts(cik=None, submission_type=None, filing_date=None, location=None, requests_per_second=5.0, callback=None, quiet=False, name=None):
|
510
611
|
"""
|
511
612
|
Convenience function to run a query without managing the async context.
|
613
|
+
|
614
|
+
Parameters:
|
615
|
+
cik (str or list): Central Index Key(s) for the company
|
616
|
+
submission_type (str or list): Filing form type(s) to filter by
|
617
|
+
filing_date (str, tuple, or list): Date or date range to filter by
|
618
|
+
location (str): Location code to filter by (e.g., 'CA' for California)
|
619
|
+
requests_per_second (float): Maximum requests per second to make to the SEC API
|
620
|
+
callback (function): Async callback function to process results as they arrive
|
621
|
+
quiet (bool): Whether to suppress progress output
|
622
|
+
name (str): Company name to search for (alternative to providing CIK)
|
623
|
+
|
624
|
+
Returns:
|
625
|
+
list: List of filing documents matching the query criteria
|
626
|
+
|
627
|
+
Example:
|
628
|
+
To search by company name:
|
629
|
+
results = query_efts(name="Tesla", submission_type="10-K")
|
630
|
+
|
631
|
+
To search by CIK:
|
632
|
+
results = query_efts(cik="1318605", submission_type="10-K")
|
512
633
|
"""
|
513
634
|
async def run_query():
|
514
635
|
query = EFTSQuery(requests_per_second=requests_per_second, quiet=quiet)
|
515
|
-
return await query.query(cik, submission_type, filing_date, callback)
|
636
|
+
return await query.query(cik, submission_type, filing_date, location, callback, name)
|
516
637
|
|
517
638
|
return asyncio.run(run_query())
|
@@ -20,12 +20,16 @@ async def _process_efts_hits(hits, collected_accession_numbers, data_callback=No
|
|
20
20
|
submission_type = source.get('form')
|
21
21
|
ciks = source.get('ciks', [])
|
22
22
|
ciks = [str(int(cik)) for cik in ciks]
|
23
|
+
|
24
|
+
filing_date = source.get('file_date')
|
23
25
|
|
24
26
|
# Create standardized filing record
|
25
27
|
filing = {
|
26
28
|
'accession_number': accession_number,
|
27
29
|
'submission_type': submission_type,
|
28
|
-
'ciks': ciks
|
30
|
+
'ciks': ciks,
|
31
|
+
'filing_date': filing_date,
|
32
|
+
|
29
33
|
}
|
30
34
|
|
31
35
|
processed_hits.append(filing)
|
@@ -21,8 +21,8 @@ def fix_filing_url(url):
|
|
21
21
|
return url
|
22
22
|
|
23
23
|
class Streamer(EFTSQuery):
|
24
|
-
def __init__(self, requests_per_second=5.0, document_callback=None, accession_numbers=None):
|
25
|
-
super().__init__(requests_per_second=requests_per_second)
|
24
|
+
def __init__(self, requests_per_second=5.0, document_callback=None, accession_numbers=None, quiet=False):
|
25
|
+
super().__init__(requests_per_second=requests_per_second, quiet=quiet)
|
26
26
|
self.document_callback = document_callback
|
27
27
|
self.document_queue = asyncio.Queue()
|
28
28
|
self.download_in_progress = asyncio.Event()
|
@@ -57,12 +57,14 @@ class Streamer(EFTSQuery):
|
|
57
57
|
await callback(hits)
|
58
58
|
self.fetch_queue.task_done()
|
59
59
|
except Exception as e:
|
60
|
-
|
60
|
+
if not self.quiet:
|
61
|
+
print(f"\nError fetching {url}: {str(e)}")
|
61
62
|
self.fetch_queue.task_done()
|
62
63
|
except asyncio.CancelledError:
|
63
64
|
break
|
64
65
|
except Exception as e:
|
65
|
-
|
66
|
+
if not self.quiet:
|
67
|
+
print(f"\nWorker error: {str(e)}")
|
66
68
|
self.fetch_queue.task_done()
|
67
69
|
|
68
70
|
def _construct_submission_url(self, hit):
|
@@ -85,7 +87,8 @@ class Streamer(EFTSQuery):
|
|
85
87
|
|
86
88
|
return url, cik, accno_w_dash
|
87
89
|
except (KeyError, IndexError) as e:
|
88
|
-
|
90
|
+
if not self.quiet:
|
91
|
+
print(f"Error constructing URL for hit: {hit}. Error: {str(e)}")
|
89
92
|
return None, None, None
|
90
93
|
|
91
94
|
async def _document_download_worker(self):
|
@@ -115,13 +118,15 @@ class Streamer(EFTSQuery):
|
|
115
118
|
|
116
119
|
self.document_queue.task_done()
|
117
120
|
except Exception as e:
|
118
|
-
|
121
|
+
if not self.quiet:
|
122
|
+
print(f"\nError streaming document {doc_url}: {str(e)}")
|
119
123
|
self.document_queue.task_done()
|
120
124
|
|
121
125
|
except asyncio.CancelledError:
|
122
126
|
break
|
123
127
|
except Exception as e:
|
124
|
-
|
128
|
+
if not self.quiet:
|
129
|
+
print(f"\nDocument worker error: {str(e)}")
|
125
130
|
self.document_queue.task_done()
|
126
131
|
|
127
132
|
async def document_download_callback(self, hits):
|
@@ -133,7 +138,7 @@ class Streamer(EFTSQuery):
|
|
133
138
|
self.download_in_progress.set()
|
134
139
|
|
135
140
|
# Create progress bar for documents if not exists
|
136
|
-
if not self.document_pbar:
|
141
|
+
if not self.document_pbar and not self.quiet:
|
137
142
|
self.document_pbar = tqdm(total=0, desc="Streaming submissions")
|
138
143
|
|
139
144
|
# Queue up the documents for download
|
@@ -141,7 +146,8 @@ class Streamer(EFTSQuery):
|
|
141
146
|
doc_url, cik, accno = self._construct_submission_url(hit)
|
142
147
|
if doc_url:
|
143
148
|
# Update document progress bar total
|
144
|
-
self.document_pbar
|
149
|
+
if self.document_pbar:
|
150
|
+
self.document_pbar.total += 1
|
145
151
|
self.total_documents += 1
|
146
152
|
|
147
153
|
# Add to download queue
|
@@ -159,8 +165,20 @@ class Streamer(EFTSQuery):
|
|
159
165
|
# Signal that document download is complete
|
160
166
|
self.download_in_progress.clear()
|
161
167
|
|
162
|
-
async def stream(self, cik=None, submission_type=None, filing_date=None):
|
163
|
-
"""
|
168
|
+
async def stream(self, cik=None, submission_type=None, filing_date=None, location=None, name=None):
|
169
|
+
"""
|
170
|
+
Main method to stream EFTS results and download documents
|
171
|
+
|
172
|
+
Parameters:
|
173
|
+
cik (str or list): Central Index Key(s) for the company
|
174
|
+
submission_type (str or list): Filing form type(s) to filter by
|
175
|
+
filing_date (str, tuple, or list): Date or date range to filter by
|
176
|
+
location (str): Location code to filter by (e.g., 'CA' for California)
|
177
|
+
name (str): Company name to search for (alternative to providing CIK)
|
178
|
+
|
179
|
+
Returns:
|
180
|
+
list: List of all EFTS hits processed
|
181
|
+
"""
|
164
182
|
# Create document worker tasks
|
165
183
|
self.document_workers = [
|
166
184
|
asyncio.create_task(self._document_download_worker())
|
@@ -173,11 +191,12 @@ class Streamer(EFTSQuery):
|
|
173
191
|
self.skipped_documents = 0
|
174
192
|
|
175
193
|
# Run the main query with our document download callback
|
176
|
-
results = await self.query(cik, submission_type, filing_date, self.document_download_callback)
|
194
|
+
results = await self.query(cik, submission_type, filing_date, location, self.document_download_callback, name)
|
177
195
|
|
178
196
|
# Make sure all document downloads are complete
|
179
197
|
if self.download_in_progress.is_set():
|
180
|
-
|
198
|
+
if not self.quiet:
|
199
|
+
print("Waiting for remaining document downloads to complete...")
|
181
200
|
await self.document_queue.join()
|
182
201
|
|
183
202
|
# Clean up document workers
|
@@ -190,14 +209,17 @@ class Streamer(EFTSQuery):
|
|
190
209
|
if self.document_pbar:
|
191
210
|
self.document_pbar.close()
|
192
211
|
self.document_pbar = None # Set to None to prevent reuse
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
212
|
+
|
213
|
+
if not self.quiet:
|
214
|
+
print(f"\n--- Streaming complete: {len(results)} EFTS results processed ---")
|
215
|
+
if self.accession_numbers is not None:
|
216
|
+
print(f"--- {self.documents_processed} documents downloaded, {self.skipped_documents} skipped due to accession number filter ---")
|
217
|
+
|
197
218
|
return results
|
198
219
|
|
199
|
-
def stream(cik=None, submission_type=None, filing_date=None,
|
200
|
-
|
220
|
+
def stream(cik=None, submission_type=None, filing_date=None, location=None,
|
221
|
+
requests_per_second=5.0, document_callback=None, accession_numbers=None,
|
222
|
+
quiet=False, name=None):
|
201
223
|
"""
|
202
224
|
Stream EFTS results and download documents into memory.
|
203
225
|
|
@@ -205,15 +227,28 @@ def stream(cik=None, submission_type=None, filing_date=None,
|
|
205
227
|
- cik: CIK number(s) to query for
|
206
228
|
- submission_type: Filing type(s) to query for
|
207
229
|
- filing_date: Date or date range to query for
|
230
|
+
- location: Location code to filter by (e.g., 'CA' for California)
|
208
231
|
- requests_per_second: Rate limit for SEC requests (combined EFTS and document downloads)
|
209
232
|
- document_callback: Callback function that receives (hit, content, cik, accno, url)
|
210
233
|
- accession_numbers: Optional list of accession numbers to filter by
|
234
|
+
- quiet: Whether to suppress progress output
|
235
|
+
- name: Company name to search for (alternative to providing CIK)
|
211
236
|
|
212
237
|
Returns:
|
213
238
|
- List of all EFTS hits processed
|
239
|
+
|
240
|
+
Example:
|
241
|
+
To search by company name:
|
242
|
+
results = stream(name="Tesla", submission_type="10-K")
|
243
|
+
|
244
|
+
To search by CIK:
|
245
|
+
results = stream(cik="1318605", submission_type="10-K")
|
246
|
+
|
247
|
+
To search with location filter:
|
248
|
+
results = stream(name="Tesla", location="CA", submission_type="10-K")
|
214
249
|
"""
|
215
|
-
|
216
|
-
#
|
250
|
+
|
251
|
+
# Check if acc no is empty list
|
217
252
|
if accession_numbers == []:
|
218
253
|
raise ValueError("Applied filter resulted in empty accession numbers list")
|
219
254
|
|
@@ -221,8 +256,9 @@ def stream(cik=None, submission_type=None, filing_date=None,
|
|
221
256
|
streamer = Streamer(
|
222
257
|
requests_per_second=requests_per_second,
|
223
258
|
document_callback=document_callback,
|
224
|
-
accession_numbers=accession_numbers
|
259
|
+
accession_numbers=accession_numbers,
|
260
|
+
quiet=quiet
|
225
261
|
)
|
226
|
-
return await streamer.stream(cik, submission_type, filing_date)
|
262
|
+
return await streamer.stream(cik, submission_type, filing_date, location, name)
|
227
263
|
|
228
264
|
return asyncio.run(run_stream())
|
@@ -13,9 +13,9 @@ class TextSearchEFTSQuery(EFTSQuery):
|
|
13
13
|
super().__init__(requests_per_second=requests_per_second, quiet=quiet)
|
14
14
|
self.text_query = text_query
|
15
15
|
|
16
|
-
def _prepare_params(self, cik=None, submission_type=None, filing_date=None):
|
16
|
+
def _prepare_params(self, cik=None, submission_type=None, filing_date=None, location=None):
|
17
17
|
# Get base parameters from parent class
|
18
|
-
params = super()._prepare_params(cik, submission_type, filing_date)
|
18
|
+
params = super()._prepare_params(cik, submission_type, filing_date, location)
|
19
19
|
|
20
20
|
# Add text query parameter
|
21
21
|
params['q'] = self.text_query
|
@@ -46,7 +46,8 @@ async def extract_accession_numbers(hits):
|
|
46
46
|
accession_numbers.append(acc_no)
|
47
47
|
return accession_numbers
|
48
48
|
|
49
|
-
def query(text_query, cik=None, submission_type=None, filing_date=None,
|
49
|
+
def query(text_query, cik=None, submission_type=None, filing_date=None, location=None,
|
50
|
+
name=None, requests_per_second=5.0, quiet=False):
|
50
51
|
"""
|
51
52
|
Search SEC filings for text and return the full search results.
|
52
53
|
|
@@ -63,6 +64,10 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
|
|
63
64
|
filing_date : str, tuple, list, optional
|
64
65
|
Date or date range to filter by. Can be a single date string ('YYYY-MM-DD'),
|
65
66
|
a tuple of (start_date, end_date), or a list of dates.
|
67
|
+
location : str, optional
|
68
|
+
Location code to filter by (e.g., 'CA' for California).
|
69
|
+
name : str, optional
|
70
|
+
Company name to search for (alternative to providing CIK).
|
66
71
|
requests_per_second : float, optional
|
67
72
|
Maximum number of requests per second to make to the SEC API.
|
68
73
|
Default is 5.0.
|
@@ -73,14 +78,23 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
|
|
73
78
|
--------
|
74
79
|
list
|
75
80
|
Complete search results with all hit data.
|
81
|
+
|
82
|
+
Examples:
|
83
|
+
---------
|
84
|
+
# Search for 'climate risk' in Tesla's 10-K filings using company name
|
85
|
+
results = query('"climate risk"', name='Tesla', submission_type='10-K')
|
86
|
+
|
87
|
+
# Search for 'pandemic' in California companies' filings
|
88
|
+
results = query('pandemic', location='CA', submission_type='8-K')
|
76
89
|
"""
|
77
90
|
async def run_query():
|
78
91
|
query = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
|
79
|
-
return await query.query(cik, submission_type, filing_date)
|
92
|
+
return await query.query(cik, submission_type, filing_date, location, None, name)
|
80
93
|
|
81
94
|
return asyncio.run(run_query())
|
82
95
|
|
83
|
-
def filter_text(text_query, cik=None, submission_type=None, filing_date=None,
|
96
|
+
def filter_text(text_query, cik=None, submission_type=None, filing_date=None, location=None,
|
97
|
+
name=None, requests_per_second=5.0, quiet=False):
|
84
98
|
"""
|
85
99
|
Search SEC filings for text and return matching accession numbers.
|
86
100
|
|
@@ -97,6 +111,10 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
|
|
97
111
|
filing_date : str, tuple, list, optional
|
98
112
|
Date or date range to filter by. Can be a single date string ('YYYY-MM-DD'),
|
99
113
|
a tuple of (start_date, end_date), or a list of dates.
|
114
|
+
location : str, optional
|
115
|
+
Location code to filter by (e.g., 'CA' for California).
|
116
|
+
name : str, optional
|
117
|
+
Company name to search for (alternative to providing CIK).
|
100
118
|
requests_per_second : float, optional
|
101
119
|
Maximum number of requests per second to make to the SEC API.
|
102
120
|
Default is 5.0.
|
@@ -107,6 +125,15 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
|
|
107
125
|
--------
|
108
126
|
list
|
109
127
|
List of accession numbers (as strings) for filings that match the text query.
|
128
|
+
|
129
|
+
Examples:
|
130
|
+
---------
|
131
|
+
# Get accession numbers of Apple filings mentioning 'supply chain'
|
132
|
+
acc_numbers = filter_text('"supply chain"', name='Apple')
|
133
|
+
|
134
|
+
# Use the accession numbers as a filter in another API
|
135
|
+
from .downloader import download
|
136
|
+
download(name='Apple', accession_numbers=acc_numbers)
|
110
137
|
"""
|
111
138
|
async def run_query():
|
112
139
|
query_obj = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
|
@@ -119,7 +146,7 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
|
|
119
146
|
all_acc_nos.extend(acc_nos)
|
120
147
|
|
121
148
|
# Run the query with our callback
|
122
|
-
await query_obj.query(cik, submission_type, filing_date, collect_acc_nos)
|
149
|
+
await query_obj.query(cik, submission_type, filing_date, location, collect_acc_nos, name)
|
123
150
|
|
124
151
|
return all_acc_nos
|
125
152
|
|
@@ -0,0 +1,191 @@
|
|
1
|
+
import os
|
2
|
+
import requests
|
3
|
+
import json
|
4
|
+
|
5
|
+
def get_information_table(
|
6
|
+
# Required parameters
|
7
|
+
table_type="INFORMATION_TABLE",
|
8
|
+
|
9
|
+
# Optional filtering parameters
|
10
|
+
columns=None,
|
11
|
+
name_of_issuer=None,
|
12
|
+
title_of_class=None,
|
13
|
+
cusip=None,
|
14
|
+
value=None,
|
15
|
+
ssh_prnamt=None,
|
16
|
+
ssh_prnamt_type=None,
|
17
|
+
investment_discretion=None,
|
18
|
+
voting_authority_sole=None,
|
19
|
+
voting_authority_shared=None,
|
20
|
+
voting_authority_none=None,
|
21
|
+
reporting_owner_cik=None,
|
22
|
+
put_call=None,
|
23
|
+
other_manager=None,
|
24
|
+
figi=None,
|
25
|
+
accession=None,
|
26
|
+
filing_date=None,
|
27
|
+
|
28
|
+
# API key handling
|
29
|
+
api_key=None,
|
30
|
+
|
31
|
+
# Additional options
|
32
|
+
print_cost=True,
|
33
|
+
verbose=False
|
34
|
+
):
|
35
|
+
"""
|
36
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
37
|
+
|
38
|
+
Parameters:
|
39
|
+
-----------
|
40
|
+
table_type : str
|
41
|
+
The table to query (default is "INFORMATION_TABLE")
|
42
|
+
columns : List[str], optional
|
43
|
+
Specific columns to return. If None, all columns are returned.
|
44
|
+
|
45
|
+
# Filter parameters
|
46
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
47
|
+
- str: Exact match
|
48
|
+
- List[str]: Match any in list
|
49
|
+
- tuple: (min, max) range for numeric/date fields
|
50
|
+
|
51
|
+
api_key : str, optional
|
52
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
53
|
+
print_cost : bool
|
54
|
+
Whether to print the query cost information
|
55
|
+
verbose : bool
|
56
|
+
Whether to print additional information about the query
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
--------
|
60
|
+
List[Dict]
|
61
|
+
A list of dictionaries containing the query results
|
62
|
+
|
63
|
+
Raises:
|
64
|
+
-------
|
65
|
+
ValueError
|
66
|
+
If API key is missing or invalid
|
67
|
+
Exception
|
68
|
+
For API errors or other issues
|
69
|
+
"""
|
70
|
+
|
71
|
+
# 1. Handle API key
|
72
|
+
if api_key is None:
|
73
|
+
api_key = os.getenv('DATAMULE_API_KEY')
|
74
|
+
|
75
|
+
if not api_key:
|
76
|
+
raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key parameter")
|
77
|
+
|
78
|
+
# 2. Build query parameters
|
79
|
+
params = {'table_type': table_type}
|
80
|
+
|
81
|
+
# Add columns parameter if provided
|
82
|
+
if columns:
|
83
|
+
if isinstance(columns, list):
|
84
|
+
params['columns'] = ','.join(columns)
|
85
|
+
else:
|
86
|
+
params['columns'] = columns
|
87
|
+
|
88
|
+
# Map Python parameter names to API parameter names
|
89
|
+
param_mapping = {
|
90
|
+
'name_of_issuer': 'nameOfIssuer',
|
91
|
+
'title_of_class': 'titleOfClass',
|
92
|
+
'cusip': 'cusip',
|
93
|
+
'value': 'value',
|
94
|
+
'ssh_prnamt': 'sshPrnamt',
|
95
|
+
'ssh_prnamt_type': 'sshPrnamtType',
|
96
|
+
'investment_discretion': 'investmentDiscretion',
|
97
|
+
'voting_authority_sole': 'votingAuthoritySole',
|
98
|
+
'voting_authority_shared': 'votingAuthorityShared',
|
99
|
+
'voting_authority_none': 'votingAuthorityNone',
|
100
|
+
'reporting_owner_cik': 'reportingOwnerCIK',
|
101
|
+
'put_call': 'putCall',
|
102
|
+
'other_manager': 'otherManager',
|
103
|
+
'figi': 'figi',
|
104
|
+
'accession': 'accession',
|
105
|
+
'filing_date': 'filingDate'
|
106
|
+
}
|
107
|
+
|
108
|
+
# Process all possible filter parameters
|
109
|
+
for param_name, api_param_name in param_mapping.items():
|
110
|
+
value = locals()[param_name]
|
111
|
+
if value is not None:
|
112
|
+
# Handle different filter types
|
113
|
+
if isinstance(value, list):
|
114
|
+
# List filter
|
115
|
+
params[api_param_name] = f"[{','.join(str(v) for v in value)}]"
|
116
|
+
elif isinstance(value, tuple):
|
117
|
+
# Range filter
|
118
|
+
if len(value) == 2:
|
119
|
+
min_val, max_val = value
|
120
|
+
# Handle date range specially
|
121
|
+
if param_name == 'filing_date':
|
122
|
+
# Dates need to be in quotes within the parentheses
|
123
|
+
if min_val is None:
|
124
|
+
min_val = ''
|
125
|
+
else:
|
126
|
+
min_val = f"'{min_val}'"
|
127
|
+
|
128
|
+
if max_val is None:
|
129
|
+
max_val = ''
|
130
|
+
else:
|
131
|
+
max_val = f"'{max_val}'"
|
132
|
+
|
133
|
+
range_str = f"({min_val},{max_val})"
|
134
|
+
params[api_param_name] = range_str
|
135
|
+
else:
|
136
|
+
raise ValueError(f"Range filter for {param_name} must be a tuple of (min, max)")
|
137
|
+
else:
|
138
|
+
# Exact match
|
139
|
+
params[api_param_name] = value
|
140
|
+
|
141
|
+
# 3. Make the API request
|
142
|
+
BASE_URL = "https://sec-bq.jgfriedman99.workers.dev/"
|
143
|
+
|
144
|
+
headers = {
|
145
|
+
'Authorization': f'Bearer {api_key}',
|
146
|
+
'Accept': 'application/json'
|
147
|
+
}
|
148
|
+
|
149
|
+
if verbose:
|
150
|
+
print(f"Making request to {BASE_URL} with params: {params}")
|
151
|
+
|
152
|
+
try:
|
153
|
+
response = requests.get(BASE_URL, params=params, headers=headers)
|
154
|
+
|
155
|
+
# Check for HTTP errors
|
156
|
+
response.raise_for_status()
|
157
|
+
|
158
|
+
# Parse response
|
159
|
+
result = response.json()
|
160
|
+
|
161
|
+
# Check for API-level errors
|
162
|
+
if not result.get('success', False):
|
163
|
+
error_msg = result.get('error', 'Unknown API error')
|
164
|
+
raise Exception(f"API Error: {error_msg}")
|
165
|
+
|
166
|
+
# Extract metadata for cost reporting
|
167
|
+
metadata = result.get('metadata', {})
|
168
|
+
|
169
|
+
# 5. Print cost information if requested
|
170
|
+
if print_cost and 'billing' in metadata:
|
171
|
+
billing = metadata['billing']
|
172
|
+
query_info = metadata.get('query_info', {})
|
173
|
+
|
174
|
+
print("\n=== Query Cost Information ===")
|
175
|
+
print(f"Bytes Processed: {query_info.get('bytes_processed', 0):,} bytes")
|
176
|
+
print(f"Data Processed: {billing.get('tb_processed', 0):.10f} TB")
|
177
|
+
print(f"Cost Rate: ${billing.get('cost_per_tb', 0):.2f}/TB")
|
178
|
+
print(f"Query Cost: ${billing.get('total_charge', 0):.6f}")
|
179
|
+
print(f"Remaining Balance: ${billing.get('remaining_balance', 0):.2f}")
|
180
|
+
print(f"Execution Time: {query_info.get('execution_time_ms', 0)} ms")
|
181
|
+
print(f"Cache Hit: {query_info.get('cache_hit', False)}")
|
182
|
+
print("==============================\n")
|
183
|
+
|
184
|
+
# 6. Return data
|
185
|
+
return result.get('data', [])
|
186
|
+
|
187
|
+
except requests.exceptions.RequestException as e:
|
188
|
+
if response.status_code == 401:
|
189
|
+
raise ValueError("Authentication failed: Invalid API key")
|
190
|
+
else:
|
191
|
+
raise Exception(f"Request failed: {str(e)}")
|