datamule 1.1.7__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document.py +237 -28
- datamule/portfolio.py +2 -4
- datamule/sec/submissions/monitor.py +5 -1
- datamule/seclibrary/bq.py +191 -0
- datamule/sheet.py +219 -12
- datamule/submission.py +2 -1
- {datamule-1.1.7.dist-info → datamule-1.1.8.dist-info}/METADATA +1 -1
- {datamule-1.1.7.dist-info → datamule-1.1.8.dist-info}/RECORD +10 -9
- {datamule-1.1.7.dist-info → datamule-1.1.8.dist-info}/WHEEL +0 -0
- {datamule-1.1.7.dist-info → datamule-1.1.8.dist-info}/top_level.txt +0 -0
datamule/document.py
CHANGED
@@ -11,8 +11,6 @@ class Document:
|
|
11
11
|
def __init__(self, type, content, extension):
|
12
12
|
|
13
13
|
self.type = type
|
14
|
-
# we will remove this later #
|
15
|
-
# make sure extension is in lower case
|
16
14
|
extension = extension.lower()
|
17
15
|
self.content = content
|
18
16
|
if extension == '.txt':
|
@@ -94,6 +92,9 @@ class Document:
|
|
94
92
|
|
95
93
|
# Note: this method will be heavily modified in the future
|
96
94
|
def parse(self):
|
95
|
+
# check if we have already parsed the content
|
96
|
+
if self.data:
|
97
|
+
return self.data
|
97
98
|
mapping_dict = None
|
98
99
|
|
99
100
|
if self.extension == '.xml':
|
@@ -127,34 +128,243 @@ class Document:
|
|
127
128
|
with open(output_filename, 'w',encoding='utf-8') as f:
|
128
129
|
json.dump(self.data, f, indent=2)
|
129
130
|
|
130
|
-
def
|
131
|
+
def to_tabular(self, accession_number=None):
|
131
132
|
self.parse()
|
132
133
|
|
133
|
-
|
134
|
-
|
135
|
-
|
134
|
+
if self.type == "INFORMATION TABLE":
|
135
|
+
info_table = self.data['informationTable']['infoTable']
|
136
|
+
if isinstance(info_table, dict):
|
137
|
+
info_table = [info_table]
|
138
|
+
|
139
|
+
flattened = self._flatten_dict(info_table)
|
136
140
|
|
137
|
-
|
141
|
+
# Original field names
|
142
|
+
original_columns = [
|
143
|
+
"nameOfIssuer", "titleOfClass", "cusip", "value",
|
144
|
+
"shrsOrPrnAmt_sshPrnamt", "shrsOrPrnAmt_sshPrnamtType",
|
145
|
+
"investmentDiscretion", "votingAuthority_Sole",
|
146
|
+
"votingAuthority_Shared", "votingAuthority_None",
|
147
|
+
"reportingOwnerCIK", "putCall", "otherManager", 'figi'
|
148
|
+
]
|
149
|
+
|
150
|
+
# Define mapping from original to camelCase field names
|
151
|
+
field_mapping = {
|
152
|
+
"shrsOrPrnAmt_sshPrnamt": "sshPrnamt",
|
153
|
+
"shrsOrPrnAmt_sshPrnamtType": "sshPrnamtType",
|
154
|
+
"votingAuthority_Sole": "votingAuthoritySole",
|
155
|
+
"votingAuthority_Shared": "votingAuthorityShared",
|
156
|
+
"votingAuthority_None": "votingAuthorityNone"
|
157
|
+
}
|
158
|
+
|
159
|
+
# Create the new expected columns list with mapped field names
|
160
|
+
expected_columns = []
|
161
|
+
for column in original_columns:
|
162
|
+
if column in field_mapping:
|
163
|
+
expected_columns.append(field_mapping[column])
|
164
|
+
else:
|
165
|
+
expected_columns.append(column)
|
166
|
+
|
167
|
+
# Process each item in the flattened data
|
168
|
+
for item in flattened:
|
169
|
+
# Remove newlines from items
|
170
|
+
for key in item:
|
171
|
+
if isinstance(item[key], str):
|
172
|
+
item[key] = re.sub(r'\s+', ' ', item[key])
|
173
|
+
|
174
|
+
new_item = {}
|
175
|
+
for key, value in item.items():
|
176
|
+
# Apply the mapping if the key is in our mapping dictionary
|
177
|
+
if key in field_mapping:
|
178
|
+
new_item[field_mapping[key]] = value
|
179
|
+
else:
|
180
|
+
new_item[key] = value
|
181
|
+
|
182
|
+
# Update the original item with the new keys
|
183
|
+
item.clear()
|
184
|
+
item.update(new_item)
|
185
|
+
|
186
|
+
# Ensure all expected columns exist
|
187
|
+
for column in expected_columns:
|
188
|
+
if column not in item:
|
189
|
+
item[column] = None
|
190
|
+
|
191
|
+
item['accession'] = accession_number
|
138
192
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
193
|
+
# Add this block to reorder the items to match the expected order
|
194
|
+
ordered_columns = ["nameOfIssuer", "titleOfClass", "cusip", "value", "sshPrnamt", "sshPrnamtType",
|
195
|
+
"investmentDiscretion", "votingAuthoritySole", "votingAuthorityShared", "votingAuthorityNone",
|
196
|
+
"reportingOwnerCIK", "putCall", "otherManager", "figi"]
|
197
|
+
if accession_number is not None:
|
198
|
+
ordered_columns.append("accession")
|
199
|
+
|
200
|
+
ordered_data = []
|
201
|
+
for item in flattened:
|
202
|
+
ordered_item = {column: item.get(column, None) for column in ordered_columns}
|
203
|
+
ordered_data.append(ordered_item)
|
204
|
+
|
205
|
+
return ordered_data
|
206
|
+
|
207
|
+
elif self.type in ["3", "4", "5"]:
|
208
|
+
# Master mapping dictionary - includes all possible fields
|
209
|
+
# The order of this dictionary will determine the output column order
|
210
|
+
master_mapping_dict = {
|
211
|
+
# Flag fields (will be set programmatically)
|
212
|
+
"isDerivative": "isDerivative",
|
213
|
+
"isNonDerivative": "isNonDerivative",
|
214
|
+
|
215
|
+
# Common fields across all types
|
216
|
+
"securityTitle_value": "securityTitle",
|
217
|
+
"transactionDate_value": "transactionDate",
|
218
|
+
"documentType": "documentType",
|
219
|
+
"transactionCoding_transactionFormType": "documentType",
|
220
|
+
"transactionCoding_transactionCode": "transactionCode",
|
221
|
+
"transactionAmounts_transactionAcquiredDisposedCode_value": "transactionCode",
|
222
|
+
"transactionCoding_equitySwapInvolved": "equitySwapInvolved",
|
223
|
+
"transactionTimeliness_value": "transactionTimeliness",
|
224
|
+
"transactionAmounts_transactionShares_value": "transactionShares",
|
225
|
+
"transactionAmounts_transactionPricePerShare_value": "transactionPricePerShare",
|
226
|
+
"postTransactionAmounts_sharesOwnedFollowingTransaction_value": "sharesOwnedFollowingTransaction",
|
227
|
+
"heldFollowingReport": "sharesOwnedFollowingTransaction", # Form 3
|
228
|
+
"ownershipNature_directOrIndirectOwnership_value": "ownershipType",
|
229
|
+
"ownershipNature_natureOfOwnership_value": "ownershipType",
|
230
|
+
"deemedExecutionDate": "deemedExecutionDate",
|
231
|
+
"deemedExecutionDate_value": "deemedExecutionDate",
|
232
|
+
|
233
|
+
# Derivative-specific fields
|
234
|
+
"conversionOrExercisePrice_value": "conversionOrExercisePrice",
|
235
|
+
"exerciseDate_value": "exerciseDate",
|
236
|
+
"expirationDate_value": "expirationDate",
|
237
|
+
"underlyingSecurity_underlyingSecurityTitle_value": "underlyingSecurityTitle",
|
238
|
+
"underlyingSecurity_underlyingSecurityShares_value": "underlyingSecurityShares",
|
239
|
+
"underlyingSecurity_underlyingSecurityValue_value": "underlyingSecurityValue",
|
240
|
+
|
241
|
+
# Footnote fields
|
242
|
+
"transactionPricePerShareFootnote": "transactionPricePerShareFootnote",
|
243
|
+
"transactionAmounts_transactionPricePerShare_footnote": "transactionPricePerShareFootnote",
|
244
|
+
"transactionCodeFootnote": "transactionCodeFootnote",
|
245
|
+
"transactionAmounts_transactionAcquiredDisposedCode_footnote": "transactionCodeFootnote",
|
246
|
+
"transactionCoding_footnote": "transactionCodeFootnote",
|
247
|
+
"natureOfOwnershipFootnote": "natureOfOwnershipFootnote",
|
248
|
+
"ownershipNature_natureOfOwnership_footnote": "natureOfOwnershipFootnote",
|
249
|
+
"sharesOwnedFollowingTransactionFootnote": "sharesOwnedFollowingTransactionFootnote",
|
250
|
+
"postTransactionAmounts_sharesOwnedFollowingTransaction_footnote": "sharesOwnedFollowingTransactionFootnote",
|
251
|
+
"ownershipTypeFootnote": "ownershipTypeFootnote",
|
252
|
+
"ownershipNature_directOrIndirectOwnership_footnote": "ownershipTypeFootnote",
|
253
|
+
"securityTitleFootnote": "securityTitleFootnote",
|
254
|
+
"securityTitle_footnote": "securityTitleFootnote",
|
255
|
+
"transactionSharesFootnote": "transactionSharesFootnote",
|
256
|
+
"transactionAmounts_transactionShares_footnote": "transactionSharesFootnote",
|
257
|
+
"transactionDateFootnote": "transactionDateFootnote",
|
258
|
+
"transactionDate_footnote": "transactionDateFootnote",
|
259
|
+
"conversionOrExercisePriceFootnote": "conversionOrExercisePriceFootnote",
|
260
|
+
"conversionOrExercisePrice_footnote": "conversionOrExercisePriceFootnote",
|
261
|
+
"exerciseDateFootnote": "exerciseDateFootnote",
|
262
|
+
"exerciseDate_footnote": "exerciseDateFootnote",
|
263
|
+
"expirationDateFootnote": "expirationDateFootnote",
|
264
|
+
"expirationDate_footnote": "expirationDateFootnote",
|
265
|
+
"underlyingSecurityTitleFootnote": "underlyingSecurityTitleFootnote",
|
266
|
+
"underlyingSecurity_underlyingSecurityTitle_footnote": "underlyingSecurityTitleFootnote",
|
267
|
+
"underlyingSecuritySharesFootnote": "underlyingSecuritySharesFootnote",
|
268
|
+
"underlyingSecurity_underlyingSecurityShares_footnote": "underlyingSecuritySharesFootnote",
|
269
|
+
"underlyingSecurityValueFootnote": "underlyingSecurityValueFootnote",
|
270
|
+
"underlyingSecurity_underlyingSecurityValue_footnote": "underlyingSecurityValueFootnote"
|
271
|
+
}
|
272
|
+
|
273
|
+
# Get the unique target column names in order from the mapping dictionary
|
274
|
+
output_columns = []
|
275
|
+
for _, target_key in master_mapping_dict.items():
|
276
|
+
if target_key not in output_columns:
|
277
|
+
output_columns.append(target_key)
|
278
|
+
|
279
|
+
# Process function that handles any table type
|
280
|
+
def process_table(table_data, is_derivative):
|
281
|
+
if isinstance(table_data, dict):
|
282
|
+
table_data = [table_data]
|
283
|
+
|
284
|
+
flattened = self._flatten_dict(table_data)
|
285
|
+
|
286
|
+
# Apply mapping to the flattened data and ensure all expected columns are present
|
287
|
+
mapped_data = []
|
288
|
+
for item in flattened:
|
289
|
+
mapped_item = {}
|
290
|
+
# First, apply the mapping
|
291
|
+
for old_key, value in item.items():
|
292
|
+
target_key = master_mapping_dict.get(old_key, old_key)
|
293
|
+
mapped_item[target_key] = value
|
294
|
+
|
295
|
+
# Set the derivative/non-derivative flags
|
296
|
+
mapped_item["isDerivative"] = 1 if is_derivative else 0
|
297
|
+
mapped_item["isNonDerivative"] = 0 if is_derivative else 1
|
298
|
+
|
299
|
+
# Create a new ordered dictionary with all columns
|
300
|
+
ordered_item = {}
|
301
|
+
for column in output_columns:
|
302
|
+
ordered_item[column] = mapped_item.get(column, None)
|
303
|
+
|
304
|
+
# Add accession_number if available
|
305
|
+
if accession_number is not None:
|
306
|
+
ordered_item['accession_number'] = accession_number
|
307
|
+
|
308
|
+
mapped_data.append(ordered_item)
|
309
|
+
|
310
|
+
return mapped_data
|
311
|
+
|
312
|
+
# Results container
|
313
|
+
all_results = []
|
314
|
+
|
315
|
+
# Process non-derivative transactions if they exist
|
316
|
+
if 'nonDerivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['nonDerivativeTable'] is not None:
|
317
|
+
if 'nonDerivativeTransaction' in self.data['ownershipDocument']['nonDerivativeTable']:
|
318
|
+
non_deriv_trans = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeTransaction']
|
319
|
+
non_deriv_results = process_table(non_deriv_trans, is_derivative=False)
|
320
|
+
all_results.extend(non_deriv_results)
|
321
|
+
|
322
|
+
# Process non-derivative holdings (for Form 3)
|
323
|
+
if 'nonDerivativeHolding' in self.data['ownershipDocument']['nonDerivativeTable']:
|
324
|
+
non_deriv_hold = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeHolding']
|
325
|
+
non_deriv_hold_results = process_table(non_deriv_hold, is_derivative=False)
|
326
|
+
all_results.extend(non_deriv_hold_results)
|
327
|
+
|
328
|
+
# Process derivative transactions if they exist
|
329
|
+
if 'derivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['derivativeTable'] is not None:
|
330
|
+
if 'derivativeTransaction' in self.data['ownershipDocument']['derivativeTable']:
|
331
|
+
deriv_trans = self.data['ownershipDocument']['derivativeTable']['derivativeTransaction']
|
332
|
+
deriv_results = process_table(deriv_trans, is_derivative=True)
|
333
|
+
all_results.extend(deriv_results)
|
334
|
+
|
335
|
+
# Process derivative holdings (for Form 3)
|
336
|
+
if 'derivativeHolding' in self.data['ownershipDocument']['derivativeTable']:
|
337
|
+
deriv_hold = self.data['ownershipDocument']['derivativeTable']['derivativeHolding']
|
338
|
+
deriv_hold_results = process_table(deriv_hold, is_derivative=True)
|
339
|
+
all_results.extend(deriv_hold_results)
|
155
340
|
|
156
|
-
|
157
|
-
|
341
|
+
# check if any rows not in the mapping dict, raise error if so
|
342
|
+
for item in all_results:
|
343
|
+
for key in item.keys():
|
344
|
+
if key not in master_mapping_dict.values() and key != 'accession_number':
|
345
|
+
raise ValueError(f"Key '{key}' not found in mapping dictionary")
|
346
|
+
|
347
|
+
|
348
|
+
return all_results
|
349
|
+
else:
|
350
|
+
raise ValueError("sorry, rejigging conversion to tabular format")
|
351
|
+
|
352
|
+
def write_csv(self, output_filename, accession_number=None):
|
353
|
+
|
354
|
+
data = self.to_tabular(accession_number)
|
355
|
+
|
356
|
+
if not data:
|
357
|
+
|
358
|
+
return
|
359
|
+
|
360
|
+
fieldnames = data[0].keys()
|
361
|
+
|
362
|
+
with open(output_filename, 'w', newline='') as csvfile:
|
363
|
+
writer = csv.DictWriter(csvfile,fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
364
|
+
writer.writeheader()
|
365
|
+
writer.writerows(data)
|
366
|
+
|
367
|
+
|
158
368
|
def _document_to_section_text(self, document_data, parent_key=''):
|
159
369
|
items = []
|
160
370
|
|
@@ -188,7 +398,7 @@ class Document:
|
|
188
398
|
# we'll modify this for every dict
|
189
399
|
def _flatten_dict(self, d, parent_key=''):
|
190
400
|
items = {}
|
191
|
-
|
401
|
+
|
192
402
|
if isinstance(d, list):
|
193
403
|
return [self._flatten_dict(item) for item in d]
|
194
404
|
|
@@ -204,8 +414,7 @@ class Document:
|
|
204
414
|
|
205
415
|
# this will all have to be changed. default will be to flatten everything
|
206
416
|
def __iter__(self):
|
207
|
-
|
208
|
-
self.parse()
|
417
|
+
self.parse()
|
209
418
|
|
210
419
|
# Let's remove XML iterable for now
|
211
420
|
|
datamule/portfolio.py
CHANGED
@@ -119,7 +119,7 @@ class Portfolio:
|
|
119
119
|
# First query, just set the accession numbers
|
120
120
|
self.accession_numbers = new_accession_numbers
|
121
121
|
|
122
|
-
def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None, **kwargs):
|
122
|
+
def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,requests_per_second=5, **kwargs):
|
123
123
|
if provider is None:
|
124
124
|
config = Config()
|
125
125
|
provider = config.get_default_source()
|
@@ -142,7 +142,7 @@ class Portfolio:
|
|
142
142
|
cik=cik,
|
143
143
|
submission_type=submission_type,
|
144
144
|
filing_date=filing_date,
|
145
|
-
requests_per_second=
|
145
|
+
requests_per_second=requests_per_second,
|
146
146
|
accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None
|
147
147
|
)
|
148
148
|
|
@@ -164,8 +164,6 @@ class Portfolio:
|
|
164
164
|
)
|
165
165
|
|
166
166
|
|
167
|
-
|
168
|
-
|
169
167
|
def __iter__(self):
|
170
168
|
if not self.submissions_loaded:
|
171
169
|
self._load_submissions()
|
@@ -20,12 +20,16 @@ async def _process_efts_hits(hits, collected_accession_numbers, data_callback=No
|
|
20
20
|
submission_type = source.get('form')
|
21
21
|
ciks = source.get('ciks', [])
|
22
22
|
ciks = [str(int(cik)) for cik in ciks]
|
23
|
+
|
24
|
+
filing_date = source.get('file_date')
|
23
25
|
|
24
26
|
# Create standardized filing record
|
25
27
|
filing = {
|
26
28
|
'accession_number': accession_number,
|
27
29
|
'submission_type': submission_type,
|
28
|
-
'ciks': ciks
|
30
|
+
'ciks': ciks,
|
31
|
+
'filing_date': filing_date,
|
32
|
+
|
29
33
|
}
|
30
34
|
|
31
35
|
processed_hits.append(filing)
|
@@ -0,0 +1,191 @@
|
|
1
|
+
import os
|
2
|
+
import requests
|
3
|
+
import json
|
4
|
+
|
5
|
+
def get_information_table(
|
6
|
+
# Required parameters
|
7
|
+
table_type="INFORMATION_TABLE",
|
8
|
+
|
9
|
+
# Optional filtering parameters
|
10
|
+
columns=None,
|
11
|
+
name_of_issuer=None,
|
12
|
+
title_of_class=None,
|
13
|
+
cusip=None,
|
14
|
+
value=None,
|
15
|
+
ssh_prnamt=None,
|
16
|
+
ssh_prnamt_type=None,
|
17
|
+
investment_discretion=None,
|
18
|
+
voting_authority_sole=None,
|
19
|
+
voting_authority_shared=None,
|
20
|
+
voting_authority_none=None,
|
21
|
+
reporting_owner_cik=None,
|
22
|
+
put_call=None,
|
23
|
+
other_manager=None,
|
24
|
+
figi=None,
|
25
|
+
accession=None,
|
26
|
+
filing_date=None,
|
27
|
+
|
28
|
+
# API key handling
|
29
|
+
api_key=None,
|
30
|
+
|
31
|
+
# Additional options
|
32
|
+
print_cost=True,
|
33
|
+
verbose=False
|
34
|
+
):
|
35
|
+
"""
|
36
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
37
|
+
|
38
|
+
Parameters:
|
39
|
+
-----------
|
40
|
+
table_type : str
|
41
|
+
The table to query (default is "INFORMATION_TABLE")
|
42
|
+
columns : List[str], optional
|
43
|
+
Specific columns to return. If None, all columns are returned.
|
44
|
+
|
45
|
+
# Filter parameters
|
46
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
47
|
+
- str: Exact match
|
48
|
+
- List[str]: Match any in list
|
49
|
+
- tuple: (min, max) range for numeric/date fields
|
50
|
+
|
51
|
+
api_key : str, optional
|
52
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
53
|
+
print_cost : bool
|
54
|
+
Whether to print the query cost information
|
55
|
+
verbose : bool
|
56
|
+
Whether to print additional information about the query
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
--------
|
60
|
+
List[Dict]
|
61
|
+
A list of dictionaries containing the query results
|
62
|
+
|
63
|
+
Raises:
|
64
|
+
-------
|
65
|
+
ValueError
|
66
|
+
If API key is missing or invalid
|
67
|
+
Exception
|
68
|
+
For API errors or other issues
|
69
|
+
"""
|
70
|
+
|
71
|
+
# 1. Handle API key
|
72
|
+
if api_key is None:
|
73
|
+
api_key = os.getenv('DATAMULE_API_KEY')
|
74
|
+
|
75
|
+
if not api_key:
|
76
|
+
raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key parameter")
|
77
|
+
|
78
|
+
# 2. Build query parameters
|
79
|
+
params = {'table_type': table_type}
|
80
|
+
|
81
|
+
# Add columns parameter if provided
|
82
|
+
if columns:
|
83
|
+
if isinstance(columns, list):
|
84
|
+
params['columns'] = ','.join(columns)
|
85
|
+
else:
|
86
|
+
params['columns'] = columns
|
87
|
+
|
88
|
+
# Map Python parameter names to API parameter names
|
89
|
+
param_mapping = {
|
90
|
+
'name_of_issuer': 'nameOfIssuer',
|
91
|
+
'title_of_class': 'titleOfClass',
|
92
|
+
'cusip': 'cusip',
|
93
|
+
'value': 'value',
|
94
|
+
'ssh_prnamt': 'sshPrnamt',
|
95
|
+
'ssh_prnamt_type': 'sshPrnamtType',
|
96
|
+
'investment_discretion': 'investmentDiscretion',
|
97
|
+
'voting_authority_sole': 'votingAuthoritySole',
|
98
|
+
'voting_authority_shared': 'votingAuthorityShared',
|
99
|
+
'voting_authority_none': 'votingAuthorityNone',
|
100
|
+
'reporting_owner_cik': 'reportingOwnerCIK',
|
101
|
+
'put_call': 'putCall',
|
102
|
+
'other_manager': 'otherManager',
|
103
|
+
'figi': 'figi',
|
104
|
+
'accession': 'accession',
|
105
|
+
'filing_date': 'filingDate'
|
106
|
+
}
|
107
|
+
|
108
|
+
# Process all possible filter parameters
|
109
|
+
for param_name, api_param_name in param_mapping.items():
|
110
|
+
value = locals()[param_name]
|
111
|
+
if value is not None:
|
112
|
+
# Handle different filter types
|
113
|
+
if isinstance(value, list):
|
114
|
+
# List filter
|
115
|
+
params[api_param_name] = f"[{','.join(str(v) for v in value)}]"
|
116
|
+
elif isinstance(value, tuple):
|
117
|
+
# Range filter
|
118
|
+
if len(value) == 2:
|
119
|
+
min_val, max_val = value
|
120
|
+
# Handle date range specially
|
121
|
+
if param_name == 'filing_date':
|
122
|
+
# Dates need to be in quotes within the parentheses
|
123
|
+
if min_val is None:
|
124
|
+
min_val = ''
|
125
|
+
else:
|
126
|
+
min_val = f"'{min_val}'"
|
127
|
+
|
128
|
+
if max_val is None:
|
129
|
+
max_val = ''
|
130
|
+
else:
|
131
|
+
max_val = f"'{max_val}'"
|
132
|
+
|
133
|
+
range_str = f"({min_val},{max_val})"
|
134
|
+
params[api_param_name] = range_str
|
135
|
+
else:
|
136
|
+
raise ValueError(f"Range filter for {param_name} must be a tuple of (min, max)")
|
137
|
+
else:
|
138
|
+
# Exact match
|
139
|
+
params[api_param_name] = value
|
140
|
+
|
141
|
+
# 3. Make the API request
|
142
|
+
BASE_URL = "https://sec-bq.jgfriedman99.workers.dev/"
|
143
|
+
|
144
|
+
headers = {
|
145
|
+
'Authorization': f'Bearer {api_key}',
|
146
|
+
'Accept': 'application/json'
|
147
|
+
}
|
148
|
+
|
149
|
+
if verbose:
|
150
|
+
print(f"Making request to {BASE_URL} with params: {params}")
|
151
|
+
|
152
|
+
try:
|
153
|
+
response = requests.get(BASE_URL, params=params, headers=headers)
|
154
|
+
|
155
|
+
# Check for HTTP errors
|
156
|
+
response.raise_for_status()
|
157
|
+
|
158
|
+
# Parse response
|
159
|
+
result = response.json()
|
160
|
+
|
161
|
+
# Check for API-level errors
|
162
|
+
if not result.get('success', False):
|
163
|
+
error_msg = result.get('error', 'Unknown API error')
|
164
|
+
raise Exception(f"API Error: {error_msg}")
|
165
|
+
|
166
|
+
# Extract metadata for cost reporting
|
167
|
+
metadata = result.get('metadata', {})
|
168
|
+
|
169
|
+
# 5. Print cost information if requested
|
170
|
+
if print_cost and 'billing' in metadata:
|
171
|
+
billing = metadata['billing']
|
172
|
+
query_info = metadata.get('query_info', {})
|
173
|
+
|
174
|
+
print("\n=== Query Cost Information ===")
|
175
|
+
print(f"Bytes Processed: {query_info.get('bytes_processed', 0):,} bytes")
|
176
|
+
print(f"Data Processed: {billing.get('tb_processed', 0):.10f} TB")
|
177
|
+
print(f"Cost Rate: ${billing.get('cost_per_tb', 0):.2f}/TB")
|
178
|
+
print(f"Query Cost: ${billing.get('total_charge', 0):.6f}")
|
179
|
+
print(f"Remaining Balance: ${billing.get('remaining_balance', 0):.2f}")
|
180
|
+
print(f"Execution Time: {query_info.get('execution_time_ms', 0)} ms")
|
181
|
+
print(f"Cache Hit: {query_info.get('cache_hit', False)}")
|
182
|
+
print("==============================\n")
|
183
|
+
|
184
|
+
# 6. Return data
|
185
|
+
return result.get('data', [])
|
186
|
+
|
187
|
+
except requests.exceptions.RequestException as e:
|
188
|
+
if response.status_code == 401:
|
189
|
+
raise ValueError("Authentication failed: Invalid API key")
|
190
|
+
else:
|
191
|
+
raise Exception(f"Request failed: {str(e)}")
|
datamule/sheet.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
import csv
|
3
|
+
import os
|
2
4
|
from .helper import _process_cik_and_metadata_filters, load_package_dataset
|
3
5
|
from .sec.xbrl.downloadcompanyfacts import download_company_facts
|
6
|
+
from .seclibrary.bq import get_information_table
|
4
7
|
|
5
8
|
class Sheet:
|
6
9
|
def __init__(self, path):
|
@@ -26,16 +29,220 @@ class Sheet:
|
|
26
29
|
# Download facts for all CIKs in parallel
|
27
30
|
download_company_facts(cik=cik_list, output_dir=self.path)
|
28
31
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
def get_information_table(
|
33
|
+
self,
|
34
|
+
# Required parameters
|
35
|
+
table_type="INFORMATION_TABLE",
|
36
|
+
|
37
|
+
# Optional filtering parameters
|
38
|
+
columns=None,
|
39
|
+
name_of_issuer=None,
|
40
|
+
title_of_class=None,
|
41
|
+
cusip=None,
|
42
|
+
value=None,
|
43
|
+
ssh_prnamt=None,
|
44
|
+
ssh_prnamt_type=None,
|
45
|
+
investment_discretion=None,
|
46
|
+
voting_authority_sole=None,
|
47
|
+
voting_authority_shared=None,
|
48
|
+
voting_authority_none=None,
|
49
|
+
reporting_owner_cik=None,
|
50
|
+
put_call=None,
|
51
|
+
other_manager=None,
|
52
|
+
figi=None,
|
53
|
+
accession=None,
|
54
|
+
filing_date=None,
|
55
|
+
|
56
|
+
# API key handling
|
57
|
+
api_key=None,
|
58
|
+
|
59
|
+
# Additional options
|
60
|
+
print_cost=True,
|
61
|
+
verbose=False
|
62
|
+
):
|
63
|
+
"""
|
64
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
table_type : str
|
69
|
+
The table to query (default is "INFORMATION_TABLE")
|
70
|
+
columns : List[str], optional
|
71
|
+
Specific columns to return. If None, all columns are returned.
|
72
|
+
|
73
|
+
# Filter parameters
|
74
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
75
|
+
- str: Exact match
|
76
|
+
- List[str]: Match any in list
|
77
|
+
- tuple: (min, max) range for numeric/date fields
|
78
|
+
|
79
|
+
api_key : str, optional
|
80
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
81
|
+
print_cost : bool
|
82
|
+
Whether to print the query cost information
|
83
|
+
verbose : bool
|
84
|
+
Whether to print additional information about the query
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
--------
|
88
|
+
List[Dict]
|
89
|
+
A list of dictionaries containing the query results
|
90
|
+
|
91
|
+
Raises:
|
92
|
+
-------
|
93
|
+
ValueError
|
94
|
+
If API key is missing or invalid
|
95
|
+
Exception
|
96
|
+
For API errors or other issues
|
97
|
+
"""
|
98
|
+
|
99
|
+
return get_information_table(
|
100
|
+
table_type=table_type,
|
101
|
+
columns=columns,
|
102
|
+
name_of_issuer=name_of_issuer,
|
103
|
+
title_of_class=title_of_class,
|
104
|
+
cusip=cusip,
|
105
|
+
value=value,
|
106
|
+
ssh_prnamt=ssh_prnamt,
|
107
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
108
|
+
investment_discretion=investment_discretion,
|
109
|
+
voting_authority_sole=voting_authority_sole,
|
110
|
+
voting_authority_shared=voting_authority_shared,
|
111
|
+
voting_authority_none=voting_authority_none,
|
112
|
+
reporting_owner_cik=reporting_owner_cik,
|
113
|
+
put_call=put_call,
|
114
|
+
other_manager=other_manager,
|
115
|
+
figi=figi,
|
116
|
+
accession=accession,
|
117
|
+
filing_date=filing_date,
|
118
|
+
|
119
|
+
# API key handling
|
120
|
+
api_key=api_key,
|
121
|
+
|
122
|
+
# Additional options
|
123
|
+
print_cost=print_cost,
|
124
|
+
verbose=verbose
|
125
|
+
)
|
33
126
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
127
|
+
def download_information_table(
|
128
|
+
self,
|
129
|
+
filepath,
|
130
|
+
# Required parameters
|
131
|
+
table_type="INFORMATION_TABLE",
|
132
|
+
|
133
|
+
# Optional filtering parameters
|
134
|
+
columns=None,
|
135
|
+
name_of_issuer=None,
|
136
|
+
title_of_class=None,
|
137
|
+
cusip=None,
|
138
|
+
value=None,
|
139
|
+
ssh_prnamt=None,
|
140
|
+
ssh_prnamt_type=None,
|
141
|
+
investment_discretion=None,
|
142
|
+
voting_authority_sole=None,
|
143
|
+
voting_authority_shared=None,
|
144
|
+
voting_authority_none=None,
|
145
|
+
reporting_owner_cik=None,
|
146
|
+
put_call=None,
|
147
|
+
other_manager=None,
|
148
|
+
figi=None,
|
149
|
+
accession=None,
|
150
|
+
filing_date=None,
|
151
|
+
|
152
|
+
# API key handling
|
153
|
+
api_key=None,
|
154
|
+
|
155
|
+
# Additional options
|
156
|
+
print_cost=True,
|
157
|
+
verbose=False
|
158
|
+
):
|
159
|
+
"""
|
160
|
+
Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
|
161
|
+
|
162
|
+
Parameters:
|
163
|
+
-----------
|
164
|
+
filepath : str
|
165
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
166
|
+
|
167
|
+
table_type : str
|
168
|
+
The table to query (default is "INFORMATION_TABLE")
|
169
|
+
columns : List[str], optional
|
170
|
+
Specific columns to return. If None, all columns are returned.
|
171
|
+
|
172
|
+
# Filter parameters
|
173
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
174
|
+
- str: Exact match
|
175
|
+
- List[str]: Match any in list
|
176
|
+
- tuple: (min, max) range for numeric/date fields
|
177
|
+
|
178
|
+
api_key : str, optional
|
179
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
180
|
+
print_cost : bool
|
181
|
+
Whether to print the query cost information
|
182
|
+
verbose : bool
|
183
|
+
Whether to print additional information about the query
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
--------
|
187
|
+
List[Dict]
|
188
|
+
A list of dictionaries containing the query results
|
189
|
+
|
190
|
+
Raises:
|
191
|
+
-------
|
192
|
+
ValueError
|
193
|
+
If API key is missing or invalid
|
194
|
+
Exception
|
195
|
+
For API errors or other issues
|
196
|
+
"""
|
197
|
+
# Get the data from the API
|
198
|
+
data = self.get_information_table(
|
199
|
+
table_type=table_type,
|
200
|
+
columns=columns,
|
201
|
+
name_of_issuer=name_of_issuer,
|
202
|
+
title_of_class=title_of_class,
|
203
|
+
cusip=cusip,
|
204
|
+
value=value,
|
205
|
+
ssh_prnamt=ssh_prnamt,
|
206
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
207
|
+
investment_discretion=investment_discretion,
|
208
|
+
voting_authority_sole=voting_authority_sole,
|
209
|
+
voting_authority_shared=voting_authority_shared,
|
210
|
+
voting_authority_none=voting_authority_none,
|
211
|
+
reporting_owner_cik=reporting_owner_cik,
|
212
|
+
put_call=put_call,
|
213
|
+
other_manager=other_manager,
|
214
|
+
figi=figi,
|
215
|
+
accession=accession,
|
216
|
+
filing_date=filing_date,
|
217
|
+
api_key=api_key,
|
218
|
+
print_cost=print_cost,
|
219
|
+
verbose=verbose
|
220
|
+
)
|
221
|
+
|
222
|
+
# If no data returned, nothing to save
|
223
|
+
if not data:
|
224
|
+
if verbose:
|
225
|
+
print("No data returned from API. No file was created.")
|
226
|
+
return data
|
227
|
+
|
228
|
+
# Resolve filepath - if it's not absolute, make it relative to self.path
|
229
|
+
filepath_obj = Path(filepath)
|
230
|
+
if not filepath_obj.is_absolute():
|
231
|
+
filepath_obj = self.path / filepath_obj
|
232
|
+
|
233
|
+
# Create directory if it doesn't exist
|
234
|
+
os.makedirs(filepath_obj.parent, exist_ok=True)
|
235
|
+
|
236
|
+
# Get fieldnames from the first record
|
237
|
+
fieldnames = data[0].keys()
|
238
|
+
|
239
|
+
# Write to CSV
|
240
|
+
with open(filepath_obj, 'w', newline='') as csvfile:
|
241
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
242
|
+
writer.writeheader()
|
243
|
+
writer.writerows(data)
|
244
|
+
|
245
|
+
if verbose:
|
246
|
+
print(f"Saved {len(data)} records to {filepath_obj}")
|
247
|
+
|
248
|
+
return data
|
datamule/submission.py
CHANGED
@@ -14,6 +14,7 @@ class Submission:
|
|
14
14
|
if sgml_content is not None:
|
15
15
|
self.path = None
|
16
16
|
self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
17
|
+
self.documents = []
|
17
18
|
|
18
19
|
for idx,doc in enumerate(self.metadata['documents']):
|
19
20
|
type = doc.get('type')
|
@@ -23,7 +24,7 @@ class Submission:
|
|
23
24
|
continue
|
24
25
|
filename = doc.get('filename')
|
25
26
|
extension = Path(filename).suffix
|
26
|
-
self.documents
|
27
|
+
self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
|
27
28
|
|
28
29
|
|
29
30
|
if path is not None:
|
@@ -1,11 +1,11 @@
|
|
1
1
|
datamule/__init__.py,sha256=l6YlwT5EeRxPlCtO5Jd4I8l266rSRUJyfFe97cRtSCM,991
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
|
-
datamule/document.py,sha256=
|
3
|
+
datamule/document.py,sha256=qShyVKHQ1nSCNvSfrhAOMVXprOd1br1rFKLy52S9WnE,22007
|
4
4
|
datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
|
5
5
|
datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
|
6
|
-
datamule/portfolio.py,sha256=
|
7
|
-
datamule/sheet.py,sha256=
|
8
|
-
datamule/submission.py,sha256=
|
6
|
+
datamule/portfolio.py,sha256=yWt5gYTjV7rJsLiPUmhc6Vmr3lfvfCR5MSpLQ_6Gdp4,7104
|
7
|
+
datamule/sheet.py,sha256=QaArtx7LpT7bwyteelJV67C-lK0RjQbGS3ka7ftdi8w,7978
|
8
|
+
datamule/submission.py,sha256=LI7Zr60YbE_tU-v2N09k2dGjfztSgplKZACT3eRUkFE,4463
|
9
9
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
11
11
|
datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
|
@@ -18,7 +18,7 @@ datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,1
|
|
18
18
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
datamule/sec/submissions/downloader.py,sha256=IB08W8-lQD5Bb0LgzrTN4Xi4HsCw24DybRLHqE1AUrU,3290
|
20
20
|
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
21
|
-
datamule/sec/submissions/monitor.py,sha256=
|
21
|
+
datamule/sec/submissions/monitor.py,sha256=Im2kgnUehhTgyY2Vq3uk07n4Vkj4PjII_SsRDi8ehAE,5384
|
22
22
|
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
23
23
|
datamule/sec/submissions/textsearch.py,sha256=-a5yIrrxxtaK10IJeywFmXuJmSndYL9VKm4SC4I9JAs,5808
|
24
24
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -27,9 +27,10 @@ datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr
|
|
27
27
|
datamule/sec/xbrl/streamcompanyfacts.py,sha256=WyJIwuy5mNMXWpx_IkhFzDMe9MOfQ-vNkWl_JzBzFmc,3323
|
28
28
|
datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
|
29
29
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
+
datamule/seclibrary/bq.py,sha256=C6kafFXWtm-MUjf70H1wTtpwv1Rxpcbk-Kfy8fkBPfo,6469
|
30
31
|
datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
|
31
32
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
32
|
-
datamule-1.1.
|
33
|
-
datamule-1.1.
|
34
|
-
datamule-1.1.
|
35
|
-
datamule-1.1.
|
33
|
+
datamule-1.1.8.dist-info/METADATA,sha256=8HRRMz6l928E5tuHXkPi1_Kf-8nfPSjWQnnfReSxdPM,512
|
34
|
+
datamule-1.1.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
35
|
+
datamule-1.1.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
36
|
+
datamule-1.1.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|