datamule 1.1.7__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document.py +231 -29
- datamule/portfolio.py +2 -4
- datamule/sec/submissions/monitor.py +5 -1
- datamule/seclibrary/bq.py +528 -0
- datamule/sheet.py +644 -13
- datamule/submission.py +2 -1
- {datamule-1.1.7.dist-info → datamule-1.2.0.dist-info}/METADATA +1 -1
- {datamule-1.1.7.dist-info → datamule-1.2.0.dist-info}/RECORD +10 -9
- {datamule-1.1.7.dist-info → datamule-1.2.0.dist-info}/WHEEL +1 -1
- {datamule-1.1.7.dist-info → datamule-1.2.0.dist-info}/top_level.txt +0 -0
datamule/sheet.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
import csv
|
3
|
+
import os
|
2
4
|
from .helper import _process_cik_and_metadata_filters, load_package_dataset
|
3
5
|
from .sec.xbrl.downloadcompanyfacts import download_company_facts
|
6
|
+
from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
|
4
7
|
|
5
8
|
class Sheet:
|
6
9
|
def __init__(self, path):
|
@@ -26,16 +29,644 @@ class Sheet:
|
|
26
29
|
# Download facts for all CIKs in parallel
|
27
30
|
download_company_facts(cik=cik_list, output_dir=self.path)
|
28
31
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
def get_information_table(
|
33
|
+
self,
|
34
|
+
# Optional filtering parameters
|
35
|
+
columns=None,
|
36
|
+
name_of_issuer=None,
|
37
|
+
title_of_class=None,
|
38
|
+
cusip=None,
|
39
|
+
value=None,
|
40
|
+
ssh_prnamt=None,
|
41
|
+
ssh_prnamt_type=None,
|
42
|
+
investment_discretion=None,
|
43
|
+
voting_authority_sole=None,
|
44
|
+
voting_authority_shared=None,
|
45
|
+
voting_authority_none=None,
|
46
|
+
reporting_owner_cik=None,
|
47
|
+
put_call=None,
|
48
|
+
other_manager=None,
|
49
|
+
figi=None,
|
50
|
+
accession=None,
|
51
|
+
filing_date=None,
|
52
|
+
|
53
|
+
# API key handling
|
54
|
+
api_key=None,
|
55
|
+
|
56
|
+
# Additional options
|
57
|
+
print_cost=True,
|
58
|
+
verbose=False
|
59
|
+
):
|
60
|
+
"""
|
61
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
62
|
+
|
63
|
+
Parameters:
|
64
|
+
-----------
|
65
|
+
columns : List[str], optional
|
66
|
+
Specific columns to return. If None, all columns are returned.
|
67
|
+
|
68
|
+
# Filter parameters
|
69
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
70
|
+
- str: Exact match
|
71
|
+
- List[str]: Match any in list
|
72
|
+
- tuple: (min, max) range for numeric/date fields
|
73
|
+
|
74
|
+
api_key : str, optional
|
75
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
76
|
+
print_cost : bool
|
77
|
+
Whether to print the query cost information
|
78
|
+
verbose : bool
|
79
|
+
Whether to print additional information about the query
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
--------
|
83
|
+
List[Dict]
|
84
|
+
A list of dictionaries containing the query results
|
85
|
+
|
86
|
+
Raises:
|
87
|
+
-------
|
88
|
+
ValueError
|
89
|
+
If API key is missing or invalid
|
90
|
+
Exception
|
91
|
+
For API errors or other issues
|
92
|
+
"""
|
93
|
+
|
94
|
+
return get_information_table(
|
95
|
+
columns=columns,
|
96
|
+
name_of_issuer=name_of_issuer,
|
97
|
+
title_of_class=title_of_class,
|
98
|
+
cusip=cusip,
|
99
|
+
value=value,
|
100
|
+
ssh_prnamt=ssh_prnamt,
|
101
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
102
|
+
investment_discretion=investment_discretion,
|
103
|
+
voting_authority_sole=voting_authority_sole,
|
104
|
+
voting_authority_shared=voting_authority_shared,
|
105
|
+
voting_authority_none=voting_authority_none,
|
106
|
+
reporting_owner_cik=reporting_owner_cik,
|
107
|
+
put_call=put_call,
|
108
|
+
other_manager=other_manager,
|
109
|
+
figi=figi,
|
110
|
+
accession=accession,
|
111
|
+
filing_date=filing_date,
|
112
|
+
|
113
|
+
# API key handling
|
114
|
+
api_key=api_key,
|
115
|
+
|
116
|
+
# Additional options
|
117
|
+
print_cost=print_cost,
|
118
|
+
verbose=verbose
|
119
|
+
)
|
120
|
+
|
121
|
+
def get_345(
|
122
|
+
self,
|
123
|
+
# Optional filtering parameters
|
124
|
+
columns=None,
|
125
|
+
is_derivative=None,
|
126
|
+
is_non_derivative=None,
|
127
|
+
security_title=None,
|
128
|
+
transaction_date=None,
|
129
|
+
document_type=None,
|
130
|
+
transaction_code=None,
|
131
|
+
equity_swap_involved=None,
|
132
|
+
transaction_timeliness=None,
|
133
|
+
transaction_shares=None,
|
134
|
+
transaction_price_per_share=None,
|
135
|
+
shares_owned_following_transaction=None,
|
136
|
+
ownership_type=None,
|
137
|
+
deemed_execution_date=None,
|
138
|
+
conversion_or_exercise_price=None,
|
139
|
+
exercise_date=None,
|
140
|
+
expiration_date=None,
|
141
|
+
underlying_security_title=None,
|
142
|
+
underlying_security_shares=None,
|
143
|
+
underlying_security_value=None,
|
144
|
+
accession=None,
|
145
|
+
reporting_owner_cik=None,
|
146
|
+
issuer_cik=None,
|
147
|
+
filing_date=None,
|
148
|
+
|
149
|
+
# API key handling
|
150
|
+
api_key=None,
|
151
|
+
|
152
|
+
# Additional options
|
153
|
+
print_cost=True,
|
154
|
+
verbose=False
|
155
|
+
):
|
156
|
+
"""
|
157
|
+
Query the SEC BigQuery API for Form 345 insider transaction data.
|
158
|
+
|
159
|
+
Parameters:
|
160
|
+
-----------
|
161
|
+
columns : List[str], optional
|
162
|
+
Specific columns to return. If None, all columns are returned.
|
163
|
+
|
164
|
+
# Filter parameters
|
165
|
+
is_derivative, security_title, etc. : Various filters that can be:
|
166
|
+
- str/bool: Exact match
|
167
|
+
- List[str]: Match any in list
|
168
|
+
- tuple: (min, max) range for numeric/date fields
|
169
|
+
|
170
|
+
reporting_owner_cik : str or List[str]
|
171
|
+
CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
|
172
|
+
Any match within the array will return the record.
|
173
|
+
|
174
|
+
issuer_cik : str or List[str]
|
175
|
+
CIK(s) of the company/companies
|
176
|
+
|
177
|
+
api_key : str, optional
|
178
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
179
|
+
print_cost : bool
|
180
|
+
Whether to print the query cost information
|
181
|
+
verbose : bool
|
182
|
+
Whether to print additional information about the query
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
--------
|
186
|
+
List[Dict]
|
187
|
+
A list of dictionaries containing the query results
|
188
|
+
|
189
|
+
Raises:
|
190
|
+
-------
|
191
|
+
ValueError
|
192
|
+
If API key is missing or invalid
|
193
|
+
Exception
|
194
|
+
For API errors or other issues
|
195
|
+
"""
|
196
|
+
|
197
|
+
return get_345(
|
198
|
+
columns=columns,
|
199
|
+
is_derivative=is_derivative,
|
200
|
+
is_non_derivative=is_non_derivative,
|
201
|
+
security_title=security_title,
|
202
|
+
transaction_date=transaction_date,
|
203
|
+
document_type=document_type,
|
204
|
+
transaction_code=transaction_code,
|
205
|
+
equity_swap_involved=equity_swap_involved,
|
206
|
+
transaction_timeliness=transaction_timeliness,
|
207
|
+
transaction_shares=transaction_shares,
|
208
|
+
transaction_price_per_share=transaction_price_per_share,
|
209
|
+
shares_owned_following_transaction=shares_owned_following_transaction,
|
210
|
+
ownership_type=ownership_type,
|
211
|
+
deemed_execution_date=deemed_execution_date,
|
212
|
+
conversion_or_exercise_price=conversion_or_exercise_price,
|
213
|
+
exercise_date=exercise_date,
|
214
|
+
expiration_date=expiration_date,
|
215
|
+
underlying_security_title=underlying_security_title,
|
216
|
+
underlying_security_shares=underlying_security_shares,
|
217
|
+
underlying_security_value=underlying_security_value,
|
218
|
+
accession=accession,
|
219
|
+
reporting_owner_cik=reporting_owner_cik,
|
220
|
+
issuer_cik=issuer_cik,
|
221
|
+
filing_date=filing_date,
|
222
|
+
|
223
|
+
# API key handling
|
224
|
+
api_key=api_key,
|
225
|
+
|
226
|
+
# Additional options
|
227
|
+
print_cost=print_cost,
|
228
|
+
verbose=verbose
|
229
|
+
)
|
230
|
+
|
231
|
+
def _download_to_csv(self, data, filepath, verbose=False):
|
232
|
+
"""
|
233
|
+
Helper method to download data to a CSV file.
|
234
|
+
|
235
|
+
Parameters:
|
236
|
+
-----------
|
237
|
+
data : List[Dict]
|
238
|
+
The data to save
|
239
|
+
filepath : str or Path
|
240
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
241
|
+
verbose : bool
|
242
|
+
Whether to print additional information
|
243
|
+
|
244
|
+
Returns:
|
245
|
+
--------
|
246
|
+
List[Dict]
|
247
|
+
The input data (for method chaining)
|
248
|
+
"""
|
249
|
+
# If no data returned, nothing to save
|
250
|
+
if not data:
|
251
|
+
if verbose:
|
252
|
+
print("No data returned from API. No file was created.")
|
253
|
+
return data
|
254
|
+
|
255
|
+
# Resolve filepath - if it's not absolute, make it relative to self.path
|
256
|
+
filepath_obj = Path(filepath)
|
257
|
+
if not filepath_obj.is_absolute():
|
258
|
+
filepath_obj = self.path / filepath_obj
|
259
|
+
|
260
|
+
# Create directory if it doesn't exist
|
261
|
+
os.makedirs(filepath_obj.parent, exist_ok=True)
|
262
|
+
|
263
|
+
# Get fieldnames from the first record
|
264
|
+
fieldnames = data[0].keys()
|
265
|
+
|
266
|
+
# Write to CSV
|
267
|
+
with open(filepath_obj, 'w', newline='') as csvfile:
|
268
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
269
|
+
writer.writeheader()
|
270
|
+
writer.writerows(data)
|
271
|
+
|
272
|
+
if verbose:
|
273
|
+
print(f"Saved {len(data)} records to {filepath_obj}")
|
274
|
+
|
275
|
+
|
276
|
+
def download_information_table(
|
277
|
+
self,
|
278
|
+
filepath,
|
279
|
+
# Optional filtering parameters
|
280
|
+
columns=None,
|
281
|
+
name_of_issuer=None,
|
282
|
+
title_of_class=None,
|
283
|
+
cusip=None,
|
284
|
+
value=None,
|
285
|
+
ssh_prnamt=None,
|
286
|
+
ssh_prnamt_type=None,
|
287
|
+
investment_discretion=None,
|
288
|
+
voting_authority_sole=None,
|
289
|
+
voting_authority_shared=None,
|
290
|
+
voting_authority_none=None,
|
291
|
+
reporting_owner_cik=None,
|
292
|
+
put_call=None,
|
293
|
+
other_manager=None,
|
294
|
+
figi=None,
|
295
|
+
accession=None,
|
296
|
+
filing_date=None,
|
297
|
+
|
298
|
+
# API key handling
|
299
|
+
api_key=None,
|
300
|
+
|
301
|
+
# Additional options
|
302
|
+
print_cost=True,
|
303
|
+
verbose=False
|
304
|
+
):
|
305
|
+
"""
|
306
|
+
Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
|
307
|
+
|
308
|
+
Parameters:
|
309
|
+
-----------
|
310
|
+
filepath : str
|
311
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
312
|
+
|
313
|
+
columns : List[str], optional
|
314
|
+
Specific columns to return. If None, all columns are returned.
|
315
|
+
|
316
|
+
# Filter parameters
|
317
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
318
|
+
- str: Exact match
|
319
|
+
- List[str]: Match any in list
|
320
|
+
- tuple: (min, max) range for numeric/date fields
|
321
|
+
|
322
|
+
api_key : str, optional
|
323
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
324
|
+
print_cost : bool
|
325
|
+
Whether to print the query cost information
|
326
|
+
verbose : bool
|
327
|
+
Whether to print additional information about the query
|
328
|
+
|
329
|
+
Returns:
|
330
|
+
--------
|
331
|
+
List[Dict]
|
332
|
+
A list of dictionaries containing the query results
|
333
|
+
|
334
|
+
Raises:
|
335
|
+
-------
|
336
|
+
ValueError
|
337
|
+
If API key is missing or invalid
|
338
|
+
Exception
|
339
|
+
For API errors or other issues
|
340
|
+
"""
|
341
|
+
# Get the data from the API
|
342
|
+
data = self.get_information_table(
|
343
|
+
columns=columns,
|
344
|
+
name_of_issuer=name_of_issuer,
|
345
|
+
title_of_class=title_of_class,
|
346
|
+
cusip=cusip,
|
347
|
+
value=value,
|
348
|
+
ssh_prnamt=ssh_prnamt,
|
349
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
350
|
+
investment_discretion=investment_discretion,
|
351
|
+
voting_authority_sole=voting_authority_sole,
|
352
|
+
voting_authority_shared=voting_authority_shared,
|
353
|
+
voting_authority_none=voting_authority_none,
|
354
|
+
reporting_owner_cik=reporting_owner_cik,
|
355
|
+
put_call=put_call,
|
356
|
+
other_manager=other_manager,
|
357
|
+
figi=figi,
|
358
|
+
accession=accession,
|
359
|
+
filing_date=filing_date,
|
360
|
+
api_key=api_key,
|
361
|
+
print_cost=print_cost,
|
362
|
+
verbose=verbose
|
363
|
+
)
|
364
|
+
|
365
|
+
# Save to CSV using the helper method
|
366
|
+
return self._download_to_csv(data, filepath, verbose)
|
367
|
+
|
368
|
+
def download_345(
|
369
|
+
self,
|
370
|
+
filepath,
|
371
|
+
# Optional filtering parameters
|
372
|
+
columns=None,
|
373
|
+
is_derivative=None,
|
374
|
+
is_non_derivative=None,
|
375
|
+
security_title=None,
|
376
|
+
transaction_date=None,
|
377
|
+
document_type=None,
|
378
|
+
transaction_code=None,
|
379
|
+
equity_swap_involved=None,
|
380
|
+
transaction_timeliness=None,
|
381
|
+
transaction_shares=None,
|
382
|
+
transaction_price_per_share=None,
|
383
|
+
shares_owned_following_transaction=None,
|
384
|
+
ownership_type=None,
|
385
|
+
deemed_execution_date=None,
|
386
|
+
conversion_or_exercise_price=None,
|
387
|
+
exercise_date=None,
|
388
|
+
expiration_date=None,
|
389
|
+
underlying_security_title=None,
|
390
|
+
underlying_security_shares=None,
|
391
|
+
underlying_security_value=None,
|
392
|
+
accession=None,
|
393
|
+
reporting_owner_cik=None,
|
394
|
+
issuer_cik=None,
|
395
|
+
filing_date=None,
|
396
|
+
|
397
|
+
# API key handling
|
398
|
+
api_key=None,
|
399
|
+
|
400
|
+
# Additional options
|
401
|
+
print_cost=True,
|
402
|
+
verbose=False
|
403
|
+
):
|
404
|
+
"""
|
405
|
+
Query the SEC BigQuery API for Form 345 insider transaction data and save to CSV.
|
406
|
+
|
407
|
+
Parameters:
|
408
|
+
-----------
|
409
|
+
filepath : str
|
410
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
411
|
+
|
412
|
+
columns : List[str], optional
|
413
|
+
Specific columns to return. If None, all columns are returned.
|
414
|
+
|
415
|
+
# Filter parameters
|
416
|
+
is_derivative, security_title, etc. : Various filters that can be:
|
417
|
+
- str/bool: Exact match
|
418
|
+
- List[str]: Match any in list
|
419
|
+
- tuple: (min, max) range for numeric/date fields
|
420
|
+
|
421
|
+
reporting_owner_cik : str or List[str]
|
422
|
+
CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
|
423
|
+
Any match within the array will return the record.
|
424
|
+
|
425
|
+
issuer_cik : str or List[str]
|
426
|
+
CIK(s) of the company/companies
|
427
|
+
|
428
|
+
api_key : str, optional
|
429
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
430
|
+
print_cost : bool
|
431
|
+
Whether to print the query cost information
|
432
|
+
verbose : bool
|
433
|
+
Whether to print additional information about the query
|
434
|
+
|
435
|
+
Returns:
|
436
|
+
--------
|
437
|
+
List[Dict]
|
438
|
+
A list of dictionaries containing the query results
|
439
|
+
|
440
|
+
Raises:
|
441
|
+
-------
|
442
|
+
ValueError
|
443
|
+
If API key is missing or invalid
|
444
|
+
Exception
|
445
|
+
For API errors or other issues
|
446
|
+
"""
|
447
|
+
# Get the data from the API
|
448
|
+
data = self.get_345(
|
449
|
+
columns=columns,
|
450
|
+
is_derivative=is_derivative,
|
451
|
+
is_non_derivative=is_non_derivative,
|
452
|
+
security_title=security_title,
|
453
|
+
transaction_date=transaction_date,
|
454
|
+
document_type=document_type,
|
455
|
+
transaction_code=transaction_code,
|
456
|
+
equity_swap_involved=equity_swap_involved,
|
457
|
+
transaction_timeliness=transaction_timeliness,
|
458
|
+
transaction_shares=transaction_shares,
|
459
|
+
transaction_price_per_share=transaction_price_per_share,
|
460
|
+
shares_owned_following_transaction=shares_owned_following_transaction,
|
461
|
+
ownership_type=ownership_type,
|
462
|
+
deemed_execution_date=deemed_execution_date,
|
463
|
+
conversion_or_exercise_price=conversion_or_exercise_price,
|
464
|
+
exercise_date=exercise_date,
|
465
|
+
expiration_date=expiration_date,
|
466
|
+
underlying_security_title=underlying_security_title,
|
467
|
+
underlying_security_shares=underlying_security_shares,
|
468
|
+
underlying_security_value=underlying_security_value,
|
469
|
+
accession=accession,
|
470
|
+
reporting_owner_cik=reporting_owner_cik,
|
471
|
+
issuer_cik=issuer_cik,
|
472
|
+
filing_date=filing_date,
|
473
|
+
api_key=api_key,
|
474
|
+
print_cost=print_cost,
|
475
|
+
verbose=verbose
|
476
|
+
)
|
477
|
+
|
478
|
+
# Save to CSV using the helper method
|
479
|
+
return self._download_to_csv(data, filepath, verbose)
|
480
|
+
|
481
|
+
def get_proxy_voting_record(
|
482
|
+
self,
|
483
|
+
# Optional filtering parameters
|
484
|
+
columns=None,
|
485
|
+
meeting_date=None,
|
486
|
+
isin=None,
|
487
|
+
cusip=None,
|
488
|
+
issuer_name=None,
|
489
|
+
vote_description=None,
|
490
|
+
shares_on_loan=None,
|
491
|
+
shares_voted=None,
|
492
|
+
vote_category=None,
|
493
|
+
vote_record=None,
|
494
|
+
vote_source=None,
|
495
|
+
how_voted=None,
|
496
|
+
figi=None,
|
497
|
+
management_recommendation=None,
|
498
|
+
accession=None,
|
499
|
+
reporting_owner_cik=None,
|
500
|
+
filing_date=None,
|
501
|
+
|
502
|
+
# API key handling
|
503
|
+
api_key=None,
|
504
|
+
|
505
|
+
# Additional options
|
506
|
+
print_cost=True,
|
507
|
+
verbose=False
|
508
|
+
):
|
509
|
+
"""
|
510
|
+
Query the SEC BigQuery API for NPX proxy voting record data.
|
511
|
+
|
512
|
+
Parameters:
|
513
|
+
-----------
|
514
|
+
columns : List[str], optional
|
515
|
+
Specific columns to return. If None, all columns are returned.
|
516
|
+
|
517
|
+
# Filter parameters
|
518
|
+
meeting_date, isin, cusip, etc. : Various filters that can be:
|
519
|
+
- str: Exact match
|
520
|
+
- List[str]: Match any in list
|
521
|
+
- tuple: (min, max) range for numeric/date fields
|
522
|
+
|
523
|
+
shares_on_loan, shares_voted : int/float or tuple
|
524
|
+
Numeric values or (min, max) range
|
525
|
+
|
526
|
+
filing_date : str or tuple
|
527
|
+
Date string in 'YYYY-MM-DD' format or (start_date, end_date) tuple
|
528
|
+
|
529
|
+
api_key : str, optional
|
530
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
531
|
+
print_cost : bool
|
532
|
+
Whether to print the query cost information
|
533
|
+
verbose : bool
|
534
|
+
Whether to print additional information about the query
|
535
|
+
|
536
|
+
Returns:
|
537
|
+
--------
|
538
|
+
List[Dict]
|
539
|
+
A list of dictionaries containing the query results
|
540
|
+
|
541
|
+
Raises:
|
542
|
+
-------
|
543
|
+
ValueError
|
544
|
+
If API key is missing or invalid
|
545
|
+
Exception
|
546
|
+
For API errors or other issues
|
547
|
+
"""
|
548
|
+
|
549
|
+
return get_proxy_voting_record(
|
550
|
+
columns=columns,
|
551
|
+
meeting_date=meeting_date,
|
552
|
+
isin=isin,
|
553
|
+
cusip=cusip,
|
554
|
+
issuer_name=issuer_name,
|
555
|
+
vote_description=vote_description,
|
556
|
+
shares_on_loan=shares_on_loan,
|
557
|
+
shares_voted=shares_voted,
|
558
|
+
vote_category=vote_category,
|
559
|
+
vote_record=vote_record,
|
560
|
+
vote_source=vote_source,
|
561
|
+
how_voted=how_voted,
|
562
|
+
figi=figi,
|
563
|
+
management_recommendation=management_recommendation,
|
564
|
+
accession=accession,
|
565
|
+
reporting_owner_cik=reporting_owner_cik,
|
566
|
+
filing_date=filing_date,
|
567
|
+
|
568
|
+
# API key handling
|
569
|
+
api_key=api_key,
|
570
|
+
|
571
|
+
# Additional options
|
572
|
+
print_cost=print_cost,
|
573
|
+
verbose=verbose
|
574
|
+
)
|
575
|
+
|
576
|
+
def download_proxy_voting_record(
|
577
|
+
self,
|
578
|
+
filepath,
|
579
|
+
# Optional filtering parameters
|
580
|
+
columns=None,
|
581
|
+
meeting_date=None,
|
582
|
+
isin=None,
|
583
|
+
cusip=None,
|
584
|
+
issuer_name=None,
|
585
|
+
vote_description=None,
|
586
|
+
shares_on_loan=None,
|
587
|
+
shares_voted=None,
|
588
|
+
vote_category=None,
|
589
|
+
vote_record=None,
|
590
|
+
vote_source=None,
|
591
|
+
how_voted=None,
|
592
|
+
figi=None,
|
593
|
+
management_recommendation=None,
|
594
|
+
accession=None,
|
595
|
+
reporting_owner_cik=None,
|
596
|
+
filing_date=None,
|
597
|
+
|
598
|
+
# API key handling
|
599
|
+
api_key=None,
|
600
|
+
|
601
|
+
# Additional options
|
602
|
+
print_cost=True,
|
603
|
+
verbose=False
|
604
|
+
):
|
605
|
+
"""
|
606
|
+
Query the SEC BigQuery API for NPX proxy voting record data and save to CSV.
|
607
|
+
|
608
|
+
Parameters:
|
609
|
+
-----------
|
610
|
+
filepath : str
|
611
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
612
|
+
|
613
|
+
columns : List[str], optional
|
614
|
+
Specific columns to return. If None, all columns are returned.
|
615
|
+
|
616
|
+
# Filter parameters
|
617
|
+
meeting_date, isin, cusip, etc. : Various filters that can be:
|
618
|
+
- str: Exact match
|
619
|
+
- List[str]: Match any in list
|
620
|
+
- tuple: (min, max) range for numeric/date fields
|
621
|
+
|
622
|
+
shares_on_loan, shares_voted : int/float or tuple
|
623
|
+
Numeric values or (min, max) range
|
624
|
+
|
625
|
+
filing_date : str or tuple
|
626
|
+
Date string in 'YYYY-MM-DD' format or (start_date, end_date) tuple
|
627
|
+
|
628
|
+
api_key : str, optional
|
629
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
630
|
+
print_cost : bool
|
631
|
+
Whether to print the query cost information
|
632
|
+
verbose : bool
|
633
|
+
Whether to print additional information about the query
|
634
|
+
|
635
|
+
Returns:
|
636
|
+
--------
|
637
|
+
List[Dict]
|
638
|
+
A list of dictionaries containing the query results
|
639
|
+
|
640
|
+
Raises:
|
641
|
+
-------
|
642
|
+
ValueError
|
643
|
+
If API key is missing or invalid
|
644
|
+
Exception
|
645
|
+
For API errors or other issues
|
646
|
+
"""
|
647
|
+
# Get the data from the API
|
648
|
+
data = self.get_proxy_voting_record(
|
649
|
+
columns=columns,
|
650
|
+
meeting_date=meeting_date,
|
651
|
+
isin=isin,
|
652
|
+
cusip=cusip,
|
653
|
+
issuer_name=issuer_name,
|
654
|
+
vote_description=vote_description,
|
655
|
+
shares_on_loan=shares_on_loan,
|
656
|
+
shares_voted=shares_voted,
|
657
|
+
vote_category=vote_category,
|
658
|
+
vote_record=vote_record,
|
659
|
+
vote_source=vote_source,
|
660
|
+
how_voted=how_voted,
|
661
|
+
figi=figi,
|
662
|
+
management_recommendation=management_recommendation,
|
663
|
+
accession=accession,
|
664
|
+
reporting_owner_cik=reporting_owner_cik,
|
665
|
+
filing_date=filing_date,
|
666
|
+
api_key=api_key,
|
667
|
+
print_cost=print_cost,
|
668
|
+
verbose=verbose
|
669
|
+
)
|
670
|
+
|
671
|
+
# Save to CSV using the helper method
|
672
|
+
return self._download_to_csv(data, filepath, verbose)
|
datamule/submission.py
CHANGED
@@ -14,6 +14,7 @@ class Submission:
|
|
14
14
|
if sgml_content is not None:
|
15
15
|
self.path = None
|
16
16
|
self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
17
|
+
self.documents = []
|
17
18
|
|
18
19
|
for idx,doc in enumerate(self.metadata['documents']):
|
19
20
|
type = doc.get('type')
|
@@ -23,7 +24,7 @@ class Submission:
|
|
23
24
|
continue
|
24
25
|
filename = doc.get('filename')
|
25
26
|
extension = Path(filename).suffix
|
26
|
-
self.documents
|
27
|
+
self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
|
27
28
|
|
28
29
|
|
29
30
|
if path is not None:
|