datamule 1.1.7__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/sheet.py CHANGED
@@ -1,6 +1,9 @@
1
1
  from pathlib import Path
2
+ import csv
3
+ import os
2
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
3
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
+ from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
4
7
 
5
8
  class Sheet:
6
9
  def __init__(self, path):
@@ -26,16 +29,644 @@ class Sheet:
26
29
  # Download facts for all CIKs in parallel
27
30
  download_company_facts(cik=cik_list, output_dir=self.path)
28
31
 
29
- def query_345():
30
- pass
31
- def query_xbrl():
32
- pass
33
-
34
- # LIST TUPLE SYNTAX, so e.g. value (0,100) is 0-100, while [0,100] is 0 and 100
35
- def get_13fhr(reportingOwnerCIK,nameOfIssuer,titleOfClass,cusip,value,
36
- shrsOrPrnAmt_sshPrnamt,shrsOrPrnAmt_sshPrnamtType,investmentDiscretion,otherManager,
37
- votingAuthority_Sole,
38
- votingAuthority_Shared,
39
- votingAuthority_None,
40
- filing_date):
41
- pass
32
+ def get_information_table(
33
+ self,
34
+ # Optional filtering parameters
35
+ columns=None,
36
+ name_of_issuer=None,
37
+ title_of_class=None,
38
+ cusip=None,
39
+ value=None,
40
+ ssh_prnamt=None,
41
+ ssh_prnamt_type=None,
42
+ investment_discretion=None,
43
+ voting_authority_sole=None,
44
+ voting_authority_shared=None,
45
+ voting_authority_none=None,
46
+ reporting_owner_cik=None,
47
+ put_call=None,
48
+ other_manager=None,
49
+ figi=None,
50
+ accession=None,
51
+ filing_date=None,
52
+
53
+ # API key handling
54
+ api_key=None,
55
+
56
+ # Additional options
57
+ print_cost=True,
58
+ verbose=False
59
+ ):
60
+ """
61
+ Query the SEC BigQuery API for 13F-HR information table data.
62
+
63
+ Parameters:
64
+ -----------
65
+ columns : List[str], optional
66
+ Specific columns to return. If None, all columns are returned.
67
+
68
+ # Filter parameters
69
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
70
+ - str: Exact match
71
+ - List[str]: Match any in list
72
+ - tuple: (min, max) range for numeric/date fields
73
+
74
+ api_key : str, optional
75
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
76
+ print_cost : bool
77
+ Whether to print the query cost information
78
+ verbose : bool
79
+ Whether to print additional information about the query
80
+
81
+ Returns:
82
+ --------
83
+ List[Dict]
84
+ A list of dictionaries containing the query results
85
+
86
+ Raises:
87
+ -------
88
+ ValueError
89
+ If API key is missing or invalid
90
+ Exception
91
+ For API errors or other issues
92
+ """
93
+
94
+ return get_information_table(
95
+ columns=columns,
96
+ name_of_issuer=name_of_issuer,
97
+ title_of_class=title_of_class,
98
+ cusip=cusip,
99
+ value=value,
100
+ ssh_prnamt=ssh_prnamt,
101
+ ssh_prnamt_type=ssh_prnamt_type,
102
+ investment_discretion=investment_discretion,
103
+ voting_authority_sole=voting_authority_sole,
104
+ voting_authority_shared=voting_authority_shared,
105
+ voting_authority_none=voting_authority_none,
106
+ reporting_owner_cik=reporting_owner_cik,
107
+ put_call=put_call,
108
+ other_manager=other_manager,
109
+ figi=figi,
110
+ accession=accession,
111
+ filing_date=filing_date,
112
+
113
+ # API key handling
114
+ api_key=api_key,
115
+
116
+ # Additional options
117
+ print_cost=print_cost,
118
+ verbose=verbose
119
+ )
120
+
121
+ def get_345(
122
+ self,
123
+ # Optional filtering parameters
124
+ columns=None,
125
+ is_derivative=None,
126
+ is_non_derivative=None,
127
+ security_title=None,
128
+ transaction_date=None,
129
+ document_type=None,
130
+ transaction_code=None,
131
+ equity_swap_involved=None,
132
+ transaction_timeliness=None,
133
+ transaction_shares=None,
134
+ transaction_price_per_share=None,
135
+ shares_owned_following_transaction=None,
136
+ ownership_type=None,
137
+ deemed_execution_date=None,
138
+ conversion_or_exercise_price=None,
139
+ exercise_date=None,
140
+ expiration_date=None,
141
+ underlying_security_title=None,
142
+ underlying_security_shares=None,
143
+ underlying_security_value=None,
144
+ accession=None,
145
+ reporting_owner_cik=None,
146
+ issuer_cik=None,
147
+ filing_date=None,
148
+
149
+ # API key handling
150
+ api_key=None,
151
+
152
+ # Additional options
153
+ print_cost=True,
154
+ verbose=False
155
+ ):
156
+ """
157
+ Query the SEC BigQuery API for Form 345 insider transaction data.
158
+
159
+ Parameters:
160
+ -----------
161
+ columns : List[str], optional
162
+ Specific columns to return. If None, all columns are returned.
163
+
164
+ # Filter parameters
165
+ is_derivative, security_title, etc. : Various filters that can be:
166
+ - str/bool: Exact match
167
+ - List[str]: Match any in list
168
+ - tuple: (min, max) range for numeric/date fields
169
+
170
+ reporting_owner_cik : str or List[str]
171
+ CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
172
+ Any match within the array will return the record.
173
+
174
+ issuer_cik : str or List[str]
175
+ CIK(s) of the company/companies
176
+
177
+ api_key : str, optional
178
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
179
+ print_cost : bool
180
+ Whether to print the query cost information
181
+ verbose : bool
182
+ Whether to print additional information about the query
183
+
184
+ Returns:
185
+ --------
186
+ List[Dict]
187
+ A list of dictionaries containing the query results
188
+
189
+ Raises:
190
+ -------
191
+ ValueError
192
+ If API key is missing or invalid
193
+ Exception
194
+ For API errors or other issues
195
+ """
196
+
197
+ return get_345(
198
+ columns=columns,
199
+ is_derivative=is_derivative,
200
+ is_non_derivative=is_non_derivative,
201
+ security_title=security_title,
202
+ transaction_date=transaction_date,
203
+ document_type=document_type,
204
+ transaction_code=transaction_code,
205
+ equity_swap_involved=equity_swap_involved,
206
+ transaction_timeliness=transaction_timeliness,
207
+ transaction_shares=transaction_shares,
208
+ transaction_price_per_share=transaction_price_per_share,
209
+ shares_owned_following_transaction=shares_owned_following_transaction,
210
+ ownership_type=ownership_type,
211
+ deemed_execution_date=deemed_execution_date,
212
+ conversion_or_exercise_price=conversion_or_exercise_price,
213
+ exercise_date=exercise_date,
214
+ expiration_date=expiration_date,
215
+ underlying_security_title=underlying_security_title,
216
+ underlying_security_shares=underlying_security_shares,
217
+ underlying_security_value=underlying_security_value,
218
+ accession=accession,
219
+ reporting_owner_cik=reporting_owner_cik,
220
+ issuer_cik=issuer_cik,
221
+ filing_date=filing_date,
222
+
223
+ # API key handling
224
+ api_key=api_key,
225
+
226
+ # Additional options
227
+ print_cost=print_cost,
228
+ verbose=verbose
229
+ )
230
+
231
+ def _download_to_csv(self, data, filepath, verbose=False):
232
+ """
233
+ Helper method to download data to a CSV file.
234
+
235
+ Parameters:
236
+ -----------
237
+ data : List[Dict]
238
+ The data to save
239
+ filepath : str or Path
240
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
241
+ verbose : bool
242
+ Whether to print additional information
243
+
244
+ Returns:
245
+ --------
246
+ List[Dict]
247
+ The input data (for method chaining)
248
+ """
249
+ # If no data returned, nothing to save
250
+ if not data:
251
+ if verbose:
252
+ print("No data returned from API. No file was created.")
253
+ return data
254
+
255
+ # Resolve filepath - if it's not absolute, make it relative to self.path
256
+ filepath_obj = Path(filepath)
257
+ if not filepath_obj.is_absolute():
258
+ filepath_obj = self.path / filepath_obj
259
+
260
+ # Create directory if it doesn't exist
261
+ os.makedirs(filepath_obj.parent, exist_ok=True)
262
+
263
+ # Get fieldnames from the first record
264
+ fieldnames = data[0].keys()
265
+
266
+ # Write to CSV
267
+ with open(filepath_obj, 'w', newline='') as csvfile:
268
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
269
+ writer.writeheader()
270
+ writer.writerows(data)
271
+
272
+ if verbose:
273
+ print(f"Saved {len(data)} records to {filepath_obj}")
274
+
275
+
276
+ def download_information_table(
277
+ self,
278
+ filepath,
279
+ # Optional filtering parameters
280
+ columns=None,
281
+ name_of_issuer=None,
282
+ title_of_class=None,
283
+ cusip=None,
284
+ value=None,
285
+ ssh_prnamt=None,
286
+ ssh_prnamt_type=None,
287
+ investment_discretion=None,
288
+ voting_authority_sole=None,
289
+ voting_authority_shared=None,
290
+ voting_authority_none=None,
291
+ reporting_owner_cik=None,
292
+ put_call=None,
293
+ other_manager=None,
294
+ figi=None,
295
+ accession=None,
296
+ filing_date=None,
297
+
298
+ # API key handling
299
+ api_key=None,
300
+
301
+ # Additional options
302
+ print_cost=True,
303
+ verbose=False
304
+ ):
305
+ """
306
+ Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
307
+
308
+ Parameters:
309
+ -----------
310
+ filepath : str
311
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
312
+
313
+ columns : List[str], optional
314
+ Specific columns to return. If None, all columns are returned.
315
+
316
+ # Filter parameters
317
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
318
+ - str: Exact match
319
+ - List[str]: Match any in list
320
+ - tuple: (min, max) range for numeric/date fields
321
+
322
+ api_key : str, optional
323
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
324
+ print_cost : bool
325
+ Whether to print the query cost information
326
+ verbose : bool
327
+ Whether to print additional information about the query
328
+
329
+ Returns:
330
+ --------
331
+ List[Dict]
332
+ A list of dictionaries containing the query results
333
+
334
+ Raises:
335
+ -------
336
+ ValueError
337
+ If API key is missing or invalid
338
+ Exception
339
+ For API errors or other issues
340
+ """
341
+ # Get the data from the API
342
+ data = self.get_information_table(
343
+ columns=columns,
344
+ name_of_issuer=name_of_issuer,
345
+ title_of_class=title_of_class,
346
+ cusip=cusip,
347
+ value=value,
348
+ ssh_prnamt=ssh_prnamt,
349
+ ssh_prnamt_type=ssh_prnamt_type,
350
+ investment_discretion=investment_discretion,
351
+ voting_authority_sole=voting_authority_sole,
352
+ voting_authority_shared=voting_authority_shared,
353
+ voting_authority_none=voting_authority_none,
354
+ reporting_owner_cik=reporting_owner_cik,
355
+ put_call=put_call,
356
+ other_manager=other_manager,
357
+ figi=figi,
358
+ accession=accession,
359
+ filing_date=filing_date,
360
+ api_key=api_key,
361
+ print_cost=print_cost,
362
+ verbose=verbose
363
+ )
364
+
365
+ # Save to CSV using the helper method
366
+ return self._download_to_csv(data, filepath, verbose)
367
+
368
+ def download_345(
369
+ self,
370
+ filepath,
371
+ # Optional filtering parameters
372
+ columns=None,
373
+ is_derivative=None,
374
+ is_non_derivative=None,
375
+ security_title=None,
376
+ transaction_date=None,
377
+ document_type=None,
378
+ transaction_code=None,
379
+ equity_swap_involved=None,
380
+ transaction_timeliness=None,
381
+ transaction_shares=None,
382
+ transaction_price_per_share=None,
383
+ shares_owned_following_transaction=None,
384
+ ownership_type=None,
385
+ deemed_execution_date=None,
386
+ conversion_or_exercise_price=None,
387
+ exercise_date=None,
388
+ expiration_date=None,
389
+ underlying_security_title=None,
390
+ underlying_security_shares=None,
391
+ underlying_security_value=None,
392
+ accession=None,
393
+ reporting_owner_cik=None,
394
+ issuer_cik=None,
395
+ filing_date=None,
396
+
397
+ # API key handling
398
+ api_key=None,
399
+
400
+ # Additional options
401
+ print_cost=True,
402
+ verbose=False
403
+ ):
404
+ """
405
+ Query the SEC BigQuery API for Form 345 insider transaction data and save to CSV.
406
+
407
+ Parameters:
408
+ -----------
409
+ filepath : str
410
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
411
+
412
+ columns : List[str], optional
413
+ Specific columns to return. If None, all columns are returned.
414
+
415
+ # Filter parameters
416
+ is_derivative, security_title, etc. : Various filters that can be:
417
+ - str/bool: Exact match
418
+ - List[str]: Match any in list
419
+ - tuple: (min, max) range for numeric/date fields
420
+
421
+ reporting_owner_cik : str or List[str]
422
+ CIK(s) of the reporting insider(s). This is matched against an array in BigQuery.
423
+ Any match within the array will return the record.
424
+
425
+ issuer_cik : str or List[str]
426
+ CIK(s) of the company/companies
427
+
428
+ api_key : str, optional
429
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
430
+ print_cost : bool
431
+ Whether to print the query cost information
432
+ verbose : bool
433
+ Whether to print additional information about the query
434
+
435
+ Returns:
436
+ --------
437
+ List[Dict]
438
+ A list of dictionaries containing the query results
439
+
440
+ Raises:
441
+ -------
442
+ ValueError
443
+ If API key is missing or invalid
444
+ Exception
445
+ For API errors or other issues
446
+ """
447
+ # Get the data from the API
448
+ data = self.get_345(
449
+ columns=columns,
450
+ is_derivative=is_derivative,
451
+ is_non_derivative=is_non_derivative,
452
+ security_title=security_title,
453
+ transaction_date=transaction_date,
454
+ document_type=document_type,
455
+ transaction_code=transaction_code,
456
+ equity_swap_involved=equity_swap_involved,
457
+ transaction_timeliness=transaction_timeliness,
458
+ transaction_shares=transaction_shares,
459
+ transaction_price_per_share=transaction_price_per_share,
460
+ shares_owned_following_transaction=shares_owned_following_transaction,
461
+ ownership_type=ownership_type,
462
+ deemed_execution_date=deemed_execution_date,
463
+ conversion_or_exercise_price=conversion_or_exercise_price,
464
+ exercise_date=exercise_date,
465
+ expiration_date=expiration_date,
466
+ underlying_security_title=underlying_security_title,
467
+ underlying_security_shares=underlying_security_shares,
468
+ underlying_security_value=underlying_security_value,
469
+ accession=accession,
470
+ reporting_owner_cik=reporting_owner_cik,
471
+ issuer_cik=issuer_cik,
472
+ filing_date=filing_date,
473
+ api_key=api_key,
474
+ print_cost=print_cost,
475
+ verbose=verbose
476
+ )
477
+
478
+ # Save to CSV using the helper method
479
+ return self._download_to_csv(data, filepath, verbose)
480
+
481
+ def get_proxy_voting_record(
482
+ self,
483
+ # Optional filtering parameters
484
+ columns=None,
485
+ meeting_date=None,
486
+ isin=None,
487
+ cusip=None,
488
+ issuer_name=None,
489
+ vote_description=None,
490
+ shares_on_loan=None,
491
+ shares_voted=None,
492
+ vote_category=None,
493
+ vote_record=None,
494
+ vote_source=None,
495
+ how_voted=None,
496
+ figi=None,
497
+ management_recommendation=None,
498
+ accession=None,
499
+ reporting_owner_cik=None,
500
+ filing_date=None,
501
+
502
+ # API key handling
503
+ api_key=None,
504
+
505
+ # Additional options
506
+ print_cost=True,
507
+ verbose=False
508
+ ):
509
+ """
510
+ Query the SEC BigQuery API for NPX proxy voting record data.
511
+
512
+ Parameters:
513
+ -----------
514
+ columns : List[str], optional
515
+ Specific columns to return. If None, all columns are returned.
516
+
517
+ # Filter parameters
518
+ meeting_date, isin, cusip, etc. : Various filters that can be:
519
+ - str: Exact match
520
+ - List[str]: Match any in list
521
+ - tuple: (min, max) range for numeric/date fields
522
+
523
+ shares_on_loan, shares_voted : int/float or tuple
524
+ Numeric values or (min, max) range
525
+
526
+ filing_date : str or tuple
527
+ Date string in 'YYYY-MM-DD' format or (start_date, end_date) tuple
528
+
529
+ api_key : str, optional
530
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
531
+ print_cost : bool
532
+ Whether to print the query cost information
533
+ verbose : bool
534
+ Whether to print additional information about the query
535
+
536
+ Returns:
537
+ --------
538
+ List[Dict]
539
+ A list of dictionaries containing the query results
540
+
541
+ Raises:
542
+ -------
543
+ ValueError
544
+ If API key is missing or invalid
545
+ Exception
546
+ For API errors or other issues
547
+ """
548
+
549
+ return get_proxy_voting_record(
550
+ columns=columns,
551
+ meeting_date=meeting_date,
552
+ isin=isin,
553
+ cusip=cusip,
554
+ issuer_name=issuer_name,
555
+ vote_description=vote_description,
556
+ shares_on_loan=shares_on_loan,
557
+ shares_voted=shares_voted,
558
+ vote_category=vote_category,
559
+ vote_record=vote_record,
560
+ vote_source=vote_source,
561
+ how_voted=how_voted,
562
+ figi=figi,
563
+ management_recommendation=management_recommendation,
564
+ accession=accession,
565
+ reporting_owner_cik=reporting_owner_cik,
566
+ filing_date=filing_date,
567
+
568
+ # API key handling
569
+ api_key=api_key,
570
+
571
+ # Additional options
572
+ print_cost=print_cost,
573
+ verbose=verbose
574
+ )
575
+
576
+ def download_proxy_voting_record(
577
+ self,
578
+ filepath,
579
+ # Optional filtering parameters
580
+ columns=None,
581
+ meeting_date=None,
582
+ isin=None,
583
+ cusip=None,
584
+ issuer_name=None,
585
+ vote_description=None,
586
+ shares_on_loan=None,
587
+ shares_voted=None,
588
+ vote_category=None,
589
+ vote_record=None,
590
+ vote_source=None,
591
+ how_voted=None,
592
+ figi=None,
593
+ management_recommendation=None,
594
+ accession=None,
595
+ reporting_owner_cik=None,
596
+ filing_date=None,
597
+
598
+ # API key handling
599
+ api_key=None,
600
+
601
+ # Additional options
602
+ print_cost=True,
603
+ verbose=False
604
+ ):
605
+ """
606
+ Query the SEC BigQuery API for NPX proxy voting record data and save to CSV.
607
+
608
+ Parameters:
609
+ -----------
610
+ filepath : str
611
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
612
+
613
+ columns : List[str], optional
614
+ Specific columns to return. If None, all columns are returned.
615
+
616
+ # Filter parameters
617
+ meeting_date, isin, cusip, etc. : Various filters that can be:
618
+ - str: Exact match
619
+ - List[str]: Match any in list
620
+ - tuple: (min, max) range for numeric/date fields
621
+
622
+ shares_on_loan, shares_voted : int/float or tuple
623
+ Numeric values or (min, max) range
624
+
625
+ filing_date : str or tuple
626
+ Date string in 'YYYY-MM-DD' format or (start_date, end_date) tuple
627
+
628
+ api_key : str, optional
629
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
630
+ print_cost : bool
631
+ Whether to print the query cost information
632
+ verbose : bool
633
+ Whether to print additional information about the query
634
+
635
+ Returns:
636
+ --------
637
+ List[Dict]
638
+ A list of dictionaries containing the query results
639
+
640
+ Raises:
641
+ -------
642
+ ValueError
643
+ If API key is missing or invalid
644
+ Exception
645
+ For API errors or other issues
646
+ """
647
+ # Get the data from the API
648
+ data = self.get_proxy_voting_record(
649
+ columns=columns,
650
+ meeting_date=meeting_date,
651
+ isin=isin,
652
+ cusip=cusip,
653
+ issuer_name=issuer_name,
654
+ vote_description=vote_description,
655
+ shares_on_loan=shares_on_loan,
656
+ shares_voted=shares_voted,
657
+ vote_category=vote_category,
658
+ vote_record=vote_record,
659
+ vote_source=vote_source,
660
+ how_voted=how_voted,
661
+ figi=figi,
662
+ management_recommendation=management_recommendation,
663
+ accession=accession,
664
+ reporting_owner_cik=reporting_owner_cik,
665
+ filing_date=filing_date,
666
+ api_key=api_key,
667
+ print_cost=print_cost,
668
+ verbose=verbose
669
+ )
670
+
671
+ # Save to CSV using the helper method
672
+ return self._download_to_csv(data, filepath, verbose)
datamule/submission.py CHANGED
@@ -14,6 +14,7 @@ class Submission:
14
14
  if sgml_content is not None:
15
15
  self.path = None
16
16
  self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
17
+ self.documents = []
17
18
 
18
19
  for idx,doc in enumerate(self.metadata['documents']):
19
20
  type = doc.get('type')
@@ -23,7 +24,7 @@ class Submission:
23
24
  continue
24
25
  filename = doc.get('filename')
25
26
  extension = Path(filename).suffix
26
- self.documents = [Document(type=type, content=raw_documents[idx], extension=extension)]
27
+ self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
27
28
 
28
29
 
29
30
  if path is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.7
3
+ Version: 1.2.0
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman