datamule 1.1.7__py3-none-any.whl → 1.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/document.py CHANGED
@@ -11,8 +11,6 @@ class Document:
11
11
  def __init__(self, type, content, extension):
12
12
 
13
13
  self.type = type
14
- # we will remove this later #
15
- # make sure extension is in lower case
16
14
  extension = extension.lower()
17
15
  self.content = content
18
16
  if extension == '.txt':
@@ -94,6 +92,9 @@ class Document:
94
92
 
95
93
  # Note: this method will be heavily modified in the future
96
94
  def parse(self):
95
+ # check if we have already parsed the content
96
+ if self.data:
97
+ return self.data
97
98
  mapping_dict = None
98
99
 
99
100
  if self.extension == '.xml':
@@ -127,34 +128,243 @@ class Document:
127
128
  with open(output_filename, 'w',encoding='utf-8') as f:
128
129
  json.dump(self.data, f, indent=2)
129
130
 
130
- def write_csv(self, output_filename=None, accession_number=None):
131
+ def to_tabular(self, accession_number=None):
131
132
  self.parse()
132
133
 
133
- with open(output_filename, 'w', newline='') as csvfile:
134
- if not self.data:
135
- return output_filename
134
+ if self.type == "INFORMATION TABLE":
135
+ info_table = self.data['informationTable']['infoTable']
136
+ if isinstance(info_table, dict):
137
+ info_table = [info_table]
138
+
139
+ flattened = self._flatten_dict(info_table)
136
140
 
137
- has_document = any('document' in item for item in self.data)
141
+ # Original field names
142
+ original_columns = [
143
+ "nameOfIssuer", "titleOfClass", "cusip", "value",
144
+ "shrsOrPrnAmt_sshPrnamt", "shrsOrPrnAmt_sshPrnamtType",
145
+ "investmentDiscretion", "votingAuthority_Sole",
146
+ "votingAuthority_Shared", "votingAuthority_None",
147
+ "reportingOwnerCIK", "putCall", "otherManager", 'figi'
148
+ ]
149
+
150
+ # Define mapping from original to camelCase field names
151
+ field_mapping = {
152
+ "shrsOrPrnAmt_sshPrnamt": "sshPrnamt",
153
+ "shrsOrPrnAmt_sshPrnamtType": "sshPrnamtType",
154
+ "votingAuthority_Sole": "votingAuthoritySole",
155
+ "votingAuthority_Shared": "votingAuthorityShared",
156
+ "votingAuthority_None": "votingAuthorityNone"
157
+ }
158
+
159
+ # Create the new expected columns list with mapped field names
160
+ expected_columns = []
161
+ for column in original_columns:
162
+ if column in field_mapping:
163
+ expected_columns.append(field_mapping[column])
164
+ else:
165
+ expected_columns.append(column)
166
+
167
+ # Process each item in the flattened data
168
+ for item in flattened:
169
+ # Remove newlines from items
170
+ for key in item:
171
+ if isinstance(item[key], str):
172
+ item[key] = re.sub(r'\s+', ' ', item[key])
173
+
174
+ new_item = {}
175
+ for key, value in item.items():
176
+ # Apply the mapping if the key is in our mapping dictionary
177
+ if key in field_mapping:
178
+ new_item[field_mapping[key]] = value
179
+ else:
180
+ new_item[key] = value
181
+
182
+ # Update the original item with the new keys
183
+ item.clear()
184
+ item.update(new_item)
185
+
186
+ # Ensure all expected columns exist
187
+ for column in expected_columns:
188
+ if column not in item:
189
+ item[column] = None
190
+
191
+ item['accession'] = accession_number
138
192
 
139
- if has_document and 'document' in self.data:
140
- writer = csv.DictWriter(csvfile, ['section', 'text'], quoting=csv.QUOTE_ALL)
141
- writer.writeheader()
142
- flattened = self._flatten_dict(self.data['document'])
143
- for section, text in flattened.items():
144
- writer.writerow({'section': section, 'text': text})
145
- else:
146
- fieldnames = list(self.data[0].keys())
147
- if accession_number:
148
- fieldnames.append('Accession Number')
149
- writer = csv.DictWriter(csvfile, fieldnames, quoting=csv.QUOTE_ALL)
150
- writer.writeheader()
151
- for row in self.data:
152
- if accession_number:
153
- row['Accession Number'] = accession_number
154
- writer.writerow(row)
193
+ # Add this block to reorder the items to match the expected order
194
+ ordered_columns = ["nameOfIssuer", "titleOfClass", "cusip", "value", "sshPrnamt", "sshPrnamtType",
195
+ "investmentDiscretion", "votingAuthoritySole", "votingAuthorityShared", "votingAuthorityNone",
196
+ "reportingOwnerCIK", "putCall", "otherManager", "figi"]
197
+ if accession_number is not None:
198
+ ordered_columns.append("accession")
199
+
200
+ ordered_data = []
201
+ for item in flattened:
202
+ ordered_item = {column: item.get(column, None) for column in ordered_columns}
203
+ ordered_data.append(ordered_item)
204
+
205
+ return ordered_data
206
+
207
+ elif self.type in ["3", "4", "5"]:
208
+ # Master mapping dictionary - includes all possible fields
209
+ # The order of this dictionary will determine the output column order
210
+ master_mapping_dict = {
211
+ # Flag fields (will be set programmatically)
212
+ "isDerivative": "isDerivative",
213
+ "isNonDerivative": "isNonDerivative",
214
+
215
+ # Common fields across all types
216
+ "securityTitle_value": "securityTitle",
217
+ "transactionDate_value": "transactionDate",
218
+ "documentType": "documentType",
219
+ "transactionCoding_transactionFormType": "documentType",
220
+ "transactionCoding_transactionCode": "transactionCode",
221
+ "transactionAmounts_transactionAcquiredDisposedCode_value": "transactionCode",
222
+ "transactionCoding_equitySwapInvolved": "equitySwapInvolved",
223
+ "transactionTimeliness_value": "transactionTimeliness",
224
+ "transactionAmounts_transactionShares_value": "transactionShares",
225
+ "transactionAmounts_transactionPricePerShare_value": "transactionPricePerShare",
226
+ "postTransactionAmounts_sharesOwnedFollowingTransaction_value": "sharesOwnedFollowingTransaction",
227
+ "heldFollowingReport": "sharesOwnedFollowingTransaction", # Form 3
228
+ "ownershipNature_directOrIndirectOwnership_value": "ownershipType",
229
+ "ownershipNature_natureOfOwnership_value": "ownershipType",
230
+ "deemedExecutionDate": "deemedExecutionDate",
231
+ "deemedExecutionDate_value": "deemedExecutionDate",
232
+
233
+ # Derivative-specific fields
234
+ "conversionOrExercisePrice_value": "conversionOrExercisePrice",
235
+ "exerciseDate_value": "exerciseDate",
236
+ "expirationDate_value": "expirationDate",
237
+ "underlyingSecurity_underlyingSecurityTitle_value": "underlyingSecurityTitle",
238
+ "underlyingSecurity_underlyingSecurityShares_value": "underlyingSecurityShares",
239
+ "underlyingSecurity_underlyingSecurityValue_value": "underlyingSecurityValue",
240
+
241
+ # Footnote fields
242
+ "transactionPricePerShareFootnote": "transactionPricePerShareFootnote",
243
+ "transactionAmounts_transactionPricePerShare_footnote": "transactionPricePerShareFootnote",
244
+ "transactionCodeFootnote": "transactionCodeFootnote",
245
+ "transactionAmounts_transactionAcquiredDisposedCode_footnote": "transactionCodeFootnote",
246
+ "transactionCoding_footnote": "transactionCodeFootnote",
247
+ "natureOfOwnershipFootnote": "natureOfOwnershipFootnote",
248
+ "ownershipNature_natureOfOwnership_footnote": "natureOfOwnershipFootnote",
249
+ "sharesOwnedFollowingTransactionFootnote": "sharesOwnedFollowingTransactionFootnote",
250
+ "postTransactionAmounts_sharesOwnedFollowingTransaction_footnote": "sharesOwnedFollowingTransactionFootnote",
251
+ "ownershipTypeFootnote": "ownershipTypeFootnote",
252
+ "ownershipNature_directOrIndirectOwnership_footnote": "ownershipTypeFootnote",
253
+ "securityTitleFootnote": "securityTitleFootnote",
254
+ "securityTitle_footnote": "securityTitleFootnote",
255
+ "transactionSharesFootnote": "transactionSharesFootnote",
256
+ "transactionAmounts_transactionShares_footnote": "transactionSharesFootnote",
257
+ "transactionDateFootnote": "transactionDateFootnote",
258
+ "transactionDate_footnote": "transactionDateFootnote",
259
+ "conversionOrExercisePriceFootnote": "conversionOrExercisePriceFootnote",
260
+ "conversionOrExercisePrice_footnote": "conversionOrExercisePriceFootnote",
261
+ "exerciseDateFootnote": "exerciseDateFootnote",
262
+ "exerciseDate_footnote": "exerciseDateFootnote",
263
+ "expirationDateFootnote": "expirationDateFootnote",
264
+ "expirationDate_footnote": "expirationDateFootnote",
265
+ "underlyingSecurityTitleFootnote": "underlyingSecurityTitleFootnote",
266
+ "underlyingSecurity_underlyingSecurityTitle_footnote": "underlyingSecurityTitleFootnote",
267
+ "underlyingSecuritySharesFootnote": "underlyingSecuritySharesFootnote",
268
+ "underlyingSecurity_underlyingSecurityShares_footnote": "underlyingSecuritySharesFootnote",
269
+ "underlyingSecurityValueFootnote": "underlyingSecurityValueFootnote",
270
+ "underlyingSecurity_underlyingSecurityValue_footnote": "underlyingSecurityValueFootnote"
271
+ }
272
+
273
+ # Get the unique target column names in order from the mapping dictionary
274
+ output_columns = []
275
+ for _, target_key in master_mapping_dict.items():
276
+ if target_key not in output_columns:
277
+ output_columns.append(target_key)
278
+
279
+ # Process function that handles any table type
280
+ def process_table(table_data, is_derivative):
281
+ if isinstance(table_data, dict):
282
+ table_data = [table_data]
283
+
284
+ flattened = self._flatten_dict(table_data)
285
+
286
+ # Apply mapping to the flattened data and ensure all expected columns are present
287
+ mapped_data = []
288
+ for item in flattened:
289
+ mapped_item = {}
290
+ # First, apply the mapping
291
+ for old_key, value in item.items():
292
+ target_key = master_mapping_dict.get(old_key, old_key)
293
+ mapped_item[target_key] = value
294
+
295
+ # Set the derivative/non-derivative flags
296
+ mapped_item["isDerivative"] = 1 if is_derivative else 0
297
+ mapped_item["isNonDerivative"] = 0 if is_derivative else 1
298
+
299
+ # Create a new ordered dictionary with all columns
300
+ ordered_item = {}
301
+ for column in output_columns:
302
+ ordered_item[column] = mapped_item.get(column, None)
303
+
304
+ # Add accession_number if available
305
+ if accession_number is not None:
306
+ ordered_item['accession_number'] = accession_number
307
+
308
+ mapped_data.append(ordered_item)
309
+
310
+ return mapped_data
311
+
312
+ # Results container
313
+ all_results = []
314
+
315
+ # Process non-derivative transactions if they exist
316
+ if 'nonDerivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['nonDerivativeTable'] is not None:
317
+ if 'nonDerivativeTransaction' in self.data['ownershipDocument']['nonDerivativeTable']:
318
+ non_deriv_trans = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeTransaction']
319
+ non_deriv_results = process_table(non_deriv_trans, is_derivative=False)
320
+ all_results.extend(non_deriv_results)
321
+
322
+ # Process non-derivative holdings (for Form 3)
323
+ if 'nonDerivativeHolding' in self.data['ownershipDocument']['nonDerivativeTable']:
324
+ non_deriv_hold = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeHolding']
325
+ non_deriv_hold_results = process_table(non_deriv_hold, is_derivative=False)
326
+ all_results.extend(non_deriv_hold_results)
327
+
328
+ # Process derivative transactions if they exist
329
+ if 'derivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['derivativeTable'] is not None:
330
+ if 'derivativeTransaction' in self.data['ownershipDocument']['derivativeTable']:
331
+ deriv_trans = self.data['ownershipDocument']['derivativeTable']['derivativeTransaction']
332
+ deriv_results = process_table(deriv_trans, is_derivative=True)
333
+ all_results.extend(deriv_results)
334
+
335
+ # Process derivative holdings (for Form 3)
336
+ if 'derivativeHolding' in self.data['ownershipDocument']['derivativeTable']:
337
+ deriv_hold = self.data['ownershipDocument']['derivativeTable']['derivativeHolding']
338
+ deriv_hold_results = process_table(deriv_hold, is_derivative=True)
339
+ all_results.extend(deriv_hold_results)
155
340
 
156
- return output_filename
157
-
341
+ # check if any rows not in the mapping dict, raise error if so
342
+ for item in all_results:
343
+ for key in item.keys():
344
+ if key not in master_mapping_dict.values() and key != 'accession_number':
345
+ raise ValueError(f"Key '{key}' not found in mapping dictionary")
346
+
347
+
348
+ return all_results
349
+ else:
350
+ raise ValueError("sorry, rejigging conversion to tabular format")
351
+
352
+ def write_csv(self, output_filename, accession_number=None):
353
+
354
+ data = self.to_tabular(accession_number)
355
+
356
+ if not data:
357
+
358
+ return
359
+
360
+ fieldnames = data[0].keys()
361
+
362
+ with open(output_filename, 'w', newline='') as csvfile:
363
+ writer = csv.DictWriter(csvfile,fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
364
+ writer.writeheader()
365
+ writer.writerows(data)
366
+
367
+
158
368
  def _document_to_section_text(self, document_data, parent_key=''):
159
369
  items = []
160
370
 
@@ -188,7 +398,7 @@ class Document:
188
398
  # we'll modify this for every dict
189
399
  def _flatten_dict(self, d, parent_key=''):
190
400
  items = {}
191
-
401
+
192
402
  if isinstance(d, list):
193
403
  return [self._flatten_dict(item) for item in d]
194
404
 
@@ -204,8 +414,7 @@ class Document:
204
414
 
205
415
  # this will all have to be changed. default will be to flatten everything
206
416
  def __iter__(self):
207
- if not self.data:
208
- self.parse()
417
+ self.parse()
209
418
 
210
419
  # Let's remove XML iterable for now
211
420
 
datamule/portfolio.py CHANGED
@@ -119,7 +119,7 @@ class Portfolio:
119
119
  # First query, just set the accession numbers
120
120
  self.accession_numbers = new_accession_numbers
121
121
 
122
- def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None, **kwargs):
122
+ def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,requests_per_second=5, **kwargs):
123
123
  if provider is None:
124
124
  config = Config()
125
125
  provider = config.get_default_source()
@@ -142,7 +142,7 @@ class Portfolio:
142
142
  cik=cik,
143
143
  submission_type=submission_type,
144
144
  filing_date=filing_date,
145
- requests_per_second=5,
145
+ requests_per_second=requests_per_second,
146
146
  accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None
147
147
  )
148
148
 
@@ -164,8 +164,6 @@ class Portfolio:
164
164
  )
165
165
 
166
166
 
167
-
168
-
169
167
  def __iter__(self):
170
168
  if not self.submissions_loaded:
171
169
  self._load_submissions()
@@ -20,12 +20,16 @@ async def _process_efts_hits(hits, collected_accession_numbers, data_callback=No
20
20
  submission_type = source.get('form')
21
21
  ciks = source.get('ciks', [])
22
22
  ciks = [str(int(cik)) for cik in ciks]
23
+
24
+ filing_date = source.get('file_date')
23
25
 
24
26
  # Create standardized filing record
25
27
  filing = {
26
28
  'accession_number': accession_number,
27
29
  'submission_type': submission_type,
28
- 'ciks': ciks
30
+ 'ciks': ciks,
31
+ 'filing_date': filing_date,
32
+
29
33
  }
30
34
 
31
35
  processed_hits.append(filing)
@@ -0,0 +1,191 @@
1
+ import os
2
+ import requests
3
+ import json
4
+
5
+ def get_information_table(
6
+ # Required parameters
7
+ table_type="INFORMATION_TABLE",
8
+
9
+ # Optional filtering parameters
10
+ columns=None,
11
+ name_of_issuer=None,
12
+ title_of_class=None,
13
+ cusip=None,
14
+ value=None,
15
+ ssh_prnamt=None,
16
+ ssh_prnamt_type=None,
17
+ investment_discretion=None,
18
+ voting_authority_sole=None,
19
+ voting_authority_shared=None,
20
+ voting_authority_none=None,
21
+ reporting_owner_cik=None,
22
+ put_call=None,
23
+ other_manager=None,
24
+ figi=None,
25
+ accession=None,
26
+ filing_date=None,
27
+
28
+ # API key handling
29
+ api_key=None,
30
+
31
+ # Additional options
32
+ print_cost=True,
33
+ verbose=False
34
+ ):
35
+ """
36
+ Query the SEC BigQuery API for 13F-HR information table data.
37
+
38
+ Parameters:
39
+ -----------
40
+ table_type : str
41
+ The table to query (default is "INFORMATION_TABLE")
42
+ columns : List[str], optional
43
+ Specific columns to return. If None, all columns are returned.
44
+
45
+ # Filter parameters
46
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
47
+ - str: Exact match
48
+ - List[str]: Match any in list
49
+ - tuple: (min, max) range for numeric/date fields
50
+
51
+ api_key : str, optional
52
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
53
+ print_cost : bool
54
+ Whether to print the query cost information
55
+ verbose : bool
56
+ Whether to print additional information about the query
57
+
58
+ Returns:
59
+ --------
60
+ List[Dict]
61
+ A list of dictionaries containing the query results
62
+
63
+ Raises:
64
+ -------
65
+ ValueError
66
+ If API key is missing or invalid
67
+ Exception
68
+ For API errors or other issues
69
+ """
70
+
71
+ # 1. Handle API key
72
+ if api_key is None:
73
+ api_key = os.getenv('DATAMULE_API_KEY')
74
+
75
+ if not api_key:
76
+ raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key parameter")
77
+
78
+ # 2. Build query parameters
79
+ params = {'table_type': table_type}
80
+
81
+ # Add columns parameter if provided
82
+ if columns:
83
+ if isinstance(columns, list):
84
+ params['columns'] = ','.join(columns)
85
+ else:
86
+ params['columns'] = columns
87
+
88
+ # Map Python parameter names to API parameter names
89
+ param_mapping = {
90
+ 'name_of_issuer': 'nameOfIssuer',
91
+ 'title_of_class': 'titleOfClass',
92
+ 'cusip': 'cusip',
93
+ 'value': 'value',
94
+ 'ssh_prnamt': 'sshPrnamt',
95
+ 'ssh_prnamt_type': 'sshPrnamtType',
96
+ 'investment_discretion': 'investmentDiscretion',
97
+ 'voting_authority_sole': 'votingAuthoritySole',
98
+ 'voting_authority_shared': 'votingAuthorityShared',
99
+ 'voting_authority_none': 'votingAuthorityNone',
100
+ 'reporting_owner_cik': 'reportingOwnerCIK',
101
+ 'put_call': 'putCall',
102
+ 'other_manager': 'otherManager',
103
+ 'figi': 'figi',
104
+ 'accession': 'accession',
105
+ 'filing_date': 'filingDate'
106
+ }
107
+
108
+ # Process all possible filter parameters
109
+ for param_name, api_param_name in param_mapping.items():
110
+ value = locals()[param_name]
111
+ if value is not None:
112
+ # Handle different filter types
113
+ if isinstance(value, list):
114
+ # List filter
115
+ params[api_param_name] = f"[{','.join(str(v) for v in value)}]"
116
+ elif isinstance(value, tuple):
117
+ # Range filter
118
+ if len(value) == 2:
119
+ min_val, max_val = value
120
+ # Handle date range specially
121
+ if param_name == 'filing_date':
122
+ # Dates need to be in quotes within the parentheses
123
+ if min_val is None:
124
+ min_val = ''
125
+ else:
126
+ min_val = f"'{min_val}'"
127
+
128
+ if max_val is None:
129
+ max_val = ''
130
+ else:
131
+ max_val = f"'{max_val}'"
132
+
133
+ range_str = f"({min_val},{max_val})"
134
+ params[api_param_name] = range_str
135
+ else:
136
+ raise ValueError(f"Range filter for {param_name} must be a tuple of (min, max)")
137
+ else:
138
+ # Exact match
139
+ params[api_param_name] = value
140
+
141
+ # 3. Make the API request
142
+ BASE_URL = "https://sec-bq.jgfriedman99.workers.dev/"
143
+
144
+ headers = {
145
+ 'Authorization': f'Bearer {api_key}',
146
+ 'Accept': 'application/json'
147
+ }
148
+
149
+ if verbose:
150
+ print(f"Making request to {BASE_URL} with params: {params}")
151
+
152
+ try:
153
+ response = requests.get(BASE_URL, params=params, headers=headers)
154
+
155
+ # Check for HTTP errors
156
+ response.raise_for_status()
157
+
158
+ # Parse response
159
+ result = response.json()
160
+
161
+ # Check for API-level errors
162
+ if not result.get('success', False):
163
+ error_msg = result.get('error', 'Unknown API error')
164
+ raise Exception(f"API Error: {error_msg}")
165
+
166
+ # Extract metadata for cost reporting
167
+ metadata = result.get('metadata', {})
168
+
169
+ # 5. Print cost information if requested
170
+ if print_cost and 'billing' in metadata:
171
+ billing = metadata['billing']
172
+ query_info = metadata.get('query_info', {})
173
+
174
+ print("\n=== Query Cost Information ===")
175
+ print(f"Bytes Processed: {query_info.get('bytes_processed', 0):,} bytes")
176
+ print(f"Data Processed: {billing.get('tb_processed', 0):.10f} TB")
177
+ print(f"Cost Rate: ${billing.get('cost_per_tb', 0):.2f}/TB")
178
+ print(f"Query Cost: ${billing.get('total_charge', 0):.6f}")
179
+ print(f"Remaining Balance: ${billing.get('remaining_balance', 0):.2f}")
180
+ print(f"Execution Time: {query_info.get('execution_time_ms', 0)} ms")
181
+ print(f"Cache Hit: {query_info.get('cache_hit', False)}")
182
+ print("==============================\n")
183
+
184
+ # 6. Return data
185
+ return result.get('data', [])
186
+
187
+ except requests.exceptions.RequestException as e:
188
+ if response.status_code == 401:
189
+ raise ValueError("Authentication failed: Invalid API key")
190
+ else:
191
+ raise Exception(f"Request failed: {str(e)}")
datamule/sheet.py CHANGED
@@ -1,6 +1,9 @@
1
1
  from pathlib import Path
2
+ import csv
3
+ import os
2
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
3
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
+ from .seclibrary.bq import get_information_table
4
7
 
5
8
  class Sheet:
6
9
  def __init__(self, path):
@@ -26,16 +29,220 @@ class Sheet:
26
29
  # Download facts for all CIKs in parallel
27
30
  download_company_facts(cik=cik_list, output_dir=self.path)
28
31
 
29
- def query_345():
30
- pass
31
- def query_xbrl():
32
- pass
32
+ def get_information_table(
33
+ self,
34
+ # Required parameters
35
+ table_type="INFORMATION_TABLE",
36
+
37
+ # Optional filtering parameters
38
+ columns=None,
39
+ name_of_issuer=None,
40
+ title_of_class=None,
41
+ cusip=None,
42
+ value=None,
43
+ ssh_prnamt=None,
44
+ ssh_prnamt_type=None,
45
+ investment_discretion=None,
46
+ voting_authority_sole=None,
47
+ voting_authority_shared=None,
48
+ voting_authority_none=None,
49
+ reporting_owner_cik=None,
50
+ put_call=None,
51
+ other_manager=None,
52
+ figi=None,
53
+ accession=None,
54
+ filing_date=None,
55
+
56
+ # API key handling
57
+ api_key=None,
58
+
59
+ # Additional options
60
+ print_cost=True,
61
+ verbose=False
62
+ ):
63
+ """
64
+ Query the SEC BigQuery API for 13F-HR information table data.
65
+
66
+ Parameters:
67
+ -----------
68
+ table_type : str
69
+ The table to query (default is "INFORMATION_TABLE")
70
+ columns : List[str], optional
71
+ Specific columns to return. If None, all columns are returned.
72
+
73
+ # Filter parameters
74
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
75
+ - str: Exact match
76
+ - List[str]: Match any in list
77
+ - tuple: (min, max) range for numeric/date fields
78
+
79
+ api_key : str, optional
80
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
81
+ print_cost : bool
82
+ Whether to print the query cost information
83
+ verbose : bool
84
+ Whether to print additional information about the query
85
+
86
+ Returns:
87
+ --------
88
+ List[Dict]
89
+ A list of dictionaries containing the query results
90
+
91
+ Raises:
92
+ -------
93
+ ValueError
94
+ If API key is missing or invalid
95
+ Exception
96
+ For API errors or other issues
97
+ """
98
+
99
+ return get_information_table(
100
+ table_type=table_type,
101
+ columns=columns,
102
+ name_of_issuer=name_of_issuer,
103
+ title_of_class=title_of_class,
104
+ cusip=cusip,
105
+ value=value,
106
+ ssh_prnamt=ssh_prnamt,
107
+ ssh_prnamt_type=ssh_prnamt_type,
108
+ investment_discretion=investment_discretion,
109
+ voting_authority_sole=voting_authority_sole,
110
+ voting_authority_shared=voting_authority_shared,
111
+ voting_authority_none=voting_authority_none,
112
+ reporting_owner_cik=reporting_owner_cik,
113
+ put_call=put_call,
114
+ other_manager=other_manager,
115
+ figi=figi,
116
+ accession=accession,
117
+ filing_date=filing_date,
118
+
119
+ # API key handling
120
+ api_key=api_key,
121
+
122
+ # Additional options
123
+ print_cost=print_cost,
124
+ verbose=verbose
125
+ )
33
126
 
34
- # LIST TUPLE SYNTAX, so e.g. value (0,100) is 0-100, while [0,100] is 0 and 100
35
- def get_13fhr(reportingOwnerCIK,nameOfIssuer,titleOfClass,cusip,value,
36
- shrsOrPrnAmt_sshPrnamt,shrsOrPrnAmt_sshPrnamtType,investmentDiscretion,otherManager,
37
- votingAuthority_Sole,
38
- votingAuthority_Shared,
39
- votingAuthority_None,
40
- filing_date):
41
- pass
127
+ def download_information_table(
128
+ self,
129
+ filepath,
130
+ # Required parameters
131
+ table_type="INFORMATION_TABLE",
132
+
133
+ # Optional filtering parameters
134
+ columns=None,
135
+ name_of_issuer=None,
136
+ title_of_class=None,
137
+ cusip=None,
138
+ value=None,
139
+ ssh_prnamt=None,
140
+ ssh_prnamt_type=None,
141
+ investment_discretion=None,
142
+ voting_authority_sole=None,
143
+ voting_authority_shared=None,
144
+ voting_authority_none=None,
145
+ reporting_owner_cik=None,
146
+ put_call=None,
147
+ other_manager=None,
148
+ figi=None,
149
+ accession=None,
150
+ filing_date=None,
151
+
152
+ # API key handling
153
+ api_key=None,
154
+
155
+ # Additional options
156
+ print_cost=True,
157
+ verbose=False
158
+ ):
159
+ """
160
+ Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
161
+
162
+ Parameters:
163
+ -----------
164
+ filepath : str
165
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
166
+
167
+ table_type : str
168
+ The table to query (default is "INFORMATION_TABLE")
169
+ columns : List[str], optional
170
+ Specific columns to return. If None, all columns are returned.
171
+
172
+ # Filter parameters
173
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
174
+ - str: Exact match
175
+ - List[str]: Match any in list
176
+ - tuple: (min, max) range for numeric/date fields
177
+
178
+ api_key : str, optional
179
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
180
+ print_cost : bool
181
+ Whether to print the query cost information
182
+ verbose : bool
183
+ Whether to print additional information about the query
184
+
185
+ Returns:
186
+ --------
187
+ List[Dict]
188
+ A list of dictionaries containing the query results
189
+
190
+ Raises:
191
+ -------
192
+ ValueError
193
+ If API key is missing or invalid
194
+ Exception
195
+ For API errors or other issues
196
+ """
197
+ # Get the data from the API
198
+ data = self.get_information_table(
199
+ table_type=table_type,
200
+ columns=columns,
201
+ name_of_issuer=name_of_issuer,
202
+ title_of_class=title_of_class,
203
+ cusip=cusip,
204
+ value=value,
205
+ ssh_prnamt=ssh_prnamt,
206
+ ssh_prnamt_type=ssh_prnamt_type,
207
+ investment_discretion=investment_discretion,
208
+ voting_authority_sole=voting_authority_sole,
209
+ voting_authority_shared=voting_authority_shared,
210
+ voting_authority_none=voting_authority_none,
211
+ reporting_owner_cik=reporting_owner_cik,
212
+ put_call=put_call,
213
+ other_manager=other_manager,
214
+ figi=figi,
215
+ accession=accession,
216
+ filing_date=filing_date,
217
+ api_key=api_key,
218
+ print_cost=print_cost,
219
+ verbose=verbose
220
+ )
221
+
222
+ # If no data returned, nothing to save
223
+ if not data:
224
+ if verbose:
225
+ print("No data returned from API. No file was created.")
226
+ return data
227
+
228
+ # Resolve filepath - if it's not absolute, make it relative to self.path
229
+ filepath_obj = Path(filepath)
230
+ if not filepath_obj.is_absolute():
231
+ filepath_obj = self.path / filepath_obj
232
+
233
+ # Create directory if it doesn't exist
234
+ os.makedirs(filepath_obj.parent, exist_ok=True)
235
+
236
+ # Get fieldnames from the first record
237
+ fieldnames = data[0].keys()
238
+
239
+ # Write to CSV
240
+ with open(filepath_obj, 'w', newline='') as csvfile:
241
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
242
+ writer.writeheader()
243
+ writer.writerows(data)
244
+
245
+ if verbose:
246
+ print(f"Saved {len(data)} records to {filepath_obj}")
247
+
248
+ return data
datamule/submission.py CHANGED
@@ -14,6 +14,7 @@ class Submission:
14
14
  if sgml_content is not None:
15
15
  self.path = None
16
16
  self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
17
+ self.documents = []
17
18
 
18
19
  for idx,doc in enumerate(self.metadata['documents']):
19
20
  type = doc.get('type')
@@ -23,7 +24,7 @@ class Submission:
23
24
  continue
24
25
  filename = doc.get('filename')
25
26
  extension = Path(filename).suffix
26
- self.documents = [Document(type=type, content=raw_documents[idx], extension=extension)]
27
+ self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
27
28
 
28
29
 
29
30
  if path is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.7
3
+ Version: 1.1.8
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -1,11 +1,11 @@
1
1
  datamule/__init__.py,sha256=l6YlwT5EeRxPlCtO5Jd4I8l266rSRUJyfFe97cRtSCM,991
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
- datamule/document.py,sha256=7FBmjWJJfdKrbQ4UH4J8It7W5GEWTFFEUfQdODUrYlQ,10160
3
+ datamule/document.py,sha256=qShyVKHQ1nSCNvSfrhAOMVXprOd1br1rFKLy52S9WnE,22007
4
4
  datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
5
5
  datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
6
- datamule/portfolio.py,sha256=ECevaiF8P6v4mJ7W9IM4hRKNF0GGdQzc1SzBWLnG2qQ,7082
7
- datamule/sheet.py,sha256=FF0JL8BuAZ7Sd_LY_-sCGJuYlhm3sKgj2jlHUGMjeUQ,1406
8
- datamule/submission.py,sha256=zWCnucjmfTYcr1Hm9Us-TjGLjWAHuRPtIyaVpLNvs4c,4427
6
+ datamule/portfolio.py,sha256=yWt5gYTjV7rJsLiPUmhc6Vmr3lfvfCR5MSpLQ_6Gdp4,7104
7
+ datamule/sheet.py,sha256=QaArtx7LpT7bwyteelJV67C-lK0RjQbGS3ka7ftdi8w,7978
8
+ datamule/submission.py,sha256=LI7Zr60YbE_tU-v2N09k2dGjfztSgplKZACT3eRUkFE,4463
9
9
  datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
11
11
  datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
@@ -18,7 +18,7 @@ datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,1
18
18
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  datamule/sec/submissions/downloader.py,sha256=IB08W8-lQD5Bb0LgzrTN4Xi4HsCw24DybRLHqE1AUrU,3290
20
20
  datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
21
- datamule/sec/submissions/monitor.py,sha256=F24I9yn1k8ggbCJQ-Vk7go_qJHlpkBzVKFYKDs_CWLs,5287
21
+ datamule/sec/submissions/monitor.py,sha256=Im2kgnUehhTgyY2Vq3uk07n4Vkj4PjII_SsRDi8ehAE,5384
22
22
  datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
23
23
  datamule/sec/submissions/textsearch.py,sha256=-a5yIrrxxtaK10IJeywFmXuJmSndYL9VKm4SC4I9JAs,5808
24
24
  datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,9 +27,10 @@ datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr
27
27
  datamule/sec/xbrl/streamcompanyfacts.py,sha256=WyJIwuy5mNMXWpx_IkhFzDMe9MOfQ-vNkWl_JzBzFmc,3323
28
28
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
29
29
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ datamule/seclibrary/bq.py,sha256=C6kafFXWtm-MUjf70H1wTtpwv1Rxpcbk-Kfy8fkBPfo,6469
30
31
  datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
31
32
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
32
- datamule-1.1.7.dist-info/METADATA,sha256=gIryya087eiyvgFA5S5vf2s_wKDxaV3ZEAJA7-W4kS8,512
33
- datamule-1.1.7.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
34
- datamule-1.1.7.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
35
- datamule-1.1.7.dist-info/RECORD,,
33
+ datamule-1.1.8.dist-info/METADATA,sha256=8HRRMz6l928E5tuHXkPi1_Kf-8nfPSjWQnnfReSxdPM,512
34
+ datamule-1.1.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
35
+ datamule-1.1.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
36
+ datamule-1.1.8.dist-info/RECORD,,