datamule 1.1.8__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. datamule-1.2.0/PKG-INFO +20 -0
  2. {datamule-1.1.8 → datamule-1.2.0}/datamule/document.py +110 -117
  3. datamule-1.2.0/datamule/seclibrary/bq.py +528 -0
  4. datamule-1.2.0/datamule/sheet.py +672 -0
  5. datamule-1.2.0/datamule.egg-info/PKG-INFO +20 -0
  6. {datamule-1.1.8 → datamule-1.2.0}/setup.py +1 -1
  7. datamule-1.1.8/PKG-INFO +0 -6
  8. datamule-1.1.8/datamule/seclibrary/bq.py +0 -191
  9. datamule-1.1.8/datamule/sheet.py +0 -248
  10. datamule-1.1.8/datamule.egg-info/PKG-INFO +0 -6
  11. {datamule-1.1.8 → datamule-1.2.0}/datamule/__init__.py +0 -0
  12. {datamule-1.1.8 → datamule-1.2.0}/datamule/config.py +0 -0
  13. {datamule-1.1.8 → datamule-1.2.0}/datamule/helper.py +0 -0
  14. {datamule-1.1.8 → datamule-1.2.0}/datamule/index.py +0 -0
  15. {datamule-1.1.8 → datamule-1.2.0}/datamule/mapping_dicts/__init__.py +0 -0
  16. {datamule-1.1.8 → datamule-1.2.0}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  17. {datamule-1.1.8 → datamule-1.2.0}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  18. {datamule-1.1.8 → datamule-1.2.0}/datamule/portfolio.py +0 -0
  19. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/__init__.py +0 -0
  20. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/infrastructure/__init__.py +0 -0
  21. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  22. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/rss/__init__.py +0 -0
  23. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/rss/monitor.py +0 -0
  24. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/__init__.py +0 -0
  25. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/downloader.py +0 -0
  26. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/eftsquery.py +0 -0
  27. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/monitor.py +0 -0
  28. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/streamer.py +0 -0
  29. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/submissions/textsearch.py +0 -0
  30. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/utils.py +0 -0
  31. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/xbrl/__init__.py +0 -0
  32. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  33. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  34. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  35. {datamule-1.1.8 → datamule-1.2.0}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  36. {datamule-1.1.8 → datamule-1.2.0}/datamule/seclibrary/__init__.py +0 -0
  37. {datamule-1.1.8 → datamule-1.2.0}/datamule/seclibrary/downloader.py +0 -0
  38. {datamule-1.1.8 → datamule-1.2.0}/datamule/seclibrary/query.py +0 -0
  39. {datamule-1.1.8 → datamule-1.2.0}/datamule/submission.py +0 -0
  40. {datamule-1.1.8 → datamule-1.2.0}/datamule.egg-info/SOURCES.txt +0 -0
  41. {datamule-1.1.8 → datamule-1.2.0}/datamule.egg-info/dependency_links.txt +0 -0
  42. {datamule-1.1.8 → datamule-1.2.0}/datamule.egg-info/requires.txt +0 -0
  43. {datamule-1.1.8 → datamule-1.2.0}/datamule.egg-info/top_level.txt +0 -0
  44. {datamule-1.1.8 → datamule-1.2.0}/setup.cfg +0 -0
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.1
2
+ Name: datamule
3
+ Version: 1.2.0
4
+ Summary: Making it easier to use SEC filings.
5
+ Home-page: https://github.com/john-friedman/datamule-python
6
+ Author: John Friedman
7
+ Requires-Dist: aiohttp
8
+ Requires-Dist: aiolimiter
9
+ Requires-Dist: tqdm
10
+ Requires-Dist: requests
11
+ Requires-Dist: nest_asyncio
12
+ Requires-Dist: aiofiles
13
+ Requires-Dist: polars
14
+ Requires-Dist: setuptools
15
+ Requires-Dist: selectolax
16
+ Requires-Dist: pytz
17
+ Requires-Dist: zstandard
18
+ Requires-Dist: doc2dict
19
+ Requires-Dist: secsgml
20
+ Requires-Dist: lxml
@@ -129,85 +129,123 @@ class Document:
129
129
  json.dump(self.data, f, indent=2)
130
130
 
131
131
  def to_tabular(self, accession_number=None):
132
+ """
133
+ Convert the document to a tabular format suitable for CSV output.
134
+
135
+ Args:
136
+ accession_number: Optional accession number to include in the output
137
+
138
+ Returns:
139
+ list: List of dictionaries, each representing a row in the tabular output
140
+ """
132
141
  self.parse()
133
-
134
- if self.type == "INFORMATION TABLE":
135
- info_table = self.data['informationTable']['infoTable']
136
- if isinstance(info_table, dict):
137
- info_table = [info_table]
138
-
139
- flattened = self._flatten_dict(info_table)
140
-
141
- # Original field names
142
- original_columns = [
143
- "nameOfIssuer", "titleOfClass", "cusip", "value",
144
- "shrsOrPrnAmt_sshPrnamt", "shrsOrPrnAmt_sshPrnamtType",
145
- "investmentDiscretion", "votingAuthority_Sole",
146
- "votingAuthority_Shared", "votingAuthority_None",
147
- "reportingOwnerCIK", "putCall", "otherManager", 'figi'
148
- ]
149
-
150
- # Define mapping from original to camelCase field names
151
- field_mapping = {
152
- "shrsOrPrnAmt_sshPrnamt": "sshPrnamt",
153
- "shrsOrPrnAmt_sshPrnamtType": "sshPrnamtType",
154
- "votingAuthority_Sole": "votingAuthoritySole",
155
- "votingAuthority_Shared": "votingAuthorityShared",
156
- "votingAuthority_None": "votingAuthorityNone"
157
- }
142
+
143
+ # Common function to normalize and process dictionaries
144
+ def process_records(records, mapping_dict, is_derivative=None):
145
+ """
146
+ Process records into a standardized tabular format
147
+
148
+ Args:
149
+ records: List or single dictionary of records to process
150
+ mapping_dict: Dictionary mapping source keys to target keys
151
+ is_derivative: Boolean flag for derivative securities (or None if not applicable)
158
152
 
159
- # Create the new expected columns list with mapped field names
160
- expected_columns = []
161
- for column in original_columns:
162
- if column in field_mapping:
163
- expected_columns.append(field_mapping[column])
164
- else:
165
- expected_columns.append(column)
153
+ Returns:
154
+ list: Processed records in tabular format
155
+ """
156
+ # Convert single dict to list for uniform processing
157
+ if isinstance(records, dict):
158
+ records = [records]
166
159
 
167
- # Process each item in the flattened data
160
+ # Flatten nested dictionaries
161
+ flattened = self._flatten_dict(records)
162
+
163
+ # Process each record
164
+ result = []
168
165
  for item in flattened:
169
- # Remove newlines from items
166
+ # Normalize whitespace in all string values
170
167
  for key in item:
171
168
  if isinstance(item[key], str):
172
169
  item[key] = re.sub(r'\s+', ' ', item[key])
170
+
171
+ # Map keys according to the mapping dictionary
172
+ mapped_item = {}
173
+ for old_key, value in item.items():
174
+ target_key = mapping_dict.get(old_key, old_key)
175
+ mapped_item[target_key] = value
173
176
 
174
- new_item = {}
175
- for key, value in item.items():
176
- # Apply the mapping if the key is in our mapping dictionary
177
- if key in field_mapping:
178
- new_item[field_mapping[key]] = value
179
- else:
180
- new_item[key] = value
181
-
182
- # Update the original item with the new keys
183
- item.clear()
184
- item.update(new_item)
177
+ # Set derivative flags if applicable
178
+ if is_derivative is not None:
179
+ mapped_item["isDerivative"] = 1 if is_derivative else 0
180
+ mapped_item["isNonDerivative"] = 0 if is_derivative else 1
185
181
 
186
182
  # Ensure all expected columns exist
187
- for column in expected_columns:
188
- if column not in item:
189
- item[column] = None
190
-
191
- item['accession'] = accession_number
192
-
193
- # Add this block to reorder the items to match the expected order
194
- ordered_columns = ["nameOfIssuer", "titleOfClass", "cusip", "value", "sshPrnamt", "sshPrnamtType",
195
- "investmentDiscretion", "votingAuthoritySole", "votingAuthorityShared", "votingAuthorityNone",
196
- "reportingOwnerCIK", "putCall", "otherManager", "figi"]
197
- if accession_number is not None:
198
- ordered_columns.append("accession")
183
+ output_columns = list(dict.fromkeys(mapping_dict.values()))
184
+ ordered_item = {column: mapped_item.get(column, None) for column in output_columns}
199
185
 
200
- ordered_data = []
201
- for item in flattened:
202
- ordered_item = {column: item.get(column, None) for column in ordered_columns}
203
- ordered_data.append(ordered_item)
186
+ # Add accession number if provided
187
+ if accession_number is not None:
188
+ ordered_item['accession'] = accession_number
189
+
190
+ result.append(ordered_item)
204
191
 
205
- return ordered_data
192
+ return result
206
193
 
194
+ # Handle different document types
195
+ if self.type == "INFORMATION TABLE":
196
+ # Information Table mapping dictionary
197
+ info_table_mapping = {
198
+ "nameOfIssuer": "nameOfIssuer",
199
+ "titleOfClass": "titleOfClass",
200
+ "cusip": "cusip",
201
+ "value": "value",
202
+ "shrsOrPrnAmt_sshPrnamt": "sshPrnamt",
203
+ "shrsOrPrnAmt_sshPrnamtType": "sshPrnamtType",
204
+ "investmentDiscretion": "investmentDiscretion",
205
+ "votingAuthority_Sole": "votingAuthoritySole",
206
+ "votingAuthority_Shared": "votingAuthorityShared",
207
+ "votingAuthority_None": "votingAuthorityNone",
208
+ "reportingOwnerCIK": "reportingOwnerCIK",
209
+ "putCall": "putCall",
210
+ "otherManager": "otherManager",
211
+ "figi": "figi"
212
+ }
213
+
214
+ # Process the information table
215
+ info_table = self.data['informationTable']['infoTable']
216
+ return process_records(info_table, info_table_mapping)
217
+
218
+ elif self.type == "PROXY VOTING RECORD":
219
+ # Proxy voting record mapping dictionary
220
+ proxy_mapping = {
221
+ 'meetingDate': 'meetingDate',
222
+ 'isin': 'isin',
223
+ 'cusip': 'cusip',
224
+ 'issuerName': 'issuerName',
225
+ 'voteDescription': 'voteDescription',
226
+ 'sharesOnLoan': 'sharesOnLoan',
227
+ 'vote_voteRecord_sharesVoted': 'sharesVoted',
228
+ 'voteCategories_voteCategory_categoryType': 'voteCategory',
229
+ 'vote_voteRecord': 'voteRecord',
230
+ 'sharesVoted': 'sharesVoted',
231
+ 'voteSource': 'voteSource',
232
+ 'vote_voteRecord_howVoted': 'howVoted',
233
+ 'figi': 'figi',
234
+ 'vote_voteRecord_managementRecommendation': 'managementRecommendation'
235
+ }
236
+
237
+ # Process proxy voting records if they exist
238
+ all_results = []
239
+ if 'proxyVoteTable' in self.data and 'proxyTable' in self.data['proxyVoteTable'] and self.data['proxyVoteTable']['proxyTable'] is not None:
240
+ proxy_records = self.data['proxyVoteTable']['proxyTable']
241
+ proxy_results = process_records(proxy_records, proxy_mapping)
242
+ all_results.extend(proxy_results)
243
+
244
+ return all_results
245
+
207
246
  elif self.type in ["3", "4", "5"]:
208
- # Master mapping dictionary - includes all possible fields
209
- # The order of this dictionary will determine the output column order
210
- master_mapping_dict = {
247
+ # Forms 3, 4, 5 mapping dictionary
248
+ form_345_mapping = {
211
249
  # Flag fields (will be set programmatically)
212
250
  "isDerivative": "isDerivative",
213
251
  "isNonDerivative": "isNonDerivative",
@@ -270,45 +308,6 @@ class Document:
270
308
  "underlyingSecurity_underlyingSecurityValue_footnote": "underlyingSecurityValueFootnote"
271
309
  }
272
310
 
273
- # Get the unique target column names in order from the mapping dictionary
274
- output_columns = []
275
- for _, target_key in master_mapping_dict.items():
276
- if target_key not in output_columns:
277
- output_columns.append(target_key)
278
-
279
- # Process function that handles any table type
280
- def process_table(table_data, is_derivative):
281
- if isinstance(table_data, dict):
282
- table_data = [table_data]
283
-
284
- flattened = self._flatten_dict(table_data)
285
-
286
- # Apply mapping to the flattened data and ensure all expected columns are present
287
- mapped_data = []
288
- for item in flattened:
289
- mapped_item = {}
290
- # First, apply the mapping
291
- for old_key, value in item.items():
292
- target_key = master_mapping_dict.get(old_key, old_key)
293
- mapped_item[target_key] = value
294
-
295
- # Set the derivative/non-derivative flags
296
- mapped_item["isDerivative"] = 1 if is_derivative else 0
297
- mapped_item["isNonDerivative"] = 0 if is_derivative else 1
298
-
299
- # Create a new ordered dictionary with all columns
300
- ordered_item = {}
301
- for column in output_columns:
302
- ordered_item[column] = mapped_item.get(column, None)
303
-
304
- # Add accession_number if available
305
- if accession_number is not None:
306
- ordered_item['accession_number'] = accession_number
307
-
308
- mapped_data.append(ordered_item)
309
-
310
- return mapped_data
311
-
312
311
  # Results container
313
312
  all_results = []
314
313
 
@@ -316,39 +315,33 @@ class Document:
316
315
  if 'nonDerivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['nonDerivativeTable'] is not None:
317
316
  if 'nonDerivativeTransaction' in self.data['ownershipDocument']['nonDerivativeTable']:
318
317
  non_deriv_trans = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeTransaction']
319
- non_deriv_results = process_table(non_deriv_trans, is_derivative=False)
318
+ non_deriv_results = process_records(non_deriv_trans, form_345_mapping, is_derivative=False)
320
319
  all_results.extend(non_deriv_results)
321
320
 
322
321
  # Process non-derivative holdings (for Form 3)
323
322
  if 'nonDerivativeHolding' in self.data['ownershipDocument']['nonDerivativeTable']:
324
323
  non_deriv_hold = self.data['ownershipDocument']['nonDerivativeTable']['nonDerivativeHolding']
325
- non_deriv_hold_results = process_table(non_deriv_hold, is_derivative=False)
324
+ non_deriv_hold_results = process_records(non_deriv_hold, form_345_mapping, is_derivative=False)
326
325
  all_results.extend(non_deriv_hold_results)
327
326
 
328
327
  # Process derivative transactions if they exist
329
328
  if 'derivativeTable' in self.data['ownershipDocument'] and self.data['ownershipDocument']['derivativeTable'] is not None:
330
329
  if 'derivativeTransaction' in self.data['ownershipDocument']['derivativeTable']:
331
330
  deriv_trans = self.data['ownershipDocument']['derivativeTable']['derivativeTransaction']
332
- deriv_results = process_table(deriv_trans, is_derivative=True)
331
+ deriv_results = process_records(deriv_trans, form_345_mapping, is_derivative=True)
333
332
  all_results.extend(deriv_results)
334
333
 
335
334
  # Process derivative holdings (for Form 3)
336
335
  if 'derivativeHolding' in self.data['ownershipDocument']['derivativeTable']:
337
336
  deriv_hold = self.data['ownershipDocument']['derivativeTable']['derivativeHolding']
338
- deriv_hold_results = process_table(deriv_hold, is_derivative=True)
337
+ deriv_hold_results = process_records(deriv_hold, form_345_mapping, is_derivative=True)
339
338
  all_results.extend(deriv_hold_results)
340
-
341
- # check if any rows not in the mapping dict, raise error if so
342
- for item in all_results:
343
- for key in item.keys():
344
- if key not in master_mapping_dict.values() and key != 'accession_number':
345
- raise ValueError(f"Key '{key}' not found in mapping dictionary")
346
-
347
339
 
348
340
  return all_results
349
- else:
350
- raise ValueError("sorry, rejigging conversion to tabular format")
351
341
 
342
+ else:
343
+ raise ValueError(f"Document type '{self.type}' is not supported for tabular conversion")
344
+
352
345
  def write_csv(self, output_filename, accession_number=None):
353
346
 
354
347
  data = self.to_tabular(accession_number)