datamule 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. datamule/document/document.py +33 -18
  2. datamule/document/tables/tables.py +129 -0
  3. datamule/document/{mappings/thirteenfhr.py → tables/tables_13fhr.py} +8 -4
  4. datamule/document/{mappings/twentyfivense.py → tables/tables_25nse.py} +7 -2
  5. datamule/document/{mappings/information_table.py → tables/tables_informationtable.py} +7 -3
  6. datamule/document/{mappings/npx.py → tables/tables_npx.py} +7 -0
  7. datamule/document/{mappings/ownership.py → tables/tables_ownership.py} +37 -9
  8. datamule/document/{mappings/proxy_voting_record.py → tables/tables_proxyvotingrecord.py} +7 -0
  9. datamule/document/{mappings/sbsef.py → tables/tables_sbsef.py} +7 -0
  10. datamule/document/{mappings/sdr.py → tables/tables_sdr.py} +7 -0
  11. datamule/document/tables/utils.py +26 -0
  12. datamule/submission.py +50 -15
  13. {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/METADATA +1 -1
  14. {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/RECORD +17 -32
  15. datamule/document/mappings/atsn.py +0 -208
  16. datamule/document/mappings/cfportal.py +0 -346
  17. datamule/document/mappings/d.py +0 -125
  18. datamule/document/mappings/ex102_abs.py +0 -63
  19. datamule/document/mappings/ex99a_sdr.py +0 -1
  20. datamule/document/mappings/ex99c_sdr.py +0 -0
  21. datamule/document/mappings/ex99g_sdr.py +0 -0
  22. datamule/document/mappings/ex99i_sdr.py +0 -0
  23. datamule/document/mappings/nmfp.py +0 -275
  24. datamule/document/mappings/onefourtyfour.py +0 -68
  25. datamule/document/mappings/sbs.py +0 -0
  26. datamule/document/mappings/schedule13.py +0 -117
  27. datamule/document/mappings/submission_metadata.py +0 -9
  28. datamule/document/mappings/ta.py +0 -0
  29. datamule/document/mappings/twentyfourf2nt.py +0 -100
  30. datamule/document/processing.py +0 -732
  31. datamule/document/table.py +0 -315
  32. /datamule/document/{mappings → tables}/__init__.py +0 -0
  33. {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/WHEEL +0 -0
  34. {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/top_level.txt +0 -0
@@ -1,732 +0,0 @@
1
- from .table import Table
2
- from warnings import warn
3
- def safe_get(d, keys, default=None):
4
- """Safely access nested dictionary keys"""
5
- current = d
6
- for key in keys:
7
- if isinstance(current, dict) and key in current:
8
- current = current[key]
9
- else:
10
- return default
11
- return current
12
-
13
- def process_tabular_data(self):
14
- if self.type in ["3","4","5","3/A","4/A","5/A"]:
15
- tables = process_ownership(self.data, self.accession)
16
- elif self.type in ["13F-HR", "13F-HR/A","13F-NT", "13F-NT/A"]:
17
- tables = process_13fhr(self.data, self.accession)
18
- elif self.type in ["INFORMATION TABLE"]:
19
- tables = process_information_table(self.data, self.accession)
20
- elif self.type in ["25-NSE", "25-NSE/A"]:
21
- tables = process_25nse(self.data, self.accession)
22
- # complete mark:
23
- elif self.type in ["EX-102"]:
24
- tables = process_ex102_abs(self.data, self.accession)
25
- elif self.type in ["D","D/A"]:
26
- tables = process_d(self.data, self.accession)
27
- elif self.type in ["N-PX","N-PX/A"]:
28
- tables = process_npx(self.data, self.accession)
29
-
30
-
31
- elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
32
- tables = process_sbsef(self.data, self.accession)
33
- elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
34
- tables = process_sdr_header_data(self.data, self.accession)
35
- elif self.type in ["EX-99.C SDR"]:
36
- tables = process_ex_99c_sdr(self.data, self.accession)
37
- elif self.type in ["EX-99.A SDR SUMMARY"]:
38
- tables = process_ex_99a_summary_sdr(self.data, self.accession)
39
- elif self.type in ["EX-99.G SDR"]:
40
- tables = process_ex_99g_summary_sdr(self.data, self.accession)
41
- elif self.type in ["EX-99.I SDR SUMMARY"]:
42
- tables = process_ex_99i_summary_sdr(self.data, self.accession)
43
- elif self.type in ["144", "144/A"]:
44
- tables = process_144(self.data, self.accession)
45
- elif self.type in ["24F-2NT", "24F-2NT/A"]:
46
- tables = process_24f2nt(self.data, self.accession)
47
-
48
- elif self.type in ["ATS-N", "ATS-N/A"]:
49
- tables = process_ats(self.data, self.accession)
50
- # elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
51
- # "C-AR","C-AR-W","C-AR/A","C-AR/A-W","C-TR","C-TR-W"]:
52
- # tables = process_c(self.data, self.accession)
53
- elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
54
- tables = process_cfportal(self.data, self.accession)
55
-
56
- # elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
57
- # tables = process_ma(self.data, self.accession)
58
- # elif self.type in ["N-CEN","N-CEN/A"]:
59
- # tables = process_ncen(self.data, self.accession)
60
- # elif self.type in ["N-MFP","N-MFP/A","N-MFP1","N-MFP1/A",
61
- # "N-MFP2","N-MFP2/A","N-MFP3","N-MFP3/A"]:
62
- # tables = process_nmfp(self.data, self.accession)
63
- # elif self.type in ["NPORT-P","NPORT-P/A"]:
64
- # tables = process_nportp(self.data, self.accession)
65
-
66
- # elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
67
- # tables = process_ta(self.data, self.accession)
68
- elif self.type in ["X-17A-5","X-17A-5/A"]:
69
- tables = process_x17a5(self.data, self.accession)
70
- elif self.type in ["SCHEDULE 13D","SCHEDULE 13D/A",
71
- "SCHEDULE 13G","SCHEDULE 13G/A"]:
72
- tables = process_schedule_13(self.data, self.accession)
73
- elif self.type in ["1-A","1-A/A","1-A POS","1-K","1-K/A","1-Z","1-Z/A"]:
74
- tables = process_reg_a(self.data, self.accession)
75
- # elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
76
- # tables = process_sbs(self.data, self.accession)
77
-
78
- elif self.type == "PROXY VOTING RECORD":
79
- tables = process_proxy_voting_record(self.data, self.accession)
80
- elif self.type == 'submission_metadata':
81
- tables = process_submission_metadata(self.content, self.accession)
82
- else:
83
- warn(f"Processing for {self.type} is not implemented yet.")
84
- return []
85
-
86
- if tables is not None:
87
- [table.map_data() for table in tables]
88
-
89
- return tables
90
-
91
- def _flatten_dict(d, parent_key=''):
92
- items = {}
93
-
94
- if isinstance(d, list):
95
- return [_flatten_dict(item) for item in d]
96
-
97
- for k, v in d.items():
98
- new_key = f"{parent_key}_{k}" if parent_key else k
99
-
100
- if isinstance(v, dict):
101
- items.update(_flatten_dict(v, new_key))
102
- else:
103
- items[new_key] = str(v)
104
-
105
- return items
106
-
107
- # flattens in a different way
108
- def flatten_dict_to_rows(d, parent_key='', sep='_'):
109
-
110
- if isinstance(d, list):
111
- # If input is a list, flatten each item and return all rows
112
- all_rows = []
113
- for item in d:
114
- all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
115
- return all_rows
116
-
117
- if not isinstance(d, dict):
118
- # If input is a primitive value, return single row
119
- return [{parent_key: d}] if parent_key else []
120
-
121
- # Input is a dictionary
122
- rows = [{}]
123
-
124
- for k, v in d.items():
125
- new_key = f"{parent_key}{sep}{k}" if parent_key else k
126
-
127
- if isinstance(v, dict):
128
- # Recursively flatten nested dictionaries
129
- nested_rows = flatten_dict_to_rows(v, new_key, sep)
130
- # Cross-product with existing rows
131
- new_rows = []
132
- for row in rows:
133
- for nested_row in nested_rows:
134
- combined_row = row.copy()
135
- combined_row.update(nested_row)
136
- new_rows.append(combined_row)
137
- rows = new_rows
138
-
139
- elif isinstance(v, list):
140
- # Handle lists - create multiple rows
141
- if not v: # Empty list
142
- for row in rows:
143
- row[new_key] = ''
144
- else:
145
- new_rows = []
146
- for row in rows:
147
- for list_item in v:
148
- new_row = row.copy()
149
- if isinstance(list_item, dict):
150
- # Recursively flatten dict items in list
151
- nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
152
- for nested_row in nested_rows:
153
- combined_row = new_row.copy()
154
- combined_row.update(nested_row)
155
- new_rows.append(combined_row)
156
- else:
157
- # Primitive value in list
158
- new_row[new_key] = list_item
159
- new_rows.append(new_row)
160
- rows = new_rows
161
- else:
162
- # Handle primitive values
163
- for row in rows:
164
- row[new_key] = v
165
-
166
- return rows
167
-
168
- def process_ownership(data, accession):
169
- tables = []
170
- if 'ownershipDocument' not in data:
171
- return tables
172
-
173
- ownership_doc = data['ownershipDocument']
174
-
175
- if 'nonDerivativeTable' in ownership_doc:
176
- non_deriv_table = ownership_doc['nonDerivativeTable']
177
- if 'nonDerivativeHolding' in non_deriv_table and non_deriv_table['nonDerivativeHolding']:
178
- tables.append(Table(_flatten_dict(non_deriv_table['nonDerivativeHolding']), 'non_derivative_holding_ownership', accession))
179
- if 'nonDerivativeTransaction' in non_deriv_table and non_deriv_table['nonDerivativeTransaction']:
180
- tables.append(Table(_flatten_dict(non_deriv_table['nonDerivativeTransaction']), 'non_derivative_transaction_ownership', accession))
181
-
182
- if 'derivativeTable' in ownership_doc:
183
- deriv_table = ownership_doc['derivativeTable']
184
- if 'derivativeHolding' in deriv_table and deriv_table['derivativeHolding']:
185
- tables.append(Table(_flatten_dict(deriv_table['derivativeHolding']), 'derivative_holding_ownership', accession))
186
- if 'derivativeTransaction' in deriv_table and deriv_table['derivativeTransaction']:
187
- tables.append(Table(_flatten_dict(deriv_table['derivativeTransaction']), 'derivative_transaction_ownership', accession))
188
-
189
- metadata_table_dict = {'schemaVersion': ownership_doc.get('schemaVersion', None),
190
- 'documentType': ownership_doc.get('documentType', None),
191
- 'periodOfReport': ownership_doc.get('periodOfReport', None),
192
- 'dateOfOriginalSubmission': ownership_doc.get('dateOfOriginalSubmission', None),
193
- 'noSecuritiesOwned': ownership_doc.get('noSecuritiesOwned', None),
194
- 'notSubjectToSection16': ownership_doc.get('notSubjectToSection16', None),
195
- 'form3HoldingsReported': ownership_doc.get('form3HoldingsReported', None),
196
- 'form4TransactionsReported': ownership_doc.get('form4TransactionsReported', None),
197
- 'aff10b5One': ownership_doc.get('aff10b5One', None),
198
- 'remarks': ownership_doc.get('remarks', None)}
199
-
200
- metadata_table = Table(data=metadata_table_dict, type='metadata_ownership', accession=accession)
201
- tables.append(metadata_table)
202
-
203
- if 'reportingOwner' in ownership_doc:
204
- tables.append(Table(_flatten_dict(ownership_doc['reportingOwner']), 'reporting_owner_ownership', accession))
205
-
206
- if 'ownerSignature' in ownership_doc:
207
- tables.append(Table(_flatten_dict(ownership_doc['ownerSignature']), 'owner_signature_ownership', accession))
208
-
209
- return tables
210
-
211
- def process_information_table(data, accession):
212
- tables = []
213
- information_table = safe_get(data, ['informationTable','infoTable'])
214
- if information_table:
215
- tables.append(Table(_flatten_dict(information_table), 'information_table', accession))
216
- return tables
217
-
218
- def process_13fhr(data, accession):
219
- tables = []
220
- edgar_submission = safe_get(data, ['edgarSubmission'])
221
- if edgar_submission:
222
- tables.append(Table(_flatten_dict(edgar_submission), '13fhr', accession))
223
- return tables
224
-
225
- def process_sbsef(data, accession):
226
- tables = []
227
- header_data = safe_get(data, ['edgarSubmission'])
228
- if header_data:
229
- tables.append(Table(_flatten_dict(header_data), 'sbsef', accession))
230
- return tables
231
-
232
- def process_sdr_header_data(data, accession):
233
- tables = []
234
- edgar_submission = safe_get(data, ['edgarSubmission'])
235
- if edgar_submission:
236
- tables.append(Table(_flatten_dict(edgar_submission), 'sdr', accession))
237
- return tables
238
-
239
- def process_ex_99c_sdr(data, accession):
240
- tables = []
241
- director_governors = safe_get(data, ['directorGovernors','officer'])
242
- if director_governors:
243
- tables.append(Table(_flatten_dict(director_governors), 'ex99c_sdr', accession))
244
- return tables
245
-
246
- def process_ex_99a_summary_sdr(data, accession):
247
- tables = []
248
- controlling_persons = safe_get(data, ['controllingPersons','controlPerson'])
249
- if controlling_persons:
250
- tables.append(Table(_flatten_dict(controlling_persons), 'ex99a_sdr', accession))
251
- return tables
252
-
253
- def process_ex_99g_summary_sdr(data, accession):
254
- tables = []
255
- affiliates = safe_get(data, ['affiliates','affiliate'])
256
- if affiliates:
257
- tables.append(Table(_flatten_dict(affiliates), 'ex99g_sdr', accession))
258
- return tables
259
-
260
- def process_ex_99i_summary_sdr(data, accession):
261
- tables = []
262
- service_provider_contracts = safe_get(data, ['serviceProviderContracts','serviceProviderContract'])
263
- if service_provider_contracts:
264
- tables.append(Table(_flatten_dict(service_provider_contracts), 'ex99i_sdr', accession))
265
- return tables
266
-
267
- def process_144(data, accession):
268
- tables = []
269
- notice_signature = safe_get(data, ['edgarSubmission', 'formData', 'noticeSignature'])
270
- if notice_signature:
271
- tables.append(Table(_flatten_dict(notice_signature), 'signatures_144', accession))
272
-
273
- securities_sold = safe_get(data, ['edgarSubmission', 'formData', 'securitiesSoldInPast3Months'])
274
- if securities_sold:
275
- tables.append(Table(_flatten_dict(securities_sold), 'securities_sold_in_past_3_months_144', accession))
276
-
277
- securities_to_be_sold = safe_get(data, ['edgarSubmission', 'formData', 'securitiesToBeSold'])
278
- if securities_to_be_sold:
279
- tables.append(Table(_flatten_dict(securities_to_be_sold), 'securities_to_be_sold_144', accession))
280
-
281
- securities_info = safe_get(data, ['edgarSubmission', 'formData', 'securitiesInformation'])
282
- if securities_info:
283
- tables.append(Table(_flatten_dict(securities_info), 'securities_information_144', accession))
284
-
285
- issuer_info = safe_get(data, ['edgarSubmission', 'formData', 'issuerInfo'])
286
- if issuer_info:
287
- tables.append(Table(_flatten_dict(issuer_info), 'issuer_information_144', accession))
288
-
289
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
290
- metadata_table = Table(_flatten_dict(header_data), 'metadata_144', accession)
291
- remarks = safe_get(data, ['edgarSubmission', 'formData', 'remarks'])
292
- if remarks:
293
- metadata_table.add_column('remarks', remarks)
294
-
295
- tables.append(metadata_table)
296
-
297
- return tables
298
-
299
- def process_24f2nt(data, accession):
300
- tables = []
301
-
302
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
303
- if header_data:
304
- header_data_table = Table(_flatten_dict(header_data), 'metadata_24f_2nt', accession)
305
- schema_version = safe_get(data, ['edgarSubmission', 'schemaVersion'])
306
- if schema_version:
307
- header_data_table.add_column('schemaVersion', schema_version)
308
- tables.append(header_data_table)
309
-
310
- item1 = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', 'item1'])
311
- if item1:
312
- tables.append(Table(_flatten_dict(item1), 'item_1_24f2nt', accession))
313
-
314
- for i in range(2, 10):
315
- item = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', f'item{i}'])
316
- if item:
317
- tables.append(Table(_flatten_dict(item), f'item_{i}_24f2nt', accession))
318
-
319
- signature = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', 'signature'])
320
- if signature:
321
- tables.append(Table(_flatten_dict(signature), 'signature_24f2nt', accession))
322
-
323
- return tables
324
-
325
- def process_25nse(data, accession):
326
- tables = []
327
- notification = safe_get(data, ['notificationOfRemoval'])
328
- if notification:
329
- tables.append(Table(_flatten_dict(notification), '25nse', accession))
330
- return tables
331
-
332
- def process_ats(data, accession):
333
- tables = []
334
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
335
- if header_data:
336
- tables.append(Table(_flatten_dict(header_data), 'metadata_ats', accession))
337
-
338
- cover = safe_get(data, ['edgarSubmission', 'formData', 'cover'])
339
- if cover:
340
- tables.append(Table(_flatten_dict(cover), 'cover_ats', accession))
341
-
342
- part_one = safe_get(data, ['edgarSubmission', 'formData', 'partOne'])
343
- if part_one:
344
- tables.append(Table(_flatten_dict(part_one), 'part_one_ats', accession))
345
-
346
- part_two = safe_get(data, ['edgarSubmission', 'formData', 'partTwo'])
347
- if part_two:
348
- tables.append(Table(_flatten_dict(part_two), 'part_two_ats', accession))
349
-
350
- part_three = safe_get(data, ['edgarSubmission', 'formData', 'partThree'])
351
- if part_three:
352
- tables.append(Table(_flatten_dict(part_three), 'part_three_ats', accession))
353
-
354
- part_four = safe_get(data, ['edgarSubmission', 'formData', 'partFour'])
355
- if part_four:
356
- tables.append(Table(_flatten_dict(part_four), 'part_four_ats', accession))
357
-
358
- return tables
359
-
360
- # def process_c(data, accession):
361
- # tables = []
362
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
363
- # if header_data:
364
- # tables.append(Table(_flatten_dict(header_data), 'metadata_c', accession))
365
-
366
- # issuer_info = safe_get(data, ['edgarSubmission', 'formData', 'issuerInformation'])
367
- # if issuer_info:
368
- # tables.append(Table(_flatten_dict(issuer_info), 'issuer_information_c', accession))
369
-
370
- # offering_info = safe_get(data, ['edgarSubmission', 'formData', 'offeringInformation'])
371
- # if offering_info:
372
- # tables.append(Table(_flatten_dict(offering_info), 'offering_information_c', accession))
373
-
374
- # annual_report = safe_get(data, ['edgarSubmission', 'formData', 'annualReportDisclosureRequirements'])
375
- # if annual_report:
376
- # tables.append(Table(_flatten_dict(annual_report), 'annual_report_disclosure_requirements_c', accession))
377
-
378
- # signature_info = safe_get(data, ['edgarSubmission', 'formData', 'signatureInfo'])
379
- # if signature_info:
380
- # tables.append(Table(_flatten_dict(signature_info), 'signature_info_c', accession))
381
-
382
- # return tables
383
-
384
- def process_cfportal(data, accession):
385
- tables = []
386
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
387
- if header_data:
388
- tables.append(Table(_flatten_dict(header_data), 'metadata_cfportal', accession))
389
-
390
- base_path = ['edgarSubmission', 'formData']
391
- sections = [
392
- ('identifyingInformation', 'identifying_information_cfportal'),
393
- ('formOfOrganization', 'form_of_organization_cfportal'),
394
- ('successions', 'successions_cfportal'),
395
- ('controlRelationships', 'control_relationships_cfportal'),
396
- ('disclosureAnswers', 'disclosure_answers_cfportal'),
397
- ('nonSecuritiesRelatedBusiness', 'non_securities_related_business_cfportal'),
398
- ('escrowArrangements', 'escrow_arrangements_cfportal'),
399
- ('execution', 'execution_cfportal'),
400
- ('scheduleA', 'schedule_a_cfportal'),
401
- ('scheduleB', 'schedule_b_cfportal'),
402
- ('scheduleC', 'schedule_c_cfportal'),
403
- ('scheduleD', 'schedule_d_cfportal'),
404
- ('criminalDrpInfo', 'criminal_drip_info_cfportal'),
405
- ('regulatoryDrpInfo', 'regulatory_drip_info_cfportal'),
406
- ('civilJudicialDrpInfo', 'civil_judicial_drip_info_cfportal'),
407
- ('bankruptcySipcDrpInfo', 'bankruptcy_sipc_drip_info_cfportal'),
408
- ('bondDrpInfo', 'bond_drip_info_cfportal'),
409
- ('judgementDrpInfo', 'judgement_drip_info_cfportal')
410
- ]
411
-
412
- for section_key, table_name in sections:
413
- section_data = safe_get(data, base_path + [section_key])
414
- if section_data:
415
- tables.append(Table(_flatten_dict(section_data), table_name, accession))
416
-
417
- return tables
418
-
419
- def process_d(data, accession):
420
- tables = []
421
- groups = [('contactData', 'contact_data_d'),
422
- ('notificationAddressList', 'notification_address_list_d'),
423
- ('primaryIssuer', 'primary_issuer_d'),
424
- ('issuerList', 'issuer_list_d'),
425
- ('relatedPersonsList', 'related_persons_list_d'),
426
- ('offeringData', 'offering_data_d'),
427
- ]
428
- for group,table_type in groups:
429
- if group == 'relatedPersonList':
430
- group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
431
- data['edgarSubmission'].pop(group, None)
432
- elif group == 'issuerList':
433
- group_data = data['edgarSubmission'].pop('issuerList', None)
434
- else:
435
- group_data = data['edgarSubmission'].pop(group, None)
436
-
437
- if group_data:
438
- # Special handling ONLY for relatedPersonsList
439
- if group in ['relatedPersonsList', 'issuerList','offeringData']:
440
- # Use the new flatten_dict_to_rows ONLY for this key
441
- flattened_rows = flatten_dict_to_rows(group_data)
442
- if flattened_rows:
443
- tables.append(Table(flattened_rows, table_type, accession))
444
- else:
445
- # Everything else remains EXACTLY the same
446
- tables.append(Table(_flatten_dict(group_data), table_type, accession))
447
-
448
-
449
-
450
- metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
451
- tables.append(metadata_table)
452
-
453
- return tables
454
-
455
- # def process_nmfp(data, accession):
456
- # tables = []
457
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
458
- # if header_data:
459
- # tables.append(Table(_flatten_dict(header_data), 'metadata_nmfp', accession))
460
-
461
- # general_info = safe_get(data, ['edgarSubmission', 'formData', 'generalInfo'])
462
- # if general_info:
463
- # tables.append(Table(_flatten_dict(general_info), 'general_information_nmfp', accession))
464
-
465
- # series_level_info = safe_get(data, ['edgarSubmission', 'formData', 'seriesLevelInfo'])
466
- # if series_level_info:
467
- # tables.append(Table(_flatten_dict(series_level_info), 'series_level_info_nmfp', accession))
468
-
469
- # class_level_info = safe_get(data, ['edgarSubmission', 'formData', 'classLevelInfo'])
470
- # if class_level_info:
471
- # tables.append(Table(_flatten_dict(class_level_info), 'class_level_info_nmfp', accession))
472
-
473
- # portfolio_securities = safe_get(data, ['edgarSubmission', 'formData', 'scheduleOfPortfolioSecuritiesInfo'])
474
- # if portfolio_securities:
475
- # tables.append(Table(_flatten_dict(portfolio_securities), 'schedule_of_portfolio_securities_info_nmfp', accession))
476
-
477
- # signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
478
- # if signature:
479
- # tables.append(Table(_flatten_dict(signature), 'signature_nmfp', accession))
480
-
481
- # return tables
482
-
483
- # def process_nportp(data, accession):
484
- # tables = []
485
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
486
- # if header_data:
487
- # tables.append(Table(_flatten_dict(header_data), 'metadata_nportp', accession))
488
-
489
- # gen_info = safe_get(data, ['edgarSubmission', 'formData', 'genInfo'])
490
- # if gen_info:
491
- # tables.append(Table(_flatten_dict(gen_info), 'general_information_nportp', accession))
492
-
493
- # fund_info = safe_get(data, ['edgarSubmission', 'formData', 'fundInfo'])
494
- # if fund_info:
495
- # tables.append(Table(_flatten_dict(fund_info), 'fund_information_nportp', accession))
496
-
497
- # invst_or_secs = safe_get(data, ['edgarSubmission', 'formData', 'invstOrSecs'])
498
- # if invst_or_secs:
499
- # tables.append(Table(_flatten_dict(invst_or_secs), 'investment_or_securities_nportp', accession))
500
-
501
- # signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
502
- # if signature:
503
- # tables.append(Table(_flatten_dict(signature), 'signature_nportp', accession))
504
-
505
- # return tables
506
-
507
- def process_npx(data, accession):
508
- tables = []
509
- edgar_submission = safe_get(data, ['edgarSubmission'])
510
- if edgar_submission:
511
- tables.append(Table(_flatten_dict(edgar_submission), 'npx', accession))
512
- return tables
513
-
514
- def process_proxy_voting_record(data, accession):
515
- tables = []
516
- proxy_table = safe_get(data, ['proxyVoteTable', 'proxyTable'])
517
- if proxy_table:
518
- tables.append(Table(_flatten_dict(proxy_table), 'proxy_voting_record', accession))
519
- return tables
520
-
521
- # SOMETHING IS VERY OFF HERE
522
- # def process_ta(data, accession):
523
- # tables = []
524
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
525
- # if header_data:
526
- # metadata_ta = Table(_flatten_dict(header_data), 'metadata_ta', accession)
527
- # schema_version = safe_get(data, ['edgarSubmission', 'schemaVersion'])
528
- # if schema_version:
529
- # metadata_ta.add_column('schemaVersion', schema_version)
530
- # tables.append(metadata_ta)
531
-
532
- # registrant = safe_get(data, ['edgarSubmission', 'registrant'])
533
- # if registrant:
534
- # tables.append(Table(_flatten_dict(registrant), 'registrant_ta', accession))
535
-
536
- # independent_registrant = safe_get(data, ['edgarSubmission', 'formData', 'independentRegistrant'])
537
- # if independent_registrant:
538
- # tables.append(Table(_flatten_dict(independent_registrant), 'independent_registrant_ta', accession))
539
-
540
- # disciplinary_history = safe_get(data, ['edgarSubmission', 'formData', 'disciplinaryHistory'])
541
- # if disciplinary_history:
542
- # tables.append(Table(_flatten_dict(disciplinary_history), 'disciplinary_history_ta', accession))
543
-
544
- # signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
545
- # if signature:
546
- # tables.append(Table(_flatten_dict(signature), 'signature_ta', accession))
547
-
548
- # return tables
549
-
550
- def process_x17a5(data, accession):
551
- tables = []
552
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
553
- if header_data:
554
- tables.append(Table(_flatten_dict(header_data), 'metadata_x17a5', accession))
555
-
556
- submission_info = safe_get(data, ['edgarSubmission', 'formData', 'submissionInformation'])
557
- if submission_info:
558
- tables.append(Table(_flatten_dict(submission_info), 'submission_information_x17a5', accession))
559
-
560
- registrant_id = safe_get(data, ['edgarSubmission', 'formData', 'registrantIdentification'])
561
- if registrant_id:
562
- tables.append(Table(_flatten_dict(registrant_id), 'registrant_identification_x17a5', accession))
563
-
564
- accountant_id = safe_get(data, ['edgarSubmission', 'formData', 'accountantIdentification'])
565
- if accountant_id:
566
- tables.append(Table(_flatten_dict(accountant_id), 'accountant_identification_x17a5', accession))
567
-
568
- oath_signature = safe_get(data, ['edgarSubmission', 'formData', 'oathSignature'])
569
- if oath_signature:
570
- tables.append(Table(_flatten_dict(oath_signature), 'oath_signature_x17a5', accession))
571
-
572
- return tables
573
-
574
- def process_schedule_13(data, accession):
575
- tables = []
576
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
577
- if header_data:
578
- tables.append(Table(_flatten_dict(header_data), 'metadata_schedule_13', accession))
579
-
580
- cover_page_header = safe_get(data, ['edgarSubmission', 'formData', 'coverPageHeader'])
581
- if cover_page_header:
582
- tables.append(Table(_flatten_dict(cover_page_header), 'cover_schedule_13', accession))
583
-
584
- cover_page_details = safe_get(data, ['edgarSubmission', 'formData', 'coverPageHeaderReportingPersonDetails'])
585
- if cover_page_details:
586
- tables.append(Table(_flatten_dict(cover_page_details), 'reporting_person_details_schedule_13', accession))
587
-
588
- items = safe_get(data, ['edgarSubmission', 'formData', 'items'])
589
- if items and isinstance(items, dict):
590
- for k, v in items.items():
591
- if v:
592
- tables.append(Table(_flatten_dict(v), f'{k}_schedule_13', accession))
593
-
594
- signature_info = safe_get(data, ['edgarSubmission', 'formData', 'signatureInformation'])
595
- if signature_info:
596
- tables.append(Table(_flatten_dict(signature_info), 'signature_information_schedule_13', accession))
597
-
598
- return tables
599
-
600
- def process_reg_a(data, accession):
601
- tables = []
602
- header_data = safe_get(data, ['edgarSubmission', 'headerData'])
603
- if header_data:
604
- tables.append(Table(_flatten_dict(header_data), 'metadata_reg_a', accession))
605
-
606
- base_path = ['edgarSubmission', 'formData']
607
- sections = [
608
- ('employeesInfo', 'employees_info_reg_a'),
609
- ('issuerInfo', 'issuer_info_reg_a'),
610
- ('commonEquity', 'common_equity_reg_a'),
611
- ('preferredEquity', 'preferred_equity_reg_a'),
612
- ('debtSecurities', 'debt_securities_reg_a'),
613
- ('issuerEligibility', 'issuer_eligibility_reg_a'),
614
- ('applicationRule262', 'application_rule_262_reg_a'),
615
- ('summaryInfo', 'summary_info_reg_a'),
616
- ('juridictionSecuritiesOffered', 'juridiction_securities_offered_reg_a'),
617
- ('unregisteredSecurities', 'unregistered_securities_reg_a'),
618
- ('securitiesIssued', 'securities_issued_reg_a'),
619
- ('unregisteredSecuritiesAct', 'unregistered_securities_act_reg_a')
620
- ]
621
-
622
- for section_key, table_name in sections:
623
- section_data = safe_get(data, base_path + [section_key])
624
- if section_data:
625
- tables.append(Table(_flatten_dict(section_data), table_name, accession))
626
-
627
- return tables
628
-
629
- # looks good but some extra nesed tables we missed
630
- # def process_sbs(data, accession):
631
- # tables = []
632
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
633
- # if header_data:
634
- # tables.append(Table(_flatten_dict(header_data), 'metadata_sbse', accession))
635
-
636
- # applicant = safe_get(data, ['edgarSubmission', 'formData', 'applicant'])
637
- # if applicant and isinstance(applicant, dict):
638
- # for k, v in applicant.items():
639
- # if v:
640
- # tables.append(Table(_flatten_dict(v), f'applicant_{k}_sbs', accession))
641
-
642
- # base_path = ['edgarSubmission', 'formData']
643
- # sections = [
644
- # ('scheduleA', 'schedule_a_sbs'),
645
- # ('scheduleB', 'schedule_b_sbs'),
646
- # ('scheduleC', 'schedule_c_sbs'),
647
- # ('scheduleD', 'schedule_d_sbs'),
648
- # ('scheduleE', 'schedule_e_sbs'),
649
- # ('scheduleF', 'schedule_f_sbs'),
650
- # ('criminalDrpInfo', 'criminal_drip_info_sbs'),
651
- # ('regulatoryDrpInfo', 'regulatory_drip_info_sbs'),
652
- # ('civilJudicialDrpInfo', 'civil_judicial_drip_info_sbs'),
653
- # ('bankruptcySipcDrpInfo', 'bankruptcy_sipc_drip_info_sbs'),
654
- # ('execution', 'execution_sbs')
655
- # ]
656
-
657
- # for section_key, table_name in sections:
658
- # section_data = safe_get(data, base_path + [section_key])
659
- # if section_data:
660
- # tables.append(Table(_flatten_dict(section_data), table_name, accession))
661
-
662
- # return tables
663
-
664
- def process_ex102_abs(data, accession):
665
- tables = []
666
- data = safe_get(data, ['assetData', 'assets'])
667
-
668
- # Create assets list: all items without their 'property' field
669
- assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
670
-
671
- # Create properties list in a more vectorized way
672
- properties = []
673
-
674
- # Handle dictionary properties
675
- properties.extend([
676
- item['property'] | {'assetNumber': item['assetNumber']}
677
- for item in data
678
- if 'property' in item and isinstance(item['property'], dict)
679
- ])
680
-
681
- # Handle list properties - flatten in one operation
682
- properties.extend([
683
- prop | {'assetNumber': item['assetNumber']}
684
- for item in data
685
- if 'property' in item and isinstance(item['property'], list)
686
- for prop in item['property']
687
- if isinstance(prop, dict)
688
- ])
689
-
690
- if assets:
691
- tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
692
-
693
- if properties:
694
- tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
695
-
696
- return tables
697
-
698
- # def process_ma(data, accession):
699
- # tables = []
700
- # header_data = safe_get(data, ['edgarSubmission', 'headerData'])
701
- # if header_data:
702
- # header_ma = Table(_flatten_dict(header_data), 'metadata_ma', accession)
703
- # tables.append(header_ma)
704
- # # WE NEED TO COMBINE TABLES
705
- # raise NotImplementedError("Need to implement the rest of the MA processing")
706
-
707
- # def process_ncen(data, accession):
708
- # raise NotImplementedError("Need to implement the N-CEN processing")
709
-
710
- # WIP
711
- # Note: going to pause this for now, as I don't have a great way of putting this in a csv.
712
- def process_submission_metadata(data,accession):
713
- tables = []
714
- document_data = safe_get(data, ['documents'])
715
- if document_data:
716
- tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
717
-
718
- reporting_owner_data = safe_get(data,['reporting-owner'])
719
- if reporting_owner_data:
720
- tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
721
-
722
- issuer_data = safe_get(data,['issuer'])
723
- if issuer_data:
724
- tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
725
-
726
- # # construct metadata
727
- # accession-number date-of-filing-date-change, depositor-cik effectiveness-date
728
-
729
- # # other tables
730
- # depositor, securitizer
731
-
732
- return tables