datamule 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document/document.py +33 -18
- datamule/document/tables/tables.py +129 -0
- datamule/document/{mappings/thirteenfhr.py → tables/tables_13fhr.py} +8 -4
- datamule/document/{mappings/twentyfivense.py → tables/tables_25nse.py} +7 -2
- datamule/document/{mappings/information_table.py → tables/tables_informationtable.py} +7 -3
- datamule/document/{mappings/npx.py → tables/tables_npx.py} +7 -0
- datamule/document/{mappings/ownership.py → tables/tables_ownership.py} +37 -9
- datamule/document/{mappings/proxy_voting_record.py → tables/tables_proxyvotingrecord.py} +7 -0
- datamule/document/{mappings/sbsef.py → tables/tables_sbsef.py} +7 -0
- datamule/document/{mappings/sdr.py → tables/tables_sdr.py} +7 -0
- datamule/document/tables/utils.py +26 -0
- datamule/submission.py +50 -15
- {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/METADATA +1 -1
- {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/RECORD +17 -32
- datamule/document/mappings/atsn.py +0 -208
- datamule/document/mappings/cfportal.py +0 -346
- datamule/document/mappings/d.py +0 -125
- datamule/document/mappings/ex102_abs.py +0 -63
- datamule/document/mappings/ex99a_sdr.py +0 -1
- datamule/document/mappings/ex99c_sdr.py +0 -0
- datamule/document/mappings/ex99g_sdr.py +0 -0
- datamule/document/mappings/ex99i_sdr.py +0 -0
- datamule/document/mappings/nmfp.py +0 -275
- datamule/document/mappings/onefourtyfour.py +0 -68
- datamule/document/mappings/sbs.py +0 -0
- datamule/document/mappings/schedule13.py +0 -117
- datamule/document/mappings/submission_metadata.py +0 -9
- datamule/document/mappings/ta.py +0 -0
- datamule/document/mappings/twentyfourf2nt.py +0 -100
- datamule/document/processing.py +0 -732
- datamule/document/table.py +0 -315
- /datamule/document/{mappings → tables}/__init__.py +0 -0
- {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/WHEEL +0 -0
- {datamule-2.0.5.dist-info → datamule-2.0.7.dist-info}/top_level.txt +0 -0
datamule/document/processing.py
DELETED
@@ -1,732 +0,0 @@
|
|
1
|
-
from .table import Table
|
2
|
-
from warnings import warn
|
3
|
-
def safe_get(d, keys, default=None):
|
4
|
-
"""Safely access nested dictionary keys"""
|
5
|
-
current = d
|
6
|
-
for key in keys:
|
7
|
-
if isinstance(current, dict) and key in current:
|
8
|
-
current = current[key]
|
9
|
-
else:
|
10
|
-
return default
|
11
|
-
return current
|
12
|
-
|
13
|
-
def process_tabular_data(self):
|
14
|
-
if self.type in ["3","4","5","3/A","4/A","5/A"]:
|
15
|
-
tables = process_ownership(self.data, self.accession)
|
16
|
-
elif self.type in ["13F-HR", "13F-HR/A","13F-NT", "13F-NT/A"]:
|
17
|
-
tables = process_13fhr(self.data, self.accession)
|
18
|
-
elif self.type in ["INFORMATION TABLE"]:
|
19
|
-
tables = process_information_table(self.data, self.accession)
|
20
|
-
elif self.type in ["25-NSE", "25-NSE/A"]:
|
21
|
-
tables = process_25nse(self.data, self.accession)
|
22
|
-
# complete mark:
|
23
|
-
elif self.type in ["EX-102"]:
|
24
|
-
tables = process_ex102_abs(self.data, self.accession)
|
25
|
-
elif self.type in ["D","D/A"]:
|
26
|
-
tables = process_d(self.data, self.accession)
|
27
|
-
elif self.type in ["N-PX","N-PX/A"]:
|
28
|
-
tables = process_npx(self.data, self.accession)
|
29
|
-
|
30
|
-
|
31
|
-
elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
|
32
|
-
tables = process_sbsef(self.data, self.accession)
|
33
|
-
elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
|
34
|
-
tables = process_sdr_header_data(self.data, self.accession)
|
35
|
-
elif self.type in ["EX-99.C SDR"]:
|
36
|
-
tables = process_ex_99c_sdr(self.data, self.accession)
|
37
|
-
elif self.type in ["EX-99.A SDR SUMMARY"]:
|
38
|
-
tables = process_ex_99a_summary_sdr(self.data, self.accession)
|
39
|
-
elif self.type in ["EX-99.G SDR"]:
|
40
|
-
tables = process_ex_99g_summary_sdr(self.data, self.accession)
|
41
|
-
elif self.type in ["EX-99.I SDR SUMMARY"]:
|
42
|
-
tables = process_ex_99i_summary_sdr(self.data, self.accession)
|
43
|
-
elif self.type in ["144", "144/A"]:
|
44
|
-
tables = process_144(self.data, self.accession)
|
45
|
-
elif self.type in ["24F-2NT", "24F-2NT/A"]:
|
46
|
-
tables = process_24f2nt(self.data, self.accession)
|
47
|
-
|
48
|
-
elif self.type in ["ATS-N", "ATS-N/A"]:
|
49
|
-
tables = process_ats(self.data, self.accession)
|
50
|
-
# elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
|
51
|
-
# "C-AR","C-AR-W","C-AR/A","C-AR/A-W","C-TR","C-TR-W"]:
|
52
|
-
# tables = process_c(self.data, self.accession)
|
53
|
-
elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
|
54
|
-
tables = process_cfportal(self.data, self.accession)
|
55
|
-
|
56
|
-
# elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
|
57
|
-
# tables = process_ma(self.data, self.accession)
|
58
|
-
# elif self.type in ["N-CEN","N-CEN/A"]:
|
59
|
-
# tables = process_ncen(self.data, self.accession)
|
60
|
-
# elif self.type in ["N-MFP","N-MFP/A","N-MFP1","N-MFP1/A",
|
61
|
-
# "N-MFP2","N-MFP2/A","N-MFP3","N-MFP3/A"]:
|
62
|
-
# tables = process_nmfp(self.data, self.accession)
|
63
|
-
# elif self.type in ["NPORT-P","NPORT-P/A"]:
|
64
|
-
# tables = process_nportp(self.data, self.accession)
|
65
|
-
|
66
|
-
# elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
|
67
|
-
# tables = process_ta(self.data, self.accession)
|
68
|
-
elif self.type in ["X-17A-5","X-17A-5/A"]:
|
69
|
-
tables = process_x17a5(self.data, self.accession)
|
70
|
-
elif self.type in ["SCHEDULE 13D","SCHEDULE 13D/A",
|
71
|
-
"SCHEDULE 13G","SCHEDULE 13G/A"]:
|
72
|
-
tables = process_schedule_13(self.data, self.accession)
|
73
|
-
elif self.type in ["1-A","1-A/A","1-A POS","1-K","1-K/A","1-Z","1-Z/A"]:
|
74
|
-
tables = process_reg_a(self.data, self.accession)
|
75
|
-
# elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
|
76
|
-
# tables = process_sbs(self.data, self.accession)
|
77
|
-
|
78
|
-
elif self.type == "PROXY VOTING RECORD":
|
79
|
-
tables = process_proxy_voting_record(self.data, self.accession)
|
80
|
-
elif self.type == 'submission_metadata':
|
81
|
-
tables = process_submission_metadata(self.content, self.accession)
|
82
|
-
else:
|
83
|
-
warn(f"Processing for {self.type} is not implemented yet.")
|
84
|
-
return []
|
85
|
-
|
86
|
-
if tables is not None:
|
87
|
-
[table.map_data() for table in tables]
|
88
|
-
|
89
|
-
return tables
|
90
|
-
|
91
|
-
def _flatten_dict(d, parent_key=''):
|
92
|
-
items = {}
|
93
|
-
|
94
|
-
if isinstance(d, list):
|
95
|
-
return [_flatten_dict(item) for item in d]
|
96
|
-
|
97
|
-
for k, v in d.items():
|
98
|
-
new_key = f"{parent_key}_{k}" if parent_key else k
|
99
|
-
|
100
|
-
if isinstance(v, dict):
|
101
|
-
items.update(_flatten_dict(v, new_key))
|
102
|
-
else:
|
103
|
-
items[new_key] = str(v)
|
104
|
-
|
105
|
-
return items
|
106
|
-
|
107
|
-
# flattens in a different way
|
108
|
-
def flatten_dict_to_rows(d, parent_key='', sep='_'):
|
109
|
-
|
110
|
-
if isinstance(d, list):
|
111
|
-
# If input is a list, flatten each item and return all rows
|
112
|
-
all_rows = []
|
113
|
-
for item in d:
|
114
|
-
all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
|
115
|
-
return all_rows
|
116
|
-
|
117
|
-
if not isinstance(d, dict):
|
118
|
-
# If input is a primitive value, return single row
|
119
|
-
return [{parent_key: d}] if parent_key else []
|
120
|
-
|
121
|
-
# Input is a dictionary
|
122
|
-
rows = [{}]
|
123
|
-
|
124
|
-
for k, v in d.items():
|
125
|
-
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
126
|
-
|
127
|
-
if isinstance(v, dict):
|
128
|
-
# Recursively flatten nested dictionaries
|
129
|
-
nested_rows = flatten_dict_to_rows(v, new_key, sep)
|
130
|
-
# Cross-product with existing rows
|
131
|
-
new_rows = []
|
132
|
-
for row in rows:
|
133
|
-
for nested_row in nested_rows:
|
134
|
-
combined_row = row.copy()
|
135
|
-
combined_row.update(nested_row)
|
136
|
-
new_rows.append(combined_row)
|
137
|
-
rows = new_rows
|
138
|
-
|
139
|
-
elif isinstance(v, list):
|
140
|
-
# Handle lists - create multiple rows
|
141
|
-
if not v: # Empty list
|
142
|
-
for row in rows:
|
143
|
-
row[new_key] = ''
|
144
|
-
else:
|
145
|
-
new_rows = []
|
146
|
-
for row in rows:
|
147
|
-
for list_item in v:
|
148
|
-
new_row = row.copy()
|
149
|
-
if isinstance(list_item, dict):
|
150
|
-
# Recursively flatten dict items in list
|
151
|
-
nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
|
152
|
-
for nested_row in nested_rows:
|
153
|
-
combined_row = new_row.copy()
|
154
|
-
combined_row.update(nested_row)
|
155
|
-
new_rows.append(combined_row)
|
156
|
-
else:
|
157
|
-
# Primitive value in list
|
158
|
-
new_row[new_key] = list_item
|
159
|
-
new_rows.append(new_row)
|
160
|
-
rows = new_rows
|
161
|
-
else:
|
162
|
-
# Handle primitive values
|
163
|
-
for row in rows:
|
164
|
-
row[new_key] = v
|
165
|
-
|
166
|
-
return rows
|
167
|
-
|
168
|
-
def process_ownership(data, accession):
|
169
|
-
tables = []
|
170
|
-
if 'ownershipDocument' not in data:
|
171
|
-
return tables
|
172
|
-
|
173
|
-
ownership_doc = data['ownershipDocument']
|
174
|
-
|
175
|
-
if 'nonDerivativeTable' in ownership_doc:
|
176
|
-
non_deriv_table = ownership_doc['nonDerivativeTable']
|
177
|
-
if 'nonDerivativeHolding' in non_deriv_table and non_deriv_table['nonDerivativeHolding']:
|
178
|
-
tables.append(Table(_flatten_dict(non_deriv_table['nonDerivativeHolding']), 'non_derivative_holding_ownership', accession))
|
179
|
-
if 'nonDerivativeTransaction' in non_deriv_table and non_deriv_table['nonDerivativeTransaction']:
|
180
|
-
tables.append(Table(_flatten_dict(non_deriv_table['nonDerivativeTransaction']), 'non_derivative_transaction_ownership', accession))
|
181
|
-
|
182
|
-
if 'derivativeTable' in ownership_doc:
|
183
|
-
deriv_table = ownership_doc['derivativeTable']
|
184
|
-
if 'derivativeHolding' in deriv_table and deriv_table['derivativeHolding']:
|
185
|
-
tables.append(Table(_flatten_dict(deriv_table['derivativeHolding']), 'derivative_holding_ownership', accession))
|
186
|
-
if 'derivativeTransaction' in deriv_table and deriv_table['derivativeTransaction']:
|
187
|
-
tables.append(Table(_flatten_dict(deriv_table['derivativeTransaction']), 'derivative_transaction_ownership', accession))
|
188
|
-
|
189
|
-
metadata_table_dict = {'schemaVersion': ownership_doc.get('schemaVersion', None),
|
190
|
-
'documentType': ownership_doc.get('documentType', None),
|
191
|
-
'periodOfReport': ownership_doc.get('periodOfReport', None),
|
192
|
-
'dateOfOriginalSubmission': ownership_doc.get('dateOfOriginalSubmission', None),
|
193
|
-
'noSecuritiesOwned': ownership_doc.get('noSecuritiesOwned', None),
|
194
|
-
'notSubjectToSection16': ownership_doc.get('notSubjectToSection16', None),
|
195
|
-
'form3HoldingsReported': ownership_doc.get('form3HoldingsReported', None),
|
196
|
-
'form4TransactionsReported': ownership_doc.get('form4TransactionsReported', None),
|
197
|
-
'aff10b5One': ownership_doc.get('aff10b5One', None),
|
198
|
-
'remarks': ownership_doc.get('remarks', None)}
|
199
|
-
|
200
|
-
metadata_table = Table(data=metadata_table_dict, type='metadata_ownership', accession=accession)
|
201
|
-
tables.append(metadata_table)
|
202
|
-
|
203
|
-
if 'reportingOwner' in ownership_doc:
|
204
|
-
tables.append(Table(_flatten_dict(ownership_doc['reportingOwner']), 'reporting_owner_ownership', accession))
|
205
|
-
|
206
|
-
if 'ownerSignature' in ownership_doc:
|
207
|
-
tables.append(Table(_flatten_dict(ownership_doc['ownerSignature']), 'owner_signature_ownership', accession))
|
208
|
-
|
209
|
-
return tables
|
210
|
-
|
211
|
-
def process_information_table(data, accession):
|
212
|
-
tables = []
|
213
|
-
information_table = safe_get(data, ['informationTable','infoTable'])
|
214
|
-
if information_table:
|
215
|
-
tables.append(Table(_flatten_dict(information_table), 'information_table', accession))
|
216
|
-
return tables
|
217
|
-
|
218
|
-
def process_13fhr(data, accession):
|
219
|
-
tables = []
|
220
|
-
edgar_submission = safe_get(data, ['edgarSubmission'])
|
221
|
-
if edgar_submission:
|
222
|
-
tables.append(Table(_flatten_dict(edgar_submission), '13fhr', accession))
|
223
|
-
return tables
|
224
|
-
|
225
|
-
def process_sbsef(data, accession):
|
226
|
-
tables = []
|
227
|
-
header_data = safe_get(data, ['edgarSubmission'])
|
228
|
-
if header_data:
|
229
|
-
tables.append(Table(_flatten_dict(header_data), 'sbsef', accession))
|
230
|
-
return tables
|
231
|
-
|
232
|
-
def process_sdr_header_data(data, accession):
|
233
|
-
tables = []
|
234
|
-
edgar_submission = safe_get(data, ['edgarSubmission'])
|
235
|
-
if edgar_submission:
|
236
|
-
tables.append(Table(_flatten_dict(edgar_submission), 'sdr', accession))
|
237
|
-
return tables
|
238
|
-
|
239
|
-
def process_ex_99c_sdr(data, accession):
|
240
|
-
tables = []
|
241
|
-
director_governors = safe_get(data, ['directorGovernors','officer'])
|
242
|
-
if director_governors:
|
243
|
-
tables.append(Table(_flatten_dict(director_governors), 'ex99c_sdr', accession))
|
244
|
-
return tables
|
245
|
-
|
246
|
-
def process_ex_99a_summary_sdr(data, accession):
|
247
|
-
tables = []
|
248
|
-
controlling_persons = safe_get(data, ['controllingPersons','controlPerson'])
|
249
|
-
if controlling_persons:
|
250
|
-
tables.append(Table(_flatten_dict(controlling_persons), 'ex99a_sdr', accession))
|
251
|
-
return tables
|
252
|
-
|
253
|
-
def process_ex_99g_summary_sdr(data, accession):
|
254
|
-
tables = []
|
255
|
-
affiliates = safe_get(data, ['affiliates','affiliate'])
|
256
|
-
if affiliates:
|
257
|
-
tables.append(Table(_flatten_dict(affiliates), 'ex99g_sdr', accession))
|
258
|
-
return tables
|
259
|
-
|
260
|
-
def process_ex_99i_summary_sdr(data, accession):
|
261
|
-
tables = []
|
262
|
-
service_provider_contracts = safe_get(data, ['serviceProviderContracts','serviceProviderContract'])
|
263
|
-
if service_provider_contracts:
|
264
|
-
tables.append(Table(_flatten_dict(service_provider_contracts), 'ex99i_sdr', accession))
|
265
|
-
return tables
|
266
|
-
|
267
|
-
def process_144(data, accession):
|
268
|
-
tables = []
|
269
|
-
notice_signature = safe_get(data, ['edgarSubmission', 'formData', 'noticeSignature'])
|
270
|
-
if notice_signature:
|
271
|
-
tables.append(Table(_flatten_dict(notice_signature), 'signatures_144', accession))
|
272
|
-
|
273
|
-
securities_sold = safe_get(data, ['edgarSubmission', 'formData', 'securitiesSoldInPast3Months'])
|
274
|
-
if securities_sold:
|
275
|
-
tables.append(Table(_flatten_dict(securities_sold), 'securities_sold_in_past_3_months_144', accession))
|
276
|
-
|
277
|
-
securities_to_be_sold = safe_get(data, ['edgarSubmission', 'formData', 'securitiesToBeSold'])
|
278
|
-
if securities_to_be_sold:
|
279
|
-
tables.append(Table(_flatten_dict(securities_to_be_sold), 'securities_to_be_sold_144', accession))
|
280
|
-
|
281
|
-
securities_info = safe_get(data, ['edgarSubmission', 'formData', 'securitiesInformation'])
|
282
|
-
if securities_info:
|
283
|
-
tables.append(Table(_flatten_dict(securities_info), 'securities_information_144', accession))
|
284
|
-
|
285
|
-
issuer_info = safe_get(data, ['edgarSubmission', 'formData', 'issuerInfo'])
|
286
|
-
if issuer_info:
|
287
|
-
tables.append(Table(_flatten_dict(issuer_info), 'issuer_information_144', accession))
|
288
|
-
|
289
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
290
|
-
metadata_table = Table(_flatten_dict(header_data), 'metadata_144', accession)
|
291
|
-
remarks = safe_get(data, ['edgarSubmission', 'formData', 'remarks'])
|
292
|
-
if remarks:
|
293
|
-
metadata_table.add_column('remarks', remarks)
|
294
|
-
|
295
|
-
tables.append(metadata_table)
|
296
|
-
|
297
|
-
return tables
|
298
|
-
|
299
|
-
def process_24f2nt(data, accession):
|
300
|
-
tables = []
|
301
|
-
|
302
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
303
|
-
if header_data:
|
304
|
-
header_data_table = Table(_flatten_dict(header_data), 'metadata_24f_2nt', accession)
|
305
|
-
schema_version = safe_get(data, ['edgarSubmission', 'schemaVersion'])
|
306
|
-
if schema_version:
|
307
|
-
header_data_table.add_column('schemaVersion', schema_version)
|
308
|
-
tables.append(header_data_table)
|
309
|
-
|
310
|
-
item1 = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', 'item1'])
|
311
|
-
if item1:
|
312
|
-
tables.append(Table(_flatten_dict(item1), 'item_1_24f2nt', accession))
|
313
|
-
|
314
|
-
for i in range(2, 10):
|
315
|
-
item = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', f'item{i}'])
|
316
|
-
if item:
|
317
|
-
tables.append(Table(_flatten_dict(item), f'item_{i}_24f2nt', accession))
|
318
|
-
|
319
|
-
signature = safe_get(data, ['edgarSubmission', 'formData', 'annualFilings', 'annualFilingInfo', 'signature'])
|
320
|
-
if signature:
|
321
|
-
tables.append(Table(_flatten_dict(signature), 'signature_24f2nt', accession))
|
322
|
-
|
323
|
-
return tables
|
324
|
-
|
325
|
-
def process_25nse(data, accession):
|
326
|
-
tables = []
|
327
|
-
notification = safe_get(data, ['notificationOfRemoval'])
|
328
|
-
if notification:
|
329
|
-
tables.append(Table(_flatten_dict(notification), '25nse', accession))
|
330
|
-
return tables
|
331
|
-
|
332
|
-
def process_ats(data, accession):
|
333
|
-
tables = []
|
334
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
335
|
-
if header_data:
|
336
|
-
tables.append(Table(_flatten_dict(header_data), 'metadata_ats', accession))
|
337
|
-
|
338
|
-
cover = safe_get(data, ['edgarSubmission', 'formData', 'cover'])
|
339
|
-
if cover:
|
340
|
-
tables.append(Table(_flatten_dict(cover), 'cover_ats', accession))
|
341
|
-
|
342
|
-
part_one = safe_get(data, ['edgarSubmission', 'formData', 'partOne'])
|
343
|
-
if part_one:
|
344
|
-
tables.append(Table(_flatten_dict(part_one), 'part_one_ats', accession))
|
345
|
-
|
346
|
-
part_two = safe_get(data, ['edgarSubmission', 'formData', 'partTwo'])
|
347
|
-
if part_two:
|
348
|
-
tables.append(Table(_flatten_dict(part_two), 'part_two_ats', accession))
|
349
|
-
|
350
|
-
part_three = safe_get(data, ['edgarSubmission', 'formData', 'partThree'])
|
351
|
-
if part_three:
|
352
|
-
tables.append(Table(_flatten_dict(part_three), 'part_three_ats', accession))
|
353
|
-
|
354
|
-
part_four = safe_get(data, ['edgarSubmission', 'formData', 'partFour'])
|
355
|
-
if part_four:
|
356
|
-
tables.append(Table(_flatten_dict(part_four), 'part_four_ats', accession))
|
357
|
-
|
358
|
-
return tables
|
359
|
-
|
360
|
-
# def process_c(data, accession):
|
361
|
-
# tables = []
|
362
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
363
|
-
# if header_data:
|
364
|
-
# tables.append(Table(_flatten_dict(header_data), 'metadata_c', accession))
|
365
|
-
|
366
|
-
# issuer_info = safe_get(data, ['edgarSubmission', 'formData', 'issuerInformation'])
|
367
|
-
# if issuer_info:
|
368
|
-
# tables.append(Table(_flatten_dict(issuer_info), 'issuer_information_c', accession))
|
369
|
-
|
370
|
-
# offering_info = safe_get(data, ['edgarSubmission', 'formData', 'offeringInformation'])
|
371
|
-
# if offering_info:
|
372
|
-
# tables.append(Table(_flatten_dict(offering_info), 'offering_information_c', accession))
|
373
|
-
|
374
|
-
# annual_report = safe_get(data, ['edgarSubmission', 'formData', 'annualReportDisclosureRequirements'])
|
375
|
-
# if annual_report:
|
376
|
-
# tables.append(Table(_flatten_dict(annual_report), 'annual_report_disclosure_requirements_c', accession))
|
377
|
-
|
378
|
-
# signature_info = safe_get(data, ['edgarSubmission', 'formData', 'signatureInfo'])
|
379
|
-
# if signature_info:
|
380
|
-
# tables.append(Table(_flatten_dict(signature_info), 'signature_info_c', accession))
|
381
|
-
|
382
|
-
# return tables
|
383
|
-
|
384
|
-
def process_cfportal(data, accession):
|
385
|
-
tables = []
|
386
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
387
|
-
if header_data:
|
388
|
-
tables.append(Table(_flatten_dict(header_data), 'metadata_cfportal', accession))
|
389
|
-
|
390
|
-
base_path = ['edgarSubmission', 'formData']
|
391
|
-
sections = [
|
392
|
-
('identifyingInformation', 'identifying_information_cfportal'),
|
393
|
-
('formOfOrganization', 'form_of_organization_cfportal'),
|
394
|
-
('successions', 'successions_cfportal'),
|
395
|
-
('controlRelationships', 'control_relationships_cfportal'),
|
396
|
-
('disclosureAnswers', 'disclosure_answers_cfportal'),
|
397
|
-
('nonSecuritiesRelatedBusiness', 'non_securities_related_business_cfportal'),
|
398
|
-
('escrowArrangements', 'escrow_arrangements_cfportal'),
|
399
|
-
('execution', 'execution_cfportal'),
|
400
|
-
('scheduleA', 'schedule_a_cfportal'),
|
401
|
-
('scheduleB', 'schedule_b_cfportal'),
|
402
|
-
('scheduleC', 'schedule_c_cfportal'),
|
403
|
-
('scheduleD', 'schedule_d_cfportal'),
|
404
|
-
('criminalDrpInfo', 'criminal_drip_info_cfportal'),
|
405
|
-
('regulatoryDrpInfo', 'regulatory_drip_info_cfportal'),
|
406
|
-
('civilJudicialDrpInfo', 'civil_judicial_drip_info_cfportal'),
|
407
|
-
('bankruptcySipcDrpInfo', 'bankruptcy_sipc_drip_info_cfportal'),
|
408
|
-
('bondDrpInfo', 'bond_drip_info_cfportal'),
|
409
|
-
('judgementDrpInfo', 'judgement_drip_info_cfportal')
|
410
|
-
]
|
411
|
-
|
412
|
-
for section_key, table_name in sections:
|
413
|
-
section_data = safe_get(data, base_path + [section_key])
|
414
|
-
if section_data:
|
415
|
-
tables.append(Table(_flatten_dict(section_data), table_name, accession))
|
416
|
-
|
417
|
-
return tables
|
418
|
-
|
419
|
-
def process_d(data, accession):
|
420
|
-
tables = []
|
421
|
-
groups = [('contactData', 'contact_data_d'),
|
422
|
-
('notificationAddressList', 'notification_address_list_d'),
|
423
|
-
('primaryIssuer', 'primary_issuer_d'),
|
424
|
-
('issuerList', 'issuer_list_d'),
|
425
|
-
('relatedPersonsList', 'related_persons_list_d'),
|
426
|
-
('offeringData', 'offering_data_d'),
|
427
|
-
]
|
428
|
-
for group,table_type in groups:
|
429
|
-
if group == 'relatedPersonList':
|
430
|
-
group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
|
431
|
-
data['edgarSubmission'].pop(group, None)
|
432
|
-
elif group == 'issuerList':
|
433
|
-
group_data = data['edgarSubmission'].pop('issuerList', None)
|
434
|
-
else:
|
435
|
-
group_data = data['edgarSubmission'].pop(group, None)
|
436
|
-
|
437
|
-
if group_data:
|
438
|
-
# Special handling ONLY for relatedPersonsList
|
439
|
-
if group in ['relatedPersonsList', 'issuerList','offeringData']:
|
440
|
-
# Use the new flatten_dict_to_rows ONLY for this key
|
441
|
-
flattened_rows = flatten_dict_to_rows(group_data)
|
442
|
-
if flattened_rows:
|
443
|
-
tables.append(Table(flattened_rows, table_type, accession))
|
444
|
-
else:
|
445
|
-
# Everything else remains EXACTLY the same
|
446
|
-
tables.append(Table(_flatten_dict(group_data), table_type, accession))
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
|
451
|
-
tables.append(metadata_table)
|
452
|
-
|
453
|
-
return tables
|
454
|
-
|
455
|
-
# def process_nmfp(data, accession):
|
456
|
-
# tables = []
|
457
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
458
|
-
# if header_data:
|
459
|
-
# tables.append(Table(_flatten_dict(header_data), 'metadata_nmfp', accession))
|
460
|
-
|
461
|
-
# general_info = safe_get(data, ['edgarSubmission', 'formData', 'generalInfo'])
|
462
|
-
# if general_info:
|
463
|
-
# tables.append(Table(_flatten_dict(general_info), 'general_information_nmfp', accession))
|
464
|
-
|
465
|
-
# series_level_info = safe_get(data, ['edgarSubmission', 'formData', 'seriesLevelInfo'])
|
466
|
-
# if series_level_info:
|
467
|
-
# tables.append(Table(_flatten_dict(series_level_info), 'series_level_info_nmfp', accession))
|
468
|
-
|
469
|
-
# class_level_info = safe_get(data, ['edgarSubmission', 'formData', 'classLevelInfo'])
|
470
|
-
# if class_level_info:
|
471
|
-
# tables.append(Table(_flatten_dict(class_level_info), 'class_level_info_nmfp', accession))
|
472
|
-
|
473
|
-
# portfolio_securities = safe_get(data, ['edgarSubmission', 'formData', 'scheduleOfPortfolioSecuritiesInfo'])
|
474
|
-
# if portfolio_securities:
|
475
|
-
# tables.append(Table(_flatten_dict(portfolio_securities), 'schedule_of_portfolio_securities_info_nmfp', accession))
|
476
|
-
|
477
|
-
# signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
|
478
|
-
# if signature:
|
479
|
-
# tables.append(Table(_flatten_dict(signature), 'signature_nmfp', accession))
|
480
|
-
|
481
|
-
# return tables
|
482
|
-
|
483
|
-
# def process_nportp(data, accession):
|
484
|
-
# tables = []
|
485
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
486
|
-
# if header_data:
|
487
|
-
# tables.append(Table(_flatten_dict(header_data), 'metadata_nportp', accession))
|
488
|
-
|
489
|
-
# gen_info = safe_get(data, ['edgarSubmission', 'formData', 'genInfo'])
|
490
|
-
# if gen_info:
|
491
|
-
# tables.append(Table(_flatten_dict(gen_info), 'general_information_nportp', accession))
|
492
|
-
|
493
|
-
# fund_info = safe_get(data, ['edgarSubmission', 'formData', 'fundInfo'])
|
494
|
-
# if fund_info:
|
495
|
-
# tables.append(Table(_flatten_dict(fund_info), 'fund_information_nportp', accession))
|
496
|
-
|
497
|
-
# invst_or_secs = safe_get(data, ['edgarSubmission', 'formData', 'invstOrSecs'])
|
498
|
-
# if invst_or_secs:
|
499
|
-
# tables.append(Table(_flatten_dict(invst_or_secs), 'investment_or_securities_nportp', accession))
|
500
|
-
|
501
|
-
# signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
|
502
|
-
# if signature:
|
503
|
-
# tables.append(Table(_flatten_dict(signature), 'signature_nportp', accession))
|
504
|
-
|
505
|
-
# return tables
|
506
|
-
|
507
|
-
def process_npx(data, accession):
|
508
|
-
tables = []
|
509
|
-
edgar_submission = safe_get(data, ['edgarSubmission'])
|
510
|
-
if edgar_submission:
|
511
|
-
tables.append(Table(_flatten_dict(edgar_submission), 'npx', accession))
|
512
|
-
return tables
|
513
|
-
|
514
|
-
def process_proxy_voting_record(data, accession):
|
515
|
-
tables = []
|
516
|
-
proxy_table = safe_get(data, ['proxyVoteTable', 'proxyTable'])
|
517
|
-
if proxy_table:
|
518
|
-
tables.append(Table(_flatten_dict(proxy_table), 'proxy_voting_record', accession))
|
519
|
-
return tables
|
520
|
-
|
521
|
-
# SOMETHING IS VERY OFF HERE
|
522
|
-
# def process_ta(data, accession):
|
523
|
-
# tables = []
|
524
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
525
|
-
# if header_data:
|
526
|
-
# metadata_ta = Table(_flatten_dict(header_data), 'metadata_ta', accession)
|
527
|
-
# schema_version = safe_get(data, ['edgarSubmission', 'schemaVersion'])
|
528
|
-
# if schema_version:
|
529
|
-
# metadata_ta.add_column('schemaVersion', schema_version)
|
530
|
-
# tables.append(metadata_ta)
|
531
|
-
|
532
|
-
# registrant = safe_get(data, ['edgarSubmission', 'registrant'])
|
533
|
-
# if registrant:
|
534
|
-
# tables.append(Table(_flatten_dict(registrant), 'registrant_ta', accession))
|
535
|
-
|
536
|
-
# independent_registrant = safe_get(data, ['edgarSubmission', 'formData', 'independentRegistrant'])
|
537
|
-
# if independent_registrant:
|
538
|
-
# tables.append(Table(_flatten_dict(independent_registrant), 'independent_registrant_ta', accession))
|
539
|
-
|
540
|
-
# disciplinary_history = safe_get(data, ['edgarSubmission', 'formData', 'disciplinaryHistory'])
|
541
|
-
# if disciplinary_history:
|
542
|
-
# tables.append(Table(_flatten_dict(disciplinary_history), 'disciplinary_history_ta', accession))
|
543
|
-
|
544
|
-
# signature = safe_get(data, ['edgarSubmission', 'formData', 'signature'])
|
545
|
-
# if signature:
|
546
|
-
# tables.append(Table(_flatten_dict(signature), 'signature_ta', accession))
|
547
|
-
|
548
|
-
# return tables
|
549
|
-
|
550
|
-
def process_x17a5(data, accession):
|
551
|
-
tables = []
|
552
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
553
|
-
if header_data:
|
554
|
-
tables.append(Table(_flatten_dict(header_data), 'metadata_x17a5', accession))
|
555
|
-
|
556
|
-
submission_info = safe_get(data, ['edgarSubmission', 'formData', 'submissionInformation'])
|
557
|
-
if submission_info:
|
558
|
-
tables.append(Table(_flatten_dict(submission_info), 'submission_information_x17a5', accession))
|
559
|
-
|
560
|
-
registrant_id = safe_get(data, ['edgarSubmission', 'formData', 'registrantIdentification'])
|
561
|
-
if registrant_id:
|
562
|
-
tables.append(Table(_flatten_dict(registrant_id), 'registrant_identification_x17a5', accession))
|
563
|
-
|
564
|
-
accountant_id = safe_get(data, ['edgarSubmission', 'formData', 'accountantIdentification'])
|
565
|
-
if accountant_id:
|
566
|
-
tables.append(Table(_flatten_dict(accountant_id), 'accountant_identification_x17a5', accession))
|
567
|
-
|
568
|
-
oath_signature = safe_get(data, ['edgarSubmission', 'formData', 'oathSignature'])
|
569
|
-
if oath_signature:
|
570
|
-
tables.append(Table(_flatten_dict(oath_signature), 'oath_signature_x17a5', accession))
|
571
|
-
|
572
|
-
return tables
|
573
|
-
|
574
|
-
def process_schedule_13(data, accession):
|
575
|
-
tables = []
|
576
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
577
|
-
if header_data:
|
578
|
-
tables.append(Table(_flatten_dict(header_data), 'metadata_schedule_13', accession))
|
579
|
-
|
580
|
-
cover_page_header = safe_get(data, ['edgarSubmission', 'formData', 'coverPageHeader'])
|
581
|
-
if cover_page_header:
|
582
|
-
tables.append(Table(_flatten_dict(cover_page_header), 'cover_schedule_13', accession))
|
583
|
-
|
584
|
-
cover_page_details = safe_get(data, ['edgarSubmission', 'formData', 'coverPageHeaderReportingPersonDetails'])
|
585
|
-
if cover_page_details:
|
586
|
-
tables.append(Table(_flatten_dict(cover_page_details), 'reporting_person_details_schedule_13', accession))
|
587
|
-
|
588
|
-
items = safe_get(data, ['edgarSubmission', 'formData', 'items'])
|
589
|
-
if items and isinstance(items, dict):
|
590
|
-
for k, v in items.items():
|
591
|
-
if v:
|
592
|
-
tables.append(Table(_flatten_dict(v), f'{k}_schedule_13', accession))
|
593
|
-
|
594
|
-
signature_info = safe_get(data, ['edgarSubmission', 'formData', 'signatureInformation'])
|
595
|
-
if signature_info:
|
596
|
-
tables.append(Table(_flatten_dict(signature_info), 'signature_information_schedule_13', accession))
|
597
|
-
|
598
|
-
return tables
|
599
|
-
|
600
|
-
def process_reg_a(data, accession):
|
601
|
-
tables = []
|
602
|
-
header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
603
|
-
if header_data:
|
604
|
-
tables.append(Table(_flatten_dict(header_data), 'metadata_reg_a', accession))
|
605
|
-
|
606
|
-
base_path = ['edgarSubmission', 'formData']
|
607
|
-
sections = [
|
608
|
-
('employeesInfo', 'employees_info_reg_a'),
|
609
|
-
('issuerInfo', 'issuer_info_reg_a'),
|
610
|
-
('commonEquity', 'common_equity_reg_a'),
|
611
|
-
('preferredEquity', 'preferred_equity_reg_a'),
|
612
|
-
('debtSecurities', 'debt_securities_reg_a'),
|
613
|
-
('issuerEligibility', 'issuer_eligibility_reg_a'),
|
614
|
-
('applicationRule262', 'application_rule_262_reg_a'),
|
615
|
-
('summaryInfo', 'summary_info_reg_a'),
|
616
|
-
('juridictionSecuritiesOffered', 'juridiction_securities_offered_reg_a'),
|
617
|
-
('unregisteredSecurities', 'unregistered_securities_reg_a'),
|
618
|
-
('securitiesIssued', 'securities_issued_reg_a'),
|
619
|
-
('unregisteredSecuritiesAct', 'unregistered_securities_act_reg_a')
|
620
|
-
]
|
621
|
-
|
622
|
-
for section_key, table_name in sections:
|
623
|
-
section_data = safe_get(data, base_path + [section_key])
|
624
|
-
if section_data:
|
625
|
-
tables.append(Table(_flatten_dict(section_data), table_name, accession))
|
626
|
-
|
627
|
-
return tables
|
628
|
-
|
629
|
-
# looks good but some extra nesed tables we missed
|
630
|
-
# def process_sbs(data, accession):
|
631
|
-
# tables = []
|
632
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
633
|
-
# if header_data:
|
634
|
-
# tables.append(Table(_flatten_dict(header_data), 'metadata_sbse', accession))
|
635
|
-
|
636
|
-
# applicant = safe_get(data, ['edgarSubmission', 'formData', 'applicant'])
|
637
|
-
# if applicant and isinstance(applicant, dict):
|
638
|
-
# for k, v in applicant.items():
|
639
|
-
# if v:
|
640
|
-
# tables.append(Table(_flatten_dict(v), f'applicant_{k}_sbs', accession))
|
641
|
-
|
642
|
-
# base_path = ['edgarSubmission', 'formData']
|
643
|
-
# sections = [
|
644
|
-
# ('scheduleA', 'schedule_a_sbs'),
|
645
|
-
# ('scheduleB', 'schedule_b_sbs'),
|
646
|
-
# ('scheduleC', 'schedule_c_sbs'),
|
647
|
-
# ('scheduleD', 'schedule_d_sbs'),
|
648
|
-
# ('scheduleE', 'schedule_e_sbs'),
|
649
|
-
# ('scheduleF', 'schedule_f_sbs'),
|
650
|
-
# ('criminalDrpInfo', 'criminal_drip_info_sbs'),
|
651
|
-
# ('regulatoryDrpInfo', 'regulatory_drip_info_sbs'),
|
652
|
-
# ('civilJudicialDrpInfo', 'civil_judicial_drip_info_sbs'),
|
653
|
-
# ('bankruptcySipcDrpInfo', 'bankruptcy_sipc_drip_info_sbs'),
|
654
|
-
# ('execution', 'execution_sbs')
|
655
|
-
# ]
|
656
|
-
|
657
|
-
# for section_key, table_name in sections:
|
658
|
-
# section_data = safe_get(data, base_path + [section_key])
|
659
|
-
# if section_data:
|
660
|
-
# tables.append(Table(_flatten_dict(section_data), table_name, accession))
|
661
|
-
|
662
|
-
# return tables
|
663
|
-
|
664
|
-
def process_ex102_abs(data, accession):
|
665
|
-
tables = []
|
666
|
-
data = safe_get(data, ['assetData', 'assets'])
|
667
|
-
|
668
|
-
# Create assets list: all items without their 'property' field
|
669
|
-
assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
|
670
|
-
|
671
|
-
# Create properties list in a more vectorized way
|
672
|
-
properties = []
|
673
|
-
|
674
|
-
# Handle dictionary properties
|
675
|
-
properties.extend([
|
676
|
-
item['property'] | {'assetNumber': item['assetNumber']}
|
677
|
-
for item in data
|
678
|
-
if 'property' in item and isinstance(item['property'], dict)
|
679
|
-
])
|
680
|
-
|
681
|
-
# Handle list properties - flatten in one operation
|
682
|
-
properties.extend([
|
683
|
-
prop | {'assetNumber': item['assetNumber']}
|
684
|
-
for item in data
|
685
|
-
if 'property' in item and isinstance(item['property'], list)
|
686
|
-
for prop in item['property']
|
687
|
-
if isinstance(prop, dict)
|
688
|
-
])
|
689
|
-
|
690
|
-
if assets:
|
691
|
-
tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
|
692
|
-
|
693
|
-
if properties:
|
694
|
-
tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
|
695
|
-
|
696
|
-
return tables
|
697
|
-
|
698
|
-
# def process_ma(data, accession):
|
699
|
-
# tables = []
|
700
|
-
# header_data = safe_get(data, ['edgarSubmission', 'headerData'])
|
701
|
-
# if header_data:
|
702
|
-
# header_ma = Table(_flatten_dict(header_data), 'metadata_ma', accession)
|
703
|
-
# tables.append(header_ma)
|
704
|
-
# # WE NEED TO COMBINE TABLES
|
705
|
-
# raise NotImplementedError("Need to implement the rest of the MA processing")
|
706
|
-
|
707
|
-
# def process_ncen(data, accession):
|
708
|
-
# raise NotImplementedError("Need to implement the N-CEN processing")
|
709
|
-
|
710
|
-
# WIP
|
711
|
-
# Note: going to pause this for now, as I don't have a great way of putting this in a csv.
|
712
|
-
def process_submission_metadata(data,accession):
|
713
|
-
tables = []
|
714
|
-
document_data = safe_get(data, ['documents'])
|
715
|
-
if document_data:
|
716
|
-
tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
|
717
|
-
|
718
|
-
reporting_owner_data = safe_get(data,['reporting-owner'])
|
719
|
-
if reporting_owner_data:
|
720
|
-
tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
|
721
|
-
|
722
|
-
issuer_data = safe_get(data,['issuer'])
|
723
|
-
if issuer_data:
|
724
|
-
tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
|
725
|
-
|
726
|
-
# # construct metadata
|
727
|
-
# accession-number date-of-filing-date-change, depositor-cik effectiveness-date
|
728
|
-
|
729
|
-
# # other tables
|
730
|
-
# depositor, securitizer
|
731
|
-
|
732
|
-
return tables
|