datamule 1.2.6__py3-none-any.whl → 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,7 +141,7 @@ class Document:
141
141
  with open(output_filename, 'w',encoding='utf-8') as f:
142
142
  json.dump(self.data, f, indent=2)
143
143
 
144
- def to_tabular(self):
144
+ def tables(self):
145
145
  if self.type == 'submission_metadata':
146
146
  return process_tabular_data(self)
147
147
  elif self.extension != '.xml':
@@ -155,7 +155,7 @@ class Document:
155
155
  output_folder = Path(output_folder)
156
156
  output_folder.mkdir(exist_ok=True)
157
157
 
158
- tables = self.to_tabular()
158
+ tables = self.tables()
159
159
 
160
160
  if not tables:
161
161
  return
@@ -0,0 +1,127 @@
1
+ issuer_list_d_dict = {
2
+ 'issuer_issuerAddress_street1': 'issuerStreet1',
3
+ 'issuer_cik': 'issuerCik',
4
+ 'issuer_issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
5
+ 'issuer_issuerAddress_zipCode': 'issuerZipCode',
6
+ 'issuer_issuerPhoneNumber': 'issuerPhoneNumber',
7
+ 'issuer_yearOfInc_value': 'yearOfIncValue',
8
+ 'issuer_issuerAddress_stateOrCountry': 'issuerStateOrCountry',
9
+ 'issuer_jurisdictionOfInc': 'jurisdictionOfInc',
10
+ 'issuer_entityType': 'entityType',
11
+ 'issuer_issuerAddress_street2': 'issuerStreet2',
12
+ 'issuer_entityName': 'entityName',
13
+ 'accession': 'accessionNumber',
14
+ 'issuer_edgarPreviousNameList_value': 'edgarPreviousNameListValue',
15
+ 'issuer_entityTypeOtherDesc': 'entityTypeOtherDesc',
16
+ 'issuer_yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
17
+ 'issuer_yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
18
+ 'issuer_issuerPreviousNameList_value': 'issuerPreviousNameListValue',
19
+ 'issuer_issuerAddress_city': 'issuerCity'
20
+ }
21
+
22
+ metadata_d_dict = {
23
+ "testOrLive" : "testOrLive",
24
+ "schemaVersion" : "schemaVersion",
25
+ "accession" : "accession",
26
+ "submissionType" : "submissionType",
27
+ }
28
+ offering_data_d_dict = {
29
+ 'salesCompensationList_recipient': 'salesCompensationRecipient',
30
+ 'useOfProceeds_clarificationOfResponse': 'useOfProceedsClarification',
31
+ 'industryGroup_investmentFundInfo_investmentFundType': 'investmentFundType',
32
+ 'typeOfFiling_newOrAmendment_isAmendment': 'isAmendment',
33
+ 'salesCompensationList_recipient_recipientAddress_stateOrCountryDescription': 'recipientStateOrCountryDescription',
34
+ 'signatureBlock_signature_signatureName': 'signatureName',
35
+ 'federalExemptionsExclusions_item': 'federalExemptionsItem',
36
+ 'salesCommissionsFindersFees_findersFees_isEstimate': 'findersFeesIsEstimate',
37
+ 'salesCommissionsFindersFees_salesCommissions_dollarAmount': 'salesCommissionsDollarAmount',
38
+ 'salesCompensationList_recipient_recipientName': 'recipientName',
39
+ 'salesCommissionsFindersFees_salesCommissions_isEstimate': 'salesCommissionsIsEstimate',
40
+ 'offeringSalesAmounts_totalRemaining': 'totalRemaining',
41
+ 'salesCommissionsFindersFees_clarificationOfResponse': 'salesCommissionsClarification',
42
+ 'salesCompensationList_recipient_statesOfSolicitationList_value': 'recipientStatesOfSolicitationValue',
43
+ 'investors_numberNonAccreditedInvestors': 'numberNonAccreditedInvestors',
44
+ 'typesOfSecuritiesOffered_isTenantInCommonType': 'isTenantInCommonType',
45
+ 'businessCombinationTransaction_isBusinessCombinationTransaction': 'isBusinessCombinationTransaction',
46
+ 'typesOfSecuritiesOffered_isSecurityToBeAcquiredType': 'isSecurityToBeAcquiredType',
47
+ 'issuerSize_aggregateNetAssetValueRange': 'aggregateNetAssetValueRange',
48
+ 'typesOfSecuritiesOffered_isPooledInvestmentFundType': 'isPooledInvestmentFundType',
49
+ 'offeringSalesAmounts_clarificationOfResponse': 'offeringSalesAmountsClarification',
50
+ 'signatureBlock_signature_nameOfSigner': 'nameOfSigner',
51
+ 'industryGroup_investmentFundInfo_is40Act': 'is40Act',
52
+ 'salesCompensationList_recipient_recipientAddress_city': 'recipientCity',
53
+ 'typeOfFiling_dateOfFirstSale_yetToOccur': 'dateOfFirstSaleYetToOccur',
54
+ 'signatureBlock_signature': 'signature',
55
+ 'salesCompensationList_recipient_foreignSolicitation': 'recipientForeignSolicitation',
56
+ 'businessCombinationTransaction_clarificationOfResponse': 'businessCombinationClarification',
57
+ 'salesCompensationList_recipient_associatedBDName': 'recipientAssociatedBDName',
58
+ 'salesCompensationList_recipient_statesOfSolicitationList_state': 'recipientStatesOfSolicitationState',
59
+ 'typeOfFiling_dateOfFirstSale_value': 'dateOfFirstSaleValue',
60
+ 'signatureBlock_signature_signatureTitle': 'signatureTitle',
61
+ 'signatureBlock_signature_issuerName': 'signatureIssuerName',
62
+ 'durationOfOffering_moreThanOneYear': 'durationOfferingMoreThanOneYear',
63
+ 'offeringSalesAmounts_totalAmountSold': 'totalAmountSold',
64
+ 'signatureBlock_signature_signatureDate': 'signatureDate',
65
+ 'issuerSize_revenueRange': 'revenueRange',
66
+ 'typesOfSecuritiesOffered_isOptionToAcquireType': 'isOptionToAcquireType',
67
+ 'signatureBlock_authorizedRepresentative': 'authorizedRepresentative',
68
+ 'salesCompensationList_recipient_recipientAddress_street2': 'recipientStreet2',
69
+ 'useOfProceeds_grossProceedsUsed_isEstimate': 'grossProceedsUsedIsEstimate',
70
+ 'salesCommissionsFindersFees_findersFees_dollarAmount': 'findersFeesDollarAmount',
71
+ 'typesOfSecuritiesOffered_isEquityType': 'isEquityType',
72
+ 'typesOfSecuritiesOffered_descriptionOfOtherType': 'descriptionOfOtherType',
73
+ 'salesCompensationList_recipient_recipientAddress_street1': 'recipientStreet1',
74
+ 'minimumInvestmentAccepted': 'minimumInvestmentAccepted',
75
+ 'typesOfSecuritiesOffered_isOtherType': 'isOtherType',
76
+ 'salesCompensationList_recipient_recipientCRDNumber': 'recipientCRDNumber',
77
+ 'typesOfSecuritiesOffered_isDebtType': 'isDebtType',
78
+ 'accession': 'accessionNumber',
79
+ 'investors_totalNumberAlreadyInvested': 'totalNumberAlreadyInvested',
80
+ 'typesOfSecuritiesOffered_isMineralPropertyType': 'isMineralPropertyType',
81
+ 'industryGroup_industryGroupType': 'industryGroupType',
82
+ 'salesCompensationList_recipient_statesOfSolicitationList_description': 'recipientStatesOfSolicitationDescription',
83
+ 'salesCompensationList_recipient_associatedBDCRDNumber': 'recipientAssociatedBDCRDNumber',
84
+ 'useOfProceeds_grossProceedsUsed_dollarAmount': 'grossProceedsUsedDollarAmount',
85
+ 'investors_hasNonAccreditedInvestors': 'hasNonAccreditedInvestors',
86
+ 'salesCompensationList_recipient_recipientAddress_zipCode': 'recipientZipCode',
87
+ 'offeringSalesAmounts_totalOfferingAmount': 'totalOfferingAmount',
88
+ 'salesCompensationList_recipient_recipientAddress_stateOrCountry': 'recipientStateOrCountry'
89
+ }
90
+ primary_issuer_d_dict = {
91
+ 'yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
92
+ 'entityTypeOtherDesc': 'entityTypeOtherDesc',
93
+ 'jurisdictionOfInc': 'jurisdictionOfInc',
94
+ 'issuerAddress_street1': 'issuerStreet1',
95
+ 'issuerAddress_zipCode': 'issuerZipCode',
96
+ 'issuerPreviousNameList_previousName': 'issuerPreviousName',
97
+ 'entityType': 'entityType',
98
+ 'issuerPreviousNameList_value': 'issuerPreviousNameListValue',
99
+ 'issuerPhoneNumber': 'issuerPhoneNumber',
100
+ 'yearOfInc_value': 'yearOfIncValue',
101
+ 'yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
102
+ 'edgarPreviousNameList_previousName': 'edgarPreviousName',
103
+ 'edgarPreviousNameList_value': 'edgarPreviousNameListValue',
104
+ 'issuerAddress_stateOrCountry': 'issuerStateOrCountry',
105
+ 'entityName': 'entityName',
106
+ 'accession': 'accessionNumber',
107
+ 'issuerAddress_street2': 'issuerStreet2',
108
+ 'issuerAddress_city': 'issuerCity',
109
+ 'issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
110
+ 'cik': 'cik',
111
+ 'yearOfInc_overFiveYears': 'yearOfIncOverFiveYears'
112
+ }
113
+
114
+ related_persons_d_dict = {
115
+ 'relatedPersonInfo_relatedPersonAddress_stateOrCountry': 'relatedPersonStateOrCountry',
116
+ 'relatedPersonInfo_relatedPersonRelationshipList_relationship': 'relatedPersonRelationship',
117
+ 'relatedPersonInfo_relationshipClarification': 'relationshipClarification',
118
+ 'relatedPersonInfo_relatedPersonName_lastName': 'relatedPersonLastName',
119
+ 'accession': 'accessionNumber',
120
+ 'relatedPersonInfo_relatedPersonName_middleName': 'relatedPersonMiddleName',
121
+ 'relatedPersonInfo_relatedPersonAddress_zipCode': 'relatedPersonZipCode',
122
+ 'relatedPersonInfo_relatedPersonAddress_city': 'relatedPersonCity',
123
+ 'relatedPersonInfo_relatedPersonAddress_street1': 'relatedPersonStreet1',
124
+ 'relatedPersonInfo_relatedPersonAddress_stateOrCountryDescription': 'relatedPersonStateOrCountryDescription',
125
+ 'relatedPersonInfo_relatedPersonName_firstName': 'relatedPersonFirstName',
126
+ 'relatedPersonInfo_relatedPersonAddress_street2': 'relatedPersonStreet2'
127
+ }
@@ -0,0 +1,63 @@
1
+ # Assets dictionary mapping
2
+ assets_dict_ex102_abs = {
3
+ 'assetNumber': 'assetNumber',
4
+ 'DefeasedStatusCode': 'DefeasedStatusCode',
5
+ 'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
6
+ 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
7
+ 'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
8
+ 'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
9
+ 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
10
+ 'valuationSecuritizationDate': 'valuationSecuritizationDate',
11
+ 'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
12
+ 'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
13
+ 'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
14
+ 'financialsSecuritizationDate': 'financialsSecuritizationDate',
15
+ 'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
16
+ 'operatingExpensesAmount': 'operatingExpensesAmount',
17
+ 'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
18
+ 'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
19
+ 'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
20
+ 'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
21
+ 'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
22
+ 'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
23
+ 'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
24
+ 'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
25
+ 'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
26
+ 'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
27
+ 'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
28
+ 'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
29
+ 'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
30
+ 'accession': 'accession',
31
+ 'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
32
+ 'mostRecentValuationDate': 'mostRecentValuationDate',
33
+ 'mostRecentValuationAmount': 'mostRecentValuationAmount',
34
+ 'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
35
+ 'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
36
+ }
37
+
38
+ # Properties dictionary mapping
39
+ properties_dict_ex102_abs = {
40
+ 'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
41
+ 'propertyCounty': 'propertyCounty',
42
+ 'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
43
+ 'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
44
+ 'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
45
+ 'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
46
+ 'propertyZip': 'propertyZip',
47
+ 'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
48
+ 'propertyStatusCode': 'propertyStatusCode',
49
+ 'propertyState': 'propertyState',
50
+ 'yearBuiltNumber': 'yearBuiltNumber',
51
+ 'propertyCity': 'propertyCity',
52
+ 'propertyName': 'propertyName',
53
+ 'propertyAddress': 'propertyAddress',
54
+ 'yearLastRenovated': 'yearLastRenovated',
55
+ 'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
56
+ 'thirdLargestTenant': 'thirdLargestTenant',
57
+ 'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
58
+ 'propertyTypeCode': 'propertyTypeCode',
59
+ 'largestTenant': 'largestTenant',
60
+ 'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
61
+ 'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
62
+ 'secondLargestTenant': 'secondLargestTenant'
63
+ }
@@ -20,9 +20,14 @@ def process_tabular_data(self):
20
20
  elif self.type in ["25-NSE", "25-NSE/A"]:
21
21
  tables = process_25nse(self.data, self.accession)
22
22
  # complete mark:
23
+ elif self.type in ["EX-102"]:
24
+ tables = process_ex102_abs(self.data, self.accession)
25
+ elif self.type in ["D","D/A"]:
26
+ tables = process_d(self.data, self.accession)
23
27
  elif self.type in ["N-PX","N-PX/A"]:
24
28
  tables = process_npx(self.data, self.accession)
25
29
 
30
+
26
31
  elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
27
32
  tables = process_sbsef(self.data, self.accession)
28
33
  elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
@@ -47,8 +52,7 @@ def process_tabular_data(self):
47
52
  # tables = process_c(self.data, self.accession)
48
53
  elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
49
54
  tables = process_cfportal(self.data, self.accession)
50
- # elif self.type in ["D","D/A"]:
51
- # tables = process_d(self.data, self.accession)
55
+
52
56
  # elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
53
57
  # tables = process_ma(self.data, self.accession)
54
58
  # elif self.type in ["N-CEN","N-CEN/A"]:
@@ -70,8 +74,7 @@ def process_tabular_data(self):
70
74
  tables = process_reg_a(self.data, self.accession)
71
75
  # elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
72
76
  # tables = process_sbs(self.data, self.accession)
73
- # elif self.type in ["EX-102"]:
74
- # tables = process_ex102_abs(self.data, self.accession)
77
+
75
78
  elif self.type == "PROXY VOTING RECORD":
76
79
  tables = process_proxy_voting_record(self.data, self.accession)
77
80
  elif self.type == 'submission_metadata':
@@ -101,6 +104,67 @@ def _flatten_dict(d, parent_key=''):
101
104
 
102
105
  return items
103
106
 
107
+ # flattens in a different way
108
+ def flatten_dict_to_rows(d, parent_key='', sep='_'):
109
+
110
+ if isinstance(d, list):
111
+ # If input is a list, flatten each item and return all rows
112
+ all_rows = []
113
+ for item in d:
114
+ all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
115
+ return all_rows
116
+
117
+ if not isinstance(d, dict):
118
+ # If input is a primitive value, return single row
119
+ return [{parent_key: d}] if parent_key else []
120
+
121
+ # Input is a dictionary
122
+ rows = [{}]
123
+
124
+ for k, v in d.items():
125
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
126
+
127
+ if isinstance(v, dict):
128
+ # Recursively flatten nested dictionaries
129
+ nested_rows = flatten_dict_to_rows(v, new_key, sep)
130
+ # Cross-product with existing rows
131
+ new_rows = []
132
+ for row in rows:
133
+ for nested_row in nested_rows:
134
+ combined_row = row.copy()
135
+ combined_row.update(nested_row)
136
+ new_rows.append(combined_row)
137
+ rows = new_rows
138
+
139
+ elif isinstance(v, list):
140
+ # Handle lists - create multiple rows
141
+ if not v: # Empty list
142
+ for row in rows:
143
+ row[new_key] = ''
144
+ else:
145
+ new_rows = []
146
+ for row in rows:
147
+ for list_item in v:
148
+ new_row = row.copy()
149
+ if isinstance(list_item, dict):
150
+ # Recursively flatten dict items in list
151
+ nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
152
+ for nested_row in nested_rows:
153
+ combined_row = new_row.copy()
154
+ combined_row.update(nested_row)
155
+ new_rows.append(combined_row)
156
+ else:
157
+ # Primitive value in list
158
+ new_row[new_key] = list_item
159
+ new_rows.append(new_row)
160
+ rows = new_rows
161
+ else:
162
+ # Handle primitive values
163
+ for row in rows:
164
+ row[new_key] = v
165
+
166
+ return rows
167
+
104
168
  def process_ownership(data, accession):
105
169
  tables = []
106
170
  if 'ownershipDocument' not in data:
@@ -352,33 +416,33 @@ def process_cfportal(data, accession):
352
416
 
353
417
  return tables
354
418
 
355
- # def process_d(data, accession):
356
- # tables = []
357
- # primary_issuer = safe_get(data, ['edgarSubmission', 'primaryIssuer'])
358
- # if primary_issuer:
359
- # metadata = Table(_flatten_dict(primary_issuer), 'metadata_d', accession)
360
-
361
- # metadata_columns = ['schemaVersion', 'submissionType', 'testOrLive', 'returnCopy', 'contactData', 'notificationAddressList']
362
- # for col in metadata_columns:
363
- # col_data = safe_get(data, ['edgarSubmission', col])
364
- # if col_data:
365
- # metadata.add_column(col, col_data)
366
-
367
- # tables.append(metadata)
368
-
369
- # issuer_list = safe_get(data, ['edgarSubmission', 'issuerList'])
370
- # if issuer_list:
371
- # tables.append(Table(_flatten_dict(issuer_list), 'primary_issuer_d', accession))
372
-
373
- # offering_data = safe_get(data, ['edgarSubmission', 'offeringData'])
374
- # if offering_data:
375
- # tables.append(Table(_flatten_dict(offering_data), 'offering_data_d', accession))
376
-
377
- # related_persons_list = safe_get(data, ['edgarSubmission', 'relatedPersonsList'])
378
- # if related_persons_list:
379
- # tables.append(Table(_flatten_dict(related_persons_list), 'related_persons_list_d', accession))
419
+ def process_d(data, accession):
420
+ tables = []
421
+ groups = ['contactData', 'notificationAddressList', 'primaryIssuer', 'issuerList', 'relatedPersonsList', 'offeringData']
422
+ for group in groups:
423
+ if group == 'relatedPersonList':
424
+ group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
425
+ data['edgarSubmission'].pop(group, None)
426
+ elif group == 'issuerList':
427
+ group_data = data['edgarSubmission'].pop('issuerList', None)
428
+ else:
429
+ group_data = data['edgarSubmission'].pop(group, None)
430
+
431
+ if group_data:
432
+ # Special handling ONLY for relatedPersonsList
433
+ if group in ['relatedPersonsList', 'issuerList']:
434
+ # Use the new flatten_dict_to_rows ONLY for this key
435
+ flattened_rows = flatten_dict_to_rows(group_data)
436
+ if flattened_rows:
437
+ tables.append(Table(flattened_rows, f'{group}_d', accession))
438
+ else:
439
+ # Everything else remains EXACTLY the same
440
+ tables.append(Table(_flatten_dict(group_data), f'{group}_d', accession))
441
+
442
+ metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
443
+ tables.append(metadata_table)
380
444
 
381
- # return tables
445
+ return tables
382
446
 
383
447
  # def process_nmfp(data, accession):
384
448
  # tables = []
@@ -589,13 +653,39 @@ def process_reg_a(data, accession):
589
653
 
590
654
  # return tables
591
655
 
592
- # def process_ex102_abs(data, accession):
593
- # tables = []
594
- # asset_data = safe_get(data, ['assetData'])
595
- # if asset_data:
596
- # tables.append(Table(_flatten_dict(asset_data), 'abs', accession))
597
- # raise NotImplementedError("Need to implement the rest of the ABS processing")
598
- # return tables
656
+ def process_ex102_abs(data, accession):
657
+ tables = []
658
+ data = safe_get(data, ['assetData', 'assets'])
659
+
660
+ # Create assets list: all items without their 'property' field
661
+ assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
662
+
663
+ # Create properties list in a more vectorized way
664
+ properties = []
665
+
666
+ # Handle dictionary properties
667
+ properties.extend([
668
+ item['property'] | {'assetNumber': item['assetNumber']}
669
+ for item in data
670
+ if 'property' in item and isinstance(item['property'], dict)
671
+ ])
672
+
673
+ # Handle list properties - flatten in one operation
674
+ properties.extend([
675
+ prop | {'assetNumber': item['assetNumber']}
676
+ for item in data
677
+ if 'property' in item and isinstance(item['property'], list)
678
+ for prop in item['property']
679
+ if isinstance(prop, dict)
680
+ ])
681
+
682
+ if assets:
683
+ tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
684
+
685
+ if properties:
686
+ tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
687
+
688
+ return tables
599
689
 
600
690
  # def process_ma(data, accession):
601
691
  # tables = []
@@ -19,6 +19,11 @@ from .mappings.twentyfivense import *
19
19
  from .mappings.twentyfourf2nt import *
20
20
  from .mappings.information_table import *
21
21
  from .mappings.submission_metadata import *
22
+ from .mappings.ex102_abs import *
23
+ from .mappings.d import *
24
+
25
+ from pathlib import Path
26
+ import csv
22
27
  # need to check if mappings correctly create new columns
23
28
  class Table():
24
29
  def __init__(self, data, type,accession):
@@ -27,11 +32,18 @@ class Table():
27
32
  self.type = type
28
33
  self.data = data
29
34
  self.accession = accession
30
- self.columns = self.determine_columns()
35
+ self.columns = self.determine_columns_complete()
36
+
37
+ def determine_columns_complete(self):
38
+ if not self.data:
39
+ return []
40
+ return list(set().union(*(row.keys() for row in self.data)))
41
+
31
42
 
32
43
  def determine_columns(self):
33
44
  if len(self.data) == 0:
34
45
  return []
46
+
35
47
  return self.data[0].keys()
36
48
 
37
49
  def add_column(self,column_name,value):
@@ -190,6 +202,17 @@ class Table():
190
202
  elif self.type == 'signature_schedule_13':
191
203
  mapping_dict = signature_schedule_13_dict
192
204
 
205
+ # D
206
+ elif self.type == 'issuerList_d':
207
+ mapping_dict = issuer_list_d_dict
208
+ elif self.type == 'metadata_d':
209
+ mapping_dict = metadata_d_dict
210
+ elif self.type == 'offeringData_d':
211
+ mapping_dict = offering_data_d_dict
212
+ elif self.type == 'primaryIssuer_d':
213
+ mapping_dict = primary_issuer_d_dict
214
+ elif self.type == 'relatedPersonsList_d':
215
+ mapping_dict = related_persons_d_dict
193
216
  # SDR
194
217
  elif self.type == 'sdr':
195
218
  mapping_dict = sdr_dict
@@ -227,7 +250,11 @@ class Table():
227
250
  mapping_dict = item_9_24f2nt_dict
228
251
  elif self.type == 'signature_info_schedule_a':
229
252
  mapping_dict = signature_24f2nt_dict
230
-
253
+ # ABS
254
+ elif self.type == 'assets_ex102_absee':
255
+ mapping_dict = assets_dict_ex102_abs
256
+ elif self.type =='properties_ex102_absee':
257
+ mapping_dict = properties_dict_ex102_abs
231
258
  # submission metadata
232
259
  elif self.type == 'document_submission_metadata':
233
260
  mapping_dict = document_submission_metadata_dict
@@ -250,9 +277,6 @@ class Table():
250
277
  for old_key, new_key in mapping_dict.items():
251
278
  if old_key in row:
252
279
  ordered_row[new_key] = row.pop(old_key)
253
- else:
254
- # if the old key is not present, set the new key to None
255
- ordered_row[new_key] = None
256
280
 
257
281
  # Then add any remaining keys that weren't in the mapping
258
282
  for key, value in row.items():
@@ -262,4 +286,30 @@ class Table():
262
286
  row.clear()
263
287
  row.update(ordered_row)
264
288
 
265
- self.determine_columns()
289
+ # Update the columns after mapping
290
+ columns = set(self.columns)
291
+ # remove the old columns that are now in the mapping
292
+ columns.difference_update(mapping_dict.keys())
293
+ # add the new columns from the mapping
294
+ columns.update(mapping_dict.values())
295
+ # add the accession column to the columns
296
+ columns.add('accession')
297
+
298
+ self.columns = list(columns)
299
+
300
+ def write_csv(self, output_file):
301
+ output_file = Path(output_file)
302
+ fieldnames = self.columns
303
+
304
+ # Check if the file already exists
305
+ if output_file.exists():
306
+ # Append to existing file without writing header
307
+ with open(output_file, 'a', newline='') as csvfile:
308
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
309
+ writer.writerows(self.data)
310
+ else:
311
+ # Create new file with header
312
+ with open(output_file, 'w', newline='') as csvfile:
313
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
314
+ writer.writeheader()
315
+ writer.writerows(self.data)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.2.6
3
+ Version: 1.2.8
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -7,12 +7,14 @@ datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
8
  datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
9
9
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- datamule/document/document.py,sha256=CQqlHCVwp0TfOjE173jJSh9O-zpYuo4Ixr85vXCJA1E,9977
11
- datamule/document/processing.py,sha256=Wn2Dpe6vXqVDm1qgIc-nR273wKBQEtstxZbugvuPZdI,28526
12
- datamule/document/table.py,sha256=TiOQyFp8CLWt-hHHX8BCjKuaBgeb-uO23ekD8QoUsqM,10993
10
+ datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
11
+ datamule/document/processing.py,sha256=eWrLxBXmoCoKyUN1gY57ikl9m0KRdACzahJT9PnWQcA,31668
12
+ datamule/document/table.py,sha256=jC2itnolWG9DpCjw0x6Ma5yVJiHvQT3Gd_5yl53L5a4,12939
13
13
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  datamule/document/mappings/atsn.py,sha256=qkZGNIhyPC3VTTOjQ8-FSCQIhUy4XeSycUGLShxNVCo,17743
15
15
  datamule/document/mappings/cfportal.py,sha256=bR9d6DDY0kJ_HGx_hND2y1PNNkZjemYZ2KdyFAcv760,25257
16
+ datamule/document/mappings/d.py,sha256=nRpfxkHIf8wYBMmY84biU1Smci9fF9pFYCG_iUddGtU,7928
17
+ datamule/document/mappings/ex102_abs.py,sha256=FdGKvteRh_HsYgILF-8o4R6aSsjYwcaLpJxzdru4FTE,3976
16
18
  datamule/document/mappings/ex99a_sdr.py,sha256=PNdj9I0ZhNicPObLelNmjp33EgTwzvukqkBDnwxarE0,19
17
19
  datamule/document/mappings/ex99c_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  datamule/document/mappings/ex99g_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -54,7 +56,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
54
56
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
55
57
  datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
56
58
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
57
- datamule-1.2.6.dist-info/METADATA,sha256=WwrQVyxcEIzhkzy_0WQhkoXHt_nXbmzarCMa2pYl2mw,490
58
- datamule-1.2.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
59
- datamule-1.2.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
60
- datamule-1.2.6.dist-info/RECORD,,
59
+ datamule-1.2.8.dist-info/METADATA,sha256=mQvixJX2sn9NJbH3LkZ1H3IeRIwHwCgBjgWVVXjOt-Q,490
60
+ datamule-1.2.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
+ datamule-1.2.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
62
+ datamule-1.2.8.dist-info/RECORD,,