datamule 1.2.6__py3-none-any.whl → 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document/document.py +2 -2
- datamule/document/mappings/d.py +127 -0
- datamule/document/mappings/ex102_abs.py +63 -0
- datamule/document/processing.py +127 -37
- datamule/document/table.py +56 -6
- {datamule-1.2.6.dist-info → datamule-1.2.8.dist-info}/METADATA +1 -1
- {datamule-1.2.6.dist-info → datamule-1.2.8.dist-info}/RECORD +9 -7
- {datamule-1.2.6.dist-info → datamule-1.2.8.dist-info}/WHEEL +0 -0
- {datamule-1.2.6.dist-info → datamule-1.2.8.dist-info}/top_level.txt +0 -0
datamule/document/document.py
CHANGED
@@ -141,7 +141,7 @@ class Document:
|
|
141
141
|
with open(output_filename, 'w',encoding='utf-8') as f:
|
142
142
|
json.dump(self.data, f, indent=2)
|
143
143
|
|
144
|
-
def
|
144
|
+
def tables(self):
|
145
145
|
if self.type == 'submission_metadata':
|
146
146
|
return process_tabular_data(self)
|
147
147
|
elif self.extension != '.xml':
|
@@ -155,7 +155,7 @@ class Document:
|
|
155
155
|
output_folder = Path(output_folder)
|
156
156
|
output_folder.mkdir(exist_ok=True)
|
157
157
|
|
158
|
-
tables = self.
|
158
|
+
tables = self.tables()
|
159
159
|
|
160
160
|
if not tables:
|
161
161
|
return
|
@@ -0,0 +1,127 @@
|
|
1
|
+
issuer_list_d_dict = {
|
2
|
+
'issuer_issuerAddress_street1': 'issuerStreet1',
|
3
|
+
'issuer_cik': 'issuerCik',
|
4
|
+
'issuer_issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
|
5
|
+
'issuer_issuerAddress_zipCode': 'issuerZipCode',
|
6
|
+
'issuer_issuerPhoneNumber': 'issuerPhoneNumber',
|
7
|
+
'issuer_yearOfInc_value': 'yearOfIncValue',
|
8
|
+
'issuer_issuerAddress_stateOrCountry': 'issuerStateOrCountry',
|
9
|
+
'issuer_jurisdictionOfInc': 'jurisdictionOfInc',
|
10
|
+
'issuer_entityType': 'entityType',
|
11
|
+
'issuer_issuerAddress_street2': 'issuerStreet2',
|
12
|
+
'issuer_entityName': 'entityName',
|
13
|
+
'accession': 'accessionNumber',
|
14
|
+
'issuer_edgarPreviousNameList_value': 'edgarPreviousNameListValue',
|
15
|
+
'issuer_entityTypeOtherDesc': 'entityTypeOtherDesc',
|
16
|
+
'issuer_yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
|
17
|
+
'issuer_yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
|
18
|
+
'issuer_issuerPreviousNameList_value': 'issuerPreviousNameListValue',
|
19
|
+
'issuer_issuerAddress_city': 'issuerCity'
|
20
|
+
}
|
21
|
+
|
22
|
+
metadata_d_dict = {
|
23
|
+
"testOrLive" : "testOrLive",
|
24
|
+
"schemaVersion" : "schemaVersion",
|
25
|
+
"accession" : "accession",
|
26
|
+
"submissionType" : "submissionType",
|
27
|
+
}
|
28
|
+
offering_data_d_dict = {
|
29
|
+
'salesCompensationList_recipient': 'salesCompensationRecipient',
|
30
|
+
'useOfProceeds_clarificationOfResponse': 'useOfProceedsClarification',
|
31
|
+
'industryGroup_investmentFundInfo_investmentFundType': 'investmentFundType',
|
32
|
+
'typeOfFiling_newOrAmendment_isAmendment': 'isAmendment',
|
33
|
+
'salesCompensationList_recipient_recipientAddress_stateOrCountryDescription': 'recipientStateOrCountryDescription',
|
34
|
+
'signatureBlock_signature_signatureName': 'signatureName',
|
35
|
+
'federalExemptionsExclusions_item': 'federalExemptionsItem',
|
36
|
+
'salesCommissionsFindersFees_findersFees_isEstimate': 'findersFeesIsEstimate',
|
37
|
+
'salesCommissionsFindersFees_salesCommissions_dollarAmount': 'salesCommissionsDollarAmount',
|
38
|
+
'salesCompensationList_recipient_recipientName': 'recipientName',
|
39
|
+
'salesCommissionsFindersFees_salesCommissions_isEstimate': 'salesCommissionsIsEstimate',
|
40
|
+
'offeringSalesAmounts_totalRemaining': 'totalRemaining',
|
41
|
+
'salesCommissionsFindersFees_clarificationOfResponse': 'salesCommissionsClarification',
|
42
|
+
'salesCompensationList_recipient_statesOfSolicitationList_value': 'recipientStatesOfSolicitationValue',
|
43
|
+
'investors_numberNonAccreditedInvestors': 'numberNonAccreditedInvestors',
|
44
|
+
'typesOfSecuritiesOffered_isTenantInCommonType': 'isTenantInCommonType',
|
45
|
+
'businessCombinationTransaction_isBusinessCombinationTransaction': 'isBusinessCombinationTransaction',
|
46
|
+
'typesOfSecuritiesOffered_isSecurityToBeAcquiredType': 'isSecurityToBeAcquiredType',
|
47
|
+
'issuerSize_aggregateNetAssetValueRange': 'aggregateNetAssetValueRange',
|
48
|
+
'typesOfSecuritiesOffered_isPooledInvestmentFundType': 'isPooledInvestmentFundType',
|
49
|
+
'offeringSalesAmounts_clarificationOfResponse': 'offeringSalesAmountsClarification',
|
50
|
+
'signatureBlock_signature_nameOfSigner': 'nameOfSigner',
|
51
|
+
'industryGroup_investmentFundInfo_is40Act': 'is40Act',
|
52
|
+
'salesCompensationList_recipient_recipientAddress_city': 'recipientCity',
|
53
|
+
'typeOfFiling_dateOfFirstSale_yetToOccur': 'dateOfFirstSaleYetToOccur',
|
54
|
+
'signatureBlock_signature': 'signature',
|
55
|
+
'salesCompensationList_recipient_foreignSolicitation': 'recipientForeignSolicitation',
|
56
|
+
'businessCombinationTransaction_clarificationOfResponse': 'businessCombinationClarification',
|
57
|
+
'salesCompensationList_recipient_associatedBDName': 'recipientAssociatedBDName',
|
58
|
+
'salesCompensationList_recipient_statesOfSolicitationList_state': 'recipientStatesOfSolicitationState',
|
59
|
+
'typeOfFiling_dateOfFirstSale_value': 'dateOfFirstSaleValue',
|
60
|
+
'signatureBlock_signature_signatureTitle': 'signatureTitle',
|
61
|
+
'signatureBlock_signature_issuerName': 'signatureIssuerName',
|
62
|
+
'durationOfOffering_moreThanOneYear': 'durationOfferingMoreThanOneYear',
|
63
|
+
'offeringSalesAmounts_totalAmountSold': 'totalAmountSold',
|
64
|
+
'signatureBlock_signature_signatureDate': 'signatureDate',
|
65
|
+
'issuerSize_revenueRange': 'revenueRange',
|
66
|
+
'typesOfSecuritiesOffered_isOptionToAcquireType': 'isOptionToAcquireType',
|
67
|
+
'signatureBlock_authorizedRepresentative': 'authorizedRepresentative',
|
68
|
+
'salesCompensationList_recipient_recipientAddress_street2': 'recipientStreet2',
|
69
|
+
'useOfProceeds_grossProceedsUsed_isEstimate': 'grossProceedsUsedIsEstimate',
|
70
|
+
'salesCommissionsFindersFees_findersFees_dollarAmount': 'findersFeesDollarAmount',
|
71
|
+
'typesOfSecuritiesOffered_isEquityType': 'isEquityType',
|
72
|
+
'typesOfSecuritiesOffered_descriptionOfOtherType': 'descriptionOfOtherType',
|
73
|
+
'salesCompensationList_recipient_recipientAddress_street1': 'recipientStreet1',
|
74
|
+
'minimumInvestmentAccepted': 'minimumInvestmentAccepted',
|
75
|
+
'typesOfSecuritiesOffered_isOtherType': 'isOtherType',
|
76
|
+
'salesCompensationList_recipient_recipientCRDNumber': 'recipientCRDNumber',
|
77
|
+
'typesOfSecuritiesOffered_isDebtType': 'isDebtType',
|
78
|
+
'accession': 'accessionNumber',
|
79
|
+
'investors_totalNumberAlreadyInvested': 'totalNumberAlreadyInvested',
|
80
|
+
'typesOfSecuritiesOffered_isMineralPropertyType': 'isMineralPropertyType',
|
81
|
+
'industryGroup_industryGroupType': 'industryGroupType',
|
82
|
+
'salesCompensationList_recipient_statesOfSolicitationList_description': 'recipientStatesOfSolicitationDescription',
|
83
|
+
'salesCompensationList_recipient_associatedBDCRDNumber': 'recipientAssociatedBDCRDNumber',
|
84
|
+
'useOfProceeds_grossProceedsUsed_dollarAmount': 'grossProceedsUsedDollarAmount',
|
85
|
+
'investors_hasNonAccreditedInvestors': 'hasNonAccreditedInvestors',
|
86
|
+
'salesCompensationList_recipient_recipientAddress_zipCode': 'recipientZipCode',
|
87
|
+
'offeringSalesAmounts_totalOfferingAmount': 'totalOfferingAmount',
|
88
|
+
'salesCompensationList_recipient_recipientAddress_stateOrCountry': 'recipientStateOrCountry'
|
89
|
+
}
|
90
|
+
primary_issuer_d_dict = {
|
91
|
+
'yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
|
92
|
+
'entityTypeOtherDesc': 'entityTypeOtherDesc',
|
93
|
+
'jurisdictionOfInc': 'jurisdictionOfInc',
|
94
|
+
'issuerAddress_street1': 'issuerStreet1',
|
95
|
+
'issuerAddress_zipCode': 'issuerZipCode',
|
96
|
+
'issuerPreviousNameList_previousName': 'issuerPreviousName',
|
97
|
+
'entityType': 'entityType',
|
98
|
+
'issuerPreviousNameList_value': 'issuerPreviousNameListValue',
|
99
|
+
'issuerPhoneNumber': 'issuerPhoneNumber',
|
100
|
+
'yearOfInc_value': 'yearOfIncValue',
|
101
|
+
'yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
|
102
|
+
'edgarPreviousNameList_previousName': 'edgarPreviousName',
|
103
|
+
'edgarPreviousNameList_value': 'edgarPreviousNameListValue',
|
104
|
+
'issuerAddress_stateOrCountry': 'issuerStateOrCountry',
|
105
|
+
'entityName': 'entityName',
|
106
|
+
'accession': 'accessionNumber',
|
107
|
+
'issuerAddress_street2': 'issuerStreet2',
|
108
|
+
'issuerAddress_city': 'issuerCity',
|
109
|
+
'issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
|
110
|
+
'cik': 'cik',
|
111
|
+
'yearOfInc_overFiveYears': 'yearOfIncOverFiveYears'
|
112
|
+
}
|
113
|
+
|
114
|
+
related_persons_d_dict = {
|
115
|
+
'relatedPersonInfo_relatedPersonAddress_stateOrCountry': 'relatedPersonStateOrCountry',
|
116
|
+
'relatedPersonInfo_relatedPersonRelationshipList_relationship': 'relatedPersonRelationship',
|
117
|
+
'relatedPersonInfo_relationshipClarification': 'relationshipClarification',
|
118
|
+
'relatedPersonInfo_relatedPersonName_lastName': 'relatedPersonLastName',
|
119
|
+
'accession': 'accessionNumber',
|
120
|
+
'relatedPersonInfo_relatedPersonName_middleName': 'relatedPersonMiddleName',
|
121
|
+
'relatedPersonInfo_relatedPersonAddress_zipCode': 'relatedPersonZipCode',
|
122
|
+
'relatedPersonInfo_relatedPersonAddress_city': 'relatedPersonCity',
|
123
|
+
'relatedPersonInfo_relatedPersonAddress_street1': 'relatedPersonStreet1',
|
124
|
+
'relatedPersonInfo_relatedPersonAddress_stateOrCountryDescription': 'relatedPersonStateOrCountryDescription',
|
125
|
+
'relatedPersonInfo_relatedPersonName_firstName': 'relatedPersonFirstName',
|
126
|
+
'relatedPersonInfo_relatedPersonAddress_street2': 'relatedPersonStreet2'
|
127
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Assets dictionary mapping
|
2
|
+
assets_dict_ex102_abs = {
|
3
|
+
'assetNumber': 'assetNumber',
|
4
|
+
'DefeasedStatusCode': 'DefeasedStatusCode',
|
5
|
+
'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
|
6
|
+
'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
|
7
|
+
'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
|
8
|
+
'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
|
9
|
+
'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
|
10
|
+
'valuationSecuritizationDate': 'valuationSecuritizationDate',
|
11
|
+
'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
|
12
|
+
'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
|
13
|
+
'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
|
14
|
+
'financialsSecuritizationDate': 'financialsSecuritizationDate',
|
15
|
+
'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
|
16
|
+
'operatingExpensesAmount': 'operatingExpensesAmount',
|
17
|
+
'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
|
18
|
+
'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
|
19
|
+
'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
|
20
|
+
'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
|
21
|
+
'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
|
22
|
+
'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
|
23
|
+
'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
|
24
|
+
'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
|
25
|
+
'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
|
26
|
+
'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
|
27
|
+
'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
|
28
|
+
'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
|
29
|
+
'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
|
30
|
+
'accession': 'accession',
|
31
|
+
'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
|
32
|
+
'mostRecentValuationDate': 'mostRecentValuationDate',
|
33
|
+
'mostRecentValuationAmount': 'mostRecentValuationAmount',
|
34
|
+
'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
|
35
|
+
'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
|
36
|
+
}
|
37
|
+
|
38
|
+
# Properties dictionary mapping
|
39
|
+
properties_dict_ex102_abs = {
|
40
|
+
'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
|
41
|
+
'propertyCounty': 'propertyCounty',
|
42
|
+
'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
|
43
|
+
'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
|
44
|
+
'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
|
45
|
+
'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
|
46
|
+
'propertyZip': 'propertyZip',
|
47
|
+
'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
|
48
|
+
'propertyStatusCode': 'propertyStatusCode',
|
49
|
+
'propertyState': 'propertyState',
|
50
|
+
'yearBuiltNumber': 'yearBuiltNumber',
|
51
|
+
'propertyCity': 'propertyCity',
|
52
|
+
'propertyName': 'propertyName',
|
53
|
+
'propertyAddress': 'propertyAddress',
|
54
|
+
'yearLastRenovated': 'yearLastRenovated',
|
55
|
+
'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
|
56
|
+
'thirdLargestTenant': 'thirdLargestTenant',
|
57
|
+
'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
|
58
|
+
'propertyTypeCode': 'propertyTypeCode',
|
59
|
+
'largestTenant': 'largestTenant',
|
60
|
+
'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
|
61
|
+
'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
|
62
|
+
'secondLargestTenant': 'secondLargestTenant'
|
63
|
+
}
|
datamule/document/processing.py
CHANGED
@@ -20,9 +20,14 @@ def process_tabular_data(self):
|
|
20
20
|
elif self.type in ["25-NSE", "25-NSE/A"]:
|
21
21
|
tables = process_25nse(self.data, self.accession)
|
22
22
|
# complete mark:
|
23
|
+
elif self.type in ["EX-102"]:
|
24
|
+
tables = process_ex102_abs(self.data, self.accession)
|
25
|
+
elif self.type in ["D","D/A"]:
|
26
|
+
tables = process_d(self.data, self.accession)
|
23
27
|
elif self.type in ["N-PX","N-PX/A"]:
|
24
28
|
tables = process_npx(self.data, self.accession)
|
25
29
|
|
30
|
+
|
26
31
|
elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
|
27
32
|
tables = process_sbsef(self.data, self.accession)
|
28
33
|
elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
|
@@ -47,8 +52,7 @@ def process_tabular_data(self):
|
|
47
52
|
# tables = process_c(self.data, self.accession)
|
48
53
|
elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
|
49
54
|
tables = process_cfportal(self.data, self.accession)
|
50
|
-
|
51
|
-
# tables = process_d(self.data, self.accession)
|
55
|
+
|
52
56
|
# elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
|
53
57
|
# tables = process_ma(self.data, self.accession)
|
54
58
|
# elif self.type in ["N-CEN","N-CEN/A"]:
|
@@ -70,8 +74,7 @@ def process_tabular_data(self):
|
|
70
74
|
tables = process_reg_a(self.data, self.accession)
|
71
75
|
# elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
|
72
76
|
# tables = process_sbs(self.data, self.accession)
|
73
|
-
|
74
|
-
# tables = process_ex102_abs(self.data, self.accession)
|
77
|
+
|
75
78
|
elif self.type == "PROXY VOTING RECORD":
|
76
79
|
tables = process_proxy_voting_record(self.data, self.accession)
|
77
80
|
elif self.type == 'submission_metadata':
|
@@ -101,6 +104,67 @@ def _flatten_dict(d, parent_key=''):
|
|
101
104
|
|
102
105
|
return items
|
103
106
|
|
107
|
+
# flattens in a different way
|
108
|
+
def flatten_dict_to_rows(d, parent_key='', sep='_'):
|
109
|
+
|
110
|
+
if isinstance(d, list):
|
111
|
+
# If input is a list, flatten each item and return all rows
|
112
|
+
all_rows = []
|
113
|
+
for item in d:
|
114
|
+
all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
|
115
|
+
return all_rows
|
116
|
+
|
117
|
+
if not isinstance(d, dict):
|
118
|
+
# If input is a primitive value, return single row
|
119
|
+
return [{parent_key: d}] if parent_key else []
|
120
|
+
|
121
|
+
# Input is a dictionary
|
122
|
+
rows = [{}]
|
123
|
+
|
124
|
+
for k, v in d.items():
|
125
|
+
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
126
|
+
|
127
|
+
if isinstance(v, dict):
|
128
|
+
# Recursively flatten nested dictionaries
|
129
|
+
nested_rows = flatten_dict_to_rows(v, new_key, sep)
|
130
|
+
# Cross-product with existing rows
|
131
|
+
new_rows = []
|
132
|
+
for row in rows:
|
133
|
+
for nested_row in nested_rows:
|
134
|
+
combined_row = row.copy()
|
135
|
+
combined_row.update(nested_row)
|
136
|
+
new_rows.append(combined_row)
|
137
|
+
rows = new_rows
|
138
|
+
|
139
|
+
elif isinstance(v, list):
|
140
|
+
# Handle lists - create multiple rows
|
141
|
+
if not v: # Empty list
|
142
|
+
for row in rows:
|
143
|
+
row[new_key] = ''
|
144
|
+
else:
|
145
|
+
new_rows = []
|
146
|
+
for row in rows:
|
147
|
+
for list_item in v:
|
148
|
+
new_row = row.copy()
|
149
|
+
if isinstance(list_item, dict):
|
150
|
+
# Recursively flatten dict items in list
|
151
|
+
nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
|
152
|
+
for nested_row in nested_rows:
|
153
|
+
combined_row = new_row.copy()
|
154
|
+
combined_row.update(nested_row)
|
155
|
+
new_rows.append(combined_row)
|
156
|
+
else:
|
157
|
+
# Primitive value in list
|
158
|
+
new_row[new_key] = list_item
|
159
|
+
new_rows.append(new_row)
|
160
|
+
rows = new_rows
|
161
|
+
else:
|
162
|
+
# Handle primitive values
|
163
|
+
for row in rows:
|
164
|
+
row[new_key] = v
|
165
|
+
|
166
|
+
return rows
|
167
|
+
|
104
168
|
def process_ownership(data, accession):
|
105
169
|
tables = []
|
106
170
|
if 'ownershipDocument' not in data:
|
@@ -352,33 +416,33 @@ def process_cfportal(data, accession):
|
|
352
416
|
|
353
417
|
return tables
|
354
418
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
#
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
#
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
419
|
+
def process_d(data, accession):
|
420
|
+
tables = []
|
421
|
+
groups = ['contactData', 'notificationAddressList', 'primaryIssuer', 'issuerList', 'relatedPersonsList', 'offeringData']
|
422
|
+
for group in groups:
|
423
|
+
if group == 'relatedPersonList':
|
424
|
+
group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
|
425
|
+
data['edgarSubmission'].pop(group, None)
|
426
|
+
elif group == 'issuerList':
|
427
|
+
group_data = data['edgarSubmission'].pop('issuerList', None)
|
428
|
+
else:
|
429
|
+
group_data = data['edgarSubmission'].pop(group, None)
|
430
|
+
|
431
|
+
if group_data:
|
432
|
+
# Special handling ONLY for relatedPersonsList
|
433
|
+
if group in ['relatedPersonsList', 'issuerList']:
|
434
|
+
# Use the new flatten_dict_to_rows ONLY for this key
|
435
|
+
flattened_rows = flatten_dict_to_rows(group_data)
|
436
|
+
if flattened_rows:
|
437
|
+
tables.append(Table(flattened_rows, f'{group}_d', accession))
|
438
|
+
else:
|
439
|
+
# Everything else remains EXACTLY the same
|
440
|
+
tables.append(Table(_flatten_dict(group_data), f'{group}_d', accession))
|
441
|
+
|
442
|
+
metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
|
443
|
+
tables.append(metadata_table)
|
380
444
|
|
381
|
-
|
445
|
+
return tables
|
382
446
|
|
383
447
|
# def process_nmfp(data, accession):
|
384
448
|
# tables = []
|
@@ -589,13 +653,39 @@ def process_reg_a(data, accession):
|
|
589
653
|
|
590
654
|
# return tables
|
591
655
|
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
#
|
597
|
-
|
598
|
-
|
656
|
+
def process_ex102_abs(data, accession):
|
657
|
+
tables = []
|
658
|
+
data = safe_get(data, ['assetData', 'assets'])
|
659
|
+
|
660
|
+
# Create assets list: all items without their 'property' field
|
661
|
+
assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
|
662
|
+
|
663
|
+
# Create properties list in a more vectorized way
|
664
|
+
properties = []
|
665
|
+
|
666
|
+
# Handle dictionary properties
|
667
|
+
properties.extend([
|
668
|
+
item['property'] | {'assetNumber': item['assetNumber']}
|
669
|
+
for item in data
|
670
|
+
if 'property' in item and isinstance(item['property'], dict)
|
671
|
+
])
|
672
|
+
|
673
|
+
# Handle list properties - flatten in one operation
|
674
|
+
properties.extend([
|
675
|
+
prop | {'assetNumber': item['assetNumber']}
|
676
|
+
for item in data
|
677
|
+
if 'property' in item and isinstance(item['property'], list)
|
678
|
+
for prop in item['property']
|
679
|
+
if isinstance(prop, dict)
|
680
|
+
])
|
681
|
+
|
682
|
+
if assets:
|
683
|
+
tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
|
684
|
+
|
685
|
+
if properties:
|
686
|
+
tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
|
687
|
+
|
688
|
+
return tables
|
599
689
|
|
600
690
|
# def process_ma(data, accession):
|
601
691
|
# tables = []
|
datamule/document/table.py
CHANGED
@@ -19,6 +19,11 @@ from .mappings.twentyfivense import *
|
|
19
19
|
from .mappings.twentyfourf2nt import *
|
20
20
|
from .mappings.information_table import *
|
21
21
|
from .mappings.submission_metadata import *
|
22
|
+
from .mappings.ex102_abs import *
|
23
|
+
from .mappings.d import *
|
24
|
+
|
25
|
+
from pathlib import Path
|
26
|
+
import csv
|
22
27
|
# need to check if mappings correctly create new columns
|
23
28
|
class Table():
|
24
29
|
def __init__(self, data, type,accession):
|
@@ -27,11 +32,18 @@ class Table():
|
|
27
32
|
self.type = type
|
28
33
|
self.data = data
|
29
34
|
self.accession = accession
|
30
|
-
self.columns = self.
|
35
|
+
self.columns = self.determine_columns_complete()
|
36
|
+
|
37
|
+
def determine_columns_complete(self):
|
38
|
+
if not self.data:
|
39
|
+
return []
|
40
|
+
return list(set().union(*(row.keys() for row in self.data)))
|
41
|
+
|
31
42
|
|
32
43
|
def determine_columns(self):
|
33
44
|
if len(self.data) == 0:
|
34
45
|
return []
|
46
|
+
|
35
47
|
return self.data[0].keys()
|
36
48
|
|
37
49
|
def add_column(self,column_name,value):
|
@@ -190,6 +202,17 @@ class Table():
|
|
190
202
|
elif self.type == 'signature_schedule_13':
|
191
203
|
mapping_dict = signature_schedule_13_dict
|
192
204
|
|
205
|
+
# D
|
206
|
+
elif self.type == 'issuerList_d':
|
207
|
+
mapping_dict = issuer_list_d_dict
|
208
|
+
elif self.type == 'metadata_d':
|
209
|
+
mapping_dict = metadata_d_dict
|
210
|
+
elif self.type == 'offeringData_d':
|
211
|
+
mapping_dict = offering_data_d_dict
|
212
|
+
elif self.type == 'primaryIssuer_d':
|
213
|
+
mapping_dict = primary_issuer_d_dict
|
214
|
+
elif self.type == 'relatedPersonsList_d':
|
215
|
+
mapping_dict = related_persons_d_dict
|
193
216
|
# SDR
|
194
217
|
elif self.type == 'sdr':
|
195
218
|
mapping_dict = sdr_dict
|
@@ -227,7 +250,11 @@ class Table():
|
|
227
250
|
mapping_dict = item_9_24f2nt_dict
|
228
251
|
elif self.type == 'signature_info_schedule_a':
|
229
252
|
mapping_dict = signature_24f2nt_dict
|
230
|
-
|
253
|
+
# ABS
|
254
|
+
elif self.type == 'assets_ex102_absee':
|
255
|
+
mapping_dict = assets_dict_ex102_abs
|
256
|
+
elif self.type =='properties_ex102_absee':
|
257
|
+
mapping_dict = properties_dict_ex102_abs
|
231
258
|
# submission metadata
|
232
259
|
elif self.type == 'document_submission_metadata':
|
233
260
|
mapping_dict = document_submission_metadata_dict
|
@@ -250,9 +277,6 @@ class Table():
|
|
250
277
|
for old_key, new_key in mapping_dict.items():
|
251
278
|
if old_key in row:
|
252
279
|
ordered_row[new_key] = row.pop(old_key)
|
253
|
-
else:
|
254
|
-
# if the old key is not present, set the new key to None
|
255
|
-
ordered_row[new_key] = None
|
256
280
|
|
257
281
|
# Then add any remaining keys that weren't in the mapping
|
258
282
|
for key, value in row.items():
|
@@ -262,4 +286,30 @@ class Table():
|
|
262
286
|
row.clear()
|
263
287
|
row.update(ordered_row)
|
264
288
|
|
265
|
-
|
289
|
+
# Update the columns after mapping
|
290
|
+
columns = set(self.columns)
|
291
|
+
# remove the old columns that are now in the mapping
|
292
|
+
columns.difference_update(mapping_dict.keys())
|
293
|
+
# add the new columns from the mapping
|
294
|
+
columns.update(mapping_dict.values())
|
295
|
+
# add the accession column to the columns
|
296
|
+
columns.add('accession')
|
297
|
+
|
298
|
+
self.columns = list(columns)
|
299
|
+
|
300
|
+
def write_csv(self, output_file):
|
301
|
+
output_file = Path(output_file)
|
302
|
+
fieldnames = self.columns
|
303
|
+
|
304
|
+
# Check if the file already exists
|
305
|
+
if output_file.exists():
|
306
|
+
# Append to existing file without writing header
|
307
|
+
with open(output_file, 'a', newline='') as csvfile:
|
308
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
309
|
+
writer.writerows(self.data)
|
310
|
+
else:
|
311
|
+
# Create new file with header
|
312
|
+
with open(output_file, 'w', newline='') as csvfile:
|
313
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
314
|
+
writer.writeheader()
|
315
|
+
writer.writerows(self.data)
|
@@ -7,12 +7,14 @@ datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
|
|
7
7
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
8
8
|
datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
|
9
9
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
datamule/document/document.py,sha256=
|
11
|
-
datamule/document/processing.py,sha256=
|
12
|
-
datamule/document/table.py,sha256=
|
10
|
+
datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
|
11
|
+
datamule/document/processing.py,sha256=eWrLxBXmoCoKyUN1gY57ikl9m0KRdACzahJT9PnWQcA,31668
|
12
|
+
datamule/document/table.py,sha256=jC2itnolWG9DpCjw0x6Ma5yVJiHvQT3Gd_5yl53L5a4,12939
|
13
13
|
datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
datamule/document/mappings/atsn.py,sha256=qkZGNIhyPC3VTTOjQ8-FSCQIhUy4XeSycUGLShxNVCo,17743
|
15
15
|
datamule/document/mappings/cfportal.py,sha256=bR9d6DDY0kJ_HGx_hND2y1PNNkZjemYZ2KdyFAcv760,25257
|
16
|
+
datamule/document/mappings/d.py,sha256=nRpfxkHIf8wYBMmY84biU1Smci9fF9pFYCG_iUddGtU,7928
|
17
|
+
datamule/document/mappings/ex102_abs.py,sha256=FdGKvteRh_HsYgILF-8o4R6aSsjYwcaLpJxzdru4FTE,3976
|
16
18
|
datamule/document/mappings/ex99a_sdr.py,sha256=PNdj9I0ZhNicPObLelNmjp33EgTwzvukqkBDnwxarE0,19
|
17
19
|
datamule/document/mappings/ex99c_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
20
|
datamule/document/mappings/ex99g_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -54,7 +56,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
54
56
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
55
57
|
datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
|
56
58
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
57
|
-
datamule-1.2.
|
58
|
-
datamule-1.2.
|
59
|
-
datamule-1.2.
|
60
|
-
datamule-1.2.
|
59
|
+
datamule-1.2.8.dist-info/METADATA,sha256=mQvixJX2sn9NJbH3LkZ1H3IeRIwHwCgBjgWVVXjOt-Q,490
|
60
|
+
datamule-1.2.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
61
|
+
datamule-1.2.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
62
|
+
datamule-1.2.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|