datamule 1.2.5__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +1 -0
- datamule/document/document.py +12 -8
- datamule/document/mappings/d.py +125 -0
- datamule/document/mappings/ex102_abs.py +63 -0
- datamule/document/mappings/information_table.py +1 -0
- datamule/document/mappings/ownership.py +1 -1
- datamule/document/mappings/proxy_voting_record.py +17 -1
- datamule/document/mappings/submission_metadata.py +9 -0
- datamule/document/mappings/thirteenfhr.py +70 -3
- datamule/document/mappings/twentyfivense.py +1 -0
- datamule/document/processing.py +170 -42
- datamule/document/table.py +60 -5
- datamule/helper.py +10 -1
- datamule/index.py +8 -10
- datamule/portfolio.py +16 -11
- datamule/sec/submissions/monitor.py +173 -120
- datamule/sec/submissions/textsearch.py +0 -4
- datamule/sec/xbrl/streamcompanyfacts.py +1 -1
- datamule/seclibrary/downloader.py +2 -2
- datamule/submission.py +80 -14
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/METADATA +1 -2
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/RECORD +24 -23
- datamule/sec/rss/__init__.py +0 -0
- datamule/sec/rss/monitor.py +0 -416
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/WHEEL +0 -0
- {datamule-1.2.5.dist-info → datamule-1.2.9.dist-info}/top_level.txt +0 -0
datamule/__init__.py
CHANGED
datamule/document/document.py
CHANGED
@@ -118,10 +118,11 @@ class Document:
|
|
118
118
|
# will deprecate this when we add html2dict
|
119
119
|
elif self.extension in ['.htm', '.html','.txt']:
|
120
120
|
|
121
|
-
|
122
|
-
|
123
|
-
elif self.type == '10-Q':
|
121
|
+
|
122
|
+
if self.type == '10-Q':
|
124
123
|
mapping_dict = dict_10q
|
124
|
+
elif self.type == '10-K':
|
125
|
+
mapping_dict = dict_10k
|
125
126
|
elif self.type == '8-K':
|
126
127
|
mapping_dict = dict_8k
|
127
128
|
elif self.type == 'SC 13D':
|
@@ -140,18 +141,21 @@ class Document:
|
|
140
141
|
with open(output_filename, 'w',encoding='utf-8') as f:
|
141
142
|
json.dump(self.data, f, indent=2)
|
142
143
|
|
143
|
-
def
|
144
|
-
if self.
|
144
|
+
def tables(self):
|
145
|
+
if self.type == 'submission_metadata':
|
146
|
+
return process_tabular_data(self)
|
147
|
+
elif self.extension != '.xml':
|
145
148
|
return []
|
146
|
-
|
147
|
-
|
149
|
+
else:
|
150
|
+
self.parse()
|
151
|
+
return process_tabular_data(self)
|
148
152
|
|
149
153
|
|
150
154
|
def write_csv(self, output_folder):
|
151
155
|
output_folder = Path(output_folder)
|
152
156
|
output_folder.mkdir(exist_ok=True)
|
153
157
|
|
154
|
-
tables = self.
|
158
|
+
tables = self.tables()
|
155
159
|
|
156
160
|
if not tables:
|
157
161
|
return
|
@@ -0,0 +1,125 @@
|
|
1
|
+
issuer_list_d_dict = {
|
2
|
+
'issuer_issuerAddress_street1': 'issuerStreet1',
|
3
|
+
'issuer_cik': 'issuerCik',
|
4
|
+
'issuer_issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
|
5
|
+
'issuer_issuerAddress_zipCode': 'issuerZipCode',
|
6
|
+
'issuer_issuerPhoneNumber': 'issuerPhoneNumber',
|
7
|
+
'issuer_yearOfInc_value': 'yearOfIncValue',
|
8
|
+
'issuer_issuerAddress_stateOrCountry': 'issuerStateOrCountry',
|
9
|
+
'issuer_jurisdictionOfInc': 'jurisdictionOfInc',
|
10
|
+
'issuer_entityType': 'entityType',
|
11
|
+
'issuer_issuerAddress_street2': 'issuerStreet2',
|
12
|
+
'issuer_entityName': 'entityName',
|
13
|
+
'accession': 'accession',
|
14
|
+
'issuer_edgarPreviousNameList_value': 'edgarPreviousNameListValue',
|
15
|
+
'issuer_entityTypeOtherDesc': 'entityTypeOtherDesc',
|
16
|
+
'issuer_yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
|
17
|
+
'issuer_yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
|
18
|
+
'issuer_issuerPreviousNameList_value': 'issuerPreviousNameListValue',
|
19
|
+
'issuer_issuerAddress_city': 'issuerCity'
|
20
|
+
}
|
21
|
+
|
22
|
+
metadata_d_dict = {
|
23
|
+
"testOrLive" : "testOrLive",
|
24
|
+
"schemaVersion" : "schemaVersion",
|
25
|
+
"accession" : "accession",
|
26
|
+
"submissionType" : "submissionType",
|
27
|
+
}
|
28
|
+
offering_data_d_dict = {
|
29
|
+
'salesCompensationList_recipient_foreignSolicitation': 'foreignSolicitation',
|
30
|
+
'typeOfFiling_dateOfFirstSale_yetToOccur': 'dateOfFirstSaleYetToOccur',
|
31
|
+
'industryGroup_investmentFundInfo_is40Act': 'is40Act',
|
32
|
+
'salesCommissionsFindersFees_findersFees_dollarAmount': 'findersFeesAmount',
|
33
|
+
'offeringSalesAmounts_totalRemaining': 'totalRemaining',
|
34
|
+
'issuerSize_aggregateNetAssetValueRange': 'aggregateNetAssetValueRange',
|
35
|
+
'typesOfSecuritiesOffered_isSecurityToBeAcquiredType': 'isSecurityToBeAcquiredType',
|
36
|
+
'salesCompensationList_recipient_recipientAddress_stateOrCountryDescription': 'recipientStateOrCountryDescription',
|
37
|
+
'typesOfSecuritiesOffered_isEquityType': 'isEquityType',
|
38
|
+
'investors_totalNumberAlreadyInvested': 'totalNumberAlreadyInvested',
|
39
|
+
'minimumInvestmentAccepted': 'minimumInvestmentAccepted',
|
40
|
+
'salesCompensationList_recipient_associatedBDName': 'associatedBDName',
|
41
|
+
'salesCompensationList_recipient_statesOfSolicitationList_state': 'statesOfSolicitationState',
|
42
|
+
'businessCombinationTransaction_isBusinessCombinationTransaction': 'isBusinessCombinationTransaction',
|
43
|
+
'useOfProceeds_grossProceedsUsed_isEstimate': 'grossProceedsUsedIsEstimate',
|
44
|
+
'federalExemptionsExclusions_item': 'federalExemptionsExclusionsItem',
|
45
|
+
'useOfProceeds_grossProceedsUsed_dollarAmount': 'grossProceedsUsedAmount',
|
46
|
+
'industryGroup_industryGroupType': 'industryGroupType',
|
47
|
+
'signatureBlock_signature_nameOfSigner': 'nameOfSigner',
|
48
|
+
'signatureBlock_signature_signatureDate': 'signatureDate',
|
49
|
+
'salesCommissionsFindersFees_salesCommissions_isEstimate': 'salesCommissionsIsEstimate',
|
50
|
+
'typesOfSecuritiesOffered_isOtherType': 'isOtherType',
|
51
|
+
'salesCompensationList_recipient_associatedBDCRDNumber': 'associatedBDCRDNumber',
|
52
|
+
'salesCompensationList_recipient_recipientAddress_stateOrCountry': 'recipientStateOrCountry',
|
53
|
+
'typesOfSecuritiesOffered_descriptionOfOtherType': 'descriptionOfOtherType',
|
54
|
+
'salesCommissionsFindersFees_salesCommissions_dollarAmount': 'salesCommissionsAmount',
|
55
|
+
'useOfProceeds_clarificationOfResponse': 'useOfProceedsClarification',
|
56
|
+
'accession': 'accession',
|
57
|
+
'typesOfSecuritiesOffered_isPooledInvestmentFundType': 'isPooledInvestmentFundType',
|
58
|
+
'salesCompensationList_recipient_statesOfSolicitationList_value': 'statesOfSolicitationValue',
|
59
|
+
'signatureBlock_signature_signatureName': 'signatureName',
|
60
|
+
'typeOfFiling_newOrAmendment_isAmendment': 'isAmendment',
|
61
|
+
'issuerSize_revenueRange': 'revenueRange',
|
62
|
+
'salesCommissionsFindersFees_clarificationOfResponse': 'salesCommissionsFindersFeesClarification',
|
63
|
+
'salesCompensationList_recipient_recipientAddress_zipCode': 'recipientZipCode',
|
64
|
+
'salesCompensationList_recipient_recipientAddress_city': 'recipientCity',
|
65
|
+
'typesOfSecuritiesOffered_isOptionToAcquireType': 'isOptionToAcquireType',
|
66
|
+
'businessCombinationTransaction_clarificationOfResponse': 'businessCombinationClarification',
|
67
|
+
'typesOfSecuritiesOffered_isTenantInCommonType': 'isTenantInCommonType',
|
68
|
+
'salesCompensationList_recipient_statesOfSolicitationList_description': 'statesOfSolicitationDescription',
|
69
|
+
'offeringSalesAmounts_totalOfferingAmount': 'totalOfferingAmount',
|
70
|
+
'investors_numberNonAccreditedInvestors': 'numberNonAccreditedInvestors',
|
71
|
+
'signatureBlock_authorizedRepresentative': 'authorizedRepresentative',
|
72
|
+
'signatureBlock_signature_issuerName': 'issuerName',
|
73
|
+
'salesCompensationList_recipient_recipientAddress_street2': 'recipientStreet2',
|
74
|
+
'typesOfSecuritiesOffered_isDebtType': 'isDebtType',
|
75
|
+
'salesCompensationList_recipient_recipientAddress_street1': 'recipientStreet1',
|
76
|
+
'signatureBlock_signature_signatureTitle': 'signatureTitle',
|
77
|
+
'industryGroup_investmentFundInfo_investmentFundType': 'investmentFundType',
|
78
|
+
'salesCommissionsFindersFees_findersFees_isEstimate': 'findersFeesIsEstimate',
|
79
|
+
'typeOfFiling_dateOfFirstSale_value': 'dateOfFirstSaleValue',
|
80
|
+
'offeringSalesAmounts_totalAmountSold': 'totalAmountSold',
|
81
|
+
'offeringSalesAmounts_clarificationOfResponse': 'offeringSalesAmountsClarification',
|
82
|
+
'investors_hasNonAccreditedInvestors': 'hasNonAccreditedInvestors',
|
83
|
+
'salesCompensationList_recipient_recipientCRDNumber': 'recipientCRDNumber',
|
84
|
+
'typesOfSecuritiesOffered_isMineralPropertyType': 'isMineralPropertyType',
|
85
|
+
'salesCompensationList_recipient_recipientName': 'recipientName',
|
86
|
+
'durationOfOffering_moreThanOneYear': 'moreThanOneYear'
|
87
|
+
}
|
88
|
+
primary_issuer_d_dict = {
|
89
|
+
'yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
|
90
|
+
'entityTypeOtherDesc': 'entityTypeOtherDesc',
|
91
|
+
'jurisdictionOfInc': 'jurisdictionOfInc',
|
92
|
+
'issuerAddress_street1': 'issuerStreet1',
|
93
|
+
'issuerAddress_zipCode': 'issuerZipCode',
|
94
|
+
'issuerPreviousNameList_previousName': 'issuerPreviousName',
|
95
|
+
'entityType': 'entityType',
|
96
|
+
'issuerPreviousNameList_value': 'issuerPreviousNameListValue',
|
97
|
+
'issuerPhoneNumber': 'issuerPhoneNumber',
|
98
|
+
'yearOfInc_value': 'yearOfIncValue',
|
99
|
+
'yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
|
100
|
+
'edgarPreviousNameList_previousName': 'edgarPreviousName',
|
101
|
+
'edgarPreviousNameList_value': 'edgarPreviousNameListValue',
|
102
|
+
'issuerAddress_stateOrCountry': 'issuerStateOrCountry',
|
103
|
+
'entityName': 'entityName',
|
104
|
+
'accession': 'accession',
|
105
|
+
'issuerAddress_street2': 'issuerStreet2',
|
106
|
+
'issuerAddress_city': 'issuerCity',
|
107
|
+
'issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
|
108
|
+
'cik': 'cik',
|
109
|
+
'yearOfInc_overFiveYears': 'yearOfIncOverFiveYears'
|
110
|
+
}
|
111
|
+
|
112
|
+
related_persons_d_dict = {
|
113
|
+
'relatedPersonInfo_relatedPersonAddress_stateOrCountry': 'relatedPersonStateOrCountry',
|
114
|
+
'relatedPersonInfo_relatedPersonRelationshipList_relationship': 'relatedPersonRelationship',
|
115
|
+
'relatedPersonInfo_relationshipClarification': 'relationshipClarification',
|
116
|
+
'relatedPersonInfo_relatedPersonName_lastName': 'relatedPersonLastName',
|
117
|
+
'accession': 'accession',
|
118
|
+
'relatedPersonInfo_relatedPersonName_middleName': 'relatedPersonMiddleName',
|
119
|
+
'relatedPersonInfo_relatedPersonAddress_zipCode': 'relatedPersonZipCode',
|
120
|
+
'relatedPersonInfo_relatedPersonAddress_city': 'relatedPersonCity',
|
121
|
+
'relatedPersonInfo_relatedPersonAddress_street1': 'relatedPersonStreet1',
|
122
|
+
'relatedPersonInfo_relatedPersonAddress_stateOrCountryDescription': 'relatedPersonStateOrCountryDescription',
|
123
|
+
'relatedPersonInfo_relatedPersonName_firstName': 'relatedPersonFirstName',
|
124
|
+
'relatedPersonInfo_relatedPersonAddress_street2': 'relatedPersonStreet2'
|
125
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Assets dictionary mapping
|
2
|
+
assets_dict_ex102_abs = {
|
3
|
+
'assetNumber': 'assetNumber',
|
4
|
+
'DefeasedStatusCode': 'DefeasedStatusCode',
|
5
|
+
'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
|
6
|
+
'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
|
7
|
+
'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
|
8
|
+
'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
|
9
|
+
'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
|
10
|
+
'valuationSecuritizationDate': 'valuationSecuritizationDate',
|
11
|
+
'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
|
12
|
+
'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
|
13
|
+
'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
|
14
|
+
'financialsSecuritizationDate': 'financialsSecuritizationDate',
|
15
|
+
'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
|
16
|
+
'operatingExpensesAmount': 'operatingExpensesAmount',
|
17
|
+
'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
|
18
|
+
'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
|
19
|
+
'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
|
20
|
+
'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
|
21
|
+
'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
|
22
|
+
'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
|
23
|
+
'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
|
24
|
+
'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
|
25
|
+
'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
|
26
|
+
'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
|
27
|
+
'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
|
28
|
+
'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
|
29
|
+
'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
|
30
|
+
'accession': 'accession',
|
31
|
+
'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
|
32
|
+
'mostRecentValuationDate': 'mostRecentValuationDate',
|
33
|
+
'mostRecentValuationAmount': 'mostRecentValuationAmount',
|
34
|
+
'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
|
35
|
+
'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
|
36
|
+
}
|
37
|
+
|
38
|
+
# Properties dictionary mapping
|
39
|
+
properties_dict_ex102_abs = {
|
40
|
+
'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
|
41
|
+
'propertyCounty': 'propertyCounty',
|
42
|
+
'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
|
43
|
+
'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
|
44
|
+
'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
|
45
|
+
'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
|
46
|
+
'propertyZip': 'propertyZip',
|
47
|
+
'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
|
48
|
+
'propertyStatusCode': 'propertyStatusCode',
|
49
|
+
'propertyState': 'propertyState',
|
50
|
+
'yearBuiltNumber': 'yearBuiltNumber',
|
51
|
+
'propertyCity': 'propertyCity',
|
52
|
+
'propertyName': 'propertyName',
|
53
|
+
'propertyAddress': 'propertyAddress',
|
54
|
+
'yearLastRenovated': 'yearLastRenovated',
|
55
|
+
'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
|
56
|
+
'thirdLargestTenant': 'thirdLargestTenant',
|
57
|
+
'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
|
58
|
+
'propertyTypeCode': 'propertyTypeCode',
|
59
|
+
'largestTenant': 'largestTenant',
|
60
|
+
'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
|
61
|
+
'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
|
62
|
+
'secondLargestTenant': 'secondLargestTenant'
|
63
|
+
}
|
@@ -1 +1,17 @@
|
|
1
|
-
proxy_voting_record_dict = {
|
1
|
+
proxy_voting_record_dict = {
|
2
|
+
'meetingDate': 'meetingDate',
|
3
|
+
'accession': 'accessionNumber',
|
4
|
+
'vote_voteRecord_managementRecommendation': 'managementRecommendation',
|
5
|
+
'sharesVoted': 'sharesVoted', # Top-level sharesVoted
|
6
|
+
'vote_voteRecord_howVoted': 'howVoted',
|
7
|
+
'sharesOnLoan': 'sharesOnLoan',
|
8
|
+
'cusip': 'cusip',
|
9
|
+
'issuerName': 'issuerName',
|
10
|
+
'voteCategories_voteCategory_categoryType': 'categoryType',
|
11
|
+
'voteDescription': 'voteDescription',
|
12
|
+
'voteManager_otherManagers_otherManager': 'otherManager',
|
13
|
+
'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
|
14
|
+
'isin': 'isin',
|
15
|
+
'voteSource': 'voteSource',
|
16
|
+
'voteSeries': 'voteSeries'
|
17
|
+
}
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# Note: submission_metadata is my designation, not SEC for the header of the Submission tag
|
2
|
+
|
3
|
+
document_submission_metadata_dict = {
|
4
|
+
'accession':'accession',
|
5
|
+
'type':'type',
|
6
|
+
'sequence' : 'sequence',
|
7
|
+
'filename' : 'filename',
|
8
|
+
'description':'description'
|
9
|
+
}
|
@@ -1,5 +1,72 @@
|
|
1
|
+
# Ready for mass testing
|
1
2
|
|
2
3
|
# 13F-HR (Institutional Investment Manager Holdings) mapping
|
3
|
-
thirteenfhr_dict =
|
4
|
-
|
5
|
-
|
4
|
+
thirteenfhr_dict = {
|
5
|
+
# Cover Page Mapping
|
6
|
+
'formData_coverPage_reportCalendarOrQuarter': 'reportCalendarOrQuarter',
|
7
|
+
'formData_coverPage_filingManager_name': 'filingManagerName',
|
8
|
+
'formData_coverPage_filingManager_address_street1': 'filingManagerStreet1',
|
9
|
+
'formData_coverPage_filingManager_address_street2': 'filingManagerStreet2',
|
10
|
+
'formData_coverPage_filingManager_address_city': 'filingManagerCity',
|
11
|
+
'formData_coverPage_filingManager_address_stateOrCountry': 'filingManagerStateOrCountry',
|
12
|
+
'formData_coverPage_filingManager_address_zipCode': 'filingManagerZipCode',
|
13
|
+
'formData_coverPage_crdNumber': 'crdNumber',
|
14
|
+
'formData_coverPage_secFileNumber': 'secFileNumber',
|
15
|
+
'formData_coverPage_form13FFileNumber': 'form13FFileNumber',
|
16
|
+
'formData_coverPage_reportType': 'reportType',
|
17
|
+
'formData_coverPage_isAmendment': 'isAmendment',
|
18
|
+
'formData_coverPage_amendmentNo': 'amendmentNo',
|
19
|
+
'formData_coverPage_amendmentInfo_amendmentType': 'amendmentType',
|
20
|
+
'formData_coverPage_amendmentInfo_confDeniedExpired': 'confDeniedExpired',
|
21
|
+
'formData_coverPage_additionalInformation': 'additionalInformation',
|
22
|
+
'formData_coverPage_provideInfoForInstruction5': 'provideInfoForInstruction5',
|
23
|
+
|
24
|
+
# Other Managers Info Mapping
|
25
|
+
'formData_coverPage_otherManagersInfo_otherManager': 'otherManager',
|
26
|
+
'formData_coverPage_otherManagersInfo_otherManager_cik': 'otherManagerCik',
|
27
|
+
'formData_coverPage_otherManagersInfo_otherManager_name': 'otherManagerName',
|
28
|
+
'formData_coverPage_otherManagersInfo_otherManager_crdNumber': 'otherManagerCrdNumber',
|
29
|
+
'formData_coverPage_otherManagersInfo_otherManager_secFileNumber': 'otherManagerSecFileNumber',
|
30
|
+
'formData_coverPage_otherManagersInfo_otherManager_form13FFileNumber': 'otherManagerForm13FFileNumber',
|
31
|
+
|
32
|
+
# Summary Page Mapping
|
33
|
+
'formData_summaryPage_isConfidentialOmitted': 'isConfidentialOmitted',
|
34
|
+
'formData_summaryPage_otherIncludedManagersCount': 'otherIncludedManagersCount',
|
35
|
+
'formData_summaryPage_tableEntryTotal': 'tableEntryTotal',
|
36
|
+
'formData_summaryPage_tableValueTotal': 'tableValueTotal',
|
37
|
+
|
38
|
+
# Other Managers 2 Info Mapping
|
39
|
+
'formData_summaryPage_otherManagers2Info_otherManager2': 'otherManager2',
|
40
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_sequenceNumber': 'otherManager2SequenceNumber',
|
41
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_cik': 'otherManager2Cik',
|
42
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_name': 'otherManager2Name',
|
43
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_crdNumber': 'otherManager2CrdNumber',
|
44
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_secFileNumber': 'otherManager2SecFileNumber',
|
45
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_form13FFileNumber': 'otherManager2Form13FFileNumber',
|
46
|
+
|
47
|
+
# Signature Block Mapping
|
48
|
+
'formData_signatureBlock_name': 'signatureName',
|
49
|
+
'formData_signatureBlock_title': 'signatureTitle',
|
50
|
+
'formData_signatureBlock_phone': 'signaturePhone',
|
51
|
+
'formData_signatureBlock_signature': 'signature',
|
52
|
+
'formData_signatureBlock_city': 'signatureCity',
|
53
|
+
'formData_signatureBlock_stateOrCountry': 'signatureStateOrCountry',
|
54
|
+
'formData_signatureBlock_signatureDate': 'signatureDate',
|
55
|
+
|
56
|
+
# Header Data Mapping
|
57
|
+
'headerData_filerInfo_periodOfReport': 'periodOfReport',
|
58
|
+
'headerData_filerInfo_filer_fileNumber': 'filerFileNumber',
|
59
|
+
'headerData_filerInfo_filer_credentials_cik': 'filerCik',
|
60
|
+
'headerData_filerInfo_filer_credentials_ccc': 'filerCcc',
|
61
|
+
'headerData_filerInfo_flags_confirmingCopyFlag': 'confirmingCopyFlag',
|
62
|
+
'headerData_filerInfo_flags_returnCopyFlag': 'returnCopyFlag',
|
63
|
+
'headerData_filerInfo_flags_overrideInternetFlag': 'overrideInternetFlag',
|
64
|
+
'headerData_filerInfo_denovoRequest': 'denovoRequest',
|
65
|
+
'headerData_filerInfo_liveTestFlag': 'liveTestFlag',
|
66
|
+
'headerData_submissionType': 'submissionType',
|
67
|
+
|
68
|
+
# Schema and Metadata Mapping
|
69
|
+
'schemaLocation': 'schemaLocation',
|
70
|
+
'schemaVersion': 'schemaVersion',
|
71
|
+
'accession': 'accessionNumber'
|
72
|
+
}
|
datamule/document/processing.py
CHANGED
@@ -17,6 +17,17 @@ def process_tabular_data(self):
|
|
17
17
|
tables = process_13fhr(self.data, self.accession)
|
18
18
|
elif self.type in ["INFORMATION TABLE"]:
|
19
19
|
tables = process_information_table(self.data, self.accession)
|
20
|
+
elif self.type in ["25-NSE", "25-NSE/A"]:
|
21
|
+
tables = process_25nse(self.data, self.accession)
|
22
|
+
# complete mark:
|
23
|
+
elif self.type in ["EX-102"]:
|
24
|
+
tables = process_ex102_abs(self.data, self.accession)
|
25
|
+
elif self.type in ["D","D/A"]:
|
26
|
+
tables = process_d(self.data, self.accession)
|
27
|
+
elif self.type in ["N-PX","N-PX/A"]:
|
28
|
+
tables = process_npx(self.data, self.accession)
|
29
|
+
|
30
|
+
|
20
31
|
elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
|
21
32
|
tables = process_sbsef(self.data, self.accession)
|
22
33
|
elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
|
@@ -33,8 +44,7 @@ def process_tabular_data(self):
|
|
33
44
|
tables = process_144(self.data, self.accession)
|
34
45
|
elif self.type in ["24F-2NT", "24F-2NT/A"]:
|
35
46
|
tables = process_24f2nt(self.data, self.accession)
|
36
|
-
|
37
|
-
tables = process_25nse(self.data, self.accession)
|
47
|
+
|
38
48
|
elif self.type in ["ATS-N", "ATS-N/A"]:
|
39
49
|
tables = process_ats(self.data, self.accession)
|
40
50
|
# elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
|
@@ -42,8 +52,7 @@ def process_tabular_data(self):
|
|
42
52
|
# tables = process_c(self.data, self.accession)
|
43
53
|
elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
|
44
54
|
tables = process_cfportal(self.data, self.accession)
|
45
|
-
|
46
|
-
# tables = process_d(self.data, self.accession)
|
55
|
+
|
47
56
|
# elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
|
48
57
|
# tables = process_ma(self.data, self.accession)
|
49
58
|
# elif self.type in ["N-CEN","N-CEN/A"]:
|
@@ -53,8 +62,7 @@ def process_tabular_data(self):
|
|
53
62
|
# tables = process_nmfp(self.data, self.accession)
|
54
63
|
# elif self.type in ["NPORT-P","NPORT-P/A"]:
|
55
64
|
# tables = process_nportp(self.data, self.accession)
|
56
|
-
|
57
|
-
tables = process_npx(self.data, self.accession)
|
65
|
+
|
58
66
|
# elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
|
59
67
|
# tables = process_ta(self.data, self.accession)
|
60
68
|
elif self.type in ["X-17A-5","X-17A-5/A"]:
|
@@ -66,10 +74,11 @@ def process_tabular_data(self):
|
|
66
74
|
tables = process_reg_a(self.data, self.accession)
|
67
75
|
# elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
|
68
76
|
# tables = process_sbs(self.data, self.accession)
|
69
|
-
|
70
|
-
# tables = process_ex102_abs(self.data, self.accession)
|
77
|
+
|
71
78
|
elif self.type == "PROXY VOTING RECORD":
|
72
79
|
tables = process_proxy_voting_record(self.data, self.accession)
|
80
|
+
elif self.type == 'submission_metadata':
|
81
|
+
tables = process_submission_metadata(self.content, self.accession)
|
73
82
|
else:
|
74
83
|
warn(f"Processing for {self.type} is not implemented yet.")
|
75
84
|
return []
|
@@ -95,6 +104,67 @@ def _flatten_dict(d, parent_key=''):
|
|
95
104
|
|
96
105
|
return items
|
97
106
|
|
107
|
+
# flattens in a different way
|
108
|
+
def flatten_dict_to_rows(d, parent_key='', sep='_'):
|
109
|
+
|
110
|
+
if isinstance(d, list):
|
111
|
+
# If input is a list, flatten each item and return all rows
|
112
|
+
all_rows = []
|
113
|
+
for item in d:
|
114
|
+
all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
|
115
|
+
return all_rows
|
116
|
+
|
117
|
+
if not isinstance(d, dict):
|
118
|
+
# If input is a primitive value, return single row
|
119
|
+
return [{parent_key: d}] if parent_key else []
|
120
|
+
|
121
|
+
# Input is a dictionary
|
122
|
+
rows = [{}]
|
123
|
+
|
124
|
+
for k, v in d.items():
|
125
|
+
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
126
|
+
|
127
|
+
if isinstance(v, dict):
|
128
|
+
# Recursively flatten nested dictionaries
|
129
|
+
nested_rows = flatten_dict_to_rows(v, new_key, sep)
|
130
|
+
# Cross-product with existing rows
|
131
|
+
new_rows = []
|
132
|
+
for row in rows:
|
133
|
+
for nested_row in nested_rows:
|
134
|
+
combined_row = row.copy()
|
135
|
+
combined_row.update(nested_row)
|
136
|
+
new_rows.append(combined_row)
|
137
|
+
rows = new_rows
|
138
|
+
|
139
|
+
elif isinstance(v, list):
|
140
|
+
# Handle lists - create multiple rows
|
141
|
+
if not v: # Empty list
|
142
|
+
for row in rows:
|
143
|
+
row[new_key] = ''
|
144
|
+
else:
|
145
|
+
new_rows = []
|
146
|
+
for row in rows:
|
147
|
+
for list_item in v:
|
148
|
+
new_row = row.copy()
|
149
|
+
if isinstance(list_item, dict):
|
150
|
+
# Recursively flatten dict items in list
|
151
|
+
nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
|
152
|
+
for nested_row in nested_rows:
|
153
|
+
combined_row = new_row.copy()
|
154
|
+
combined_row.update(nested_row)
|
155
|
+
new_rows.append(combined_row)
|
156
|
+
else:
|
157
|
+
# Primitive value in list
|
158
|
+
new_row[new_key] = list_item
|
159
|
+
new_rows.append(new_row)
|
160
|
+
rows = new_rows
|
161
|
+
else:
|
162
|
+
# Handle primitive values
|
163
|
+
for row in rows:
|
164
|
+
row[new_key] = v
|
165
|
+
|
166
|
+
return rows
|
167
|
+
|
98
168
|
def process_ownership(data, accession):
|
99
169
|
tables = []
|
100
170
|
if 'ownershipDocument' not in data:
|
@@ -346,33 +416,41 @@ def process_cfportal(data, accession):
|
|
346
416
|
|
347
417
|
return tables
|
348
418
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
#
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
419
|
+
def process_d(data, accession):
|
420
|
+
tables = []
|
421
|
+
groups = [('contactData', 'contact_data_d'),
|
422
|
+
('notificationAddressList', 'notification_address_list_d'),
|
423
|
+
('primaryIssuer', 'primary_issuer_d'),
|
424
|
+
('issuerList', 'issuer_list_d'),
|
425
|
+
('relatedPersonsList', 'related_persons_list_d'),
|
426
|
+
('offeringData', 'offering_data_d'),
|
427
|
+
]
|
428
|
+
for group,table_type in groups:
|
429
|
+
if group == 'relatedPersonList':
|
430
|
+
group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
|
431
|
+
data['edgarSubmission'].pop(group, None)
|
432
|
+
elif group == 'issuerList':
|
433
|
+
group_data = data['edgarSubmission'].pop('issuerList', None)
|
434
|
+
else:
|
435
|
+
group_data = data['edgarSubmission'].pop(group, None)
|
436
|
+
|
437
|
+
if group_data:
|
438
|
+
# Special handling ONLY for relatedPersonsList
|
439
|
+
if group in ['relatedPersonsList', 'issuerList','offeringData']:
|
440
|
+
# Use the new flatten_dict_to_rows ONLY for this key
|
441
|
+
flattened_rows = flatten_dict_to_rows(group_data)
|
442
|
+
if flattened_rows:
|
443
|
+
tables.append(Table(flattened_rows, table_type, accession))
|
444
|
+
else:
|
445
|
+
# Everything else remains EXACTLY the same
|
446
|
+
tables.append(Table(_flatten_dict(group_data), table_type, accession))
|
447
|
+
|
448
|
+
|
449
|
+
|
450
|
+
metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
|
451
|
+
tables.append(metadata_table)
|
374
452
|
|
375
|
-
|
453
|
+
return tables
|
376
454
|
|
377
455
|
# def process_nmfp(data, accession):
|
378
456
|
# tables = []
|
@@ -583,13 +661,39 @@ def process_reg_a(data, accession):
|
|
583
661
|
|
584
662
|
# return tables
|
585
663
|
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
#
|
591
|
-
|
592
|
-
|
664
|
+
def process_ex102_abs(data, accession):
|
665
|
+
tables = []
|
666
|
+
data = safe_get(data, ['assetData', 'assets'])
|
667
|
+
|
668
|
+
# Create assets list: all items without their 'property' field
|
669
|
+
assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
|
670
|
+
|
671
|
+
# Create properties list in a more vectorized way
|
672
|
+
properties = []
|
673
|
+
|
674
|
+
# Handle dictionary properties
|
675
|
+
properties.extend([
|
676
|
+
item['property'] | {'assetNumber': item['assetNumber']}
|
677
|
+
for item in data
|
678
|
+
if 'property' in item and isinstance(item['property'], dict)
|
679
|
+
])
|
680
|
+
|
681
|
+
# Handle list properties - flatten in one operation
|
682
|
+
properties.extend([
|
683
|
+
prop | {'assetNumber': item['assetNumber']}
|
684
|
+
for item in data
|
685
|
+
if 'property' in item and isinstance(item['property'], list)
|
686
|
+
for prop in item['property']
|
687
|
+
if isinstance(prop, dict)
|
688
|
+
])
|
689
|
+
|
690
|
+
if assets:
|
691
|
+
tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
|
692
|
+
|
693
|
+
if properties:
|
694
|
+
tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
|
695
|
+
|
696
|
+
return tables
|
593
697
|
|
594
698
|
# def process_ma(data, accession):
|
595
699
|
# tables = []
|
@@ -601,4 +705,28 @@ def process_reg_a(data, accession):
|
|
601
705
|
# raise NotImplementedError("Need to implement the rest of the MA processing")
|
602
706
|
|
603
707
|
# def process_ncen(data, accession):
|
604
|
-
# raise NotImplementedError("Need to implement the N-CEN processing")
|
708
|
+
# raise NotImplementedError("Need to implement the N-CEN processing")
|
709
|
+
|
710
|
+
# WIP
|
711
|
+
# Note: going to pause this for now, as I don't have a great way of putting this in a csv.
|
712
|
+
def process_submission_metadata(data,accession):
|
713
|
+
tables = []
|
714
|
+
document_data = safe_get(data, ['documents'])
|
715
|
+
if document_data:
|
716
|
+
tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
|
717
|
+
|
718
|
+
reporting_owner_data = safe_get(data,['reporting-owner'])
|
719
|
+
if reporting_owner_data:
|
720
|
+
tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
|
721
|
+
|
722
|
+
issuer_data = safe_get(data,['issuer'])
|
723
|
+
if issuer_data:
|
724
|
+
tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
|
725
|
+
|
726
|
+
# # construct metadata
|
727
|
+
# accession-number date-of-filing-date-change, depositor-cik effectiveness-date
|
728
|
+
|
729
|
+
# # other tables
|
730
|
+
# depositor, securitizer
|
731
|
+
|
732
|
+
return tables
|