datamule 1.2.5__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .index import Index
8
8
  from .package_updater import PackageUpdater
9
9
 
10
10
 
11
+
11
12
  # Keep the notebook environment setup
12
13
  def _is_notebook_env():
13
14
  """Check if the code is running in a Jupyter or Colab environment."""
@@ -118,10 +118,11 @@ class Document:
118
118
  # will deprecate this when we add html2dict
119
119
  elif self.extension in ['.htm', '.html','.txt']:
120
120
 
121
- if self.type == '10-K':
122
- mapping_dict = dict_10k
123
- elif self.type == '10-Q':
121
+
122
+ if self.type == '10-Q':
124
123
  mapping_dict = dict_10q
124
+ elif self.type == '10-K':
125
+ mapping_dict = dict_10k
125
126
  elif self.type == '8-K':
126
127
  mapping_dict = dict_8k
127
128
  elif self.type == 'SC 13D':
@@ -140,18 +141,21 @@ class Document:
140
141
  with open(output_filename, 'w',encoding='utf-8') as f:
141
142
  json.dump(self.data, f, indent=2)
142
143
 
143
- def to_tabular(self):
144
- if self.extension != '.xml':
144
+ def tables(self):
145
+ if self.type == 'submission_metadata':
146
+ return process_tabular_data(self)
147
+ elif self.extension != '.xml':
145
148
  return []
146
- self.parse()
147
- return process_tabular_data(self)
149
+ else:
150
+ self.parse()
151
+ return process_tabular_data(self)
148
152
 
149
153
 
150
154
  def write_csv(self, output_folder):
151
155
  output_folder = Path(output_folder)
152
156
  output_folder.mkdir(exist_ok=True)
153
157
 
154
- tables = self.to_tabular()
158
+ tables = self.tables()
155
159
 
156
160
  if not tables:
157
161
  return
@@ -0,0 +1,125 @@
1
+ issuer_list_d_dict = {
2
+ 'issuer_issuerAddress_street1': 'issuerStreet1',
3
+ 'issuer_cik': 'issuerCik',
4
+ 'issuer_issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
5
+ 'issuer_issuerAddress_zipCode': 'issuerZipCode',
6
+ 'issuer_issuerPhoneNumber': 'issuerPhoneNumber',
7
+ 'issuer_yearOfInc_value': 'yearOfIncValue',
8
+ 'issuer_issuerAddress_stateOrCountry': 'issuerStateOrCountry',
9
+ 'issuer_jurisdictionOfInc': 'jurisdictionOfInc',
10
+ 'issuer_entityType': 'entityType',
11
+ 'issuer_issuerAddress_street2': 'issuerStreet2',
12
+ 'issuer_entityName': 'entityName',
13
+ 'accession': 'accession',
14
+ 'issuer_edgarPreviousNameList_value': 'edgarPreviousNameListValue',
15
+ 'issuer_entityTypeOtherDesc': 'entityTypeOtherDesc',
16
+ 'issuer_yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
17
+ 'issuer_yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
18
+ 'issuer_issuerPreviousNameList_value': 'issuerPreviousNameListValue',
19
+ 'issuer_issuerAddress_city': 'issuerCity'
20
+ }
21
+
22
+ metadata_d_dict = {
23
+ "testOrLive" : "testOrLive",
24
+ "schemaVersion" : "schemaVersion",
25
+ "accession" : "accession",
26
+ "submissionType" : "submissionType",
27
+ }
28
+ offering_data_d_dict = {
29
+ 'salesCompensationList_recipient_foreignSolicitation': 'foreignSolicitation',
30
+ 'typeOfFiling_dateOfFirstSale_yetToOccur': 'dateOfFirstSaleYetToOccur',
31
+ 'industryGroup_investmentFundInfo_is40Act': 'is40Act',
32
+ 'salesCommissionsFindersFees_findersFees_dollarAmount': 'findersFeesAmount',
33
+ 'offeringSalesAmounts_totalRemaining': 'totalRemaining',
34
+ 'issuerSize_aggregateNetAssetValueRange': 'aggregateNetAssetValueRange',
35
+ 'typesOfSecuritiesOffered_isSecurityToBeAcquiredType': 'isSecurityToBeAcquiredType',
36
+ 'salesCompensationList_recipient_recipientAddress_stateOrCountryDescription': 'recipientStateOrCountryDescription',
37
+ 'typesOfSecuritiesOffered_isEquityType': 'isEquityType',
38
+ 'investors_totalNumberAlreadyInvested': 'totalNumberAlreadyInvested',
39
+ 'minimumInvestmentAccepted': 'minimumInvestmentAccepted',
40
+ 'salesCompensationList_recipient_associatedBDName': 'associatedBDName',
41
+ 'salesCompensationList_recipient_statesOfSolicitationList_state': 'statesOfSolicitationState',
42
+ 'businessCombinationTransaction_isBusinessCombinationTransaction': 'isBusinessCombinationTransaction',
43
+ 'useOfProceeds_grossProceedsUsed_isEstimate': 'grossProceedsUsedIsEstimate',
44
+ 'federalExemptionsExclusions_item': 'federalExemptionsExclusionsItem',
45
+ 'useOfProceeds_grossProceedsUsed_dollarAmount': 'grossProceedsUsedAmount',
46
+ 'industryGroup_industryGroupType': 'industryGroupType',
47
+ 'signatureBlock_signature_nameOfSigner': 'nameOfSigner',
48
+ 'signatureBlock_signature_signatureDate': 'signatureDate',
49
+ 'salesCommissionsFindersFees_salesCommissions_isEstimate': 'salesCommissionsIsEstimate',
50
+ 'typesOfSecuritiesOffered_isOtherType': 'isOtherType',
51
+ 'salesCompensationList_recipient_associatedBDCRDNumber': 'associatedBDCRDNumber',
52
+ 'salesCompensationList_recipient_recipientAddress_stateOrCountry': 'recipientStateOrCountry',
53
+ 'typesOfSecuritiesOffered_descriptionOfOtherType': 'descriptionOfOtherType',
54
+ 'salesCommissionsFindersFees_salesCommissions_dollarAmount': 'salesCommissionsAmount',
55
+ 'useOfProceeds_clarificationOfResponse': 'useOfProceedsClarification',
56
+ 'accession': 'accession',
57
+ 'typesOfSecuritiesOffered_isPooledInvestmentFundType': 'isPooledInvestmentFundType',
58
+ 'salesCompensationList_recipient_statesOfSolicitationList_value': 'statesOfSolicitationValue',
59
+ 'signatureBlock_signature_signatureName': 'signatureName',
60
+ 'typeOfFiling_newOrAmendment_isAmendment': 'isAmendment',
61
+ 'issuerSize_revenueRange': 'revenueRange',
62
+ 'salesCommissionsFindersFees_clarificationOfResponse': 'salesCommissionsFindersFeesClarification',
63
+ 'salesCompensationList_recipient_recipientAddress_zipCode': 'recipientZipCode',
64
+ 'salesCompensationList_recipient_recipientAddress_city': 'recipientCity',
65
+ 'typesOfSecuritiesOffered_isOptionToAcquireType': 'isOptionToAcquireType',
66
+ 'businessCombinationTransaction_clarificationOfResponse': 'businessCombinationClarification',
67
+ 'typesOfSecuritiesOffered_isTenantInCommonType': 'isTenantInCommonType',
68
+ 'salesCompensationList_recipient_statesOfSolicitationList_description': 'statesOfSolicitationDescription',
69
+ 'offeringSalesAmounts_totalOfferingAmount': 'totalOfferingAmount',
70
+ 'investors_numberNonAccreditedInvestors': 'numberNonAccreditedInvestors',
71
+ 'signatureBlock_authorizedRepresentative': 'authorizedRepresentative',
72
+ 'signatureBlock_signature_issuerName': 'issuerName',
73
+ 'salesCompensationList_recipient_recipientAddress_street2': 'recipientStreet2',
74
+ 'typesOfSecuritiesOffered_isDebtType': 'isDebtType',
75
+ 'salesCompensationList_recipient_recipientAddress_street1': 'recipientStreet1',
76
+ 'signatureBlock_signature_signatureTitle': 'signatureTitle',
77
+ 'industryGroup_investmentFundInfo_investmentFundType': 'investmentFundType',
78
+ 'salesCommissionsFindersFees_findersFees_isEstimate': 'findersFeesIsEstimate',
79
+ 'typeOfFiling_dateOfFirstSale_value': 'dateOfFirstSaleValue',
80
+ 'offeringSalesAmounts_totalAmountSold': 'totalAmountSold',
81
+ 'offeringSalesAmounts_clarificationOfResponse': 'offeringSalesAmountsClarification',
82
+ 'investors_hasNonAccreditedInvestors': 'hasNonAccreditedInvestors',
83
+ 'salesCompensationList_recipient_recipientCRDNumber': 'recipientCRDNumber',
84
+ 'typesOfSecuritiesOffered_isMineralPropertyType': 'isMineralPropertyType',
85
+ 'salesCompensationList_recipient_recipientName': 'recipientName',
86
+ 'durationOfOffering_moreThanOneYear': 'moreThanOneYear'
87
+ }
88
+ primary_issuer_d_dict = {
89
+ 'yearOfInc_withinFiveYears': 'yearOfIncWithinFiveYears',
90
+ 'entityTypeOtherDesc': 'entityTypeOtherDesc',
91
+ 'jurisdictionOfInc': 'jurisdictionOfInc',
92
+ 'issuerAddress_street1': 'issuerStreet1',
93
+ 'issuerAddress_zipCode': 'issuerZipCode',
94
+ 'issuerPreviousNameList_previousName': 'issuerPreviousName',
95
+ 'entityType': 'entityType',
96
+ 'issuerPreviousNameList_value': 'issuerPreviousNameListValue',
97
+ 'issuerPhoneNumber': 'issuerPhoneNumber',
98
+ 'yearOfInc_value': 'yearOfIncValue',
99
+ 'yearOfInc_yetToBeFormed': 'yearOfIncYetToBeFormed',
100
+ 'edgarPreviousNameList_previousName': 'edgarPreviousName',
101
+ 'edgarPreviousNameList_value': 'edgarPreviousNameListValue',
102
+ 'issuerAddress_stateOrCountry': 'issuerStateOrCountry',
103
+ 'entityName': 'entityName',
104
+ 'accession': 'accession',
105
+ 'issuerAddress_street2': 'issuerStreet2',
106
+ 'issuerAddress_city': 'issuerCity',
107
+ 'issuerAddress_stateOrCountryDescription': 'issuerStateOrCountryDescription',
108
+ 'cik': 'cik',
109
+ 'yearOfInc_overFiveYears': 'yearOfIncOverFiveYears'
110
+ }
111
+
112
+ related_persons_d_dict = {
113
+ 'relatedPersonInfo_relatedPersonAddress_stateOrCountry': 'relatedPersonStateOrCountry',
114
+ 'relatedPersonInfo_relatedPersonRelationshipList_relationship': 'relatedPersonRelationship',
115
+ 'relatedPersonInfo_relationshipClarification': 'relationshipClarification',
116
+ 'relatedPersonInfo_relatedPersonName_lastName': 'relatedPersonLastName',
117
+ 'accession': 'accession',
118
+ 'relatedPersonInfo_relatedPersonName_middleName': 'relatedPersonMiddleName',
119
+ 'relatedPersonInfo_relatedPersonAddress_zipCode': 'relatedPersonZipCode',
120
+ 'relatedPersonInfo_relatedPersonAddress_city': 'relatedPersonCity',
121
+ 'relatedPersonInfo_relatedPersonAddress_street1': 'relatedPersonStreet1',
122
+ 'relatedPersonInfo_relatedPersonAddress_stateOrCountryDescription': 'relatedPersonStateOrCountryDescription',
123
+ 'relatedPersonInfo_relatedPersonName_firstName': 'relatedPersonFirstName',
124
+ 'relatedPersonInfo_relatedPersonAddress_street2': 'relatedPersonStreet2'
125
+ }
@@ -0,0 +1,63 @@
1
+ # Assets dictionary mapping
2
+ assets_dict_ex102_abs = {
3
+ 'assetNumber': 'assetNumber',
4
+ 'DefeasedStatusCode': 'DefeasedStatusCode',
5
+ 'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
6
+ 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
7
+ 'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
8
+ 'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
9
+ 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
10
+ 'valuationSecuritizationDate': 'valuationSecuritizationDate',
11
+ 'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
12
+ 'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
13
+ 'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
14
+ 'financialsSecuritizationDate': 'financialsSecuritizationDate',
15
+ 'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
16
+ 'operatingExpensesAmount': 'operatingExpensesAmount',
17
+ 'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
18
+ 'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
19
+ 'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
20
+ 'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
21
+ 'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
22
+ 'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
23
+ 'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
24
+ 'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
25
+ 'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
26
+ 'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
27
+ 'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
28
+ 'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
29
+ 'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
30
+ 'accession': 'accession',
31
+ 'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
32
+ 'mostRecentValuationDate': 'mostRecentValuationDate',
33
+ 'mostRecentValuationAmount': 'mostRecentValuationAmount',
34
+ 'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
35
+ 'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
36
+ }
37
+
38
+ # Properties dictionary mapping
39
+ properties_dict_ex102_abs = {
40
+ 'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
41
+ 'propertyCounty': 'propertyCounty',
42
+ 'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
43
+ 'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
44
+ 'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
45
+ 'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
46
+ 'propertyZip': 'propertyZip',
47
+ 'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
48
+ 'propertyStatusCode': 'propertyStatusCode',
49
+ 'propertyState': 'propertyState',
50
+ 'yearBuiltNumber': 'yearBuiltNumber',
51
+ 'propertyCity': 'propertyCity',
52
+ 'propertyName': 'propertyName',
53
+ 'propertyAddress': 'propertyAddress',
54
+ 'yearLastRenovated': 'yearLastRenovated',
55
+ 'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
56
+ 'thirdLargestTenant': 'thirdLargestTenant',
57
+ 'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
58
+ 'propertyTypeCode': 'propertyTypeCode',
59
+ 'largestTenant': 'largestTenant',
60
+ 'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
61
+ 'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
62
+ 'secondLargestTenant': 'secondLargestTenant'
63
+ }
@@ -1,3 +1,4 @@
1
+ # Ready for mass testing
1
2
 
2
3
  # Information Table (13F-HR Securities) mapping
3
4
  information_table_dict = {
@@ -1,4 +1,4 @@
1
- # Mapping dictionaries for SEC filing table types based on actual field occurrences
1
+ # Ready for mass testing
2
2
 
3
3
  # Non-derivative transaction ownership mapping
4
4
  non_derivative_transaction_ownership_dict = {
@@ -1 +1,17 @@
1
- proxy_voting_record_dict = {}
1
+ proxy_voting_record_dict = {
2
+ 'meetingDate': 'meetingDate',
3
+ 'accession': 'accessionNumber',
4
+ 'vote_voteRecord_managementRecommendation': 'managementRecommendation',
5
+ 'sharesVoted': 'sharesVoted', # Top-level sharesVoted
6
+ 'vote_voteRecord_howVoted': 'howVoted',
7
+ 'sharesOnLoan': 'sharesOnLoan',
8
+ 'cusip': 'cusip',
9
+ 'issuerName': 'issuerName',
10
+ 'voteCategories_voteCategory_categoryType': 'categoryType',
11
+ 'voteDescription': 'voteDescription',
12
+ 'voteManager_otherManagers_otherManager': 'otherManager',
13
+ 'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
14
+ 'isin': 'isin',
15
+ 'voteSource': 'voteSource',
16
+ 'voteSeries': 'voteSeries'
17
+ }
@@ -0,0 +1,9 @@
1
+ # Note: submission_metadata is my designation, not SEC for the header of the Submission tag
2
+
3
+ document_submission_metadata_dict = {
4
+ 'accession':'accession',
5
+ 'type':'type',
6
+ 'sequence' : 'sequence',
7
+ 'filename' : 'filename',
8
+ 'description':'description'
9
+ }
@@ -1,5 +1,72 @@
1
+ # Ready for mass testing
1
2
 
2
3
  # 13F-HR (Institutional Investment Manager Holdings) mapping
3
- thirteenfhr_dict = {
4
-
5
- }
4
+ thirteenfhr_dict = {
5
+ # Cover Page Mapping
6
+ 'formData_coverPage_reportCalendarOrQuarter': 'reportCalendarOrQuarter',
7
+ 'formData_coverPage_filingManager_name': 'filingManagerName',
8
+ 'formData_coverPage_filingManager_address_street1': 'filingManagerStreet1',
9
+ 'formData_coverPage_filingManager_address_street2': 'filingManagerStreet2',
10
+ 'formData_coverPage_filingManager_address_city': 'filingManagerCity',
11
+ 'formData_coverPage_filingManager_address_stateOrCountry': 'filingManagerStateOrCountry',
12
+ 'formData_coverPage_filingManager_address_zipCode': 'filingManagerZipCode',
13
+ 'formData_coverPage_crdNumber': 'crdNumber',
14
+ 'formData_coverPage_secFileNumber': 'secFileNumber',
15
+ 'formData_coverPage_form13FFileNumber': 'form13FFileNumber',
16
+ 'formData_coverPage_reportType': 'reportType',
17
+ 'formData_coverPage_isAmendment': 'isAmendment',
18
+ 'formData_coverPage_amendmentNo': 'amendmentNo',
19
+ 'formData_coverPage_amendmentInfo_amendmentType': 'amendmentType',
20
+ 'formData_coverPage_amendmentInfo_confDeniedExpired': 'confDeniedExpired',
21
+ 'formData_coverPage_additionalInformation': 'additionalInformation',
22
+ 'formData_coverPage_provideInfoForInstruction5': 'provideInfoForInstruction5',
23
+
24
+ # Other Managers Info Mapping
25
+ 'formData_coverPage_otherManagersInfo_otherManager': 'otherManager',
26
+ 'formData_coverPage_otherManagersInfo_otherManager_cik': 'otherManagerCik',
27
+ 'formData_coverPage_otherManagersInfo_otherManager_name': 'otherManagerName',
28
+ 'formData_coverPage_otherManagersInfo_otherManager_crdNumber': 'otherManagerCrdNumber',
29
+ 'formData_coverPage_otherManagersInfo_otherManager_secFileNumber': 'otherManagerSecFileNumber',
30
+ 'formData_coverPage_otherManagersInfo_otherManager_form13FFileNumber': 'otherManagerForm13FFileNumber',
31
+
32
+ # Summary Page Mapping
33
+ 'formData_summaryPage_isConfidentialOmitted': 'isConfidentialOmitted',
34
+ 'formData_summaryPage_otherIncludedManagersCount': 'otherIncludedManagersCount',
35
+ 'formData_summaryPage_tableEntryTotal': 'tableEntryTotal',
36
+ 'formData_summaryPage_tableValueTotal': 'tableValueTotal',
37
+
38
+ # Other Managers 2 Info Mapping
39
+ 'formData_summaryPage_otherManagers2Info_otherManager2': 'otherManager2',
40
+ 'formData_summaryPage_otherManagers2Info_otherManager2_sequenceNumber': 'otherManager2SequenceNumber',
41
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_cik': 'otherManager2Cik',
42
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_name': 'otherManager2Name',
43
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_crdNumber': 'otherManager2CrdNumber',
44
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_secFileNumber': 'otherManager2SecFileNumber',
45
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_form13FFileNumber': 'otherManager2Form13FFileNumber',
46
+
47
+ # Signature Block Mapping
48
+ 'formData_signatureBlock_name': 'signatureName',
49
+ 'formData_signatureBlock_title': 'signatureTitle',
50
+ 'formData_signatureBlock_phone': 'signaturePhone',
51
+ 'formData_signatureBlock_signature': 'signature',
52
+ 'formData_signatureBlock_city': 'signatureCity',
53
+ 'formData_signatureBlock_stateOrCountry': 'signatureStateOrCountry',
54
+ 'formData_signatureBlock_signatureDate': 'signatureDate',
55
+
56
+ # Header Data Mapping
57
+ 'headerData_filerInfo_periodOfReport': 'periodOfReport',
58
+ 'headerData_filerInfo_filer_fileNumber': 'filerFileNumber',
59
+ 'headerData_filerInfo_filer_credentials_cik': 'filerCik',
60
+ 'headerData_filerInfo_filer_credentials_ccc': 'filerCcc',
61
+ 'headerData_filerInfo_flags_confirmingCopyFlag': 'confirmingCopyFlag',
62
+ 'headerData_filerInfo_flags_returnCopyFlag': 'returnCopyFlag',
63
+ 'headerData_filerInfo_flags_overrideInternetFlag': 'overrideInternetFlag',
64
+ 'headerData_filerInfo_denovoRequest': 'denovoRequest',
65
+ 'headerData_filerInfo_liveTestFlag': 'liveTestFlag',
66
+ 'headerData_submissionType': 'submissionType',
67
+
68
+ # Schema and Metadata Mapping
69
+ 'schemaLocation': 'schemaLocation',
70
+ 'schemaVersion': 'schemaVersion',
71
+ 'accession': 'accessionNumber'
72
+ }
@@ -1,3 +1,4 @@
1
+ # Ready for mass testing
1
2
  # 25-NSE mapping
2
3
  twentyfive_nse_dict = {
3
4
  'descriptionClassSecurity': 'securityDescription',
@@ -17,6 +17,17 @@ def process_tabular_data(self):
17
17
  tables = process_13fhr(self.data, self.accession)
18
18
  elif self.type in ["INFORMATION TABLE"]:
19
19
  tables = process_information_table(self.data, self.accession)
20
+ elif self.type in ["25-NSE", "25-NSE/A"]:
21
+ tables = process_25nse(self.data, self.accession)
22
+ # complete mark:
23
+ elif self.type in ["EX-102"]:
24
+ tables = process_ex102_abs(self.data, self.accession)
25
+ elif self.type in ["D","D/A"]:
26
+ tables = process_d(self.data, self.accession)
27
+ elif self.type in ["N-PX","N-PX/A"]:
28
+ tables = process_npx(self.data, self.accession)
29
+
30
+
20
31
  elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
21
32
  tables = process_sbsef(self.data, self.accession)
22
33
  elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
@@ -33,8 +44,7 @@ def process_tabular_data(self):
33
44
  tables = process_144(self.data, self.accession)
34
45
  elif self.type in ["24F-2NT", "24F-2NT/A"]:
35
46
  tables = process_24f2nt(self.data, self.accession)
36
- elif self.type in ["25-NSE", "25-NSE/A"]:
37
- tables = process_25nse(self.data, self.accession)
47
+
38
48
  elif self.type in ["ATS-N", "ATS-N/A"]:
39
49
  tables = process_ats(self.data, self.accession)
40
50
  # elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
@@ -42,8 +52,7 @@ def process_tabular_data(self):
42
52
  # tables = process_c(self.data, self.accession)
43
53
  elif self.type in ["CFPORTAL","CFPORTAL/A","CFPORTAL-W"]:
44
54
  tables = process_cfportal(self.data, self.accession)
45
- # elif self.type in ["D","D/A"]:
46
- # tables = process_d(self.data, self.accession)
55
+
47
56
  # elif self.type in ["MA","MA-A","MA/A","MA-I","MA-I/A","MA-W"]:
48
57
  # tables = process_ma(self.data, self.accession)
49
58
  # elif self.type in ["N-CEN","N-CEN/A"]:
@@ -53,8 +62,7 @@ def process_tabular_data(self):
53
62
  # tables = process_nmfp(self.data, self.accession)
54
63
  # elif self.type in ["NPORT-P","NPORT-P/A"]:
55
64
  # tables = process_nportp(self.data, self.accession)
56
- elif self.type in ["N-PX","N-PX/A"]:
57
- tables = process_npx(self.data, self.accession)
65
+
58
66
  # elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
59
67
  # tables = process_ta(self.data, self.accession)
60
68
  elif self.type in ["X-17A-5","X-17A-5/A"]:
@@ -66,10 +74,11 @@ def process_tabular_data(self):
66
74
  tables = process_reg_a(self.data, self.accession)
67
75
  # elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
68
76
  # tables = process_sbs(self.data, self.accession)
69
- # elif self.type in ["EX-102"]:
70
- # tables = process_ex102_abs(self.data, self.accession)
77
+
71
78
  elif self.type == "PROXY VOTING RECORD":
72
79
  tables = process_proxy_voting_record(self.data, self.accession)
80
+ elif self.type == 'submission_metadata':
81
+ tables = process_submission_metadata(self.content, self.accession)
73
82
  else:
74
83
  warn(f"Processing for {self.type} is not implemented yet.")
75
84
  return []
@@ -95,6 +104,67 @@ def _flatten_dict(d, parent_key=''):
95
104
 
96
105
  return items
97
106
 
107
+ # flattens in a different way
108
+ def flatten_dict_to_rows(d, parent_key='', sep='_'):
109
+
110
+ if isinstance(d, list):
111
+ # If input is a list, flatten each item and return all rows
112
+ all_rows = []
113
+ for item in d:
114
+ all_rows.extend(flatten_dict_to_rows(item, parent_key, sep))
115
+ return all_rows
116
+
117
+ if not isinstance(d, dict):
118
+ # If input is a primitive value, return single row
119
+ return [{parent_key: d}] if parent_key else []
120
+
121
+ # Input is a dictionary
122
+ rows = [{}]
123
+
124
+ for k, v in d.items():
125
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
126
+
127
+ if isinstance(v, dict):
128
+ # Recursively flatten nested dictionaries
129
+ nested_rows = flatten_dict_to_rows(v, new_key, sep)
130
+ # Cross-product with existing rows
131
+ new_rows = []
132
+ for row in rows:
133
+ for nested_row in nested_rows:
134
+ combined_row = row.copy()
135
+ combined_row.update(nested_row)
136
+ new_rows.append(combined_row)
137
+ rows = new_rows
138
+
139
+ elif isinstance(v, list):
140
+ # Handle lists - create multiple rows
141
+ if not v: # Empty list
142
+ for row in rows:
143
+ row[new_key] = ''
144
+ else:
145
+ new_rows = []
146
+ for row in rows:
147
+ for list_item in v:
148
+ new_row = row.copy()
149
+ if isinstance(list_item, dict):
150
+ # Recursively flatten dict items in list
151
+ nested_rows = flatten_dict_to_rows(list_item, new_key, sep)
152
+ for nested_row in nested_rows:
153
+ combined_row = new_row.copy()
154
+ combined_row.update(nested_row)
155
+ new_rows.append(combined_row)
156
+ else:
157
+ # Primitive value in list
158
+ new_row[new_key] = list_item
159
+ new_rows.append(new_row)
160
+ rows = new_rows
161
+ else:
162
+ # Handle primitive values
163
+ for row in rows:
164
+ row[new_key] = v
165
+
166
+ return rows
167
+
98
168
  def process_ownership(data, accession):
99
169
  tables = []
100
170
  if 'ownershipDocument' not in data:
@@ -346,33 +416,41 @@ def process_cfportal(data, accession):
346
416
 
347
417
  return tables
348
418
 
349
- # def process_d(data, accession):
350
- # tables = []
351
- # primary_issuer = safe_get(data, ['edgarSubmission', 'primaryIssuer'])
352
- # if primary_issuer:
353
- # metadata = Table(_flatten_dict(primary_issuer), 'metadata_d', accession)
354
-
355
- # metadata_columns = ['schemaVersion', 'submissionType', 'testOrLive', 'returnCopy', 'contactData', 'notificationAddressList']
356
- # for col in metadata_columns:
357
- # col_data = safe_get(data, ['edgarSubmission', col])
358
- # if col_data:
359
- # metadata.add_column(col, col_data)
360
-
361
- # tables.append(metadata)
362
-
363
- # issuer_list = safe_get(data, ['edgarSubmission', 'issuerList'])
364
- # if issuer_list:
365
- # tables.append(Table(_flatten_dict(issuer_list), 'primary_issuer_d', accession))
366
-
367
- # offering_data = safe_get(data, ['edgarSubmission', 'offeringData'])
368
- # if offering_data:
369
- # tables.append(Table(_flatten_dict(offering_data), 'offering_data_d', accession))
370
-
371
- # related_persons_list = safe_get(data, ['edgarSubmission', 'relatedPersonsList'])
372
- # if related_persons_list:
373
- # tables.append(Table(_flatten_dict(related_persons_list), 'related_persons_list_d', accession))
419
+ def process_d(data, accession):
420
+ tables = []
421
+ groups = [('contactData', 'contact_data_d'),
422
+ ('notificationAddressList', 'notification_address_list_d'),
423
+ ('primaryIssuer', 'primary_issuer_d'),
424
+ ('issuerList', 'issuer_list_d'),
425
+ ('relatedPersonsList', 'related_persons_list_d'),
426
+ ('offeringData', 'offering_data_d'),
427
+ ]
428
+ for group,table_type in groups:
429
+ if group == 'relatedPersonList':
430
+ group_data = data['edgarSubmission'].pop('relatedPersonInfo', None)
431
+ data['edgarSubmission'].pop(group, None)
432
+ elif group == 'issuerList':
433
+ group_data = data['edgarSubmission'].pop('issuerList', None)
434
+ else:
435
+ group_data = data['edgarSubmission'].pop(group, None)
436
+
437
+ if group_data:
438
+ # Special handling ONLY for relatedPersonsList
439
+ if group in ['relatedPersonsList', 'issuerList','offeringData']:
440
+ # Use the new flatten_dict_to_rows ONLY for this key
441
+ flattened_rows = flatten_dict_to_rows(group_data)
442
+ if flattened_rows:
443
+ tables.append(Table(flattened_rows, table_type, accession))
444
+ else:
445
+ # Everything else remains EXACTLY the same
446
+ tables.append(Table(_flatten_dict(group_data), table_type, accession))
447
+
448
+
449
+
450
+ metadata_table = Table(_flatten_dict(data['edgarSubmission']), 'metadata_d', accession)
451
+ tables.append(metadata_table)
374
452
 
375
- # return tables
453
+ return tables
376
454
 
377
455
  # def process_nmfp(data, accession):
378
456
  # tables = []
@@ -583,13 +661,39 @@ def process_reg_a(data, accession):
583
661
 
584
662
  # return tables
585
663
 
586
- # def process_ex102_abs(data, accession):
587
- # tables = []
588
- # asset_data = safe_get(data, ['assetData'])
589
- # if asset_data:
590
- # tables.append(Table(_flatten_dict(asset_data), 'abs', accession))
591
- # raise NotImplementedError("Need to implement the rest of the ABS processing")
592
- # return tables
664
+ def process_ex102_abs(data, accession):
665
+ tables = []
666
+ data = safe_get(data, ['assetData', 'assets'])
667
+
668
+ # Create assets list: all items without their 'property' field
669
+ assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
670
+
671
+ # Create properties list in a more vectorized way
672
+ properties = []
673
+
674
+ # Handle dictionary properties
675
+ properties.extend([
676
+ item['property'] | {'assetNumber': item['assetNumber']}
677
+ for item in data
678
+ if 'property' in item and isinstance(item['property'], dict)
679
+ ])
680
+
681
+ # Handle list properties - flatten in one operation
682
+ properties.extend([
683
+ prop | {'assetNumber': item['assetNumber']}
684
+ for item in data
685
+ if 'property' in item and isinstance(item['property'], list)
686
+ for prop in item['property']
687
+ if isinstance(prop, dict)
688
+ ])
689
+
690
+ if assets:
691
+ tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
692
+
693
+ if properties:
694
+ tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
695
+
696
+ return tables
593
697
 
594
698
  # def process_ma(data, accession):
595
699
  # tables = []
@@ -601,4 +705,28 @@ def process_reg_a(data, accession):
601
705
  # raise NotImplementedError("Need to implement the rest of the MA processing")
602
706
 
603
707
  # def process_ncen(data, accession):
604
- # raise NotImplementedError("Need to implement the N-CEN processing")
708
+ # raise NotImplementedError("Need to implement the N-CEN processing")
709
+
710
+ # WIP
711
+ # Note: going to pause this for now, as I don't have a great way of putting this in a csv.
712
+ def process_submission_metadata(data,accession):
713
+ tables = []
714
+ document_data = safe_get(data, ['documents'])
715
+ if document_data:
716
+ tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
717
+
718
+ reporting_owner_data = safe_get(data,['reporting-owner'])
719
+ if reporting_owner_data:
720
+ tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
721
+
722
+ issuer_data = safe_get(data,['issuer'])
723
+ if issuer_data:
724
+ tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
725
+
726
+ # # construct metadata
727
+ # accession-number date-of-filing-date-change, depositor-cik effectiveness-date
728
+
729
+ # # other tables
730
+ # depositor, securitizer
731
+
732
+ return tables