datamule 1.2.5__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.2.5 → datamule-1.2.7}/PKG-INFO +1 -2
- {datamule-1.2.5 → datamule-1.2.7}/datamule/__init__.py +1 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/document.py +12 -8
- datamule-1.2.7/datamule/document/mappings/ex102_abs.py +63 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/information_table.py +1 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ownership.py +1 -1
- datamule-1.2.7/datamule/document/mappings/proxy_voting_record.py +17 -0
- datamule-1.2.7/datamule/document/mappings/submission_metadata.py +9 -0
- datamule-1.2.7/datamule/document/mappings/thirteenfhr.py +72 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/twentyfivense.py +1 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/processing.py +71 -14
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/table.py +48 -5
- {datamule-1.2.5 → datamule-1.2.7}/datamule/helper.py +10 -1
- {datamule-1.2.5 → datamule-1.2.7}/datamule/index.py +8 -10
- {datamule-1.2.5 → datamule-1.2.7}/datamule/portfolio.py +16 -11
- datamule-1.2.7/datamule/sec/submissions/monitor.py +183 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/submissions/textsearch.py +0 -4
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/xbrl/streamcompanyfacts.py +1 -1
- {datamule-1.2.5 → datamule-1.2.7}/datamule/seclibrary/downloader.py +2 -2
- {datamule-1.2.5 → datamule-1.2.7}/datamule/submission.py +80 -14
- {datamule-1.2.5 → datamule-1.2.7}/datamule.egg-info/PKG-INFO +1 -2
- {datamule-1.2.5 → datamule-1.2.7}/datamule.egg-info/SOURCES.txt +2 -2
- {datamule-1.2.5 → datamule-1.2.7}/datamule.egg-info/requires.txt +0 -1
- {datamule-1.2.5 → datamule-1.2.7}/setup.py +1 -2
- datamule-1.2.5/datamule/document/mappings/proxy_voting_record.py +0 -1
- datamule-1.2.5/datamule/document/mappings/thirteenfhr.py +0 -5
- datamule-1.2.5/datamule/sec/rss/monitor.py +0 -416
- datamule-1.2.5/datamule/sec/submissions/monitor.py +0 -130
- datamule-1.2.5/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/config.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/atsn.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/npx.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/sbs.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/sdr.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/ta.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/package_updater.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.2.5/datamule/sec/rss → datamule-1.2.7/datamule/sec/submissions}/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/utils.py +0 -0
- {datamule-1.2.5/datamule/sec/submissions → datamule-1.2.7/datamule/sec/xbrl}/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.2.5/datamule/sec/xbrl → datamule-1.2.7/datamule/seclibrary}/__init__.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/seclibrary/query.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule/sheet.py +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.2.5 → datamule-1.2.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.7
|
4
4
|
Summary: Making it easier to use SEC filings.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -10,7 +10,6 @@ Requires-Dist: tqdm
|
|
10
10
|
Requires-Dist: requests
|
11
11
|
Requires-Dist: nest_asyncio
|
12
12
|
Requires-Dist: aiofiles
|
13
|
-
Requires-Dist: polars
|
14
13
|
Requires-Dist: setuptools
|
15
14
|
Requires-Dist: selectolax
|
16
15
|
Requires-Dist: pytz
|
@@ -118,10 +118,11 @@ class Document:
|
|
118
118
|
# will deprecate this when we add html2dict
|
119
119
|
elif self.extension in ['.htm', '.html','.txt']:
|
120
120
|
|
121
|
-
|
122
|
-
|
123
|
-
elif self.type == '10-Q':
|
121
|
+
|
122
|
+
if self.type == '10-Q':
|
124
123
|
mapping_dict = dict_10q
|
124
|
+
elif self.type == '10-K':
|
125
|
+
mapping_dict = dict_10k
|
125
126
|
elif self.type == '8-K':
|
126
127
|
mapping_dict = dict_8k
|
127
128
|
elif self.type == 'SC 13D':
|
@@ -140,18 +141,21 @@ class Document:
|
|
140
141
|
with open(output_filename, 'w',encoding='utf-8') as f:
|
141
142
|
json.dump(self.data, f, indent=2)
|
142
143
|
|
143
|
-
def
|
144
|
-
if self.
|
144
|
+
def tables(self):
|
145
|
+
if self.type == 'submission_metadata':
|
146
|
+
return process_tabular_data(self)
|
147
|
+
elif self.extension != '.xml':
|
145
148
|
return []
|
146
|
-
|
147
|
-
|
149
|
+
else:
|
150
|
+
self.parse()
|
151
|
+
return process_tabular_data(self)
|
148
152
|
|
149
153
|
|
150
154
|
def write_csv(self, output_folder):
|
151
155
|
output_folder = Path(output_folder)
|
152
156
|
output_folder.mkdir(exist_ok=True)
|
153
157
|
|
154
|
-
tables = self.
|
158
|
+
tables = self.tables()
|
155
159
|
|
156
160
|
if not tables:
|
157
161
|
return
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Assets dictionary mapping
|
2
|
+
assets_dict_ex102_abs = {
|
3
|
+
'assetNumber': 'assetNumber',
|
4
|
+
'DefeasedStatusCode': 'DefeasedStatusCode',
|
5
|
+
'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
|
6
|
+
'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
|
7
|
+
'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
|
8
|
+
'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
|
9
|
+
'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
|
10
|
+
'valuationSecuritizationDate': 'valuationSecuritizationDate',
|
11
|
+
'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
|
12
|
+
'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
|
13
|
+
'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
|
14
|
+
'financialsSecuritizationDate': 'financialsSecuritizationDate',
|
15
|
+
'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
|
16
|
+
'operatingExpensesAmount': 'operatingExpensesAmount',
|
17
|
+
'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
|
18
|
+
'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
|
19
|
+
'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
|
20
|
+
'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
|
21
|
+
'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
|
22
|
+
'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
|
23
|
+
'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
|
24
|
+
'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
|
25
|
+
'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
|
26
|
+
'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
|
27
|
+
'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
|
28
|
+
'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
|
29
|
+
'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
|
30
|
+
'accession': 'accession',
|
31
|
+
'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
|
32
|
+
'mostRecentValuationDate': 'mostRecentValuationDate',
|
33
|
+
'mostRecentValuationAmount': 'mostRecentValuationAmount',
|
34
|
+
'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
|
35
|
+
'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
|
36
|
+
}
|
37
|
+
|
38
|
+
# Properties dictionary mapping
|
39
|
+
properties_dict_ex102_abs = {
|
40
|
+
'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
|
41
|
+
'propertyCounty': 'propertyCounty',
|
42
|
+
'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
|
43
|
+
'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
|
44
|
+
'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
|
45
|
+
'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
|
46
|
+
'propertyZip': 'propertyZip',
|
47
|
+
'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
|
48
|
+
'propertyStatusCode': 'propertyStatusCode',
|
49
|
+
'propertyState': 'propertyState',
|
50
|
+
'yearBuiltNumber': 'yearBuiltNumber',
|
51
|
+
'propertyCity': 'propertyCity',
|
52
|
+
'propertyName': 'propertyName',
|
53
|
+
'propertyAddress': 'propertyAddress',
|
54
|
+
'yearLastRenovated': 'yearLastRenovated',
|
55
|
+
'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
|
56
|
+
'thirdLargestTenant': 'thirdLargestTenant',
|
57
|
+
'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
|
58
|
+
'propertyTypeCode': 'propertyTypeCode',
|
59
|
+
'largestTenant': 'largestTenant',
|
60
|
+
'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
|
61
|
+
'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
|
62
|
+
'secondLargestTenant': 'secondLargestTenant'
|
63
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
proxy_voting_record_dict = {
|
2
|
+
'meetingDate': 'meetingDate',
|
3
|
+
'accession': 'accessionNumber',
|
4
|
+
'vote_voteRecord_managementRecommendation': 'managementRecommendation',
|
5
|
+
'sharesVoted': 'sharesVoted', # Top-level sharesVoted
|
6
|
+
'vote_voteRecord_howVoted': 'howVoted',
|
7
|
+
'sharesOnLoan': 'sharesOnLoan',
|
8
|
+
'cusip': 'cusip',
|
9
|
+
'issuerName': 'issuerName',
|
10
|
+
'voteCategories_voteCategory_categoryType': 'categoryType',
|
11
|
+
'voteDescription': 'voteDescription',
|
12
|
+
'voteManager_otherManagers_otherManager': 'otherManager',
|
13
|
+
'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
|
14
|
+
'isin': 'isin',
|
15
|
+
'voteSource': 'voteSource',
|
16
|
+
'voteSeries': 'voteSeries'
|
17
|
+
}
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# Note: submission_metadata is my designation, not SEC for the header of the Submission tag
|
2
|
+
|
3
|
+
document_submission_metadata_dict = {
|
4
|
+
'accession':'accession',
|
5
|
+
'type':'type',
|
6
|
+
'sequence' : 'sequence',
|
7
|
+
'filename' : 'filename',
|
8
|
+
'description':'description'
|
9
|
+
}
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Ready for mass testing
|
2
|
+
|
3
|
+
# 13F-HR (Institutional Investment Manager Holdings) mapping
|
4
|
+
thirteenfhr_dict = {
|
5
|
+
# Cover Page Mapping
|
6
|
+
'formData_coverPage_reportCalendarOrQuarter': 'reportCalendarOrQuarter',
|
7
|
+
'formData_coverPage_filingManager_name': 'filingManagerName',
|
8
|
+
'formData_coverPage_filingManager_address_street1': 'filingManagerStreet1',
|
9
|
+
'formData_coverPage_filingManager_address_street2': 'filingManagerStreet2',
|
10
|
+
'formData_coverPage_filingManager_address_city': 'filingManagerCity',
|
11
|
+
'formData_coverPage_filingManager_address_stateOrCountry': 'filingManagerStateOrCountry',
|
12
|
+
'formData_coverPage_filingManager_address_zipCode': 'filingManagerZipCode',
|
13
|
+
'formData_coverPage_crdNumber': 'crdNumber',
|
14
|
+
'formData_coverPage_secFileNumber': 'secFileNumber',
|
15
|
+
'formData_coverPage_form13FFileNumber': 'form13FFileNumber',
|
16
|
+
'formData_coverPage_reportType': 'reportType',
|
17
|
+
'formData_coverPage_isAmendment': 'isAmendment',
|
18
|
+
'formData_coverPage_amendmentNo': 'amendmentNo',
|
19
|
+
'formData_coverPage_amendmentInfo_amendmentType': 'amendmentType',
|
20
|
+
'formData_coverPage_amendmentInfo_confDeniedExpired': 'confDeniedExpired',
|
21
|
+
'formData_coverPage_additionalInformation': 'additionalInformation',
|
22
|
+
'formData_coverPage_provideInfoForInstruction5': 'provideInfoForInstruction5',
|
23
|
+
|
24
|
+
# Other Managers Info Mapping
|
25
|
+
'formData_coverPage_otherManagersInfo_otherManager': 'otherManager',
|
26
|
+
'formData_coverPage_otherManagersInfo_otherManager_cik': 'otherManagerCik',
|
27
|
+
'formData_coverPage_otherManagersInfo_otherManager_name': 'otherManagerName',
|
28
|
+
'formData_coverPage_otherManagersInfo_otherManager_crdNumber': 'otherManagerCrdNumber',
|
29
|
+
'formData_coverPage_otherManagersInfo_otherManager_secFileNumber': 'otherManagerSecFileNumber',
|
30
|
+
'formData_coverPage_otherManagersInfo_otherManager_form13FFileNumber': 'otherManagerForm13FFileNumber',
|
31
|
+
|
32
|
+
# Summary Page Mapping
|
33
|
+
'formData_summaryPage_isConfidentialOmitted': 'isConfidentialOmitted',
|
34
|
+
'formData_summaryPage_otherIncludedManagersCount': 'otherIncludedManagersCount',
|
35
|
+
'formData_summaryPage_tableEntryTotal': 'tableEntryTotal',
|
36
|
+
'formData_summaryPage_tableValueTotal': 'tableValueTotal',
|
37
|
+
|
38
|
+
# Other Managers 2 Info Mapping
|
39
|
+
'formData_summaryPage_otherManagers2Info_otherManager2': 'otherManager2',
|
40
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_sequenceNumber': 'otherManager2SequenceNumber',
|
41
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_cik': 'otherManager2Cik',
|
42
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_name': 'otherManager2Name',
|
43
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_crdNumber': 'otherManager2CrdNumber',
|
44
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_secFileNumber': 'otherManager2SecFileNumber',
|
45
|
+
'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_form13FFileNumber': 'otherManager2Form13FFileNumber',
|
46
|
+
|
47
|
+
# Signature Block Mapping
|
48
|
+
'formData_signatureBlock_name': 'signatureName',
|
49
|
+
'formData_signatureBlock_title': 'signatureTitle',
|
50
|
+
'formData_signatureBlock_phone': 'signaturePhone',
|
51
|
+
'formData_signatureBlock_signature': 'signature',
|
52
|
+
'formData_signatureBlock_city': 'signatureCity',
|
53
|
+
'formData_signatureBlock_stateOrCountry': 'signatureStateOrCountry',
|
54
|
+
'formData_signatureBlock_signatureDate': 'signatureDate',
|
55
|
+
|
56
|
+
# Header Data Mapping
|
57
|
+
'headerData_filerInfo_periodOfReport': 'periodOfReport',
|
58
|
+
'headerData_filerInfo_filer_fileNumber': 'filerFileNumber',
|
59
|
+
'headerData_filerInfo_filer_credentials_cik': 'filerCik',
|
60
|
+
'headerData_filerInfo_filer_credentials_ccc': 'filerCcc',
|
61
|
+
'headerData_filerInfo_flags_confirmingCopyFlag': 'confirmingCopyFlag',
|
62
|
+
'headerData_filerInfo_flags_returnCopyFlag': 'returnCopyFlag',
|
63
|
+
'headerData_filerInfo_flags_overrideInternetFlag': 'overrideInternetFlag',
|
64
|
+
'headerData_filerInfo_denovoRequest': 'denovoRequest',
|
65
|
+
'headerData_filerInfo_liveTestFlag': 'liveTestFlag',
|
66
|
+
'headerData_submissionType': 'submissionType',
|
67
|
+
|
68
|
+
# Schema and Metadata Mapping
|
69
|
+
'schemaLocation': 'schemaLocation',
|
70
|
+
'schemaVersion': 'schemaVersion',
|
71
|
+
'accession': 'accessionNumber'
|
72
|
+
}
|
@@ -17,6 +17,14 @@ def process_tabular_data(self):
|
|
17
17
|
tables = process_13fhr(self.data, self.accession)
|
18
18
|
elif self.type in ["INFORMATION TABLE"]:
|
19
19
|
tables = process_information_table(self.data, self.accession)
|
20
|
+
elif self.type in ["25-NSE", "25-NSE/A"]:
|
21
|
+
tables = process_25nse(self.data, self.accession)
|
22
|
+
# complete mark:
|
23
|
+
elif self.type in ["N-PX","N-PX/A"]:
|
24
|
+
tables = process_npx(self.data, self.accession)
|
25
|
+
elif self.type in ["EX-102"]:
|
26
|
+
tables = process_ex102_abs(self.data, self.accession)
|
27
|
+
|
20
28
|
elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
|
21
29
|
tables = process_sbsef(self.data, self.accession)
|
22
30
|
elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
|
@@ -33,8 +41,7 @@ def process_tabular_data(self):
|
|
33
41
|
tables = process_144(self.data, self.accession)
|
34
42
|
elif self.type in ["24F-2NT", "24F-2NT/A"]:
|
35
43
|
tables = process_24f2nt(self.data, self.accession)
|
36
|
-
|
37
|
-
tables = process_25nse(self.data, self.accession)
|
44
|
+
|
38
45
|
elif self.type in ["ATS-N", "ATS-N/A"]:
|
39
46
|
tables = process_ats(self.data, self.accession)
|
40
47
|
# elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
|
@@ -53,8 +60,7 @@ def process_tabular_data(self):
|
|
53
60
|
# tables = process_nmfp(self.data, self.accession)
|
54
61
|
# elif self.type in ["NPORT-P","NPORT-P/A"]:
|
55
62
|
# tables = process_nportp(self.data, self.accession)
|
56
|
-
|
57
|
-
tables = process_npx(self.data, self.accession)
|
63
|
+
|
58
64
|
# elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
|
59
65
|
# tables = process_ta(self.data, self.accession)
|
60
66
|
elif self.type in ["X-17A-5","X-17A-5/A"]:
|
@@ -66,10 +72,11 @@ def process_tabular_data(self):
|
|
66
72
|
tables = process_reg_a(self.data, self.accession)
|
67
73
|
# elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
|
68
74
|
# tables = process_sbs(self.data, self.accession)
|
69
|
-
|
70
|
-
# tables = process_ex102_abs(self.data, self.accession)
|
75
|
+
|
71
76
|
elif self.type == "PROXY VOTING RECORD":
|
72
77
|
tables = process_proxy_voting_record(self.data, self.accession)
|
78
|
+
elif self.type == 'submission_metadata':
|
79
|
+
tables = process_submission_metadata(self.content, self.accession)
|
73
80
|
else:
|
74
81
|
warn(f"Processing for {self.type} is not implemented yet.")
|
75
82
|
return []
|
@@ -583,13 +590,39 @@ def process_reg_a(data, accession):
|
|
583
590
|
|
584
591
|
# return tables
|
585
592
|
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
#
|
591
|
-
|
592
|
-
|
593
|
+
def process_ex102_abs(data, accession):
|
594
|
+
tables = []
|
595
|
+
data = safe_get(data, ['assetData', 'assets'])
|
596
|
+
|
597
|
+
# Create assets list: all items without their 'property' field
|
598
|
+
assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
|
599
|
+
|
600
|
+
# Create properties list in a more vectorized way
|
601
|
+
properties = []
|
602
|
+
|
603
|
+
# Handle dictionary properties
|
604
|
+
properties.extend([
|
605
|
+
item['property'] | {'assetNumber': item['assetNumber']}
|
606
|
+
for item in data
|
607
|
+
if 'property' in item and isinstance(item['property'], dict)
|
608
|
+
])
|
609
|
+
|
610
|
+
# Handle list properties - flatten in one operation
|
611
|
+
properties.extend([
|
612
|
+
prop | {'assetNumber': item['assetNumber']}
|
613
|
+
for item in data
|
614
|
+
if 'property' in item and isinstance(item['property'], list)
|
615
|
+
for prop in item['property']
|
616
|
+
if isinstance(prop, dict)
|
617
|
+
])
|
618
|
+
|
619
|
+
if assets:
|
620
|
+
tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
|
621
|
+
|
622
|
+
if properties:
|
623
|
+
tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
|
624
|
+
|
625
|
+
return tables
|
593
626
|
|
594
627
|
# def process_ma(data, accession):
|
595
628
|
# tables = []
|
@@ -601,4 +634,28 @@ def process_reg_a(data, accession):
|
|
601
634
|
# raise NotImplementedError("Need to implement the rest of the MA processing")
|
602
635
|
|
603
636
|
# def process_ncen(data, accession):
|
604
|
-
# raise NotImplementedError("Need to implement the N-CEN processing")
|
637
|
+
# raise NotImplementedError("Need to implement the N-CEN processing")
|
638
|
+
|
639
|
+
# WIP
|
640
|
+
# Note: going to pause this for now, as I don't have a great way of putting this in a csv.
|
641
|
+
def process_submission_metadata(data,accession):
|
642
|
+
tables = []
|
643
|
+
document_data = safe_get(data, ['documents'])
|
644
|
+
if document_data:
|
645
|
+
tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
|
646
|
+
|
647
|
+
reporting_owner_data = safe_get(data,['reporting-owner'])
|
648
|
+
if reporting_owner_data:
|
649
|
+
tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
|
650
|
+
|
651
|
+
issuer_data = safe_get(data,['issuer'])
|
652
|
+
if issuer_data:
|
653
|
+
tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
|
654
|
+
|
655
|
+
# # construct metadata
|
656
|
+
# accession-number date-of-filing-date-change, depositor-cik effectiveness-date
|
657
|
+
|
658
|
+
# # other tables
|
659
|
+
# depositor, securitizer
|
660
|
+
|
661
|
+
return tables
|
@@ -18,7 +18,11 @@ from .mappings.thirteenfhr import *
|
|
18
18
|
from .mappings.twentyfivense import *
|
19
19
|
from .mappings.twentyfourf2nt import *
|
20
20
|
from .mappings.information_table import *
|
21
|
+
from .mappings.submission_metadata import *
|
22
|
+
from .mappings.ex102_abs import *
|
21
23
|
|
24
|
+
from pathlib import Path
|
25
|
+
import csv
|
22
26
|
# need to check if mappings correctly create new columns
|
23
27
|
class Table():
|
24
28
|
def __init__(self, data, type,accession):
|
@@ -27,11 +31,18 @@ class Table():
|
|
27
31
|
self.type = type
|
28
32
|
self.data = data
|
29
33
|
self.accession = accession
|
30
|
-
self.columns = self.
|
34
|
+
self.columns = self.determine_columns_complete()
|
35
|
+
|
36
|
+
def determine_columns_complete(self):
|
37
|
+
if not self.data:
|
38
|
+
return []
|
39
|
+
return list(set().union(*(row.keys() for row in self.data)))
|
40
|
+
|
31
41
|
|
32
42
|
def determine_columns(self):
|
33
43
|
if len(self.data) == 0:
|
34
44
|
return []
|
45
|
+
|
35
46
|
return self.data[0].keys()
|
36
47
|
|
37
48
|
def add_column(self,column_name,value):
|
@@ -227,6 +238,15 @@ class Table():
|
|
227
238
|
mapping_dict = item_9_24f2nt_dict
|
228
239
|
elif self.type == 'signature_info_schedule_a':
|
229
240
|
mapping_dict = signature_24f2nt_dict
|
241
|
+
# ABS
|
242
|
+
elif self.type == 'assets_ex102_absee':
|
243
|
+
mapping_dict = assets_dict_ex102_abs
|
244
|
+
elif self.type =='properties_ex102_absee':
|
245
|
+
mapping_dict = properties_dict_ex102_abs
|
246
|
+
# submission metadata
|
247
|
+
elif self.type == 'document_submission_metadata':
|
248
|
+
mapping_dict = document_submission_metadata_dict
|
249
|
+
|
230
250
|
|
231
251
|
else:
|
232
252
|
mapping_dict = {}
|
@@ -245,9 +265,6 @@ class Table():
|
|
245
265
|
for old_key, new_key in mapping_dict.items():
|
246
266
|
if old_key in row:
|
247
267
|
ordered_row[new_key] = row.pop(old_key)
|
248
|
-
else:
|
249
|
-
# if the old key is not present, set the new key to None
|
250
|
-
ordered_row[new_key] = None
|
251
268
|
|
252
269
|
# Then add any remaining keys that weren't in the mapping
|
253
270
|
for key, value in row.items():
|
@@ -257,4 +274,30 @@ class Table():
|
|
257
274
|
row.clear()
|
258
275
|
row.update(ordered_row)
|
259
276
|
|
260
|
-
|
277
|
+
# Update the columns after mapping
|
278
|
+
columns = set(self.columns)
|
279
|
+
# remove the old columns that are now in the mapping
|
280
|
+
columns.difference_update(mapping_dict.keys())
|
281
|
+
# add the new columns from the mapping
|
282
|
+
columns.update(mapping_dict.values())
|
283
|
+
# add the accession column to the columns
|
284
|
+
columns.add('accession')
|
285
|
+
|
286
|
+
self.columns = list(columns)
|
287
|
+
|
288
|
+
def write_csv(self, output_file):
|
289
|
+
output_file = Path(output_file)
|
290
|
+
fieldnames = self.columns
|
291
|
+
|
292
|
+
# Check if the file already exists
|
293
|
+
if output_file.exists():
|
294
|
+
# Append to existing file without writing header
|
295
|
+
with open(output_file, 'a', newline='') as csvfile:
|
296
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
297
|
+
writer.writerows(self.data)
|
298
|
+
else:
|
299
|
+
# Create new file with header
|
300
|
+
with open(output_file, 'w', newline='') as csvfile:
|
301
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
302
|
+
writer.writeheader()
|
303
|
+
writer.writerows(self.data)
|
@@ -79,7 +79,16 @@ def _process_cik_and_metadata_filters(cik=None, ticker=None, **kwargs):
|
|
79
79
|
|
80
80
|
# Convert ticker to CIK if provided
|
81
81
|
if ticker is not None:
|
82
|
-
|
82
|
+
if isinstance(ticker, str):
|
83
|
+
ticker = [ticker]
|
84
|
+
|
85
|
+
ciks_from_ticker = []
|
86
|
+
for t in ticker:
|
87
|
+
ciks = get_cik_from_dataset('listed_filer_metadata', 'ticker', t)
|
88
|
+
if ciks:
|
89
|
+
ciks_from_ticker.extend(ciks)
|
90
|
+
|
91
|
+
cik = ciks
|
83
92
|
|
84
93
|
# Normalize CIK format
|
85
94
|
if cik is not None:
|
@@ -1,16 +1,16 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
from .sec.submissions.textsearch import query
|
3
|
-
from .helper import _process_cik_and_metadata_filters
|
3
|
+
from .helper import _process_cik_and_metadata_filters
|
4
|
+
from pathlib import Path
|
4
5
|
|
5
6
|
class Index:
|
6
|
-
def __init__(self
|
7
|
-
|
7
|
+
def __init__(self):
|
8
|
+
pass
|
8
9
|
|
9
10
|
def search_submissions(
|
10
11
|
self,
|
11
12
|
text_query,
|
12
|
-
|
13
|
-
end_date=None,
|
13
|
+
filing_date=None,
|
14
14
|
submission_type=None,
|
15
15
|
cik=None,
|
16
16
|
ticker=None,
|
@@ -47,16 +47,14 @@ class Index:
|
|
47
47
|
# Execute the search query
|
48
48
|
results = query(
|
49
49
|
f'{text_query}',
|
50
|
-
filing_date=
|
50
|
+
filing_date=filing_date,
|
51
51
|
requests_per_second=requests_per_second,
|
52
52
|
quiet=quiet,
|
53
53
|
submission_type=submission_type,
|
54
54
|
**kwargs
|
55
55
|
)
|
56
56
|
|
57
|
-
|
58
|
-
if self.path:
|
59
|
-
self._save_results(results, text_query)
|
57
|
+
|
60
58
|
|
61
59
|
return results
|
62
60
|
|
@@ -9,22 +9,28 @@ import os
|
|
9
9
|
from .helper import _process_cik_and_metadata_filters
|
10
10
|
from .seclibrary.downloader import download as seclibrary_download
|
11
11
|
from .sec.xbrl.filter_xbrl import filter_xbrl
|
12
|
-
from .sec.submissions.monitor import
|
13
|
-
from .sec.xbrl.xbrlmonitor import XBRLMonitor
|
12
|
+
from .sec.submissions.monitor import Monitor
|
13
|
+
#from .sec.xbrl.xbrlmonitor import XBRLMonitor
|
14
14
|
|
15
15
|
|
16
16
|
class Portfolio:
|
17
17
|
def __init__(self, path):
|
18
18
|
self.path = Path(path)
|
19
|
+
self.api_key = None
|
19
20
|
self.submissions = []
|
20
21
|
self.submissions_loaded = False
|
21
22
|
self.MAX_WORKERS = os.cpu_count() - 1
|
23
|
+
|
24
|
+
self.monitor = Monitor()
|
22
25
|
|
23
26
|
if self.path.exists():
|
24
27
|
self._load_submissions()
|
25
28
|
self.submissions_loaded = True
|
26
29
|
else:
|
27
30
|
self.path.mkdir(parents=True, exist_ok=True)
|
31
|
+
|
32
|
+
def set_api_key(self, api_key):
|
33
|
+
self.api_key = api_key
|
28
34
|
|
29
35
|
def _load_submissions(self):
|
30
36
|
folders = [f for f in self.path.iterdir() if f.is_dir()]
|
@@ -132,6 +138,7 @@ class Portfolio:
|
|
132
138
|
seclibrary_download(
|
133
139
|
output_dir=self.path,
|
134
140
|
cik=cik,
|
141
|
+
api_key=self.api_key,
|
135
142
|
submission_type=submission_type,
|
136
143
|
filing_date=filing_date,
|
137
144
|
accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None,
|
@@ -149,20 +156,18 @@ class Portfolio:
|
|
149
156
|
)
|
150
157
|
|
151
158
|
self.submissions_loaded = False
|
152
|
-
def monitor_submissions(self,data_callback=None,
|
153
|
-
|
159
|
+
def monitor_submissions(self, data_callback=None, interval_callback=None,
|
160
|
+
polling_interval=1000, quiet=True, start_date=None,
|
161
|
+
validation_interval=600000):
|
154
162
|
|
155
|
-
cik = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
|
156
163
|
|
157
|
-
monitor(
|
164
|
+
self.monitor.monitor_submissions(
|
158
165
|
data_callback=data_callback,
|
159
|
-
|
160
|
-
cik=cik,
|
161
|
-
submission_type=submission_type,
|
166
|
+
interval_callback=interval_callback,
|
162
167
|
polling_interval=polling_interval,
|
163
|
-
requests_per_second=requests_per_second,
|
164
168
|
quiet=quiet,
|
165
|
-
start_date=start_date
|
169
|
+
start_date=start_date,
|
170
|
+
validation_interval=validation_interval
|
166
171
|
)
|
167
172
|
|
168
173
|
|