datamule 2.0.5__py3-none-any.whl → 2.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document/document.py +33 -18
- datamule/document/tables/tables.py +129 -0
- datamule/document/{mappings/thirteenfhr.py → tables/tables_13fhr.py} +8 -4
- datamule/document/{mappings/twentyfivense.py → tables/tables_25nse.py} +7 -2
- datamule/document/{mappings/information_table.py → tables/tables_informationtable.py} +7 -3
- datamule/document/{mappings/npx.py → tables/tables_npx.py} +7 -0
- datamule/document/{mappings/ownership.py → tables/tables_ownership.py} +37 -9
- datamule/document/{mappings/proxy_voting_record.py → tables/tables_proxyvotingrecord.py} +7 -0
- datamule/document/{mappings/sbsef.py → tables/tables_sbsef.py} +7 -0
- datamule/document/{mappings/sdr.py → tables/tables_sdr.py} +7 -0
- datamule/document/tables/utils.py +26 -0
- datamule/submission.py +47 -12
- {datamule-2.0.5.dist-info → datamule-2.0.6.dist-info}/METADATA +1 -1
- {datamule-2.0.5.dist-info → datamule-2.0.6.dist-info}/RECORD +17 -32
- datamule/document/mappings/atsn.py +0 -208
- datamule/document/mappings/cfportal.py +0 -346
- datamule/document/mappings/d.py +0 -125
- datamule/document/mappings/ex102_abs.py +0 -63
- datamule/document/mappings/ex99a_sdr.py +0 -1
- datamule/document/mappings/ex99c_sdr.py +0 -0
- datamule/document/mappings/ex99g_sdr.py +0 -0
- datamule/document/mappings/ex99i_sdr.py +0 -0
- datamule/document/mappings/nmfp.py +0 -275
- datamule/document/mappings/onefourtyfour.py +0 -68
- datamule/document/mappings/sbs.py +0 -0
- datamule/document/mappings/schedule13.py +0 -117
- datamule/document/mappings/submission_metadata.py +0 -9
- datamule/document/mappings/ta.py +0 -0
- datamule/document/mappings/twentyfourf2nt.py +0 -100
- datamule/document/processing.py +0 -732
- datamule/document/table.py +0 -315
- /datamule/document/{mappings → tables}/__init__.py +0 -0
- {datamule-2.0.5.dist-info → datamule-2.0.6.dist-info}/WHEEL +0 -0
- {datamule-2.0.5.dist-info → datamule-2.0.6.dist-info}/top_level.txt +0 -0
datamule/document/document.py
CHANGED
@@ -8,11 +8,12 @@ from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_
|
|
8
8
|
from ..mapping_dicts.xml_mapping_dicts import dict_345
|
9
9
|
from ..mapping_dicts.html_mapping_dicts import *
|
10
10
|
from selectolax.parser import HTMLParser
|
11
|
-
|
11
|
+
|
12
12
|
from pathlib import Path
|
13
13
|
import webbrowser
|
14
14
|
from secsgml.utils import bytes_to_str
|
15
15
|
|
16
|
+
from .tables.tables import Tables
|
16
17
|
|
17
18
|
class Document:
|
18
19
|
def __init__(self, type, content, extension,accession,filing_date,path=None):
|
@@ -33,7 +34,9 @@ class Document:
|
|
33
34
|
|
34
35
|
self.extension = extension
|
35
36
|
# this will be filled by parsed
|
36
|
-
self.
|
37
|
+
self._data = None
|
38
|
+
self._tables = None
|
39
|
+
|
37
40
|
|
38
41
|
|
39
42
|
#_load_text_content
|
@@ -107,7 +110,7 @@ class Document:
|
|
107
110
|
# Note: this method will be heavily modified in the future
|
108
111
|
def parse(self):
|
109
112
|
# check if we have already parsed the content
|
110
|
-
if self.
|
113
|
+
if self._data:
|
111
114
|
return
|
112
115
|
|
113
116
|
mapping_dict = None
|
@@ -125,8 +128,8 @@ class Document:
|
|
125
128
|
elif self.type == 'SC 13G':
|
126
129
|
mapping_dict = dict_13g
|
127
130
|
|
128
|
-
self.
|
129
|
-
self.
|
131
|
+
self._data = {}
|
132
|
+
self._data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
|
130
133
|
elif self.extension in ['.htm', '.html']:
|
131
134
|
|
132
135
|
if self.type == '1-K':
|
@@ -204,16 +207,22 @@ class Document:
|
|
204
207
|
mapping_dict = dict_nt10k_html
|
205
208
|
|
206
209
|
dct = html2dict(content=self.content, mapping_dict=mapping_dict)
|
207
|
-
self.
|
210
|
+
self._data = dct
|
208
211
|
elif self.extension == '.xml':
|
209
212
|
if self.type in ['3', '4', '5', '3/A', '4/A', '5/A']:
|
210
213
|
mapping_dict = dict_345
|
211
214
|
|
212
|
-
self.
|
215
|
+
self._data = xml2dict(content=self.content, mapping_dict=mapping_dict)
|
213
216
|
elif self.extension == '.pdf':
|
214
|
-
self.
|
217
|
+
self._data = pdf2dict(content=self.content, mapping_dict=mapping_dict)
|
215
218
|
else:
|
216
219
|
pass
|
220
|
+
|
221
|
+
@property
|
222
|
+
def data(self):
|
223
|
+
if self._data is None:
|
224
|
+
self.parse()
|
225
|
+
return self._data
|
217
226
|
|
218
227
|
def write_json(self, output_filename=None):
|
219
228
|
if not self.data:
|
@@ -222,21 +231,27 @@ class Document:
|
|
222
231
|
with open(output_filename, 'w',encoding='utf-8') as f:
|
223
232
|
json.dump(self.data, f, indent=2)
|
224
233
|
|
225
|
-
def
|
226
|
-
if self.
|
227
|
-
|
228
|
-
elif self.extension != '.xml':
|
229
|
-
return []
|
234
|
+
def parse_tables(self):
|
235
|
+
if self.extension != '.xml':
|
236
|
+
self._tables = []
|
230
237
|
else:
|
231
|
-
|
232
|
-
|
238
|
+
# Use the property to trigger parsing if needed
|
239
|
+
data = self.data
|
240
|
+
tables = Tables(document_type = self.type, accession=self.accession, data=data)
|
241
|
+
self._tables = tables.tables
|
242
|
+
|
243
|
+
@property
|
244
|
+
def tables(self):
|
245
|
+
if self._tables is None:
|
246
|
+
self.parse_tables()
|
247
|
+
return self._tables
|
233
248
|
|
234
249
|
|
235
250
|
def write_csv(self, output_folder):
|
236
251
|
output_folder = Path(output_folder)
|
237
252
|
output_folder.mkdir(exist_ok=True)
|
238
253
|
|
239
|
-
tables = self.tables
|
254
|
+
tables = self.tables
|
240
255
|
|
241
256
|
if not tables:
|
242
257
|
return
|
@@ -315,13 +330,13 @@ class Document:
|
|
315
330
|
|
316
331
|
# TODO CHANGE THIS
|
317
332
|
def __iter__(self):
|
318
|
-
|
333
|
+
# Use the property to trigger parsing if needed
|
334
|
+
document_data = self.data
|
319
335
|
|
320
336
|
# Let's remove XML iterable for now
|
321
337
|
|
322
338
|
# Handle text-based documents
|
323
339
|
if self.extension in ['.txt', '.htm', '.html']:
|
324
|
-
document_data = self.data
|
325
340
|
if not document_data:
|
326
341
|
return iter([])
|
327
342
|
|
@@ -0,0 +1,129 @@
|
|
1
|
+
from .tables_ownership import config_ownership
|
2
|
+
from .tables_13fhr import mapping_13fhr
|
3
|
+
from .tables_informationtable import config_information_table
|
4
|
+
from .tables_25nse import config_25nse
|
5
|
+
from .tables_npx import config_npx
|
6
|
+
from .tables_sbsef import config_sbsef
|
7
|
+
from .tables_sdr import config_sdr
|
8
|
+
from .tables_proxyvotingrecord import config_proxyvotingrecord
|
9
|
+
|
10
|
+
from .utils import safe_get, flatten_dict
|
11
|
+
# will add filing date param later? or extension
|
12
|
+
all_tables_dict = {
|
13
|
+
'3' : config_ownership,
|
14
|
+
'3/A' : config_ownership,
|
15
|
+
'4' : config_ownership,
|
16
|
+
'4/A' : config_ownership,
|
17
|
+
'5' : config_ownership,
|
18
|
+
'5/A' : config_ownership,
|
19
|
+
'13F-HR' : mapping_13fhr,
|
20
|
+
'13F-HR/A' : mapping_13fhr,
|
21
|
+
'13F-NT' : mapping_13fhr,
|
22
|
+
'13F-NT/A' : mapping_13fhr,
|
23
|
+
'INFORMATION TABLE' : config_information_table,
|
24
|
+
'25-NSE' : config_25nse,
|
25
|
+
'25-NSE/A' : config_25nse,
|
26
|
+
'N-PX' : config_npx,
|
27
|
+
'N-PX/A' : config_npx,
|
28
|
+
'SBSEF' : config_sbsef,
|
29
|
+
'SBSEF/A' : config_sbsef,
|
30
|
+
'SBSEF-V' : config_sbsef,
|
31
|
+
'SBSEF-W' : config_sbsef,
|
32
|
+
'SDR' : config_sdr,
|
33
|
+
'SDR/A' : config_sdr,
|
34
|
+
'SDR-W' : config_sdr,
|
35
|
+
'SDR-A' : config_sdr,
|
36
|
+
'PROXY VOTING RECORD' : config_proxyvotingrecord,
|
37
|
+
}
|
38
|
+
|
39
|
+
# process_ex102_abs will need to be done later
|
40
|
+
# process d
|
41
|
+
# 144
|
42
|
+
|
43
|
+
def seperate_data(tables_dict, data):
|
44
|
+
data_list = []
|
45
|
+
|
46
|
+
for table_name, config in tables_dict.items():
|
47
|
+
path = config['path']
|
48
|
+
|
49
|
+
# Extract data at the specific path
|
50
|
+
table_data = safe_get(data, path.split('.'))
|
51
|
+
if not table_data:
|
52
|
+
continue
|
53
|
+
|
54
|
+
# Find sub-paths to exclude (only for paths that have sub-tables)
|
55
|
+
sub_paths = [other_path for other_path in [c['path'] for c in tables_dict.values()]
|
56
|
+
if other_path.startswith(path + '.')]
|
57
|
+
|
58
|
+
# Only apply exclusions if this path has sub-paths AND the data is a dict
|
59
|
+
if sub_paths and isinstance(table_data, dict):
|
60
|
+
exclude_keys = {sp.split('.')[len(path.split('.'))] for sp in sub_paths}
|
61
|
+
table_data = {k: v for k, v in table_data.items() if k not in exclude_keys}
|
62
|
+
|
63
|
+
data_list.append((table_name, table_data))
|
64
|
+
|
65
|
+
return data_list
|
66
|
+
|
67
|
+
def apply_mapping(flattened_data, mapping_dict, accession):
|
68
|
+
"""Apply mapping to flattened data and add accession"""
|
69
|
+
|
70
|
+
# Handle case where flattened_data is a list of dictionaries
|
71
|
+
if isinstance(flattened_data, list):
|
72
|
+
results = []
|
73
|
+
for data_dict in flattened_data:
|
74
|
+
results.append(apply_mapping(data_dict, mapping_dict, accession))
|
75
|
+
return results
|
76
|
+
|
77
|
+
# Original logic for single dictionary
|
78
|
+
ordered_row = {'accession': accession}
|
79
|
+
|
80
|
+
# Apply mapping for all other keys
|
81
|
+
for old_key, new_key in mapping_dict.items():
|
82
|
+
if old_key in flattened_data:
|
83
|
+
ordered_row[new_key] = flattened_data.pop(old_key)
|
84
|
+
else:
|
85
|
+
ordered_row[new_key] = None
|
86
|
+
|
87
|
+
# Add any remaining keys that weren't in the mapping
|
88
|
+
for key, value in flattened_data.items():
|
89
|
+
ordered_row[key] = value
|
90
|
+
|
91
|
+
return ordered_row
|
92
|
+
|
93
|
+
# should have table type, accession, data
|
94
|
+
class Table:
|
95
|
+
def __init__(self,data,name,accession):
|
96
|
+
self.data = data
|
97
|
+
self.name = name
|
98
|
+
self.accession = accession
|
99
|
+
|
100
|
+
|
101
|
+
class Tables():
|
102
|
+
def __init__(self,document_type,accession,data):
|
103
|
+
self.document_type = document_type
|
104
|
+
self.accession = accession
|
105
|
+
self.data = data
|
106
|
+
|
107
|
+
# to fill in
|
108
|
+
self.tables = []
|
109
|
+
|
110
|
+
self.parse_tables()
|
111
|
+
|
112
|
+
def parse_tables(self):
|
113
|
+
# first select dict
|
114
|
+
|
115
|
+
try:
|
116
|
+
tables_dict = all_tables_dict[self.document_type]
|
117
|
+
except:
|
118
|
+
raise ValueError(f"Table not found: {self.document_type}.")
|
119
|
+
|
120
|
+
# now get the dicts from the data
|
121
|
+
data_dicts = seperate_data(tables_dict,self.data)
|
122
|
+
|
123
|
+
# now flatten
|
124
|
+
data_dicts = [(x,flatten_dict(y)) for x,y in data_dicts]
|
125
|
+
|
126
|
+
for table_name, flattened_data in data_dicts:
|
127
|
+
mapping_dict = tables_dict[table_name]['mapping']
|
128
|
+
mapped_data = apply_mapping(flattened_data, mapping_dict, self.accession)
|
129
|
+
self.tables.append(Table(mapped_data, table_name, self.accession))
|
@@ -1,7 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# 13F-HR (Institutional Investment Manager Holdings) mapping
|
4
|
-
thirteenfhr_dict = {
|
1
|
+
mapping_13fhr = {
|
5
2
|
# Cover Page Mapping
|
6
3
|
'formData_coverPage_reportCalendarOrQuarter': 'reportCalendarOrQuarter',
|
7
4
|
'formData_coverPage_filingManager_name': 'filingManagerName',
|
@@ -69,4 +66,11 @@ thirteenfhr_dict = {
|
|
69
66
|
'schemaLocation': 'schemaLocation',
|
70
67
|
'schemaVersion': 'schemaVersion',
|
71
68
|
'accession': 'accessionNumber'
|
69
|
+
}
|
70
|
+
|
71
|
+
config_13fhr = {
|
72
|
+
'13fhr': {
|
73
|
+
'path': 'edgarSubmission',
|
74
|
+
'mapping': mapping_13fhr
|
75
|
+
}
|
72
76
|
}
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# Ready for mass testing
|
2
|
-
# 25-NSE mapping
|
3
1
|
twentyfive_nse_dict = {
|
4
2
|
'descriptionClassSecurity': 'securityDescription',
|
5
3
|
'exchange_cik': 'exchangeCik',
|
@@ -19,4 +17,11 @@ twentyfive_nse_dict = {
|
|
19
17
|
'signatureData_signatureDate': 'signatureDate',
|
20
18
|
'signatureData_signatureName': 'signatureName',
|
21
19
|
'signatureData_signatureTitle': 'signatureTitle'
|
20
|
+
}
|
21
|
+
|
22
|
+
config_25nse = {
|
23
|
+
'25nse': {
|
24
|
+
'path': 'notificationOfRemoval',
|
25
|
+
'mapping': twentyfive_nse_dict
|
26
|
+
}
|
22
27
|
}
|
@@ -1,6 +1,3 @@
|
|
1
|
-
# Ready for mass testing
|
2
|
-
|
3
|
-
# Information Table (13F-HR Securities) mapping
|
4
1
|
information_table_dict = {
|
5
2
|
'nameOfIssuer': 'nameOfIssuer',
|
6
3
|
'titleOfClass': 'titleOfClass',
|
@@ -33,3 +30,10 @@ information_table_dict = {
|
|
33
30
|
'infoTable_votingAuthority_Sole': 'infoTableVotingAuthoritySole',
|
34
31
|
'schemaLocation': 'schemaLocation'
|
35
32
|
}
|
33
|
+
|
34
|
+
config_information_table = {
|
35
|
+
'informationtable': {
|
36
|
+
'path': 'informationTable.infoTable',
|
37
|
+
'mapping': information_table_dict
|
38
|
+
}
|
39
|
+
}
|
@@ -82,4 +82,11 @@ npx_dict = {
|
|
82
82
|
'headerData_seriesClass_reportSeriesClass_rptSeriesClassInfo_includeAllClassesFlag': 'includeAllClassesFlag',
|
83
83
|
'headerData_seriesClass_reportSeriesClass_rptSeriesClassInfo_seriesId': 'headerSeriesId',
|
84
84
|
'headerData_submissionType': 'submissionType'
|
85
|
+
}
|
86
|
+
|
87
|
+
config_npx = {
|
88
|
+
'npx': {
|
89
|
+
'path': 'edgarSubmission',
|
90
|
+
'mapping': npx_dict
|
91
|
+
}
|
85
92
|
}
|
@@ -1,7 +1,5 @@
|
|
1
|
-
# Ready for mass testing
|
2
|
-
|
3
1
|
# Non-derivative transaction ownership mapping
|
4
|
-
|
2
|
+
non_derivative_transaction_mapping = {
|
5
3
|
'securityTitle_value': 'securityTitle',
|
6
4
|
'securityTitle_footnote': 'securityTitleFootnote',
|
7
5
|
'transactionDate_value': 'transactionDate',
|
@@ -31,7 +29,7 @@ non_derivative_transaction_ownership_dict = {
|
|
31
29
|
}
|
32
30
|
|
33
31
|
# Derivative transaction ownership mapping
|
34
|
-
|
32
|
+
derivative_transaction_mapping = {
|
35
33
|
'securityTitle_value': 'securityTitle',
|
36
34
|
'securityTitle_footnote': 'securityTitleFootnote',
|
37
35
|
'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
|
@@ -75,7 +73,7 @@ derivative_transaction_ownership_dict = {
|
|
75
73
|
}
|
76
74
|
|
77
75
|
# Non-derivative holding ownership mapping
|
78
|
-
|
76
|
+
non_derivative_holding_mapping = {
|
79
77
|
'securityTitle_value': 'securityTitle',
|
80
78
|
'securityTitle_footnote': 'securityTitleFootnote',
|
81
79
|
'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
|
@@ -91,7 +89,7 @@ non_derivative_holding_ownership_dict = {
|
|
91
89
|
}
|
92
90
|
|
93
91
|
# Derivative holding ownership mapping
|
94
|
-
|
92
|
+
derivative_holding_mapping = {
|
95
93
|
'securityTitle_value': 'securityTitle',
|
96
94
|
'securityTitle_footnote': 'securityTitleFootnote',
|
97
95
|
'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
|
@@ -119,7 +117,7 @@ derivative_holding_ownership_dict = {
|
|
119
117
|
}
|
120
118
|
|
121
119
|
# Reporting owner ownership mapping
|
122
|
-
|
120
|
+
reporting_owner_mapping = {
|
123
121
|
'reportingOwnerAddress_rptOwnerCity': 'rptOwnerCity',
|
124
122
|
'reportingOwnerAddress_rptOwnerState': 'rptOwnerState',
|
125
123
|
'reportingOwnerAddress_rptOwnerStateDescription': 'rptOwnerStateDescription',
|
@@ -137,7 +135,7 @@ reporting_owner_ownership_dict = {
|
|
137
135
|
}
|
138
136
|
|
139
137
|
# Metadata ownership mapping
|
140
|
-
|
138
|
+
metadata_mapping = {
|
141
139
|
'periodOfReport': 'periodOfReport',
|
142
140
|
'issuer_issuerCik': 'issuerCik',
|
143
141
|
'issuer_issuerName': 'issuerName',
|
@@ -156,8 +154,38 @@ metadata_ownership_dict = {
|
|
156
154
|
}
|
157
155
|
|
158
156
|
# Owner signature ownership mapping
|
159
|
-
|
157
|
+
owner_signature_mapping = {
|
160
158
|
'signatureName': 'signatureName',
|
161
159
|
'signatureDate': 'signatureDate'
|
162
160
|
}
|
163
161
|
|
162
|
+
config_ownership = {
|
163
|
+
'non_derivative_holding_ownership': {
|
164
|
+
'path': 'ownershipDocument.nonDerivativeTable.nonDerivativeHolding',
|
165
|
+
'mapping': non_derivative_holding_mapping
|
166
|
+
},
|
167
|
+
'non_derivative_transaction_ownership': {
|
168
|
+
'path': 'ownershipDocument.nonDerivativeTable.nonDerivativeTransaction',
|
169
|
+
'mapping': non_derivative_transaction_mapping
|
170
|
+
},
|
171
|
+
'derivative_holding_ownership': {
|
172
|
+
'path': 'ownershipDocument.derivativeTable.derivativeHolding',
|
173
|
+
'mapping': derivative_holding_mapping
|
174
|
+
},
|
175
|
+
'derivative_transaction_ownership': {
|
176
|
+
'path': 'ownershipDocument.derivativeTable.derivativeTransaction',
|
177
|
+
'mapping': derivative_transaction_mapping
|
178
|
+
},
|
179
|
+
'reporting_owner_ownership': {
|
180
|
+
'path': 'ownershipDocument.reportingOwner',
|
181
|
+
'mapping': reporting_owner_mapping
|
182
|
+
},
|
183
|
+
'owner_signature_ownership': {
|
184
|
+
'path': 'ownershipDocument.ownerSignature',
|
185
|
+
'mapping': owner_signature_mapping
|
186
|
+
},
|
187
|
+
'metadata_ownership': {
|
188
|
+
'path': 'ownershipDocument',
|
189
|
+
'mapping': metadata_mapping
|
190
|
+
}
|
191
|
+
}
|
@@ -14,4 +14,11 @@ proxy_voting_record_dict = {
|
|
14
14
|
'isin': 'isin',
|
15
15
|
'voteSource': 'voteSource',
|
16
16
|
'voteSeries': 'voteSeries'
|
17
|
+
}
|
18
|
+
|
19
|
+
config_proxyvotingrecord = {
|
20
|
+
'proxyvotingrecord': {
|
21
|
+
'path': 'proxyVoteTable.proxyTable',
|
22
|
+
'mapping': proxy_voting_record_dict
|
23
|
+
}
|
17
24
|
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
def safe_get(d, keys, default=None):
|
2
|
+
"""Safely access nested dictionary keys"""
|
3
|
+
current = d
|
4
|
+
for key in keys:
|
5
|
+
if isinstance(current, dict) and key in current:
|
6
|
+
current = current[key]
|
7
|
+
else:
|
8
|
+
return default
|
9
|
+
return current
|
10
|
+
|
11
|
+
# may modify this in the future to better account for lsits
|
12
|
+
def flatten_dict(d, parent_key=''):
|
13
|
+
items = {}
|
14
|
+
|
15
|
+
if isinstance(d, list):
|
16
|
+
return [flatten_dict(item) for item in d]
|
17
|
+
|
18
|
+
for k, v in d.items():
|
19
|
+
new_key = f"{parent_key}_{k}" if parent_key else k
|
20
|
+
|
21
|
+
if isinstance(v, dict):
|
22
|
+
items.update(flatten_dict(v, new_key))
|
23
|
+
else:
|
24
|
+
items[new_key] = str(v)
|
25
|
+
|
26
|
+
return items
|
datamule/submission.py
CHANGED
@@ -20,8 +20,8 @@ class Submission:
|
|
20
20
|
|
21
21
|
|
22
22
|
# declare vars to be filled later
|
23
|
-
self.
|
24
|
-
self.
|
23
|
+
self._xbrl = None
|
24
|
+
self._fundamentals_cache = {}
|
25
25
|
|
26
26
|
# Validate parameters
|
27
27
|
param_count = sum(x is not None for x in [path, sgml_content, batch_tar_path,url])
|
@@ -248,31 +248,46 @@ class Submission:
|
|
248
248
|
yield self._load_document_by_index(idx)
|
249
249
|
|
250
250
|
def parse_xbrl(self):
|
251
|
-
if self.
|
251
|
+
if self._xbrl:
|
252
252
|
return
|
253
253
|
|
254
254
|
for idx, doc in enumerate(self.metadata.content['documents']):
|
255
255
|
if doc['type'] in ['EX-100.INS','EX-101.INS']:
|
256
256
|
document = self._load_document_by_index(idx)
|
257
|
-
self.
|
257
|
+
self._xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
|
258
258
|
return
|
259
259
|
|
260
260
|
if doc['filename'].endswith('_htm.xml'):
|
261
261
|
document = self._load_document_by_index(idx)
|
262
|
-
self.
|
262
|
+
self._xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
|
263
263
|
return
|
264
264
|
|
265
|
+
@property
|
266
|
+
def xbrl(self):
|
267
|
+
if self._xbrl is None:
|
268
|
+
self.parse_xbrl()
|
269
|
+
return self._xbrl
|
265
270
|
|
266
|
-
def parse_fundamentals(self,categories=None):
|
267
|
-
|
271
|
+
def parse_fundamentals(self, categories=None):
|
272
|
+
# Create cache key based on categories
|
273
|
+
categories_key = tuple(sorted(categories)) if categories else 'all'
|
274
|
+
|
275
|
+
# Return cached result if available
|
276
|
+
if categories_key in self._fundamentals_cache:
|
277
|
+
return self._fundamentals_cache[categories_key]
|
278
|
+
|
279
|
+
# Use the property to trigger XBRL parsing if needed
|
280
|
+
xbrl_data = self.xbrl
|
268
281
|
|
269
|
-
# if no xbrl return
|
270
|
-
if not
|
271
|
-
|
282
|
+
# if no xbrl return None
|
283
|
+
if not xbrl_data:
|
284
|
+
self._fundamentals_cache[categories_key] = None
|
285
|
+
return None
|
286
|
+
|
272
287
|
# Transform XBRL records into the format needed by construct_fundamentals
|
273
288
|
xbrl = []
|
274
289
|
|
275
|
-
for xbrl_record in
|
290
|
+
for xbrl_record in xbrl_data:
|
276
291
|
try:
|
277
292
|
# Extract basic fields
|
278
293
|
value = xbrl_record.get('_val', None)
|
@@ -322,6 +337,26 @@ class Submission:
|
|
322
337
|
end_date_key='period_end_date',
|
323
338
|
categories=categories)
|
324
339
|
|
325
|
-
|
340
|
+
# Cache the result
|
341
|
+
self._fundamentals_cache[categories_key] = fundamentals
|
342
|
+
return fundamentals
|
343
|
+
|
344
|
+
@property
|
345
|
+
def fundamentals(self):
|
346
|
+
"""Get all fundamental data"""
|
347
|
+
return self.parse_fundamentals(categories=None)
|
348
|
+
|
349
|
+
def __getattr__(self, name):
|
350
|
+
# Check if it's a fundamentals property request
|
351
|
+
if name.endswith('_fundamentals'):
|
352
|
+
category = name.replace('_fundamentals', '')
|
353
|
+
return self.parse_fundamentals(categories=[category])
|
326
354
|
|
355
|
+
# For any other unknown attribute, try it as a fundamentals category
|
356
|
+
# Let parse_fundamentals handle whether it's valid or not
|
357
|
+
result = self.parse_fundamentals(categories=[name])
|
358
|
+
if result is not None:
|
359
|
+
return result
|
327
360
|
|
361
|
+
# Only raise AttributeError if parse_fundamentals returns None/empty
|
362
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
|
@@ -6,7 +6,7 @@ datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,9
|
|
6
6
|
datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
|
7
7
|
datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
|
8
8
|
datamule/sheet.py,sha256=Ws_YRtpvewLVioarngVMe8cgG_sp11MP9_goGbRaiWE,23952
|
9
|
-
datamule/submission.py,sha256=
|
9
|
+
datamule/submission.py,sha256=tX1r-KeytnvZRcyeVeBVOS9KbT0baOs7eOgE9qGPU0g,15918
|
10
10
|
datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
|
11
11
|
datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
|
@@ -14,33 +14,18 @@ datamule/datamule/datamule_mysql_rds.py,sha256=P5vL3RJnOwLz25hPKuoYmxSX7XeDe83YE
|
|
14
14
|
datamule/datamule/downloader.py,sha256=aTyVUuIwynPtHB0Z9BvCasy9Ao5wfHptNAsjN-7yDTk,18525
|
15
15
|
datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
|
16
16
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
datamule/document/document.py,sha256=
|
18
|
-
datamule/document/
|
19
|
-
datamule/document/
|
20
|
-
datamule/document/
|
21
|
-
datamule/document/
|
22
|
-
datamule/document/
|
23
|
-
datamule/document/
|
24
|
-
datamule/document/
|
25
|
-
datamule/document/
|
26
|
-
datamule/document/
|
27
|
-
datamule/document/
|
28
|
-
datamule/document/
|
29
|
-
datamule/document/mappings/information_table.py,sha256=6l2Via728I59RS0y9Pit37NoOSAbaT-vclArYxU1vtY,1585
|
30
|
-
datamule/document/mappings/nmfp.py,sha256=WuTyM1SkBiiLVAHqFF4DTZ_8AvsIuonT2w7pwYDPTDw,17767
|
31
|
-
datamule/document/mappings/npx.py,sha256=xwruBueC09kfWhXV3fNUnQWYwCWrdrhQoVO3cKfPTO4,6556
|
32
|
-
datamule/document/mappings/onefourtyfour.py,sha256=_-w9h6wGINGH5pQqQvPrd0cgB5QfCtPG5M40ewf_w8Q,2604
|
33
|
-
datamule/document/mappings/ownership.py,sha256=piD9vs4WFrB4yvp6c0pT5bibLKXgsM7hpnBUzaY0Xxs,10155
|
34
|
-
datamule/document/mappings/proxy_voting_record.py,sha256=tSqLH065EOUq7U80P5GP1JBqipmAiqniPpP3E4adA1A,721
|
35
|
-
datamule/document/mappings/sbs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
|
-
datamule/document/mappings/sbsef.py,sha256=Zw58rbYcnitynk1mh9g1jDrCfqmFlY60OEjPM6p9iF0,534
|
37
|
-
datamule/document/mappings/schedule13.py,sha256=lh9sukpEte514Gid77Nz9zh3uBEFZEemrZ2Uau0qsgk,6295
|
38
|
-
datamule/document/mappings/sdr.py,sha256=UekqZId5PFMMWRAJSaPvCpN4c1Hx-SLAQPEN8GW_Gbg,4829
|
39
|
-
datamule/document/mappings/submission_metadata.py,sha256=pi1eW-tnoAQ6y3laRI29Op80E9BPqqmcfe45owKYStw,271
|
40
|
-
datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
-
datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
|
42
|
-
datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
|
43
|
-
datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
|
17
|
+
datamule/document/document.py,sha256=SMsofx2xmO92pGEM0lQt9cm7lxfuGT8WqcFvL4rxOVk,14406
|
18
|
+
datamule/document/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
datamule/document/tables/tables.py,sha256=qGGMG2p85AKfS5uBY96m77qREyTtsA7BqfqjELT-x0U,4273
|
20
|
+
datamule/document/tables/tables_13fhr.py,sha256=-6tWcaTyNsb0XuW0WMBrYir9Zn1wLZL0laKxRYfPNyg,4265
|
21
|
+
datamule/document/tables/tables_25nse.py,sha256=kpoOcIpra6i3Wx_6pUCj1fkx0wUbMhx7pc8yUkrBJb4,980
|
22
|
+
datamule/document/tables/tables_informationtable.py,sha256=L7qSNTiyr45iIa8z4gm7pKw_meHjIqMFc1SF5Y6XAGg,1658
|
23
|
+
datamule/document/tables/tables_npx.py,sha256=tZDBAonAQWLsgecVK_OwIgNcUJhuV5L2gkTSNbXAgNE,6652
|
24
|
+
datamule/document/tables/tables_ownership.py,sha256=pRoFFRGLWp8gKAAvvUbVRxIU2xDFAQhwi9bgwddsT8A,11185
|
25
|
+
datamule/document/tables/tables_proxyvotingrecord.py,sha256=t5h6iQtlg28Rqt1NJ1FDHCFHFjj_4_aelnFbWLtQcs0,875
|
26
|
+
datamule/document/tables/tables_sbsef.py,sha256=X6VKVnAdWxn2TgRmaAd1WWlxPhcLPQ-53s0qDokkPI0,635
|
27
|
+
datamule/document/tables/tables_sdr.py,sha256=BwHRJvtijiYvNJ2lIc_30kct6VEmLimIzX28JjZBBqo,4924
|
28
|
+
datamule/document/tables/utils.py,sha256=2-X_1NsiWj_XsD9djxCXwTeIVlg-ip78gG11xACJiDs,738
|
44
29
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
30
|
datamule/mapping_dicts/html_mapping_dicts.py,sha256=G2PWB__FNg4VH9iFJFkflM0u-qOEtk67IWtGoqesb0k,5388
|
46
31
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
@@ -65,7 +50,7 @@ datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,180
|
|
65
50
|
datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
66
51
|
datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
|
67
52
|
datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
|
68
|
-
datamule-2.0.
|
69
|
-
datamule-2.0.
|
70
|
-
datamule-2.0.
|
71
|
-
datamule-2.0.
|
53
|
+
datamule-2.0.6.dist-info/METADATA,sha256=j3-QjEqEptbptQCKn52ImVShwyjvurA0v-Bqt4aXel8,560
|
54
|
+
datamule-2.0.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
55
|
+
datamule-2.0.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
56
|
+
datamule-2.0.6.dist-info/RECORD,,
|