datamule 1.2.5__py3-none-any.whl → 1.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .index import Index
8
8
  from .package_updater import PackageUpdater
9
9
 
10
10
 
11
+
11
12
  # Keep the notebook environment setup
12
13
  def _is_notebook_env():
13
14
  """Check if the code is running in a Jupyter or Colab environment."""
@@ -118,10 +118,11 @@ class Document:
118
118
  # will deprecate this when we add html2dict
119
119
  elif self.extension in ['.htm', '.html','.txt']:
120
120
 
121
- if self.type == '10-K':
122
- mapping_dict = dict_10k
123
- elif self.type == '10-Q':
121
+
122
+ if self.type == '10-Q':
124
123
  mapping_dict = dict_10q
124
+ elif self.type == '10-K':
125
+ mapping_dict = dict_10k
125
126
  elif self.type == '8-K':
126
127
  mapping_dict = dict_8k
127
128
  elif self.type == 'SC 13D':
@@ -140,18 +141,21 @@ class Document:
140
141
  with open(output_filename, 'w',encoding='utf-8') as f:
141
142
  json.dump(self.data, f, indent=2)
142
143
 
143
- def to_tabular(self):
144
- if self.extension != '.xml':
144
+ def tables(self):
145
+ if self.type == 'submission_metadata':
146
+ return process_tabular_data(self)
147
+ elif self.extension != '.xml':
145
148
  return []
146
- self.parse()
147
- return process_tabular_data(self)
149
+ else:
150
+ self.parse()
151
+ return process_tabular_data(self)
148
152
 
149
153
 
150
154
  def write_csv(self, output_folder):
151
155
  output_folder = Path(output_folder)
152
156
  output_folder.mkdir(exist_ok=True)
153
157
 
154
- tables = self.to_tabular()
158
+ tables = self.tables()
155
159
 
156
160
  if not tables:
157
161
  return
@@ -0,0 +1,63 @@
1
+ # Assets dictionary mapping
2
+ assets_dict_ex102_abs = {
3
+ 'assetNumber': 'assetNumber',
4
+ 'DefeasedStatusCode': 'DefeasedStatusCode',
5
+ 'defeasanceOptionStartDate': 'defeasanceOptionStartDate',
6
+ 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage': 'mostRecentDebtServiceCoverageNetOperatingIncomePercentage',
7
+ 'mostRecentDebtServiceAmount': 'mostRecentDebtServiceAmount',
8
+ 'debtServiceCoverageSecuritizationCode': 'debtServiceCoverageSecuritizationCode',
9
+ 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage': 'debtServiceCoverageNetOperatingIncomeSecuritizationPercentage',
10
+ 'valuationSecuritizationDate': 'valuationSecuritizationDate',
11
+ 'physicalOccupancySecuritizationPercentage': 'physicalOccupancySecuritizationPercentage',
12
+ 'revenueSecuritizationAmount': 'revenueSecuritizationAmount',
13
+ 'valuationSourceSecuritizationCode': 'valuationSourceSecuritizationCode',
14
+ 'financialsSecuritizationDate': 'financialsSecuritizationDate',
15
+ 'mostRecentNetCashFlowAmount': 'mostRecentNetCashFlowAmount',
16
+ 'operatingExpensesAmount': 'operatingExpensesAmount',
17
+ 'operatingExpensesSecuritizationAmount': 'operatingExpensesSecuritizationAmount',
18
+ 'netOperatingIncomeNetCashFlowSecuritizationCode': 'netOperatingIncomeNetCashFlowSecuritizationCode',
19
+ 'mostRecentValuationSourceCode': 'mostRecentValuationSourceCode',
20
+ 'mostRecentDebtServiceCoverageNetCashFlowpercentage': 'mostRecentDebtServiceCoverageNetCashFlowpercentage',
21
+ 'debtServiceCoverageNetCashFlowSecuritizationPercentage': 'debtServiceCoverageNetCashFlowSecuritizationPercentage',
22
+ 'mostRecentAnnualLeaseRolloverReviewDate': 'mostRecentAnnualLeaseRolloverReviewDate',
23
+ 'mostRecentRevenueAmount': 'mostRecentRevenueAmount',
24
+ 'mostRecentPhysicalOccupancyPercentage': 'mostRecentPhysicalOccupancyPercentage',
25
+ 'mostRecentNetOperatingIncomeAmount': 'mostRecentNetOperatingIncomeAmount',
26
+ 'netOperatingIncomeSecuritizationAmount': 'netOperatingIncomeSecuritizationAmount',
27
+ 'netOperatingIncomeNetCashFlowCode': 'netOperatingIncomeNetCashFlowCode',
28
+ 'mostRecentFinancialsStartDate': 'mostRecentFinancialsStartDate',
29
+ 'mostRecentFinancialsEndDate': 'mostRecentFinancialsEndDate',
30
+ 'accession': 'accession',
31
+ 'valuationSecuritizationAmount': 'valuationSecuritizationAmount',
32
+ 'mostRecentValuationDate': 'mostRecentValuationDate',
33
+ 'mostRecentValuationAmount': 'mostRecentValuationAmount',
34
+ 'mostRecentDebtServiceCoverageCode': 'mostRecentDebtServiceCoverageCode',
35
+ 'netCashFlowFlowSecuritizationAmount': 'netCashFlowFlowSecuritizationAmount'
36
+ }
37
+
38
+ # Properties dictionary mapping
39
+ properties_dict_ex102_abs = {
40
+ 'unitsBedsRoomsNumber': 'unitsBedsRoomsNumber',
41
+ 'propertyCounty': 'propertyCounty',
42
+ 'squareFeetLargestTenantNumber': 'squareFeetLargestTenantNumber',
43
+ 'netRentableSquareFeetNumber': 'netRentableSquareFeetNumber',
44
+ 'leaseExpirationThirdLargestTenantDate': 'leaseExpirationThirdLargestTenantDate',
45
+ 'leaseExpirationLargestTenantDate': 'leaseExpirationLargestTenantDate',
46
+ 'propertyZip': 'propertyZip',
47
+ 'squareFeetThirdLargestTenantNumber': 'squareFeetThirdLargestTenantNumber',
48
+ 'propertyStatusCode': 'propertyStatusCode',
49
+ 'propertyState': 'propertyState',
50
+ 'yearBuiltNumber': 'yearBuiltNumber',
51
+ 'propertyCity': 'propertyCity',
52
+ 'propertyName': 'propertyName',
53
+ 'propertyAddress': 'propertyAddress',
54
+ 'yearLastRenovated': 'yearLastRenovated',
55
+ 'leaseExpirationSecondLargestTenantDate': 'leaseExpirationSecondLargestTenantDate',
56
+ 'thirdLargestTenant': 'thirdLargestTenant',
57
+ 'unitsBedsRoomsSecuritizationNumber': 'unitsBedsRoomsSecuritizationNumber',
58
+ 'propertyTypeCode': 'propertyTypeCode',
59
+ 'largestTenant': 'largestTenant',
60
+ 'squareFeetSecondLargestTenantNumber': 'squareFeetSecondLargestTenantNumber',
61
+ 'netRentableSquareFeetSecuritizationNumber': 'netRentableSquareFeetSecuritizationNumber',
62
+ 'secondLargestTenant': 'secondLargestTenant'
63
+ }
@@ -1,3 +1,4 @@
1
+ # Ready for mass testing
1
2
 
2
3
  # Information Table (13F-HR Securities) mapping
3
4
  information_table_dict = {
@@ -1,4 +1,4 @@
1
- # Mapping dictionaries for SEC filing table types based on actual field occurrences
1
+ # Ready for mass testing
2
2
 
3
3
  # Non-derivative transaction ownership mapping
4
4
  non_derivative_transaction_ownership_dict = {
@@ -1 +1,17 @@
1
- proxy_voting_record_dict = {}
1
+ proxy_voting_record_dict = {
2
+ 'meetingDate': 'meetingDate',
3
+ 'accession': 'accessionNumber',
4
+ 'vote_voteRecord_managementRecommendation': 'managementRecommendation',
5
+ 'sharesVoted': 'sharesVoted', # Top-level sharesVoted
6
+ 'vote_voteRecord_howVoted': 'howVoted',
7
+ 'sharesOnLoan': 'sharesOnLoan',
8
+ 'cusip': 'cusip',
9
+ 'issuerName': 'issuerName',
10
+ 'voteCategories_voteCategory_categoryType': 'categoryType',
11
+ 'voteDescription': 'voteDescription',
12
+ 'voteManager_otherManagers_otherManager': 'otherManager',
13
+ 'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
14
+ 'isin': 'isin',
15
+ 'voteSource': 'voteSource',
16
+ 'voteSeries': 'voteSeries'
17
+ }
@@ -0,0 +1,9 @@
1
+ # Note: submission_metadata is my designation, not SEC for the header of the Submission tag
2
+
3
+ document_submission_metadata_dict = {
4
+ 'accession':'accession',
5
+ 'type':'type',
6
+ 'sequence' : 'sequence',
7
+ 'filename' : 'filename',
8
+ 'description':'description'
9
+ }
@@ -1,5 +1,72 @@
1
+ # Ready for mass testing
1
2
 
2
3
  # 13F-HR (Institutional Investment Manager Holdings) mapping
3
- thirteenfhr_dict = {
4
-
5
- }
4
+ thirteenfhr_dict = {
5
+ # Cover Page Mapping
6
+ 'formData_coverPage_reportCalendarOrQuarter': 'reportCalendarOrQuarter',
7
+ 'formData_coverPage_filingManager_name': 'filingManagerName',
8
+ 'formData_coverPage_filingManager_address_street1': 'filingManagerStreet1',
9
+ 'formData_coverPage_filingManager_address_street2': 'filingManagerStreet2',
10
+ 'formData_coverPage_filingManager_address_city': 'filingManagerCity',
11
+ 'formData_coverPage_filingManager_address_stateOrCountry': 'filingManagerStateOrCountry',
12
+ 'formData_coverPage_filingManager_address_zipCode': 'filingManagerZipCode',
13
+ 'formData_coverPage_crdNumber': 'crdNumber',
14
+ 'formData_coverPage_secFileNumber': 'secFileNumber',
15
+ 'formData_coverPage_form13FFileNumber': 'form13FFileNumber',
16
+ 'formData_coverPage_reportType': 'reportType',
17
+ 'formData_coverPage_isAmendment': 'isAmendment',
18
+ 'formData_coverPage_amendmentNo': 'amendmentNo',
19
+ 'formData_coverPage_amendmentInfo_amendmentType': 'amendmentType',
20
+ 'formData_coverPage_amendmentInfo_confDeniedExpired': 'confDeniedExpired',
21
+ 'formData_coverPage_additionalInformation': 'additionalInformation',
22
+ 'formData_coverPage_provideInfoForInstruction5': 'provideInfoForInstruction5',
23
+
24
+ # Other Managers Info Mapping
25
+ 'formData_coverPage_otherManagersInfo_otherManager': 'otherManager',
26
+ 'formData_coverPage_otherManagersInfo_otherManager_cik': 'otherManagerCik',
27
+ 'formData_coverPage_otherManagersInfo_otherManager_name': 'otherManagerName',
28
+ 'formData_coverPage_otherManagersInfo_otherManager_crdNumber': 'otherManagerCrdNumber',
29
+ 'formData_coverPage_otherManagersInfo_otherManager_secFileNumber': 'otherManagerSecFileNumber',
30
+ 'formData_coverPage_otherManagersInfo_otherManager_form13FFileNumber': 'otherManagerForm13FFileNumber',
31
+
32
+ # Summary Page Mapping
33
+ 'formData_summaryPage_isConfidentialOmitted': 'isConfidentialOmitted',
34
+ 'formData_summaryPage_otherIncludedManagersCount': 'otherIncludedManagersCount',
35
+ 'formData_summaryPage_tableEntryTotal': 'tableEntryTotal',
36
+ 'formData_summaryPage_tableValueTotal': 'tableValueTotal',
37
+
38
+ # Other Managers 2 Info Mapping
39
+ 'formData_summaryPage_otherManagers2Info_otherManager2': 'otherManager2',
40
+ 'formData_summaryPage_otherManagers2Info_otherManager2_sequenceNumber': 'otherManager2SequenceNumber',
41
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_cik': 'otherManager2Cik',
42
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_name': 'otherManager2Name',
43
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_crdNumber': 'otherManager2CrdNumber',
44
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_secFileNumber': 'otherManager2SecFileNumber',
45
+ 'formData_summaryPage_otherManagers2Info_otherManager2_otherManager_form13FFileNumber': 'otherManager2Form13FFileNumber',
46
+
47
+ # Signature Block Mapping
48
+ 'formData_signatureBlock_name': 'signatureName',
49
+ 'formData_signatureBlock_title': 'signatureTitle',
50
+ 'formData_signatureBlock_phone': 'signaturePhone',
51
+ 'formData_signatureBlock_signature': 'signature',
52
+ 'formData_signatureBlock_city': 'signatureCity',
53
+ 'formData_signatureBlock_stateOrCountry': 'signatureStateOrCountry',
54
+ 'formData_signatureBlock_signatureDate': 'signatureDate',
55
+
56
+ # Header Data Mapping
57
+ 'headerData_filerInfo_periodOfReport': 'periodOfReport',
58
+ 'headerData_filerInfo_filer_fileNumber': 'filerFileNumber',
59
+ 'headerData_filerInfo_filer_credentials_cik': 'filerCik',
60
+ 'headerData_filerInfo_filer_credentials_ccc': 'filerCcc',
61
+ 'headerData_filerInfo_flags_confirmingCopyFlag': 'confirmingCopyFlag',
62
+ 'headerData_filerInfo_flags_returnCopyFlag': 'returnCopyFlag',
63
+ 'headerData_filerInfo_flags_overrideInternetFlag': 'overrideInternetFlag',
64
+ 'headerData_filerInfo_denovoRequest': 'denovoRequest',
65
+ 'headerData_filerInfo_liveTestFlag': 'liveTestFlag',
66
+ 'headerData_submissionType': 'submissionType',
67
+
68
+ # Schema and Metadata Mapping
69
+ 'schemaLocation': 'schemaLocation',
70
+ 'schemaVersion': 'schemaVersion',
71
+ 'accession': 'accessionNumber'
72
+ }
@@ -1,3 +1,4 @@
1
+ # Ready for mass testing
1
2
  # 25-NSE mapping
2
3
  twentyfive_nse_dict = {
3
4
  'descriptionClassSecurity': 'securityDescription',
@@ -17,6 +17,14 @@ def process_tabular_data(self):
17
17
  tables = process_13fhr(self.data, self.accession)
18
18
  elif self.type in ["INFORMATION TABLE"]:
19
19
  tables = process_information_table(self.data, self.accession)
20
+ elif self.type in ["25-NSE", "25-NSE/A"]:
21
+ tables = process_25nse(self.data, self.accession)
22
+ # complete mark:
23
+ elif self.type in ["N-PX","N-PX/A"]:
24
+ tables = process_npx(self.data, self.accession)
25
+ elif self.type in ["EX-102"]:
26
+ tables = process_ex102_abs(self.data, self.accession)
27
+
20
28
  elif self.type in ["SBSEF","SBSEF/A","SBSEF-V","SBSEF-W"]:
21
29
  tables = process_sbsef(self.data, self.accession)
22
30
  elif self.type in ["SDR","SDR/A","SDR-W","SDR-A"]:
@@ -33,8 +41,7 @@ def process_tabular_data(self):
33
41
  tables = process_144(self.data, self.accession)
34
42
  elif self.type in ["24F-2NT", "24F-2NT/A"]:
35
43
  tables = process_24f2nt(self.data, self.accession)
36
- elif self.type in ["25-NSE", "25-NSE/A"]:
37
- tables = process_25nse(self.data, self.accession)
44
+
38
45
  elif self.type in ["ATS-N", "ATS-N/A"]:
39
46
  tables = process_ats(self.data, self.accession)
40
47
  # elif self.type in ["C","C-W","C-U","C-U-W","C/A","C/A-W",
@@ -53,8 +60,7 @@ def process_tabular_data(self):
53
60
  # tables = process_nmfp(self.data, self.accession)
54
61
  # elif self.type in ["NPORT-P","NPORT-P/A"]:
55
62
  # tables = process_nportp(self.data, self.accession)
56
- elif self.type in ["N-PX","N-PX/A"]:
57
- tables = process_npx(self.data, self.accession)
63
+
58
64
  # elif self.type in ["TA-1","TA-1/A","TA-W","TA-2","TA-2/A"]:
59
65
  # tables = process_ta(self.data, self.accession)
60
66
  elif self.type in ["X-17A-5","X-17A-5/A"]:
@@ -66,10 +72,11 @@ def process_tabular_data(self):
66
72
  tables = process_reg_a(self.data, self.accession)
67
73
  # elif self.type in ["SBSE","SBSE/A","SBSE-A","SBSE-A/A","SBSE-BD","SBSE-BD/A","SBSE-C","SBSE-W","SBSE-CCO-RPT","SBSE-CCO-RPT/A"]:
68
74
  # tables = process_sbs(self.data, self.accession)
69
- # elif self.type in ["EX-102"]:
70
- # tables = process_ex102_abs(self.data, self.accession)
75
+
71
76
  elif self.type == "PROXY VOTING RECORD":
72
77
  tables = process_proxy_voting_record(self.data, self.accession)
78
+ elif self.type == 'submission_metadata':
79
+ tables = process_submission_metadata(self.content, self.accession)
73
80
  else:
74
81
  warn(f"Processing for {self.type} is not implemented yet.")
75
82
  return []
@@ -583,13 +590,39 @@ def process_reg_a(data, accession):
583
590
 
584
591
  # return tables
585
592
 
586
- # def process_ex102_abs(data, accession):
587
- # tables = []
588
- # asset_data = safe_get(data, ['assetData'])
589
- # if asset_data:
590
- # tables.append(Table(_flatten_dict(asset_data), 'abs', accession))
591
- # raise NotImplementedError("Need to implement the rest of the ABS processing")
592
- # return tables
593
+ def process_ex102_abs(data, accession):
594
+ tables = []
595
+ data = safe_get(data, ['assetData', 'assets'])
596
+
597
+ # Create assets list: all items without their 'property' field
598
+ assets = [{k: v for k, v in item.items() if k != 'property'} for item in data]
599
+
600
+ # Create properties list in a more vectorized way
601
+ properties = []
602
+
603
+ # Handle dictionary properties
604
+ properties.extend([
605
+ item['property'] | {'assetNumber': item['assetNumber']}
606
+ for item in data
607
+ if 'property' in item and isinstance(item['property'], dict)
608
+ ])
609
+
610
+ # Handle list properties - flatten in one operation
611
+ properties.extend([
612
+ prop | {'assetNumber': item['assetNumber']}
613
+ for item in data
614
+ if 'property' in item and isinstance(item['property'], list)
615
+ for prop in item['property']
616
+ if isinstance(prop, dict)
617
+ ])
618
+
619
+ if assets:
620
+ tables.append(Table(_flatten_dict(assets), 'assets_ex102_absee', accession))
621
+
622
+ if properties:
623
+ tables.append(Table(_flatten_dict(properties), 'properties_ex102_absee', accession))
624
+
625
+ return tables
593
626
 
594
627
  # def process_ma(data, accession):
595
628
  # tables = []
@@ -601,4 +634,28 @@ def process_reg_a(data, accession):
601
634
  # raise NotImplementedError("Need to implement the rest of the MA processing")
602
635
 
603
636
  # def process_ncen(data, accession):
604
- # raise NotImplementedError("Need to implement the N-CEN processing")
637
+ # raise NotImplementedError("Need to implement the N-CEN processing")
638
+
639
+ # WIP
640
+ # Note: going to pause this for now, as I don't have a great way of putting this in a csv.
641
+ def process_submission_metadata(data,accession):
642
+ tables = []
643
+ document_data = safe_get(data, ['documents'])
644
+ if document_data:
645
+ tables.append(Table(_flatten_dict(document_data), 'document_submission_metadata', accession))
646
+
647
+ reporting_owner_data = safe_get(data,['reporting-owner'])
648
+ if reporting_owner_data:
649
+ tables.append(Table(_flatten_dict(reporting_owner_data), 'reporting_owner_submission_metadata', accession))
650
+
651
+ issuer_data = safe_get(data,['issuer'])
652
+ if issuer_data:
653
+ tables.append(Table(_flatten_dict(issuer_data), 'issuer_submission_metadata', accession))
654
+
655
+ # # construct metadata
656
+ # accession-number date-of-filing-date-change, depositor-cik effectiveness-date
657
+
658
+ # # other tables
659
+ # depositor, securitizer
660
+
661
+ return tables
@@ -18,7 +18,11 @@ from .mappings.thirteenfhr import *
18
18
  from .mappings.twentyfivense import *
19
19
  from .mappings.twentyfourf2nt import *
20
20
  from .mappings.information_table import *
21
+ from .mappings.submission_metadata import *
22
+ from .mappings.ex102_abs import *
21
23
 
24
+ from pathlib import Path
25
+ import csv
22
26
  # need to check if mappings correctly create new columns
23
27
  class Table():
24
28
  def __init__(self, data, type,accession):
@@ -27,11 +31,18 @@ class Table():
27
31
  self.type = type
28
32
  self.data = data
29
33
  self.accession = accession
30
- self.columns = self.determine_columns()
34
+ self.columns = self.determine_columns_complete()
35
+
36
+ def determine_columns_complete(self):
37
+ if not self.data:
38
+ return []
39
+ return list(set().union(*(row.keys() for row in self.data)))
40
+
31
41
 
32
42
  def determine_columns(self):
33
43
  if len(self.data) == 0:
34
44
  return []
45
+
35
46
  return self.data[0].keys()
36
47
 
37
48
  def add_column(self,column_name,value):
@@ -227,6 +238,15 @@ class Table():
227
238
  mapping_dict = item_9_24f2nt_dict
228
239
  elif self.type == 'signature_info_schedule_a':
229
240
  mapping_dict = signature_24f2nt_dict
241
+ # ABS
242
+ elif self.type == 'assets_ex102_absee':
243
+ mapping_dict = assets_dict_ex102_abs
244
+ elif self.type =='properties_ex102_absee':
245
+ mapping_dict = properties_dict_ex102_abs
246
+ # submission metadata
247
+ elif self.type == 'document_submission_metadata':
248
+ mapping_dict = document_submission_metadata_dict
249
+
230
250
 
231
251
  else:
232
252
  mapping_dict = {}
@@ -245,9 +265,6 @@ class Table():
245
265
  for old_key, new_key in mapping_dict.items():
246
266
  if old_key in row:
247
267
  ordered_row[new_key] = row.pop(old_key)
248
- else:
249
- # if the old key is not present, set the new key to None
250
- ordered_row[new_key] = None
251
268
 
252
269
  # Then add any remaining keys that weren't in the mapping
253
270
  for key, value in row.items():
@@ -257,4 +274,30 @@ class Table():
257
274
  row.clear()
258
275
  row.update(ordered_row)
259
276
 
260
- self.determine_columns()
277
+ # Update the columns after mapping
278
+ columns = set(self.columns)
279
+ # remove the old columns that are now in the mapping
280
+ columns.difference_update(mapping_dict.keys())
281
+ # add the new columns from the mapping
282
+ columns.update(mapping_dict.values())
283
+ # add the accession column to the columns
284
+ columns.add('accession')
285
+
286
+ self.columns = list(columns)
287
+
288
+ def write_csv(self, output_file):
289
+ output_file = Path(output_file)
290
+ fieldnames = self.columns
291
+
292
+ # Check if the file already exists
293
+ if output_file.exists():
294
+ # Append to existing file without writing header
295
+ with open(output_file, 'a', newline='') as csvfile:
296
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
297
+ writer.writerows(self.data)
298
+ else:
299
+ # Create new file with header
300
+ with open(output_file, 'w', newline='') as csvfile:
301
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
302
+ writer.writeheader()
303
+ writer.writerows(self.data)
datamule/helper.py CHANGED
@@ -79,7 +79,16 @@ def _process_cik_and_metadata_filters(cik=None, ticker=None, **kwargs):
79
79
 
80
80
  # Convert ticker to CIK if provided
81
81
  if ticker is not None:
82
- cik = get_cik_from_dataset('listed_filer_metadata', 'ticker', ticker)
82
+ if isinstance(ticker, str):
83
+ ticker = [ticker]
84
+
85
+ ciks_from_ticker = []
86
+ for t in ticker:
87
+ ciks = get_cik_from_dataset('listed_filer_metadata', 'ticker', t)
88
+ if ciks:
89
+ ciks_from_ticker.extend(ciks)
90
+
91
+ cik = ciks
83
92
 
84
93
  # Normalize CIK format
85
94
  if cik is not None:
datamule/index.py CHANGED
@@ -1,16 +1,16 @@
1
- from pathlib import Path
1
+
2
2
  from .sec.submissions.textsearch import query
3
- from .helper import _process_cik_and_metadata_filters, load_package_dataset
3
+ from .helper import _process_cik_and_metadata_filters
4
+ from pathlib import Path
4
5
 
5
6
  class Index:
6
- def __init__(self, path=None):
7
- self.path = Path(path) if path else None
7
+ def __init__(self):
8
+ pass
8
9
 
9
10
  def search_submissions(
10
11
  self,
11
12
  text_query,
12
- start_date=None,
13
- end_date=None,
13
+ filing_date=None,
14
14
  submission_type=None,
15
15
  cik=None,
16
16
  ticker=None,
@@ -47,16 +47,14 @@ class Index:
47
47
  # Execute the search query
48
48
  results = query(
49
49
  f'{text_query}',
50
- filing_date=(start_date, end_date),
50
+ filing_date=filing_date,
51
51
  requests_per_second=requests_per_second,
52
52
  quiet=quiet,
53
53
  submission_type=submission_type,
54
54
  **kwargs
55
55
  )
56
56
 
57
- # Save results to path if specified
58
- if self.path:
59
- self._save_results(results, text_query)
57
+
60
58
 
61
59
  return results
62
60
 
datamule/portfolio.py CHANGED
@@ -9,22 +9,28 @@ import os
9
9
  from .helper import _process_cik_and_metadata_filters
10
10
  from .seclibrary.downloader import download as seclibrary_download
11
11
  from .sec.xbrl.filter_xbrl import filter_xbrl
12
- from .sec.submissions.monitor import monitor
13
- from .sec.xbrl.xbrlmonitor import XBRLMonitor
12
+ from .sec.submissions.monitor import Monitor
13
+ #from .sec.xbrl.xbrlmonitor import XBRLMonitor
14
14
 
15
15
 
16
16
  class Portfolio:
17
17
  def __init__(self, path):
18
18
  self.path = Path(path)
19
+ self.api_key = None
19
20
  self.submissions = []
20
21
  self.submissions_loaded = False
21
22
  self.MAX_WORKERS = os.cpu_count() - 1
23
+
24
+ self.monitor = Monitor()
22
25
 
23
26
  if self.path.exists():
24
27
  self._load_submissions()
25
28
  self.submissions_loaded = True
26
29
  else:
27
30
  self.path.mkdir(parents=True, exist_ok=True)
31
+
32
+ def set_api_key(self, api_key):
33
+ self.api_key = api_key
28
34
 
29
35
  def _load_submissions(self):
30
36
  folders = [f for f in self.path.iterdir() if f.is_dir()]
@@ -132,6 +138,7 @@ class Portfolio:
132
138
  seclibrary_download(
133
139
  output_dir=self.path,
134
140
  cik=cik,
141
+ api_key=self.api_key,
135
142
  submission_type=submission_type,
136
143
  filing_date=filing_date,
137
144
  accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None,
@@ -149,20 +156,18 @@ class Portfolio:
149
156
  )
150
157
 
151
158
  self.submissions_loaded = False
152
- def monitor_submissions(self,data_callback=None, poll_callback=None, submission_type=None, cik=None,
153
- polling_interval=200, requests_per_second=5, quiet=False, start_date=None, ticker=None, **kwargs):
159
+ def monitor_submissions(self, data_callback=None, interval_callback=None,
160
+ polling_interval=1000, quiet=True, start_date=None,
161
+ validation_interval=600000):
154
162
 
155
- cik = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
156
163
 
157
- monitor(
164
+ self.monitor.monitor_submissions(
158
165
  data_callback=data_callback,
159
- poll_callback=poll_callback,
160
- cik=cik,
161
- submission_type=submission_type,
166
+ interval_callback=interval_callback,
162
167
  polling_interval=polling_interval,
163
- requests_per_second=requests_per_second,
164
168
  quiet=quiet,
165
- start_date=start_date
169
+ start_date=start_date,
170
+ validation_interval=validation_interval
166
171
  )
167
172
 
168
173