datamule 1.8.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ import asyncio
3
3
  import aiohttp
4
4
  import urllib.parse
5
5
  import ssl
6
- import json
7
6
  import time
8
7
  from tqdm import tqdm
9
8
 
@@ -0,0 +1,275 @@
1
+ import os
2
+ import asyncio
3
+ import aiohttp
4
+ import json
5
+ import ssl
6
+ import time
7
+ from tqdm import tqdm
8
+
9
+ class DatamuleMySQL:
10
+ def __init__(self, api_key=None):
11
+ self.API_BASE_URL = "https://datamule-mysql-rds.jgfriedman99.workers.dev"
12
+ self._api_key = api_key
13
+ self.total_cost = 0
14
+ self.remaining_balance = None
15
+ self.start_time = None
16
+
17
+ @property
18
+ def api_key(self):
19
+ return getattr(self, '_api_key', None) or os.getenv('DATAMULE_API_KEY')
20
+
21
+ @api_key.setter
22
+ def api_key(self, value):
23
+ if not value:
24
+ raise ValueError("API key cannot be empty")
25
+ self._api_key = value
26
+
27
+ async def _fetch_page(self, session, table, database, filters, page=1, page_size=25000):
28
+ payload = {
29
+ "table": table,
30
+ "database": database,
31
+ "filters": filters,
32
+ "page": page,
33
+ "pageSize": page_size
34
+ }
35
+
36
+ headers = {
37
+ "Content-Type": "application/json",
38
+ "Authorization": f"Bearer {self.api_key}",
39
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
40
+ }
41
+
42
+ async with session.post(self.API_BASE_URL, json=payload, headers=headers) as response:
43
+ data = await response.json()
44
+ if not data.get('success'):
45
+ raise ValueError(f"API request failed: {data.get('error')}")
46
+
47
+ # Track costs and balance
48
+ billing = data['metadata']['billing']
49
+ page_cost = billing['total_charge']
50
+ self.total_cost += page_cost
51
+ self.remaining_balance = billing['remaining_balance']
52
+
53
+ return data['data'], data['metadata']['pagination'], page_cost
54
+
55
+ async def execute_query(self, table, **kwargs):
56
+ if self.api_key is None:
57
+ raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
58
+
59
+ # Extract pagination and display options
60
+ page_size = kwargs.pop('page_size', 25000)
61
+ quiet = kwargs.pop('quiet', False)
62
+
63
+ # Determine database from table
64
+ if table == 'simple_xbrl':
65
+ database = 'xbrl_db'
66
+ elif table == 'accession_cik':
67
+ database = 'lookup_db'
68
+ elif table == 'submission_details':
69
+ database = 'lookup_db'
70
+ else:
71
+ raise ValueError(f"Unsupported table: {table}")
72
+
73
+ # Process filters: tuples = range, lists = OR, single = exact
74
+ filters = {}
75
+ for key, value in kwargs.items():
76
+ # Skip None values entirely
77
+ if value is None:
78
+ continue
79
+
80
+ # Special logic for cik
81
+ if key == 'cik':
82
+ if isinstance(value, list):
83
+ value = [int(val) for val in value]
84
+ else:
85
+ value = [int(value)]
86
+ filters[key] = {"type": "or", "values": value}
87
+ elif isinstance(value, tuple):
88
+ filters[key] = {"type": "range", "values": list(value)}
89
+ elif isinstance(value, list):
90
+ filters[key] = {"type": "or", "values": value}
91
+ else:
92
+ filters[key] = {"type": "or", "values": [value]}
93
+
94
+ self.start_time = time.time()
95
+ total_items = 0
96
+ pages_processed = 0
97
+
98
+ # Display query parameters
99
+ query_desc = [f"Table={table}"]
100
+ for key, filter_obj in filters.items():
101
+ if filter_obj["type"] == "range":
102
+ query_desc.append(f"{key}={filter_obj['values'][0]} to {filter_obj['values'][1]}")
103
+ elif len(filter_obj["values"]) == 1:
104
+ query_desc.append(f"{key}={filter_obj['values'][0]}")
105
+ else:
106
+ query_desc.append(f"{key}={filter_obj['values']}")
107
+
108
+ if not quiet:
109
+ print(f"QUERY: {', '.join(query_desc)}")
110
+
111
+ connector = aiohttp.TCPConnector(ssl=ssl.create_default_context())
112
+ async with aiohttp.ClientSession(connector=connector) as session:
113
+ # Initialize progress bar only if not quiet
114
+ if not quiet:
115
+ pbar = tqdm(unit="page", bar_format="{desc}: {n_fmt} {unit} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")
116
+ pbar.set_description("Fetching data")
117
+
118
+ current_page = 1
119
+ has_more = True
120
+ results = []
121
+
122
+ while has_more:
123
+ # Fetch page
124
+ page_results, pagination, page_cost = await self._fetch_page(
125
+ session,
126
+ table=table,
127
+ database=database,
128
+ filters=filters,
129
+ page=current_page,
130
+ page_size=page_size
131
+ )
132
+
133
+ # Accumulate results
134
+ results.extend(page_results)
135
+
136
+ pages_processed += 1
137
+ total_items += len(page_results)
138
+
139
+ # Update progress bar only if not quiet
140
+ if not quiet:
141
+ pbar.set_description(f"Fetching data (page {current_page})")
142
+ pbar.set_postfix_str(f"cost=${self.total_cost:.4f} | balance=${self.remaining_balance:.2f}")
143
+ pbar.update(1)
144
+
145
+ # Check if we need to fetch more pages
146
+ has_more = pagination.get('hasMore', False)
147
+ current_page += 1
148
+
149
+ # For the first page, display record info only if not quiet
150
+ if pages_processed == 1 and not quiet:
151
+ records_per_page = pagination.get('currentPageRecords', len(page_results))
152
+ if records_per_page > 0:
153
+ pbar.write(f"Retrieved {records_per_page} records (page 1) - Fetching additional pages...")
154
+ else:
155
+ pbar.write("No records found matching criteria")
156
+ break
157
+
158
+ if not quiet:
159
+ pbar.close()
160
+
161
+ # Final summary only if not quiet
162
+ if not quiet:
163
+ elapsed_time = time.time() - self.start_time
164
+ print("\nQuery complete:")
165
+ print(f"- Retrieved {total_items} records across {pages_processed} pages")
166
+ print(f"- Total cost: ${self.total_cost:.4f}")
167
+ print(f"- Remaining balance: ${self.remaining_balance:.2f}")
168
+ print(f"- Time: {elapsed_time:.1f} seconds")
169
+
170
+ return results
171
+
172
+
173
+ def query_mysql_rds(table, api_key=None, **kwargs):
174
+ """
175
+ Query MySQL RDS data from Datamule with optional filtering and automatic pagination
176
+
177
+ Parameters:
178
+ - table: Table name (e.g., 'simple_xbrl')
179
+ - cik: Company CIK number(s), can be int, string, or list
180
+ - Any other filter parameters as keyword arguments
181
+ - page_size: Number of records per page (max 25000, default 25000)
182
+ - quiet: Boolean, whether to suppress progress output and summary (default False)
183
+ - api_key: Optional API key (can also use DATAMULE_API_KEY environment variable)
184
+
185
+ Filter value types:
186
+ - Single value: Exact match
187
+ - List: OR condition (any of the values)
188
+ - Tuple: Range condition (between first and second values)
189
+
190
+ Returns:
191
+ - List of dictionaries containing the requested data (ready for pandas DataFrame)
192
+ """
193
+ # For backwards compatibility, handle non-paginated single requests
194
+ if kwargs.get('_single_page', False):
195
+ # Remove the flag and use original synchronous implementation
196
+ kwargs.pop('_single_page')
197
+ return _query_mysql_rds_single(table, api_key, **kwargs)
198
+
199
+ # Create a DatamuleMySQL instance for this request
200
+ dm = DatamuleMySQL(api_key=api_key)
201
+
202
+ # Run the paginated query and return results
203
+ return asyncio.run(dm.execute_query(table=table, **kwargs))
204
+
205
+
206
+ def _query_mysql_rds_single(table, api_key=None, **kwargs):
207
+ """Original synchronous implementation for single page requests"""
208
+ import urllib.request
209
+ import urllib.error
210
+
211
+ endpoint_url = "https://datamule-mysql-rds.jgfriedman99.workers.dev"
212
+
213
+ # Get API key from parameter or environment
214
+ if api_key is None:
215
+ api_key = os.getenv('DATAMULE_API_KEY')
216
+
217
+ if not api_key:
218
+ return {"error": "API key required. Pass api_key parameter or set DATAMULE_API_KEY environment variable"}
219
+
220
+ # Process filters: tuples = range, lists = OR, single = exact
221
+ filters = {}
222
+ for key, value in kwargs.items():
223
+ # Skip None values entirely
224
+ if value is None:
225
+ continue
226
+
227
+ # special logic for cik
228
+ if key == 'cik':
229
+ if isinstance(value, list):
230
+ value = [int(val) for val in value]
231
+ else:
232
+ value = [int(value)]
233
+ filters[key] = {"type": "or", "values": value}
234
+ elif isinstance(value, tuple):
235
+ filters[key] = {"type": "range", "values": list(value)}
236
+ elif isinstance(value, list):
237
+ filters[key] = {"type": "or", "values": value}
238
+ else:
239
+ filters[key] = {"type": "or", "values": [value]}
240
+
241
+ payload = {"filters": filters}
242
+ # add table to payload
243
+ payload['table'] = table
244
+
245
+ if table == 'simple_xbrl':
246
+ payload['database'] = 'xbrl_db'
247
+ else:
248
+ raise ValueError("table not found")
249
+
250
+ data = json.dumps(payload).encode('utf-8')
251
+ req = urllib.request.Request(
252
+ endpoint_url,
253
+ data=data,
254
+ headers={
255
+ "Content-Type": "application/json",
256
+ "Authorization": f"Bearer {api_key}",
257
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
258
+ }
259
+ )
260
+
261
+ try:
262
+ with urllib.request.urlopen(req, timeout=6000) as response:
263
+ result = json.loads(response.read().decode('utf-8'))
264
+ # Return just the data for single page requests
265
+ return result.get('data', []) if result.get('success') else result
266
+ except urllib.error.HTTPError as e:
267
+ # Print the error response body
268
+ error_body = e.read().decode('utf-8')
269
+ print(f"HTTP Error {e.code}: {error_body}")
270
+ try:
271
+ error_json = json.loads(error_body)
272
+ print(f"Error details: {error_json}")
273
+ except json.JSONDecodeError:
274
+ print(f"Raw error response: {error_body}")
275
+ raise
@@ -13,6 +13,7 @@ from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
15
  from secxbrl import parse_inline_xbrl
16
+ from company_fundamentals import construct_fundamentals
16
17
 
17
18
  class Document:
18
19
  def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -35,6 +36,7 @@ class Document:
35
36
  # this will be filled by parsed
36
37
  self.data = None
37
38
  self.xbrl = None
39
+ self.fundamentals = None
38
40
 
39
41
  #_load_text_content
40
42
  def _preprocess_txt_content(self):
@@ -113,6 +115,59 @@ class Document:
113
115
  self.xbrl = parse_inline_xbrl(self.content)
114
116
  else:
115
117
  raise ValueError("Only inline has been implemented so far.")
118
+
119
+ def parse_fundamentals(self,categories=None):
120
+ self.parse_xbrl()
121
+ # Transform XBRL records into the format needed by construct_fundamentals
122
+ xbrl = []
123
+
124
+ for xbrl_record in self.xbrl:
125
+ try:
126
+ # Extract basic fields
127
+ value = xbrl_record.get('_val', None)
128
+ taxonomy, name = xbrl_record['_attributes']['name'].split(':')
129
+
130
+ # Handle scaling if present
131
+ if xbrl_record.get('_attributes', {}).get('scale') is not None:
132
+ scale = int(xbrl_record['_attributes']['scale'])
133
+ try:
134
+ value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
135
+ except:
136
+ pass
137
+
138
+ # Extract period dates
139
+ period_start_date = None
140
+ period_end_date = None
141
+
142
+ if xbrl_record.get('_context'):
143
+ context = xbrl_record['_context']
144
+ period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
145
+ period_end_date = context.get('context_period_enddate')
146
+
147
+ # Create record in the format expected by construct_fundamentals
148
+ record = {
149
+ 'taxonomy': taxonomy,
150
+ 'name': name,
151
+ 'value': value,
152
+ 'period_start_date': period_start_date,
153
+ 'period_end_date': period_end_date
154
+ }
155
+
156
+ xbrl.append(record)
157
+
158
+ except Exception as e:
159
+ # Skip malformed records
160
+ continue
161
+
162
+ # Call construct_fundamentals with the transformed data
163
+ fundamentals = construct_fundamentals(xbrl,
164
+ taxonomy_key='taxonomy',
165
+ concept_key='name',
166
+ start_date_key='period_start_date',
167
+ end_date_key='period_end_date',
168
+ categories=categories)
169
+
170
+ self.fundamentals = fundamentals
116
171
 
117
172
  # Note: this method will be heavily modified in the future
118
173
  def parse(self):
datamule/portfolio.py CHANGED
@@ -9,14 +9,11 @@ import os
9
9
  import tarfile
10
10
  from threading import Lock
11
11
  from .helper import _process_cik_and_metadata_filters
12
- from .seclibrary.downloader import download as seclibrary_download
12
+ from .datamule.downloader import download as seclibrary_download
13
13
  from .sec.xbrl.filter_xbrl import filter_xbrl
14
14
  from .sec.submissions.monitor import Monitor
15
15
  from .portfolio_compression_utils import CompressionManager
16
16
  from .datamule.sec_connector import SecConnector
17
- from secsgml.utils import bytes_to_str, calculate_documents_locations_in_tar
18
- import json
19
- import io
20
17
  import shutil
21
18
 
22
19
 
datamule/sheet.py CHANGED
@@ -3,8 +3,8 @@ import csv
3
3
  import os
4
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
5
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
- from .seclibrary.datamule_lookup import datamule_lookup
7
-
6
+ from .datamule.datamule_lookup import datamule_lookup
7
+ from .datamule.datamule_mysql_rds import query_mysql_rds
8
8
  # slated for deprecation?
9
9
  from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
10
10
 
@@ -12,11 +12,16 @@ class Sheet:
12
12
  def __init__(self, path):
13
13
  self.path = Path(path)
14
14
 
15
+ # Keep
15
16
  def get_submissions(self,cik=None, accession_number=None, submission_type=None, filing_date=None,
16
17
  columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
17
18
 
18
19
  return datamule_lookup(cik, accession_number, submission_type, filing_date,
19
20
  columns, distinct, page_size, quiet, api_key)
21
+
22
+ def get_table(self,table,cik=None,ticker=None,**kwargs):
23
+ cik = _process_cik_and_metadata_filters(cik, ticker)
24
+ return query_mysql_rds(table=table,cik=cik,**kwargs)
20
25
 
21
26
  def download_xbrl(
22
27
  self,
datamule/submission.py CHANGED
@@ -10,8 +10,6 @@ import zstandard as zstd
10
10
  import gzip
11
11
  import urllib.request
12
12
 
13
-
14
-
15
13
  class Submission:
16
14
  def __init__(self, path=None, sgml_content=None, keep_document_types=None,
17
15
  batch_tar_path=None, accession_prefix=None, portfolio_ref=None,url=None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.8.6
3
+ Version: 2.0.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -18,4 +18,5 @@ Requires-Dist: doc2dict
18
18
  Requires-Dist: secxbrl
19
19
  Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
+ Requires-Dist: company-fundamentals
21
22
 
@@ -3,15 +3,18 @@ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
3
  datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
4
4
  datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
- datamule/portfolio.py,sha256=wtNADsrzjYoyIKAv7Xnj5uXhpT6NS1ORemC75vyeLs0,12220
6
+ datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
7
7
  datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
8
- datamule/sheet.py,sha256=V5iR9_LkuwTFxfHCfzgadO6qgB6qOhzWiCAED-y8ZJQ,22744
9
- datamule/submission.py,sha256=G2Y93VtvbEEokyL5ixemKrs-Gd2Q-GarOx3RHqQyAqk,11207
8
+ datamule/sheet.py,sha256=GnF9wA42iDw6purPmgshALymFssBp7gjmfjIs86CNJY,22997
9
+ datamule/submission.py,sha256=TkD_SVCEGjxOmHm5hjQm69j8DqQWr3YtgjTdKRWm26k,11205
10
10
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
11
11
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
13
+ datamule/datamule/datamule_mysql_rds.py,sha256=Oj_xPTBKkzWsuRlb_tphjJrBW1eua1cOuxjGwJx581k,10591
14
+ datamule/datamule/downloader.py,sha256=IbeBkvc4-xefHq37qktTxzCXh90cG8ayx80qQWehRvU,18527
12
15
  datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
13
16
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- datamule/document/document.py,sha256=5h_tCO82ZBpGAuGhTgY63OIk-db_3q4RlRWZMwtAxxg,14426
17
+ datamule/document/document.py,sha256=wr7gup139l7wJl8xiipROMo9in_44wAl27Rb__tpb84,16770
15
18
  datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
16
19
  datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
17
20
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,13 +62,10 @@ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H
59
62
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
60
63
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
64
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
62
- datamule/seclibrary/datamule_lookup.py,sha256=-xsATUVwm58Y1nNP287c-1pHB2uttngIiJ5Zy3DRi-s,9480
63
- datamule/seclibrary/downloader.py,sha256=IbeBkvc4-xefHq37qktTxzCXh90cG8ayx80qQWehRvU,18527
64
- datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
65
65
  datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
67
67
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
68
- datamule-1.8.6.dist-info/METADATA,sha256=nNBBh1IU4lWxxSx0Noh9Eb11Iz2VDa8aVruVb1wrKm8,524
69
- datamule-1.8.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
70
- datamule-1.8.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
71
- datamule-1.8.6.dist-info/RECORD,,
68
+ datamule-2.0.0.dist-info/METADATA,sha256=Gg6gAtm4lGxYuXLiqNB8VtP0pb1A922QvKRw9kgCegk,560
69
+ datamule-2.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
70
+ datamule-2.0.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
71
+ datamule-2.0.0.dist-info/RECORD,,
@@ -1,181 +0,0 @@
1
- import os
2
- import asyncio
3
- import aiohttp
4
- import urllib.parse
5
- import ssl
6
- import json
7
- import time
8
- from tqdm import tqdm
9
-
10
- class Query:
11
- def __init__(self, api_key=None):
12
- self.API_BASE_URL = "https://sec-library.jgfriedman99.workers.dev/"
13
- self._api_key = api_key
14
- self.total_cost = 0
15
- self.remaining_balance = None
16
- self.start_time = None
17
-
18
- @property
19
- def api_key(self):
20
- return getattr(self, '_api_key', None) or os.getenv('DATAMULE_API_KEY')
21
-
22
- @api_key.setter
23
- def api_key(self, value):
24
- if not value:
25
- raise ValueError("API key cannot be empty")
26
- self._api_key = value
27
-
28
- async def _fetch_page(self, session, submission_type=None, cik=None, filing_date=None, page=1):
29
- params = {
30
- 'api_key': self.api_key,
31
- 'page': page
32
- }
33
-
34
- # Handle submission_type parameter
35
- if submission_type:
36
- if isinstance(submission_type, list):
37
- params['submission_type'] = ','.join(str(x) for x in submission_type)
38
- else:
39
- params['submission_type'] = str(submission_type)
40
-
41
- # Handle CIK parameter
42
- if cik:
43
- if isinstance(cik, list):
44
- params['cik'] = ','.join(str(x) for x in cik)
45
- else:
46
- params['cik'] = str(cik)
47
-
48
- # Handle filing_date parameter
49
- if filing_date:
50
- if isinstance(filing_date, tuple):
51
- params['startdt'] = str(filing_date[0])
52
- params['enddt'] = str(filing_date[1])
53
- else:
54
- if isinstance(filing_date, list):
55
- params['filing_date'] = ','.join(str(x) for x in filing_date)
56
- else:
57
- params['filing_date'] = str(filing_date)
58
-
59
- url = f"{self.API_BASE_URL}?{urllib.parse.urlencode(params)}"
60
-
61
- async with session.get(url) as response:
62
- data = await response.json()
63
- if not data.get('success'):
64
- raise ValueError(f"API request failed: {data.get('error')}")
65
-
66
- # Track costs and balance
67
- charges = data['metadata']['billing']['charges']
68
- page_cost = charges['total']
69
- self.total_cost += page_cost
70
- self.remaining_balance = data['metadata']['billing']['remaining_balance']
71
-
72
- return data['data'], data['metadata']['pagination'], page_cost
73
-
74
- async def execute_query(self, submission_type=None, cik=None, filing_date=None):
75
- if self.api_key is None:
76
- raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
77
-
78
- self.start_time = time.time()
79
- total_items = 0
80
- pages_processed = 0
81
-
82
- # Display query parameters
83
- query_desc = []
84
- if cik:
85
- query_desc.append(f"CIK={cik}")
86
- if submission_type:
87
- query_desc.append(f"Type={submission_type}")
88
- if filing_date:
89
- if isinstance(filing_date, tuple):
90
- query_desc.append(f"Date={filing_date[0]} to {filing_date[1]}")
91
- else:
92
- query_desc.append(f"Date={filing_date}")
93
-
94
- if query_desc:
95
- print(f"QUERY: {', '.join(query_desc)}")
96
-
97
- connector = aiohttp.TCPConnector(ssl=ssl.create_default_context())
98
- async with aiohttp.ClientSession(connector=connector) as session:
99
- # Initialize progress bar with a custom format to avoid extra colons
100
- pbar = tqdm(unit="page", bar_format="{desc}: {n_fmt} {unit} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")
101
- pbar.set_description("Fetching data")
102
-
103
- current_page = 1
104
- has_more = True
105
- results = []
106
-
107
- while has_more:
108
- # Fetch page
109
- page_results, pagination, page_cost = await self._fetch_page(session,
110
- submission_type=submission_type,
111
- cik=cik,
112
- filing_date=filing_date,
113
- page=current_page)
114
-
115
- # Accumulate results
116
- results.extend(page_results)
117
-
118
- pages_processed += 1
119
- total_items += len(page_results)
120
-
121
- # Update progress bar with cleaner format
122
- pbar.set_description(f"Fetching data (page {current_page})")
123
- pbar.set_postfix_str(f"cost=${self.total_cost:.2f} | balance=${self.remaining_balance:.2f}")
124
- pbar.update(1)
125
-
126
- # Check if we need to fetch more pages
127
- has_more = pagination.get('hasMore', False)
128
- current_page += 1
129
-
130
- # For the first page, display record info using pbar.write instead of print
131
- if pages_processed == 1:
132
- records_per_page = pagination.get('currentPageRecords', len(page_results))
133
- total_records = pagination.get('totalRecords', None)
134
- if total_records:
135
- pbar.write(f"Retrieved {records_per_page} records (page 1) of {total_records} total - Fetching additional pages...")
136
- else:
137
- pbar.write(f"Retrieved {records_per_page} records (page 1) - Fetching additional pages...")
138
-
139
- pbar.close()
140
-
141
- # Final summary
142
- elapsed_time = time.time() - self.start_time
143
- print("\nQuery complete:")
144
- print(f"- Retrieved {total_items} filings across {pages_processed} pages")
145
- print(f"- Total cost: ${self.total_cost:.2f}")
146
- print(f"- Remaining balance: ${self.remaining_balance:.2f}")
147
- print(f"- Time: {elapsed_time:.1f} seconds")
148
-
149
- return results
150
-
151
-
152
- def query(cik=None, submission_type=None, filing_date=None, api_key=None):
153
- """
154
- Query SEC filings data with optional filtering
155
-
156
- Parameters:
157
- - cik: Company CIK number(s), can be string, int, or list
158
- - submission_type: Filing type(s), can be string or list (e.g., '10-K', ['10-K', '10-Q'])
159
- - filing_date: Filing date(s), can be string, list, or tuple of (start_date, end_date)
160
- - api_key: Optional API key (can also use DATAMULE_API_KEY environment variable)
161
-
162
- Returns:
163
- - List of all matching submission data
164
- """
165
- # Create a Query instance for this request
166
- q = Query(api_key=api_key)
167
- # remove dash from filing_date
168
- if isinstance(filing_date, tuple):
169
- filing_date = (filing_date[0].replace('-', ''), filing_date[1].replace('-', ''))
170
- elif isinstance(filing_date, str):
171
- filing_date = filing_date.replace('-', '')
172
- elif isinstance(filing_date, list):
173
- filing_date = [x.replace('-', '') for x in filing_date]
174
-
175
- print(filing_date)
176
- # Run the query and return results
177
- return asyncio.run(q.execute_query(
178
- submission_type=submission_type,
179
- cik=cik,
180
- filing_date=filing_date
181
- ))
File without changes