datamule 1.9.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {datamule-1.9.0 → datamule-2.0.1}/PKG-INFO +1 -1
  2. datamule-2.0.1/datamule/datamule/datamule_mysql_rds.py +275 -0
  3. {datamule-1.9.0 → datamule-2.0.1}/datamule/sheet.py +24 -4
  4. {datamule-1.9.0 → datamule-2.0.1}/datamule/submission.py +0 -2
  5. {datamule-1.9.0 → datamule-2.0.1}/datamule.egg-info/PKG-INFO +1 -1
  6. {datamule-1.9.0 → datamule-2.0.1}/setup.py +1 -1
  7. datamule-1.9.0/datamule/datamule/datamule_mysql_rds.py +0 -3
  8. {datamule-1.9.0 → datamule-2.0.1}/datamule/__init__.py +0 -0
  9. {datamule-1.9.0 → datamule-2.0.1}/datamule/config.py +0 -0
  10. {datamule-1.9.0 → datamule-2.0.1}/datamule/data/listed_filer_metadata.csv +0 -0
  11. {datamule-1.9.0 → datamule-2.0.1}/datamule/datamule/__init__.py +0 -0
  12. {datamule-1.9.0 → datamule-2.0.1}/datamule/datamule/datamule_lookup.py +0 -0
  13. {datamule-1.9.0 → datamule-2.0.1}/datamule/datamule/downloader.py +0 -0
  14. {datamule-1.9.0 → datamule-2.0.1}/datamule/datamule/sec_connector.py +0 -0
  15. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/__init__.py +0 -0
  16. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/document.py +0 -0
  17. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/__init__.py +0 -0
  18. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/atsn.py +0 -0
  19. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/cfportal.py +0 -0
  20. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/d.py +0 -0
  21. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ex102_abs.py +0 -0
  22. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ex99a_sdr.py +0 -0
  23. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ex99c_sdr.py +0 -0
  24. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ex99g_sdr.py +0 -0
  25. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ex99i_sdr.py +0 -0
  26. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/information_table.py +0 -0
  27. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/nmfp.py +0 -0
  28. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/npx.py +0 -0
  29. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/onefourtyfour.py +0 -0
  30. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ownership.py +0 -0
  31. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/proxy_voting_record.py +0 -0
  32. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/sbs.py +0 -0
  33. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/sbsef.py +0 -0
  34. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/schedule13.py +0 -0
  35. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/sdr.py +0 -0
  36. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/submission_metadata.py +0 -0
  37. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/ta.py +0 -0
  38. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/thirteenfhr.py +0 -0
  39. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/twentyfivense.py +0 -0
  40. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/mappings/twentyfourf2nt.py +0 -0
  41. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/processing.py +0 -0
  42. {datamule-1.9.0 → datamule-2.0.1}/datamule/document/table.py +0 -0
  43. {datamule-1.9.0 → datamule-2.0.1}/datamule/helper.py +0 -0
  44. {datamule-1.9.0 → datamule-2.0.1}/datamule/index.py +0 -0
  45. {datamule-1.9.0 → datamule-2.0.1}/datamule/mapping_dicts/__init__.py +0 -0
  46. {datamule-1.9.0 → datamule-2.0.1}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  47. {datamule-1.9.0 → datamule-2.0.1}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  48. {datamule-1.9.0 → datamule-2.0.1}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  49. {datamule-1.9.0 → datamule-2.0.1}/datamule/package_updater.py +0 -0
  50. {datamule-1.9.0 → datamule-2.0.1}/datamule/portfolio.py +0 -0
  51. {datamule-1.9.0 → datamule-2.0.1}/datamule/portfolio_compression_utils.py +0 -0
  52. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/__init__.py +0 -0
  53. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/infrastructure/__init__.py +0 -0
  54. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  55. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/__init__.py +0 -0
  56. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/downloader.py +0 -0
  57. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/eftsquery.py +0 -0
  58. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/monitor.py +0 -0
  59. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/streamer.py +0 -0
  60. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/submissions/textsearch.py +0 -0
  61. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/utils.py +0 -0
  62. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/xbrl/__init__.py +0 -0
  63. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  64. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  65. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  66. {datamule-1.9.0 → datamule-2.0.1}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  67. {datamule-1.9.0 → datamule-2.0.1}/datamule/seclibrary/__init__.py +0 -0
  68. {datamule-1.9.0 → datamule-2.0.1}/datamule/seclibrary/bq.py +0 -0
  69. {datamule-1.9.0 → datamule-2.0.1}/datamule/utils/__init__.py +0 -0
  70. {datamule-1.9.0 → datamule-2.0.1}/datamule/utils/construct_submissions_data.py +0 -0
  71. {datamule-1.9.0 → datamule-2.0.1}/datamule/utils/format_accession.py +0 -0
  72. {datamule-1.9.0 → datamule-2.0.1}/datamule.egg-info/SOURCES.txt +0 -0
  73. {datamule-1.9.0 → datamule-2.0.1}/datamule.egg-info/dependency_links.txt +0 -0
  74. {datamule-1.9.0 → datamule-2.0.1}/datamule.egg-info/requires.txt +0 -0
  75. {datamule-1.9.0 → datamule-2.0.1}/datamule.egg-info/top_level.txt +0 -0
  76. {datamule-1.9.0 → datamule-2.0.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.9.0
3
+ Version: 2.0.1
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -0,0 +1,275 @@
1
+ import os
2
+ import asyncio
3
+ import aiohttp
4
+ import json
5
+ import ssl
6
+ import time
7
+ from tqdm import tqdm
8
+
9
+ class DatamuleMySQL:
10
+ def __init__(self, api_key=None):
11
+ self.API_BASE_URL = "https://datamule-mysql-rds.jgfriedman99.workers.dev"
12
+ self._api_key = api_key
13
+ self.total_cost = 0
14
+ self.remaining_balance = None
15
+ self.start_time = None
16
+
17
+ @property
18
+ def api_key(self):
19
+ return getattr(self, '_api_key', None) or os.getenv('DATAMULE_API_KEY')
20
+
21
+ @api_key.setter
22
+ def api_key(self, value):
23
+ if not value:
24
+ raise ValueError("API key cannot be empty")
25
+ self._api_key = value
26
+
27
+ async def _fetch_page(self, session, table, database, filters, page=1, page_size=25000):
28
+ payload = {
29
+ "table": table,
30
+ "database": database,
31
+ "filters": filters,
32
+ "page": page,
33
+ "pageSize": page_size
34
+ }
35
+
36
+ headers = {
37
+ "Content-Type": "application/json",
38
+ "Authorization": f"Bearer {self.api_key}",
39
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
40
+ }
41
+
42
+ async with session.post(self.API_BASE_URL, json=payload, headers=headers) as response:
43
+ data = await response.json()
44
+ if not data.get('success'):
45
+ raise ValueError(f"API request failed: {data.get('error')}")
46
+
47
+ # Track costs and balance
48
+ billing = data['metadata']['billing']
49
+ page_cost = billing['total_charge']
50
+ self.total_cost += page_cost
51
+ self.remaining_balance = billing['remaining_balance']
52
+
53
+ return data['data'], data['metadata']['pagination'], page_cost
54
+
55
+ async def execute_query(self, table, **kwargs):
56
+ if self.api_key is None:
57
+ raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
58
+
59
+ # Extract pagination and display options
60
+ page_size = kwargs.pop('page_size', 25000)
61
+ quiet = kwargs.pop('quiet', False)
62
+
63
+ # Determine database from table
64
+ if table == 'simple_xbrl':
65
+ database = 'xbrl_db'
66
+ elif table == 'accession_cik':
67
+ database = 'lookup_db'
68
+ elif table == 'submission_details':
69
+ database = 'lookup_db'
70
+ else:
71
+ raise ValueError(f"Unsupported table: {table}")
72
+
73
+ # Process filters: tuples = range, lists = OR, single = exact
74
+ filters = {}
75
+ for key, value in kwargs.items():
76
+ # Skip None values entirely
77
+ if value is None:
78
+ continue
79
+
80
+ # Special logic for cik
81
+ if key == 'cik':
82
+ if isinstance(value, list):
83
+ value = [int(val) for val in value]
84
+ else:
85
+ value = [int(value)]
86
+ filters[key] = {"type": "or", "values": value}
87
+ elif isinstance(value, tuple):
88
+ filters[key] = {"type": "range", "values": list(value)}
89
+ elif isinstance(value, list):
90
+ filters[key] = {"type": "or", "values": value}
91
+ else:
92
+ filters[key] = {"type": "or", "values": [value]}
93
+
94
+ self.start_time = time.time()
95
+ total_items = 0
96
+ pages_processed = 0
97
+
98
+ # Display query parameters
99
+ query_desc = [f"Table={table}"]
100
+ for key, filter_obj in filters.items():
101
+ if filter_obj["type"] == "range":
102
+ query_desc.append(f"{key}={filter_obj['values'][0]} to {filter_obj['values'][1]}")
103
+ elif len(filter_obj["values"]) == 1:
104
+ query_desc.append(f"{key}={filter_obj['values'][0]}")
105
+ else:
106
+ query_desc.append(f"{key}={filter_obj['values']}")
107
+
108
+ if not quiet:
109
+ print(f"QUERY: {', '.join(query_desc)}")
110
+
111
+ connector = aiohttp.TCPConnector(ssl=ssl.create_default_context())
112
+ async with aiohttp.ClientSession(connector=connector) as session:
113
+ # Initialize progress bar only if not quiet
114
+ if not quiet:
115
+ pbar = tqdm(unit="page", bar_format="{desc}: {n_fmt} {unit} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")
116
+ pbar.set_description("Fetching data")
117
+
118
+ current_page = 1
119
+ has_more = True
120
+ results = []
121
+
122
+ while has_more:
123
+ # Fetch page
124
+ page_results, pagination, page_cost = await self._fetch_page(
125
+ session,
126
+ table=table,
127
+ database=database,
128
+ filters=filters,
129
+ page=current_page,
130
+ page_size=page_size
131
+ )
132
+
133
+ # Accumulate results
134
+ results.extend(page_results)
135
+
136
+ pages_processed += 1
137
+ total_items += len(page_results)
138
+
139
+ # Update progress bar only if not quiet
140
+ if not quiet:
141
+ pbar.set_description(f"Fetching data (page {current_page})")
142
+ pbar.set_postfix_str(f"cost=${self.total_cost:.4f} | balance=${self.remaining_balance:.2f}")
143
+ pbar.update(1)
144
+
145
+ # Check if we need to fetch more pages
146
+ has_more = pagination.get('hasMore', False)
147
+ current_page += 1
148
+
149
+ # For the first page, display record info only if not quiet
150
+ if pages_processed == 1 and not quiet:
151
+ records_per_page = pagination.get('currentPageRecords', len(page_results))
152
+ if records_per_page > 0:
153
+ pbar.write(f"Retrieved {records_per_page} records (page 1) - Fetching additional pages...")
154
+ else:
155
+ pbar.write("No records found matching criteria")
156
+ break
157
+
158
+ if not quiet:
159
+ pbar.close()
160
+
161
+ # Final summary only if not quiet
162
+ if not quiet:
163
+ elapsed_time = time.time() - self.start_time
164
+ print("\nQuery complete:")
165
+ print(f"- Retrieved {total_items} records across {pages_processed} pages")
166
+ print(f"- Total cost: ${self.total_cost:.4f}")
167
+ print(f"- Remaining balance: ${self.remaining_balance:.2f}")
168
+ print(f"- Time: {elapsed_time:.1f} seconds")
169
+
170
+ return results
171
+
172
+
173
+ def query_mysql_rds(table, api_key=None, **kwargs):
174
+ """
175
+ Query MySQL RDS data from Datamule with optional filtering and automatic pagination
176
+
177
+ Parameters:
178
+ - table: Table name (e.g., 'simple_xbrl')
179
+ - cik: Company CIK number(s), can be int, string, or list
180
+ - Any other filter parameters as keyword arguments
181
+ - page_size: Number of records per page (max 25000, default 25000)
182
+ - quiet: Boolean, whether to suppress progress output and summary (default False)
183
+ - api_key: Optional API key (can also use DATAMULE_API_KEY environment variable)
184
+
185
+ Filter value types:
186
+ - Single value: Exact match
187
+ - List: OR condition (any of the values)
188
+ - Tuple: Range condition (between first and second values)
189
+
190
+ Returns:
191
+ - List of dictionaries containing the requested data (ready for pandas DataFrame)
192
+ """
193
+ # For backwards compatibility, handle non-paginated single requests
194
+ if kwargs.get('_single_page', False):
195
+ # Remove the flag and use original synchronous implementation
196
+ kwargs.pop('_single_page')
197
+ return _query_mysql_rds_single(table, api_key, **kwargs)
198
+
199
+ # Create a DatamuleMySQL instance for this request
200
+ dm = DatamuleMySQL(api_key=api_key)
201
+
202
+ # Run the paginated query and return results
203
+ return asyncio.run(dm.execute_query(table=table, **kwargs))
204
+
205
+
206
+ def _query_mysql_rds_single(table, api_key=None, **kwargs):
207
+ """Original synchronous implementation for single page requests"""
208
+ import urllib.request
209
+ import urllib.error
210
+
211
+ endpoint_url = "https://datamule-mysql-rds.jgfriedman99.workers.dev"
212
+
213
+ # Get API key from parameter or environment
214
+ if api_key is None:
215
+ api_key = os.getenv('DATAMULE_API_KEY')
216
+
217
+ if not api_key:
218
+ return {"error": "API key required. Pass api_key parameter or set DATAMULE_API_KEY environment variable"}
219
+
220
+ # Process filters: tuples = range, lists = OR, single = exact
221
+ filters = {}
222
+ for key, value in kwargs.items():
223
+ # Skip None values entirely
224
+ if value is None:
225
+ continue
226
+
227
+ # special logic for cik
228
+ if key == 'cik':
229
+ if isinstance(value, list):
230
+ value = [int(val) for val in value]
231
+ else:
232
+ value = [int(value)]
233
+ filters[key] = {"type": "or", "values": value}
234
+ elif isinstance(value, tuple):
235
+ filters[key] = {"type": "range", "values": list(value)}
236
+ elif isinstance(value, list):
237
+ filters[key] = {"type": "or", "values": value}
238
+ else:
239
+ filters[key] = {"type": "or", "values": [value]}
240
+
241
+ payload = {"filters": filters}
242
+ # add table to payload
243
+ payload['table'] = table
244
+
245
+ if table == 'simple_xbrl':
246
+ payload['database'] = 'xbrl_db'
247
+ else:
248
+ raise ValueError("table not found")
249
+
250
+ data = json.dumps(payload).encode('utf-8')
251
+ req = urllib.request.Request(
252
+ endpoint_url,
253
+ data=data,
254
+ headers={
255
+ "Content-Type": "application/json",
256
+ "Authorization": f"Bearer {api_key}",
257
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
258
+ }
259
+ )
260
+
261
+ try:
262
+ with urllib.request.urlopen(req, timeout=6000) as response:
263
+ result = json.loads(response.read().decode('utf-8'))
264
+ # Return just the data for single page requests
265
+ return result.get('data', []) if result.get('success') else result
266
+ except urllib.error.HTTPError as e:
267
+ # Print the error response body
268
+ error_body = e.read().decode('utf-8')
269
+ print(f"HTTP Error {e.code}: {error_body}")
270
+ try:
271
+ error_json = json.loads(error_body)
272
+ print(f"Error details: {error_json}")
273
+ except json.JSONDecodeError:
274
+ print(f"Raw error response: {error_body}")
275
+ raise
@@ -4,7 +4,9 @@ import os
4
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
5
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
6
  from .datamule.datamule_lookup import datamule_lookup
7
-
7
+ from .datamule.datamule_mysql_rds import query_mysql_rds
8
+ from company_fundamentals.utils import get_fundamental_mappings
9
+ from company_fundamentals import construct_fundamentals
8
10
  # slated for deprecation?
9
11
  from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
10
12
 
@@ -19,9 +21,27 @@ class Sheet:
19
21
  return datamule_lookup(cik, accession_number, submission_type, filing_date,
20
22
  columns, distinct, page_size, quiet, api_key)
21
23
 
22
- # Implement
23
- def get_table(self,table,**kwargs):
24
- pass
24
+ def get_table(self,table,cik=None,ticker=None,**kwargs):
25
+ cik = _process_cik_and_metadata_filters(cik, ticker)
26
+
27
+ if table == 'fundamentals':
28
+ fundamentals = kwargs.pop('fundamentals', None)
29
+ if fundamentals is None:
30
+ raise ValueError("fundamentals parameter required for fundamentals table")
31
+
32
+ categories = kwargs.pop('categories',None)
33
+
34
+ mappings = get_fundamental_mappings(fundamentals=fundamentals)
35
+ #print(mappings)
36
+ taxonomies = [item[0] for item in mappings]
37
+ names = [item[1] for item in mappings]
38
+ xbrl = query_mysql_rds(table='simple_xbrl',cik=cik,taxonomy=taxonomies,name=names,**kwargs)
39
+ #print(xbrl)
40
+
41
+ return construct_fundamentals(xbrl, 'taxonomy', 'name', 'period_start_date', 'period_end_date', categories=categories,fundamentals=fundamentals)
42
+
43
+ else:
44
+ return query_mysql_rds(table=table,cik=cik,**kwargs)
25
45
 
26
46
  def download_xbrl(
27
47
  self,
@@ -10,8 +10,6 @@ import zstandard as zstd
10
10
  import gzip
11
11
  import urllib.request
12
12
 
13
-
14
-
15
13
  class Submission:
16
14
  def __init__(self, path=None, sgml_content=None, keep_document_types=None,
17
15
  batch_tar_path=None, accession_prefix=None, portfolio_ref=None,url=None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.9.0
3
+ Version: 2.0.1
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="1.9.0",
35
+ version="2.0.1",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
@@ -1,3 +0,0 @@
1
- # connection to worker
2
- # basically everything should be handled on worker end
3
- # except for like dates. - nah even dates
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes