datamule 1.5.4__tar.gz → 1.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.5.4 → datamule-1.5.8}/PKG-INFO +2 -1
- datamule-1.5.8/datamule/datamule/sec_connector.py +73 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/portfolio.py +6 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/submissions/monitor.py +19 -6
- datamule-1.5.8/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule.egg-info/PKG-INFO +2 -1
- {datamule-1.5.4 → datamule-1.5.8}/datamule.egg-info/SOURCES.txt +2 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule.egg-info/requires.txt +1 -0
- {datamule-1.5.4 → datamule-1.5.8}/setup.py +2 -1
- {datamule-1.5.4 → datamule-1.5.8}/datamule/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/config.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-1.5.4/datamule/document → datamule-1.5.8/datamule/datamule}/__init__.py +0 -0
- {datamule-1.5.4/datamule/document/mappings → datamule-1.5.8/datamule/document}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/document.py +0 -0
- {datamule-1.5.4/datamule/mapping_dicts → datamule-1.5.8/datamule/document/mappings}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/atsn.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/d.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ex102_abs.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/information_table.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/npx.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ownership.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/proxy_voting_record.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/sbs.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/sdr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/submission_metadata.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/ta.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/thirteenfhr.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/twentyfivense.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/processing.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/document/table.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/helper.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/index.py +0 -0
- {datamule-1.5.4/datamule/sec → datamule-1.5.8/datamule/mapping_dicts}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/package_updater.py +0 -0
- {datamule-1.5.4/datamule/sec/infrastructure → datamule-1.5.8/datamule/sec}/__init__.py +0 -0
- {datamule-1.5.4/datamule/sec/submissions → datamule-1.5.8/datamule/sec/infrastructure}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.5.4/datamule/sec/xbrl → datamule-1.5.8/datamule/sec/submissions}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/utils.py +0 -0
- {datamule-1.5.4/datamule/seclibrary → datamule-1.5.8/datamule/sec/xbrl}/__init__.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/seclibrary/downloader.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/seclibrary/query.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/sheet.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule/submission.py +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.5.4 → datamule-1.5.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.5.
|
3
|
+
Version: 1.5.8
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -16,3 +16,4 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
+
Requires-Dist: websocket-client
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import urllib.request
|
4
|
+
import websocket
|
5
|
+
|
6
|
+
|
7
|
+
class SecConnector:
|
8
|
+
def __init__(self, api_key=None, quiet=False):
|
9
|
+
self.api_key = api_key or os.getenv('DATAMULE_API_KEY')
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("API key not found. Set DATAMULE_API_KEY or provide api_key parameter.")
|
12
|
+
|
13
|
+
self.quiet = quiet
|
14
|
+
self.auth_url = "https://sec-websocket-auth-worker.jgfriedman99.workers.dev/"
|
15
|
+
self.websocket_url = "ws://3.80.249.191:8080/ws"
|
16
|
+
|
17
|
+
def _get_jwt_token(self):
|
18
|
+
if not self.quiet:
|
19
|
+
print("Getting JWT token...")
|
20
|
+
|
21
|
+
url = f"{self.auth_url}?api_key={self.api_key}"
|
22
|
+
|
23
|
+
req = urllib.request.Request(url)
|
24
|
+
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
|
25
|
+
req.add_header('Accept', 'application/json')
|
26
|
+
|
27
|
+
with urllib.request.urlopen(req) as response:
|
28
|
+
data = json.loads(response.read().decode())
|
29
|
+
|
30
|
+
if not data.get('success'):
|
31
|
+
raise Exception(f"Auth failed: {data.get('error')}")
|
32
|
+
|
33
|
+
if not self.quiet:
|
34
|
+
print("JWT token obtained")
|
35
|
+
|
36
|
+
return data['token']
|
37
|
+
|
38
|
+
def connect(self, data_callback=None):
|
39
|
+
token = self._get_jwt_token()
|
40
|
+
ws_url = f"{self.websocket_url}?token={token}"
|
41
|
+
|
42
|
+
if not self.quiet:
|
43
|
+
print("Connecting to WebSocket...")
|
44
|
+
|
45
|
+
def on_open(ws):
|
46
|
+
if not self.quiet:
|
47
|
+
print("WebSocket connected")
|
48
|
+
|
49
|
+
def on_message(ws, message):
|
50
|
+
response = json.loads(message)
|
51
|
+
data = response.get('data', [])
|
52
|
+
if not self.quiet:
|
53
|
+
print(f"Received data: {len(data)} items")
|
54
|
+
if data_callback:
|
55
|
+
data_callback(data) # Pass just the data array
|
56
|
+
|
57
|
+
def on_error(ws, error):
|
58
|
+
if not self.quiet:
|
59
|
+
print(f"WebSocket error: {error}")
|
60
|
+
|
61
|
+
def on_close(ws, close_status_code, close_msg):
|
62
|
+
if not self.quiet:
|
63
|
+
print("WebSocket closed")
|
64
|
+
|
65
|
+
ws = websocket.WebSocketApp(
|
66
|
+
ws_url,
|
67
|
+
on_open=on_open,
|
68
|
+
on_message=on_message,
|
69
|
+
on_error=on_error,
|
70
|
+
on_close=on_close
|
71
|
+
)
|
72
|
+
|
73
|
+
ws.run_forever()
|
@@ -11,6 +11,7 @@ from .seclibrary.downloader import download as seclibrary_download
|
|
11
11
|
from .sec.xbrl.filter_xbrl import filter_xbrl
|
12
12
|
from .sec.submissions.monitor import Monitor
|
13
13
|
#from .sec.xbrl.xbrlmonitor import XBRLMonitor
|
14
|
+
from .datamule.sec_connector import SecConnector
|
14
15
|
|
15
16
|
|
16
17
|
class Portfolio:
|
@@ -175,6 +176,11 @@ class Portfolio:
|
|
175
176
|
validation_interval=validation_interval
|
176
177
|
)
|
177
178
|
|
179
|
+
def stream_submissions(self,data_callback=None,quiet=False):
|
180
|
+
|
181
|
+
connector = SecConnector(api_key=self.api_key,quiet=quiet)
|
182
|
+
connector.connect(data_callback=data_callback)
|
183
|
+
|
178
184
|
|
179
185
|
def __iter__(self):
|
180
186
|
if not self.submissions_loaded:
|
@@ -7,7 +7,7 @@ import asyncio
|
|
7
7
|
from ..utils import headers, PreciseRateLimiter
|
8
8
|
from .eftsquery import EFTSQuery
|
9
9
|
import aiohttp
|
10
|
-
|
10
|
+
from zoneinfo import ZoneInfo
|
11
11
|
|
12
12
|
async def poll_rss(limiter):
|
13
13
|
base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?count=100&action=getcurrent&output=rss'
|
@@ -47,9 +47,22 @@ async def poll_rss(limiter):
|
|
47
47
|
return results
|
48
48
|
|
49
49
|
def clean_efts_hits(hits):
|
50
|
-
# clean hits
|
51
|
-
|
52
|
-
|
50
|
+
# clean hits and standardize CIKs to string(int)
|
51
|
+
cleaned_hits = []
|
52
|
+
for hit in hits:
|
53
|
+
# Get CIKs from the source, ensure it's a list
|
54
|
+
raw_ciks = hit['_source'].get('ciks', [])
|
55
|
+
|
56
|
+
# Standardize each CIK: convert to int (removes leading zeros) then back to string
|
57
|
+
standardized_ciks = [str(int(cik)) for cik in raw_ciks if cik.isdigit()] # Added .isdigit() for robustness
|
58
|
+
|
59
|
+
cleaned_hits.append({
|
60
|
+
'accession': int(hit['_source']['adsh'].replace('-','')),
|
61
|
+
'filing_date': hit['_source']['file_date'],
|
62
|
+
'ciks': standardized_ciks, # Use the standardized CIKs here
|
63
|
+
'submission_type': hit['_source']['file_type']
|
64
|
+
})
|
65
|
+
return cleaned_hits
|
53
66
|
|
54
67
|
class Monitor():
|
55
68
|
def __init__(self):
|
@@ -92,7 +105,7 @@ class Monitor():
|
|
92
105
|
|
93
106
|
# Backfill if start_date is provided
|
94
107
|
if start_date is not None:
|
95
|
-
today_date = datetime.now(
|
108
|
+
today_date = datetime.now(ZoneInfo("America/New_York")).strftime('%Y-%m-%d')
|
96
109
|
if not quiet:
|
97
110
|
print(f"Backfilling from {start_date} to {today_date}")
|
98
111
|
|
@@ -135,7 +148,7 @@ class Monitor():
|
|
135
148
|
# EFTS validation (if enabled)
|
136
149
|
if do_validation and (current_time - last_validation_time) >= validation_interval/1000:
|
137
150
|
# Get submissions from the last 24 hours for validation
|
138
|
-
today_date = datetime.now().strftime('%Y-%m-%d')
|
151
|
+
today_date = datetime.now(ZoneInfo("America/New_York")).strftime('%Y-%m-%d')
|
139
152
|
if not quiet:
|
140
153
|
print(f"Validating submissions from {today_date}")
|
141
154
|
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.5.
|
3
|
+
Version: 1.5.8
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -16,3 +16,4 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
+
Requires-Dist: websocket-client
|
@@ -13,6 +13,8 @@ datamule.egg-info/dependency_links.txt
|
|
13
13
|
datamule.egg-info/requires.txt
|
14
14
|
datamule.egg-info/top_level.txt
|
15
15
|
datamule/data/listed_filer_metadata.csv
|
16
|
+
datamule/datamule/__init__.py
|
17
|
+
datamule/datamule/sec_connector.py
|
16
18
|
datamule/document/__init__.py
|
17
19
|
datamule/document/document.py
|
18
20
|
datamule/document/processing.py
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="1.5.
|
35
|
+
version="1.5.8",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
@@ -49,6 +49,7 @@ setup(
|
|
49
49
|
'zstandard',
|
50
50
|
'doc2dict',
|
51
51
|
'secsgml',
|
52
|
+
'websocket-client',
|
52
53
|
],
|
53
54
|
# Include the data directory in the package
|
54
55
|
package_data={
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datamule-1.5.4/datamule/mapping_dicts → datamule-1.5.8/datamule/document/mappings}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datamule-1.5.4/datamule/sec/submissions → datamule-1.5.8/datamule/sec/infrastructure}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|