datamule 1.5.5__py3-none-any.whl → 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/datamule/__init__.py +0 -0
- datamule/datamule/sec_connector.py +73 -0
- datamule/portfolio.py +6 -0
- datamule/sec/submissions/monitor.py +16 -3
- {datamule-1.5.5.dist-info → datamule-1.5.8.dist-info}/METADATA +2 -1
- {datamule-1.5.5.dist-info → datamule-1.5.8.dist-info}/RECORD +8 -6
- {datamule-1.5.5.dist-info → datamule-1.5.8.dist-info}/WHEEL +0 -0
- {datamule-1.5.5.dist-info → datamule-1.5.8.dist-info}/top_level.txt +0 -0
File without changes
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import urllib.request
|
4
|
+
import websocket
|
5
|
+
|
6
|
+
|
7
|
+
class SecConnector:
|
8
|
+
def __init__(self, api_key=None, quiet=False):
|
9
|
+
self.api_key = api_key or os.getenv('DATAMULE_API_KEY')
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("API key not found. Set DATAMULE_API_KEY or provide api_key parameter.")
|
12
|
+
|
13
|
+
self.quiet = quiet
|
14
|
+
self.auth_url = "https://sec-websocket-auth-worker.jgfriedman99.workers.dev/"
|
15
|
+
self.websocket_url = "ws://3.80.249.191:8080/ws"
|
16
|
+
|
17
|
+
def _get_jwt_token(self):
|
18
|
+
if not self.quiet:
|
19
|
+
print("Getting JWT token...")
|
20
|
+
|
21
|
+
url = f"{self.auth_url}?api_key={self.api_key}"
|
22
|
+
|
23
|
+
req = urllib.request.Request(url)
|
24
|
+
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
|
25
|
+
req.add_header('Accept', 'application/json')
|
26
|
+
|
27
|
+
with urllib.request.urlopen(req) as response:
|
28
|
+
data = json.loads(response.read().decode())
|
29
|
+
|
30
|
+
if not data.get('success'):
|
31
|
+
raise Exception(f"Auth failed: {data.get('error')}")
|
32
|
+
|
33
|
+
if not self.quiet:
|
34
|
+
print("JWT token obtained")
|
35
|
+
|
36
|
+
return data['token']
|
37
|
+
|
38
|
+
def connect(self, data_callback=None):
|
39
|
+
token = self._get_jwt_token()
|
40
|
+
ws_url = f"{self.websocket_url}?token={token}"
|
41
|
+
|
42
|
+
if not self.quiet:
|
43
|
+
print("Connecting to WebSocket...")
|
44
|
+
|
45
|
+
def on_open(ws):
|
46
|
+
if not self.quiet:
|
47
|
+
print("WebSocket connected")
|
48
|
+
|
49
|
+
def on_message(ws, message):
|
50
|
+
response = json.loads(message)
|
51
|
+
data = response.get('data', [])
|
52
|
+
if not self.quiet:
|
53
|
+
print(f"Received data: {len(data)} items")
|
54
|
+
if data_callback:
|
55
|
+
data_callback(data) # Pass just the data array
|
56
|
+
|
57
|
+
def on_error(ws, error):
|
58
|
+
if not self.quiet:
|
59
|
+
print(f"WebSocket error: {error}")
|
60
|
+
|
61
|
+
def on_close(ws, close_status_code, close_msg):
|
62
|
+
if not self.quiet:
|
63
|
+
print("WebSocket closed")
|
64
|
+
|
65
|
+
ws = websocket.WebSocketApp(
|
66
|
+
ws_url,
|
67
|
+
on_open=on_open,
|
68
|
+
on_message=on_message,
|
69
|
+
on_error=on_error,
|
70
|
+
on_close=on_close
|
71
|
+
)
|
72
|
+
|
73
|
+
ws.run_forever()
|
datamule/portfolio.py
CHANGED
@@ -11,6 +11,7 @@ from .seclibrary.downloader import download as seclibrary_download
|
|
11
11
|
from .sec.xbrl.filter_xbrl import filter_xbrl
|
12
12
|
from .sec.submissions.monitor import Monitor
|
13
13
|
#from .sec.xbrl.xbrlmonitor import XBRLMonitor
|
14
|
+
from .datamule.sec_connector import SecConnector
|
14
15
|
|
15
16
|
|
16
17
|
class Portfolio:
|
@@ -175,6 +176,11 @@ class Portfolio:
|
|
175
176
|
validation_interval=validation_interval
|
176
177
|
)
|
177
178
|
|
179
|
+
def stream_submissions(self,data_callback=None,quiet=False):
|
180
|
+
|
181
|
+
connector = SecConnector(api_key=self.api_key,quiet=quiet)
|
182
|
+
connector.connect(data_callback=data_callback)
|
183
|
+
|
178
184
|
|
179
185
|
def __iter__(self):
|
180
186
|
if not self.submissions_loaded:
|
@@ -47,9 +47,22 @@ async def poll_rss(limiter):
|
|
47
47
|
return results
|
48
48
|
|
49
49
|
def clean_efts_hits(hits):
|
50
|
-
# clean hits
|
51
|
-
|
52
|
-
|
50
|
+
# clean hits and standardize CIKs to string(int)
|
51
|
+
cleaned_hits = []
|
52
|
+
for hit in hits:
|
53
|
+
# Get CIKs from the source, ensure it's a list
|
54
|
+
raw_ciks = hit['_source'].get('ciks', [])
|
55
|
+
|
56
|
+
# Standardize each CIK: convert to int (removes leading zeros) then back to string
|
57
|
+
standardized_ciks = [str(int(cik)) for cik in raw_ciks if cik.isdigit()] # Added .isdigit() for robustness
|
58
|
+
|
59
|
+
cleaned_hits.append({
|
60
|
+
'accession': int(hit['_source']['adsh'].replace('-','')),
|
61
|
+
'filing_date': hit['_source']['file_date'],
|
62
|
+
'ciks': standardized_ciks, # Use the standardized CIKs here
|
63
|
+
'submission_type': hit['_source']['file_type']
|
64
|
+
})
|
65
|
+
return cleaned_hits
|
53
66
|
|
54
67
|
class Monitor():
|
55
68
|
def __init__(self):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.5.
|
3
|
+
Version: 1.5.8
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -16,4 +16,5 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
+
Requires-Dist: websocket-client
|
19
20
|
|
@@ -3,10 +3,12 @@ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
|
3
3
|
datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
|
4
4
|
datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
|
5
5
|
datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
|
6
|
-
datamule/portfolio.py,sha256=
|
6
|
+
datamule/portfolio.py,sha256=Ijx4JFRHSzPoGJRdOTv8c90x79M80LlAXUhUncwYZSo,7755
|
7
7
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
8
8
|
datamule/submission.py,sha256=6JIi-ayLL-jENVj6Q4IhmrYlAreJI7xBAHP_NYaDB6k,12918
|
9
9
|
datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
|
10
|
+
datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
datamule/datamule/sec_connector.py,sha256=T3edE7I-d4oHysqj7zYlIOxH3Fuauj9tfw39UdFWvB8,2393
|
10
12
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
13
|
datamule/document/document.py,sha256=04Rivdphq0D1HEGIBjtl1LelJr-IyQU1qCMi8yNJajw,14038
|
12
14
|
datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
|
@@ -46,7 +48,7 @@ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNs
|
|
46
48
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
49
|
datamule/sec/submissions/downloader.py,sha256=tDWn8bsK9XabQo2pBGYSiqTw37MmqM8rEma8Ph7zp-o,1391
|
48
50
|
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
49
|
-
datamule/sec/submissions/monitor.py,sha256
|
51
|
+
datamule/sec/submissions/monitor.py,sha256=ll0nfHzG8FI3bA8zVFrfsfZGnbt5qAD4rRZ4LG2SORY,9567
|
50
52
|
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
51
53
|
datamule/sec/submissions/textsearch.py,sha256=MKDXEz_VI_0ljl73_aw2lx4MVzJW5uDt8KxjvJBwPwM,5794
|
52
54
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -58,7 +60,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
58
60
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
59
61
|
datamule/seclibrary/downloader.py,sha256=wNRURTGb3eqg12Ltt4578L0WcAm7DmCWg0Rm0Om6Z4U,17959
|
60
62
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
61
|
-
datamule-1.5.
|
62
|
-
datamule-1.5.
|
63
|
-
datamule-1.5.
|
64
|
-
datamule-1.5.
|
63
|
+
datamule-1.5.8.dist-info/METADATA,sha256=kfV8_aDjqzk6OZKmJn4GIffpvTW-SYi55O1qSOEnsGQ,501
|
64
|
+
datamule-1.5.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
65
|
+
datamule-1.5.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
66
|
+
datamule-1.5.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|