datamule 1.5.4__py3-none-any.whl → 1.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,73 @@
1
+ import os
2
+ import json
3
+ import urllib.request
4
+ import websocket
5
+
6
+
7
+ class SecConnector:
8
+ def __init__(self, api_key=None, quiet=False):
9
+ self.api_key = api_key or os.getenv('DATAMULE_API_KEY')
10
+ if not self.api_key:
11
+ raise ValueError("API key not found. Set DATAMULE_API_KEY or provide api_key parameter.")
12
+
13
+ self.quiet = quiet
14
+ self.auth_url = "https://sec-websocket-auth-worker.jgfriedman99.workers.dev/"
15
+ self.websocket_url = "ws://3.80.249.191:8080/ws"
16
+
17
+ def _get_jwt_token(self):
18
+ if not self.quiet:
19
+ print("Getting JWT token...")
20
+
21
+ url = f"{self.auth_url}?api_key={self.api_key}"
22
+
23
+ req = urllib.request.Request(url)
24
+ req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
25
+ req.add_header('Accept', 'application/json')
26
+
27
+ with urllib.request.urlopen(req) as response:
28
+ data = json.loads(response.read().decode())
29
+
30
+ if not data.get('success'):
31
+ raise Exception(f"Auth failed: {data.get('error')}")
32
+
33
+ if not self.quiet:
34
+ print("JWT token obtained")
35
+
36
+ return data['token']
37
+
38
+ def connect(self, data_callback=None):
39
+ token = self._get_jwt_token()
40
+ ws_url = f"{self.websocket_url}?token={token}"
41
+
42
+ if not self.quiet:
43
+ print("Connecting to WebSocket...")
44
+
45
+ def on_open(ws):
46
+ if not self.quiet:
47
+ print("WebSocket connected")
48
+
49
+ def on_message(ws, message):
50
+ response = json.loads(message)
51
+ data = response.get('data', [])
52
+ if not self.quiet:
53
+ print(f"Received data: {len(data)} items")
54
+ if data_callback:
55
+ data_callback(data) # Pass just the data array
56
+
57
+ def on_error(ws, error):
58
+ if not self.quiet:
59
+ print(f"WebSocket error: {error}")
60
+
61
+ def on_close(ws, close_status_code, close_msg):
62
+ if not self.quiet:
63
+ print("WebSocket closed")
64
+
65
+ ws = websocket.WebSocketApp(
66
+ ws_url,
67
+ on_open=on_open,
68
+ on_message=on_message,
69
+ on_error=on_error,
70
+ on_close=on_close
71
+ )
72
+
73
+ ws.run_forever()
datamule/portfolio.py CHANGED
@@ -11,6 +11,7 @@ from .seclibrary.downloader import download as seclibrary_download
11
11
  from .sec.xbrl.filter_xbrl import filter_xbrl
12
12
  from .sec.submissions.monitor import Monitor
13
13
  #from .sec.xbrl.xbrlmonitor import XBRLMonitor
14
+ from .datamule.sec_connector import SecConnector
14
15
 
15
16
 
16
17
  class Portfolio:
@@ -175,6 +176,11 @@ class Portfolio:
175
176
  validation_interval=validation_interval
176
177
  )
177
178
 
179
+ def stream_submissions(self,data_callback=None,quiet=False):
180
+
181
+ connector = SecConnector(api_key=self.api_key,quiet=quiet)
182
+ connector.connect(data_callback=data_callback)
183
+
178
184
 
179
185
  def __iter__(self):
180
186
  if not self.submissions_loaded:
@@ -7,7 +7,7 @@ import asyncio
7
7
  from ..utils import headers, PreciseRateLimiter
8
8
  from .eftsquery import EFTSQuery
9
9
  import aiohttp
10
-
10
+ from zoneinfo import ZoneInfo
11
11
 
12
12
  async def poll_rss(limiter):
13
13
  base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?count=100&action=getcurrent&output=rss'
@@ -47,9 +47,22 @@ async def poll_rss(limiter):
47
47
  return results
48
48
 
49
49
  def clean_efts_hits(hits):
50
- # clean hits
51
- hits = [{'accession': int(hit['_source']['adsh'].replace('-','')), 'filing_date': hit['_source']['file_date'], 'ciks': hit['_source']['ciks'], 'submission_type': hit['_source']['file_type']} for hit in hits]
52
- return hits
50
+ # clean hits and standardize CIKs to string(int)
51
+ cleaned_hits = []
52
+ for hit in hits:
53
+ # Get CIKs from the source, ensure it's a list
54
+ raw_ciks = hit['_source'].get('ciks', [])
55
+
56
+ # Standardize each CIK: convert to int (removes leading zeros) then back to string
57
+ standardized_ciks = [str(int(cik)) for cik in raw_ciks if cik.isdigit()] # Added .isdigit() for robustness
58
+
59
+ cleaned_hits.append({
60
+ 'accession': int(hit['_source']['adsh'].replace('-','')),
61
+ 'filing_date': hit['_source']['file_date'],
62
+ 'ciks': standardized_ciks, # Use the standardized CIKs here
63
+ 'submission_type': hit['_source']['file_type']
64
+ })
65
+ return cleaned_hits
53
66
 
54
67
  class Monitor():
55
68
  def __init__(self):
@@ -92,7 +105,7 @@ class Monitor():
92
105
 
93
106
  # Backfill if start_date is provided
94
107
  if start_date is not None:
95
- today_date = datetime.now().date().strftime('%Y-%m-%d')
108
+ today_date = datetime.now(ZoneInfo("America/New_York")).strftime('%Y-%m-%d')
96
109
  if not quiet:
97
110
  print(f"Backfilling from {start_date} to {today_date}")
98
111
 
@@ -135,7 +148,7 @@ class Monitor():
135
148
  # EFTS validation (if enabled)
136
149
  if do_validation and (current_time - last_validation_time) >= validation_interval/1000:
137
150
  # Get submissions from the last 24 hours for validation
138
- today_date = datetime.now().strftime('%Y-%m-%d')
151
+ today_date = datetime.now(ZoneInfo("America/New_York")).strftime('%Y-%m-%d')
139
152
  if not quiet:
140
153
  print(f"Validating submissions from {today_date}")
141
154
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.5.4
3
+ Version: 1.5.8
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -16,4 +16,5 @@ Requires-Dist: pytz
16
16
  Requires-Dist: zstandard
17
17
  Requires-Dist: doc2dict
18
18
  Requires-Dist: secsgml
19
+ Requires-Dist: websocket-client
19
20
 
@@ -3,10 +3,12 @@ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
3
  datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
4
4
  datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
- datamule/portfolio.py,sha256=iW54frGfoCQb-6aYfocDqQQPe0gc_22voedv0It_1q0,7517
6
+ datamule/portfolio.py,sha256=Ijx4JFRHSzPoGJRdOTv8c90x79M80LlAXUhUncwYZSo,7755
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
8
  datamule/submission.py,sha256=6JIi-ayLL-jENVj6Q4IhmrYlAreJI7xBAHP_NYaDB6k,12918
9
9
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
10
+ datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ datamule/datamule/sec_connector.py,sha256=T3edE7I-d4oHysqj7zYlIOxH3Fuauj9tfw39UdFWvB8,2393
10
12
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
13
  datamule/document/document.py,sha256=04Rivdphq0D1HEGIBjtl1LelJr-IyQU1qCMi8yNJajw,14038
12
14
  datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
@@ -46,7 +48,7 @@ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNs
46
48
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
49
  datamule/sec/submissions/downloader.py,sha256=tDWn8bsK9XabQo2pBGYSiqTw37MmqM8rEma8Ph7zp-o,1391
48
50
  datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
49
- datamule/sec/submissions/monitor.py,sha256=CvpHywnrn4Lwk_3rWRE5K5UNYrdJ9Gyon97Uo0Ocq-4,8985
51
+ datamule/sec/submissions/monitor.py,sha256=ll0nfHzG8FI3bA8zVFrfsfZGnbt5qAD4rRZ4LG2SORY,9567
50
52
  datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
51
53
  datamule/sec/submissions/textsearch.py,sha256=MKDXEz_VI_0ljl73_aw2lx4MVzJW5uDt8KxjvJBwPwM,5794
52
54
  datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -58,7 +60,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
58
60
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
59
61
  datamule/seclibrary/downloader.py,sha256=wNRURTGb3eqg12Ltt4578L0WcAm7DmCWg0Rm0Om6Z4U,17959
60
62
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
61
- datamule-1.5.4.dist-info/METADATA,sha256=jl-zXUtvVrWz4Etn1BW8zsZ2AQ7CaE-zDF18sS0Lf7E,469
62
- datamule-1.5.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
63
- datamule-1.5.4.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
64
- datamule-1.5.4.dist-info/RECORD,,
63
+ datamule-1.5.8.dist-info/METADATA,sha256=kfV8_aDjqzk6OZKmJn4GIffpvTW-SYi55O1qSOEnsGQ,501
64
+ datamule-1.5.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
65
+ datamule-1.5.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
66
+ datamule-1.5.8.dist-info/RECORD,,