datamule 1.5.5__tar.gz → 1.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {datamule-1.5.5 → datamule-1.5.8}/PKG-INFO +2 -1
  2. datamule-1.5.8/datamule/datamule/sec_connector.py +73 -0
  3. {datamule-1.5.5 → datamule-1.5.8}/datamule/portfolio.py +6 -0
  4. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/submissions/monitor.py +16 -3
  5. datamule-1.5.8/datamule/seclibrary/__init__.py +0 -0
  6. {datamule-1.5.5 → datamule-1.5.8}/datamule.egg-info/PKG-INFO +2 -1
  7. {datamule-1.5.5 → datamule-1.5.8}/datamule.egg-info/SOURCES.txt +2 -0
  8. {datamule-1.5.5 → datamule-1.5.8}/datamule.egg-info/requires.txt +1 -0
  9. {datamule-1.5.5 → datamule-1.5.8}/setup.py +2 -1
  10. {datamule-1.5.5 → datamule-1.5.8}/datamule/__init__.py +0 -0
  11. {datamule-1.5.5 → datamule-1.5.8}/datamule/config.py +0 -0
  12. {datamule-1.5.5 → datamule-1.5.8}/datamule/data/listed_filer_metadata.csv +0 -0
  13. {datamule-1.5.5/datamule/document → datamule-1.5.8/datamule/datamule}/__init__.py +0 -0
  14. {datamule-1.5.5/datamule/document/mappings → datamule-1.5.8/datamule/document}/__init__.py +0 -0
  15. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/document.py +0 -0
  16. {datamule-1.5.5/datamule/mapping_dicts → datamule-1.5.8/datamule/document/mappings}/__init__.py +0 -0
  17. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/atsn.py +0 -0
  18. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/cfportal.py +0 -0
  19. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/d.py +0 -0
  20. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ex102_abs.py +0 -0
  21. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ex99a_sdr.py +0 -0
  22. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ex99c_sdr.py +0 -0
  23. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ex99g_sdr.py +0 -0
  24. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ex99i_sdr.py +0 -0
  25. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/information_table.py +0 -0
  26. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/nmfp.py +0 -0
  27. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/npx.py +0 -0
  28. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/onefourtyfour.py +0 -0
  29. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ownership.py +0 -0
  30. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/proxy_voting_record.py +0 -0
  31. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/sbs.py +0 -0
  32. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/sbsef.py +0 -0
  33. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/schedule13.py +0 -0
  34. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/sdr.py +0 -0
  35. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/submission_metadata.py +0 -0
  36. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/ta.py +0 -0
  37. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/thirteenfhr.py +0 -0
  38. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/twentyfivense.py +0 -0
  39. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/mappings/twentyfourf2nt.py +0 -0
  40. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/processing.py +0 -0
  41. {datamule-1.5.5 → datamule-1.5.8}/datamule/document/table.py +0 -0
  42. {datamule-1.5.5 → datamule-1.5.8}/datamule/helper.py +0 -0
  43. {datamule-1.5.5 → datamule-1.5.8}/datamule/index.py +0 -0
  44. {datamule-1.5.5/datamule/sec → datamule-1.5.8/datamule/mapping_dicts}/__init__.py +0 -0
  45. {datamule-1.5.5 → datamule-1.5.8}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  46. {datamule-1.5.5 → datamule-1.5.8}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  47. {datamule-1.5.5 → datamule-1.5.8}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  48. {datamule-1.5.5 → datamule-1.5.8}/datamule/package_updater.py +0 -0
  49. {datamule-1.5.5/datamule/sec/infrastructure → datamule-1.5.8/datamule/sec}/__init__.py +0 -0
  50. {datamule-1.5.5/datamule/sec/submissions → datamule-1.5.8/datamule/sec/infrastructure}/__init__.py +0 -0
  51. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  52. {datamule-1.5.5/datamule/sec/xbrl → datamule-1.5.8/datamule/sec/submissions}/__init__.py +0 -0
  53. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/submissions/downloader.py +0 -0
  54. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/submissions/eftsquery.py +0 -0
  55. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/submissions/streamer.py +0 -0
  56. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/submissions/textsearch.py +0 -0
  57. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/utils.py +0 -0
  58. {datamule-1.5.5/datamule/seclibrary → datamule-1.5.8/datamule/sec/xbrl}/__init__.py +0 -0
  59. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  60. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  61. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  62. {datamule-1.5.5 → datamule-1.5.8}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  63. {datamule-1.5.5 → datamule-1.5.8}/datamule/seclibrary/bq.py +0 -0
  64. {datamule-1.5.5 → datamule-1.5.8}/datamule/seclibrary/downloader.py +0 -0
  65. {datamule-1.5.5 → datamule-1.5.8}/datamule/seclibrary/query.py +0 -0
  66. {datamule-1.5.5 → datamule-1.5.8}/datamule/sheet.py +0 -0
  67. {datamule-1.5.5 → datamule-1.5.8}/datamule/submission.py +0 -0
  68. {datamule-1.5.5 → datamule-1.5.8}/datamule.egg-info/dependency_links.txt +0 -0
  69. {datamule-1.5.5 → datamule-1.5.8}/datamule.egg-info/top_level.txt +0 -0
  70. {datamule-1.5.5 → datamule-1.5.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.5.5
3
+ Version: 1.5.8
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -16,3 +16,4 @@ Requires-Dist: pytz
16
16
  Requires-Dist: zstandard
17
17
  Requires-Dist: doc2dict
18
18
  Requires-Dist: secsgml
19
+ Requires-Dist: websocket-client
@@ -0,0 +1,73 @@
1
+ import os
2
+ import json
3
+ import urllib.request
4
+ import websocket
5
+
6
+
7
+ class SecConnector:
8
+ def __init__(self, api_key=None, quiet=False):
9
+ self.api_key = api_key or os.getenv('DATAMULE_API_KEY')
10
+ if not self.api_key:
11
+ raise ValueError("API key not found. Set DATAMULE_API_KEY or provide api_key parameter.")
12
+
13
+ self.quiet = quiet
14
+ self.auth_url = "https://sec-websocket-auth-worker.jgfriedman99.workers.dev/"
15
+ self.websocket_url = "ws://3.80.249.191:8080/ws"
16
+
17
+ def _get_jwt_token(self):
18
+ if not self.quiet:
19
+ print("Getting JWT token...")
20
+
21
+ url = f"{self.auth_url}?api_key={self.api_key}"
22
+
23
+ req = urllib.request.Request(url)
24
+ req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
25
+ req.add_header('Accept', 'application/json')
26
+
27
+ with urllib.request.urlopen(req) as response:
28
+ data = json.loads(response.read().decode())
29
+
30
+ if not data.get('success'):
31
+ raise Exception(f"Auth failed: {data.get('error')}")
32
+
33
+ if not self.quiet:
34
+ print("JWT token obtained")
35
+
36
+ return data['token']
37
+
38
+ def connect(self, data_callback=None):
39
+ token = self._get_jwt_token()
40
+ ws_url = f"{self.websocket_url}?token={token}"
41
+
42
+ if not self.quiet:
43
+ print("Connecting to WebSocket...")
44
+
45
+ def on_open(ws):
46
+ if not self.quiet:
47
+ print("WebSocket connected")
48
+
49
+ def on_message(ws, message):
50
+ response = json.loads(message)
51
+ data = response.get('data', [])
52
+ if not self.quiet:
53
+ print(f"Received data: {len(data)} items")
54
+ if data_callback:
55
+ data_callback(data) # Pass just the data array
56
+
57
+ def on_error(ws, error):
58
+ if not self.quiet:
59
+ print(f"WebSocket error: {error}")
60
+
61
+ def on_close(ws, close_status_code, close_msg):
62
+ if not self.quiet:
63
+ print("WebSocket closed")
64
+
65
+ ws = websocket.WebSocketApp(
66
+ ws_url,
67
+ on_open=on_open,
68
+ on_message=on_message,
69
+ on_error=on_error,
70
+ on_close=on_close
71
+ )
72
+
73
+ ws.run_forever()
@@ -11,6 +11,7 @@ from .seclibrary.downloader import download as seclibrary_download
11
11
  from .sec.xbrl.filter_xbrl import filter_xbrl
12
12
  from .sec.submissions.monitor import Monitor
13
13
  #from .sec.xbrl.xbrlmonitor import XBRLMonitor
14
+ from .datamule.sec_connector import SecConnector
14
15
 
15
16
 
16
17
  class Portfolio:
@@ -175,6 +176,11 @@ class Portfolio:
175
176
  validation_interval=validation_interval
176
177
  )
177
178
 
179
+ def stream_submissions(self,data_callback=None,quiet=False):
180
+
181
+ connector = SecConnector(api_key=self.api_key,quiet=quiet)
182
+ connector.connect(data_callback=data_callback)
183
+
178
184
 
179
185
  def __iter__(self):
180
186
  if not self.submissions_loaded:
@@ -47,9 +47,22 @@ async def poll_rss(limiter):
47
47
  return results
48
48
 
49
49
  def clean_efts_hits(hits):
50
- # clean hits
51
- hits = [{'accession': int(hit['_source']['adsh'].replace('-','')), 'filing_date': hit['_source']['file_date'], 'ciks': hit['_source']['ciks'], 'submission_type': hit['_source']['file_type']} for hit in hits]
52
- return hits
50
+ # clean hits and standardize CIKs to string(int)
51
+ cleaned_hits = []
52
+ for hit in hits:
53
+ # Get CIKs from the source, ensure it's a list
54
+ raw_ciks = hit['_source'].get('ciks', [])
55
+
56
+ # Standardize each CIK: convert to int (removes leading zeros) then back to string
57
+ standardized_ciks = [str(int(cik)) for cik in raw_ciks if cik.isdigit()] # Added .isdigit() for robustness
58
+
59
+ cleaned_hits.append({
60
+ 'accession': int(hit['_source']['adsh'].replace('-','')),
61
+ 'filing_date': hit['_source']['file_date'],
62
+ 'ciks': standardized_ciks, # Use the standardized CIKs here
63
+ 'submission_type': hit['_source']['file_type']
64
+ })
65
+ return cleaned_hits
53
66
 
54
67
  class Monitor():
55
68
  def __init__(self):
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.5.5
3
+ Version: 1.5.8
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -16,3 +16,4 @@ Requires-Dist: pytz
16
16
  Requires-Dist: zstandard
17
17
  Requires-Dist: doc2dict
18
18
  Requires-Dist: secsgml
19
+ Requires-Dist: websocket-client
@@ -13,6 +13,8 @@ datamule.egg-info/dependency_links.txt
13
13
  datamule.egg-info/requires.txt
14
14
  datamule.egg-info/top_level.txt
15
15
  datamule/data/listed_filer_metadata.csv
16
+ datamule/datamule/__init__.py
17
+ datamule/datamule/sec_connector.py
16
18
  datamule/document/__init__.py
17
19
  datamule/document/document.py
18
20
  datamule/document/processing.py
@@ -10,3 +10,4 @@ pytz
10
10
  zstandard
11
11
  doc2dict
12
12
  secsgml
13
+ websocket-client
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="1.5.5",
35
+ version="1.5.8",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
@@ -49,6 +49,7 @@ setup(
49
49
  'zstandard',
50
50
  'doc2dict',
51
51
  'secsgml',
52
+ 'websocket-client',
52
53
  ],
53
54
  # Include the data directory in the package
54
55
  package_data={
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes