datamule 1.5.5__tar.gz → 1.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.5.5 → datamule-1.5.9}/PKG-INFO +2 -1
- datamule-1.5.9/datamule/datamule/sec_connector.py +73 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/portfolio.py +16 -3
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/submissions/downloader.py +3 -1
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/submissions/monitor.py +16 -3
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/submissions/streamer.py +7 -2
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/xbrl/filter_xbrl.py +0 -2
- datamule-1.5.9/datamule/seclibrary/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/seclibrary/downloader.py +14 -27
- {datamule-1.5.5 → datamule-1.5.9}/datamule.egg-info/PKG-INFO +2 -1
- {datamule-1.5.5 → datamule-1.5.9}/datamule.egg-info/SOURCES.txt +2 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule.egg-info/requires.txt +1 -0
- {datamule-1.5.5 → datamule-1.5.9}/setup.py +2 -1
- {datamule-1.5.5 → datamule-1.5.9}/datamule/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/config.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-1.5.5/datamule/document → datamule-1.5.9/datamule/datamule}/__init__.py +0 -0
- {datamule-1.5.5/datamule/document/mappings → datamule-1.5.9/datamule/document}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/document.py +0 -0
- {datamule-1.5.5/datamule/mapping_dicts → datamule-1.5.9/datamule/document/mappings}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/atsn.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/cfportal.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/d.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ex102_abs.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ex99a_sdr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ex99c_sdr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ex99g_sdr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ex99i_sdr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/information_table.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/nmfp.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/npx.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/onefourtyfour.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ownership.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/proxy_voting_record.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/sbs.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/sbsef.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/schedule13.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/sdr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/submission_metadata.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/ta.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/thirteenfhr.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/twentyfivense.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/mappings/twentyfourf2nt.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/processing.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/document/table.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/helper.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/index.py +0 -0
- {datamule-1.5.5/datamule/sec → datamule-1.5.9/datamule/mapping_dicts}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/package_updater.py +0 -0
- {datamule-1.5.5/datamule/sec/infrastructure → datamule-1.5.9/datamule/sec}/__init__.py +0 -0
- {datamule-1.5.5/datamule/sec/submissions → datamule-1.5.9/datamule/sec/infrastructure}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-1.5.5/datamule/sec/xbrl → datamule-1.5.9/datamule/sec/submissions}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/utils.py +0 -0
- {datamule-1.5.5/datamule/seclibrary → datamule-1.5.9/datamule/sec/xbrl}/__init__.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/seclibrary/bq.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/seclibrary/query.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/sheet.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule/submission.py +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.5.5 → datamule-1.5.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.5.
|
3
|
+
Version: 1.5.9
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -16,3 +16,4 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
+
Requires-Dist: websocket-client
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import urllib.request
|
4
|
+
import websocket
|
5
|
+
|
6
|
+
|
7
|
+
class SecConnector:
|
8
|
+
def __init__(self, api_key=None, quiet=False):
|
9
|
+
self.api_key = api_key or os.getenv('DATAMULE_API_KEY')
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("API key not found. Set DATAMULE_API_KEY or provide api_key parameter.")
|
12
|
+
|
13
|
+
self.quiet = quiet
|
14
|
+
self.auth_url = "https://sec-websocket-auth-worker.jgfriedman99.workers.dev/"
|
15
|
+
self.websocket_url = "ws://3.80.249.191:8080/ws"
|
16
|
+
|
17
|
+
def _get_jwt_token(self):
|
18
|
+
if not self.quiet:
|
19
|
+
print("Getting JWT token...")
|
20
|
+
|
21
|
+
url = f"{self.auth_url}?api_key={self.api_key}"
|
22
|
+
|
23
|
+
req = urllib.request.Request(url)
|
24
|
+
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
|
25
|
+
req.add_header('Accept', 'application/json')
|
26
|
+
|
27
|
+
with urllib.request.urlopen(req) as response:
|
28
|
+
data = json.loads(response.read().decode())
|
29
|
+
|
30
|
+
if not data.get('success'):
|
31
|
+
raise Exception(f"Auth failed: {data.get('error')}")
|
32
|
+
|
33
|
+
if not self.quiet:
|
34
|
+
print("JWT token obtained")
|
35
|
+
|
36
|
+
return data['token']
|
37
|
+
|
38
|
+
def connect(self, data_callback=None):
|
39
|
+
token = self._get_jwt_token()
|
40
|
+
ws_url = f"{self.websocket_url}?token={token}"
|
41
|
+
|
42
|
+
if not self.quiet:
|
43
|
+
print("Connecting to WebSocket...")
|
44
|
+
|
45
|
+
def on_open(ws):
|
46
|
+
if not self.quiet:
|
47
|
+
print("WebSocket connected")
|
48
|
+
|
49
|
+
def on_message(ws, message):
|
50
|
+
response = json.loads(message)
|
51
|
+
data = response.get('data', [])
|
52
|
+
if not self.quiet:
|
53
|
+
print(f"Received data: {len(data)} items")
|
54
|
+
if data_callback:
|
55
|
+
data_callback(data) # Pass just the data array
|
56
|
+
|
57
|
+
def on_error(ws, error):
|
58
|
+
if not self.quiet:
|
59
|
+
print(f"WebSocket error: {error}")
|
60
|
+
|
61
|
+
def on_close(ws, close_status_code, close_msg):
|
62
|
+
if not self.quiet:
|
63
|
+
print("WebSocket closed")
|
64
|
+
|
65
|
+
ws = websocket.WebSocketApp(
|
66
|
+
ws_url,
|
67
|
+
on_open=on_open,
|
68
|
+
on_message=on_message,
|
69
|
+
on_error=on_error,
|
70
|
+
on_close=on_close
|
71
|
+
)
|
72
|
+
|
73
|
+
ws.run_forever()
|
@@ -11,6 +11,7 @@ from .seclibrary.downloader import download as seclibrary_download
|
|
11
11
|
from .sec.xbrl.filter_xbrl import filter_xbrl
|
12
12
|
from .sec.submissions.monitor import Monitor
|
13
13
|
#from .sec.xbrl.xbrlmonitor import XBRLMonitor
|
14
|
+
from .datamule.sec_connector import SecConnector
|
14
15
|
|
15
16
|
|
16
17
|
class Portfolio:
|
@@ -126,7 +127,7 @@ class Portfolio:
|
|
126
127
|
self.accession_numbers = new_accession_numbers
|
127
128
|
|
128
129
|
def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,document_type=[],
|
129
|
-
requests_per_second=5,keep_filtered_metadata=False,standardize_metadata=True, **kwargs):
|
130
|
+
requests_per_second=5,keep_filtered_metadata=False,standardize_metadata=True,skip_existing=True, **kwargs):
|
130
131
|
if provider is None:
|
131
132
|
config = Config()
|
132
133
|
provider = config.get_default_source()
|
@@ -134,6 +135,11 @@ class Portfolio:
|
|
134
135
|
# Process CIK and metadata filters
|
135
136
|
cik = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
|
136
137
|
|
138
|
+
accession_numbers = self.accession_numbers if hasattr(self, 'accession_numbers') else None
|
139
|
+
skip_accession_numbers = []
|
140
|
+
if skip_existing:
|
141
|
+
skip_accession_numbers = [sub.accession for sub in self]
|
142
|
+
|
137
143
|
if provider == 'datamule':
|
138
144
|
|
139
145
|
seclibrary_download(
|
@@ -142,10 +148,11 @@ class Portfolio:
|
|
142
148
|
api_key=self.api_key,
|
143
149
|
submission_type=submission_type,
|
144
150
|
filing_date=filing_date,
|
145
|
-
accession_numbers=
|
151
|
+
accession_numbers=accession_numbers,
|
146
152
|
keep_document_types=document_type,
|
147
153
|
keep_filtered_metadata=keep_filtered_metadata,
|
148
154
|
standardize_metadata=standardize_metadata,
|
155
|
+
skip_accession_numbers=skip_accession_numbers
|
149
156
|
)
|
150
157
|
else:
|
151
158
|
sec_download(
|
@@ -154,10 +161,11 @@ class Portfolio:
|
|
154
161
|
submission_type=submission_type,
|
155
162
|
filing_date=filing_date,
|
156
163
|
requests_per_second=requests_per_second,
|
157
|
-
accession_numbers=
|
164
|
+
accession_numbers=accession_numbers,
|
158
165
|
keep_document_types=document_type,
|
159
166
|
keep_filtered_metadata=keep_filtered_metadata,
|
160
167
|
standardize_metadata=standardize_metadata,
|
168
|
+
skip_accession_numbers=skip_accession_numbers
|
161
169
|
)
|
162
170
|
|
163
171
|
self.submissions_loaded = False
|
@@ -175,6 +183,11 @@ class Portfolio:
|
|
175
183
|
validation_interval=validation_interval
|
176
184
|
)
|
177
185
|
|
186
|
+
def stream_submissions(self,data_callback=None,quiet=False):
|
187
|
+
|
188
|
+
connector = SecConnector(api_key=self.api_key,quiet=quiet)
|
189
|
+
connector.connect(data_callback=data_callback)
|
190
|
+
|
178
191
|
|
179
192
|
def __iter__(self):
|
180
193
|
if not self.submissions_loaded:
|
@@ -5,7 +5,8 @@ from tqdm import tqdm
|
|
5
5
|
|
6
6
|
def download(cik=None, submission_type=None, filing_date=None, location=None, name=None,
|
7
7
|
requests_per_second=5, output_dir="filings", accession_numbers=None,
|
8
|
-
quiet=False, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True
|
8
|
+
quiet=False, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True,
|
9
|
+
skip_accession_numbers=[]):
|
9
10
|
# Make sure output directory exists
|
10
11
|
os.makedirs(output_dir, exist_ok=True)
|
11
12
|
|
@@ -29,5 +30,6 @@ def download(cik=None, submission_type=None, filing_date=None, location=None, na
|
|
29
30
|
requests_per_second=requests_per_second,
|
30
31
|
document_callback=callback_wrapper,
|
31
32
|
accession_numbers=accession_numbers,
|
33
|
+
skip_accession_numbers=skip_accession_numbers,
|
32
34
|
quiet=quiet
|
33
35
|
)
|
@@ -47,9 +47,22 @@ async def poll_rss(limiter):
|
|
47
47
|
return results
|
48
48
|
|
49
49
|
def clean_efts_hits(hits):
|
50
|
-
# clean hits
|
51
|
-
|
52
|
-
|
50
|
+
# clean hits and standardize CIKs to string(int)
|
51
|
+
cleaned_hits = []
|
52
|
+
for hit in hits:
|
53
|
+
# Get CIKs from the source, ensure it's a list
|
54
|
+
raw_ciks = hit['_source'].get('ciks', [])
|
55
|
+
|
56
|
+
# Standardize each CIK: convert to int (removes leading zeros) then back to string
|
57
|
+
standardized_ciks = [str(int(cik)) for cik in raw_ciks if cik.isdigit()] # Added .isdigit() for robustness
|
58
|
+
|
59
|
+
cleaned_hits.append({
|
60
|
+
'accession': int(hit['_source']['adsh'].replace('-','')),
|
61
|
+
'filing_date': hit['_source']['file_date'],
|
62
|
+
'ciks': standardized_ciks, # Use the standardized CIKs here
|
63
|
+
'submission_type': hit['_source']['file_type']
|
64
|
+
})
|
65
|
+
return cleaned_hits
|
53
66
|
|
54
67
|
class Monitor():
|
55
68
|
def __init__(self):
|
@@ -21,7 +21,7 @@ def fix_filing_url(url):
|
|
21
21
|
return url
|
22
22
|
|
23
23
|
class Streamer(EFTSQuery):
|
24
|
-
def __init__(self, requests_per_second=5.0, document_callback=None, accession_numbers=None, quiet=False):
|
24
|
+
def __init__(self, requests_per_second=5.0, document_callback=None, accession_numbers=None,skip_accession_numbers=None, quiet=False):
|
25
25
|
super().__init__(requests_per_second=requests_per_second, quiet=quiet)
|
26
26
|
self.document_callback = document_callback
|
27
27
|
self.document_queue = asyncio.Queue()
|
@@ -32,6 +32,7 @@ class Streamer(EFTSQuery):
|
|
32
32
|
self.documents_processed = 0
|
33
33
|
self.total_documents = 0
|
34
34
|
self.accession_numbers = accession_numbers
|
35
|
+
self.skip_accession_numbers = skip_accession_numbers
|
35
36
|
self.skipped_documents = 0
|
36
37
|
|
37
38
|
async def _fetch_worker(self):
|
@@ -81,6 +82,9 @@ class Streamer(EFTSQuery):
|
|
81
82
|
if self.accession_numbers is not None and accno_w_dash not in self.accession_numbers:
|
82
83
|
return None, None, None
|
83
84
|
|
85
|
+
if self.skip_accession_numbers is not None and accno_w_dash in self.skip_accession_numbers:
|
86
|
+
return None, None, None
|
87
|
+
|
84
88
|
# Construct the URL
|
85
89
|
url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accno_no_dash}/{accno_w_dash}.txt"
|
86
90
|
url = fix_filing_url(url)
|
@@ -218,7 +222,7 @@ class Streamer(EFTSQuery):
|
|
218
222
|
return results
|
219
223
|
|
220
224
|
def stream(cik=None, submission_type=None, filing_date=None, location=None,
|
221
|
-
requests_per_second=5.0, document_callback=None, accession_numbers=None,
|
225
|
+
requests_per_second=5.0, document_callback=None, accession_numbers=None,skip_accession_numbers=[],
|
222
226
|
quiet=False, name=None):
|
223
227
|
"""
|
224
228
|
Stream EFTS results and download documents into memory.
|
@@ -257,6 +261,7 @@ def stream(cik=None, submission_type=None, filing_date=None, location=None,
|
|
257
261
|
requests_per_second=requests_per_second,
|
258
262
|
document_callback=document_callback,
|
259
263
|
accession_numbers=accession_numbers,
|
264
|
+
skip_accession_numbers=skip_accession_numbers,
|
260
265
|
quiet=quiet
|
261
266
|
)
|
262
267
|
return await streamer.stream(cik, submission_type, filing_date, location, name)
|
@@ -5,8 +5,6 @@ from ..utils import headers
|
|
5
5
|
def fetch_frame(taxonomy, concept, unit, period):
|
6
6
|
url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{concept}/{unit}/{period}.json"
|
7
7
|
response = requests.get(url, headers=headers)
|
8
|
-
print(url)
|
9
|
-
print(response)
|
10
8
|
return response.json()
|
11
9
|
|
12
10
|
|
File without changes
|
@@ -14,7 +14,6 @@ from queue import Queue, Empty
|
|
14
14
|
from threading import Thread
|
15
15
|
from .query import query
|
16
16
|
from os import cpu_count
|
17
|
-
from ..submission import Submission
|
18
17
|
from secsgml import write_sgml_file_to_tar
|
19
18
|
|
20
19
|
|
@@ -235,7 +234,8 @@ class Downloader:
|
|
235
234
|
processor.stop_workers()
|
236
235
|
decompression_pool.shutdown()
|
237
236
|
|
238
|
-
def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True
|
237
|
+
def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True,
|
238
|
+
skip_accession_numbers=[]):
|
239
239
|
"""
|
240
240
|
Query SEC filings and download/process them.
|
241
241
|
|
@@ -259,10 +259,18 @@ class Downloader:
|
|
259
259
|
filing_date=filing_date,
|
260
260
|
api_key=self.api_key
|
261
261
|
)
|
262
|
+
|
263
|
+
|
262
264
|
# After querying but before generating URLs
|
263
265
|
if accession_numbers:
|
266
|
+
accession_numbers = [str(int(item.replace('-',''))) for item in accession_numbers]
|
264
267
|
filings = [filing for filing in filings if filing['accession_number'] in accession_numbers]
|
265
268
|
|
269
|
+
|
270
|
+
if skip_accession_numbers:
|
271
|
+
skip_accession_numbers = [int(item.replace('-','')) for item in skip_accession_numbers]
|
272
|
+
filings = [filing for filing in filings if filing['accession_number'] not in skip_accession_numbers]
|
273
|
+
|
266
274
|
# Generate URLs from the query results
|
267
275
|
|
268
276
|
print(f"Generating URLs for {len(filings)} filings...")
|
@@ -355,7 +363,8 @@ class Downloader:
|
|
355
363
|
print(f"Processing speed: {len(urls)/elapsed_time:.2f} files/second")
|
356
364
|
|
357
365
|
|
358
|
-
def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True
|
366
|
+
def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True,
|
367
|
+
skip_accession_numbers=[]):
|
359
368
|
"""
|
360
369
|
Query SEC filings and download/process them.
|
361
370
|
|
@@ -383,28 +392,6 @@ def download(submission_type=None, cik=None, filing_date=None, api_key=None, out
|
|
383
392
|
accession_numbers=accession_numbers,
|
384
393
|
keep_document_types=keep_document_types,
|
385
394
|
keep_filtered_metadata=keep_filtered_metadata,
|
386
|
-
standardize_metadata=standardize_metadata
|
395
|
+
standardize_metadata=standardize_metadata,
|
396
|
+
skip_accession_numbers=skip_accession_numbers
|
387
397
|
)
|
388
|
-
|
389
|
-
def download_files_using_filename(filenames, api_key=None, output_dir="downloads", keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True):
|
390
|
-
"""
|
391
|
-
Download and process SEC filings using specific filenames.
|
392
|
-
|
393
|
-
Parameters:
|
394
|
-
- filenames: List of specific filenames to download (e.g., ['000091205797006494.sgml', '000100704297000007.sgml.zst'])
|
395
|
-
- api_key: API key for datamule service (optional if DATAMULE_API_KEY env var is set)
|
396
|
-
- output_dir: Directory to save downloaded files
|
397
|
-
- keep_document_types: List of document types to keep (e.g., ['10-K', 'EX-10.1'])
|
398
|
-
- keep_filtered_metadata: Whether to keep metadata for filtered documents
|
399
|
-
- standardize_metadata: Whether to standardize metadata format
|
400
|
-
"""
|
401
|
-
downloader = Downloader(api_key=api_key)
|
402
|
-
downloader.QUEUE_SIZE = 1
|
403
|
-
downloader.MAX_CONCURRENT_DOWNLOADS = 1
|
404
|
-
downloader.download_files_using_filename(
|
405
|
-
filenames=filenames,
|
406
|
-
output_dir=output_dir,
|
407
|
-
keep_document_types=keep_document_types,
|
408
|
-
keep_filtered_metadata=keep_filtered_metadata,
|
409
|
-
standardize_metadata=standardize_metadata
|
410
|
-
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.5.
|
3
|
+
Version: 1.5.9
|
4
4
|
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -16,3 +16,4 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
+
Requires-Dist: websocket-client
|
@@ -13,6 +13,8 @@ datamule.egg-info/dependency_links.txt
|
|
13
13
|
datamule.egg-info/requires.txt
|
14
14
|
datamule.egg-info/top_level.txt
|
15
15
|
datamule/data/listed_filer_metadata.csv
|
16
|
+
datamule/datamule/__init__.py
|
17
|
+
datamule/datamule/sec_connector.py
|
16
18
|
datamule/document/__init__.py
|
17
19
|
datamule/document/document.py
|
18
20
|
datamule/document/processing.py
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="1.5.
|
35
|
+
version="1.5.9",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
@@ -49,6 +49,7 @@ setup(
|
|
49
49
|
'zstandard',
|
50
50
|
'doc2dict',
|
51
51
|
'secsgml',
|
52
|
+
'websocket-client',
|
52
53
|
],
|
53
54
|
# Include the data directory in the package
|
54
55
|
package_data={
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datamule-1.5.5/datamule/mapping_dicts → datamule-1.5.9/datamule/document/mappings}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datamule-1.5.5/datamule/sec/submissions → datamule-1.5.9/datamule/sec/infrastructure}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|