datamule 1.2.5__py3-none-any.whl → 1.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +1 -0
- datamule/document/document.py +12 -8
- datamule/document/mappings/ex102_abs.py +63 -0
- datamule/document/mappings/information_table.py +1 -0
- datamule/document/mappings/ownership.py +1 -1
- datamule/document/mappings/proxy_voting_record.py +17 -1
- datamule/document/mappings/submission_metadata.py +9 -0
- datamule/document/mappings/thirteenfhr.py +70 -3
- datamule/document/mappings/twentyfivense.py +1 -0
- datamule/document/processing.py +71 -14
- datamule/document/table.py +48 -5
- datamule/helper.py +10 -1
- datamule/index.py +8 -10
- datamule/portfolio.py +16 -11
- datamule/sec/submissions/monitor.py +173 -120
- datamule/sec/submissions/textsearch.py +0 -4
- datamule/sec/xbrl/streamcompanyfacts.py +1 -1
- datamule/seclibrary/downloader.py +2 -2
- datamule/submission.py +80 -14
- {datamule-1.2.5.dist-info → datamule-1.2.7.dist-info}/METADATA +1 -2
- {datamule-1.2.5.dist-info → datamule-1.2.7.dist-info}/RECORD +23 -23
- datamule/sec/rss/__init__.py +0 -0
- datamule/sec/rss/monitor.py +0 -416
- {datamule-1.2.5.dist-info → datamule-1.2.7.dist-info}/WHEEL +0 -0
- {datamule-1.2.5.dist-info → datamule-1.2.7.dist-info}/top_level.txt +0 -0
@@ -1,130 +1,183 @@
|
|
1
|
+
import time
|
2
|
+
from collections import deque
|
3
|
+
from datetime import datetime
|
4
|
+
import xml.etree.ElementTree as ET
|
5
|
+
import re
|
1
6
|
import asyncio
|
2
|
-
from
|
3
|
-
from .eftsquery import EFTSQuery
|
4
|
-
|
5
|
-
import pytz
|
7
|
+
from ..utils import headers, PreciseRateLimiter
|
8
|
+
from .eftsquery import EFTSQuery
|
9
|
+
import aiohttp
|
6
10
|
|
7
11
|
|
8
|
-
async def
|
9
|
-
|
10
|
-
processed_hits = []
|
12
|
+
async def poll_rss(limiter):
|
13
|
+
base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?count=100&action=getcurrent&output=rss'
|
11
14
|
|
12
|
-
for
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
# Extract submission_type (form) and ciks
|
20
|
-
submission_type = source.get('form')
|
21
|
-
ciks = source.get('ciks', [])
|
22
|
-
ciks = [str(int(cik)) for cik in ciks]
|
23
|
-
|
24
|
-
filing_date = source.get('file_date')
|
25
|
-
|
26
|
-
# Create standardized filing record
|
27
|
-
filing = {
|
28
|
-
'accession_number': accession_number,
|
29
|
-
'submission_type': submission_type,
|
30
|
-
'ciks': ciks,
|
31
|
-
'filing_date': filing_date,
|
32
|
-
|
33
|
-
}
|
34
|
-
|
35
|
-
processed_hits.append(filing)
|
36
|
-
collected_accession_numbers.add(accession_number) # Changed append to add for set operation
|
37
|
-
|
38
|
-
except Exception as e:
|
39
|
-
print(f"Error processing EFTS hit: {e}")
|
15
|
+
# Create a session specifically for this RSS polling operation
|
16
|
+
async with aiohttp.ClientSession(headers=headers) as session:
|
17
|
+
# Use the rate limiter before making the request
|
18
|
+
async with limiter:
|
19
|
+
# Make the HTTP request with the session
|
20
|
+
async with session.get(base_url) as response:
|
21
|
+
content = await response.read()
|
40
22
|
|
41
|
-
#
|
42
|
-
|
43
|
-
|
23
|
+
# Process the content
|
24
|
+
content_str = content.decode('utf-8')
|
25
|
+
root = ET.fromstring(content_str)
|
26
|
+
namespace = {'atom': 'http://www.w3.org/2005/Atom'}
|
27
|
+
entries = root.findall('atom:entry', namespace)
|
28
|
+
grouped = {}
|
29
|
+
|
30
|
+
for entry in entries:
|
31
|
+
url = entry.find('atom:link', namespace).get('href')
|
32
|
+
accession = re.search(r'/(\d{10})-(\d{2})-(\d{6})', url)
|
33
|
+
accession = accession.group(1) + accession.group(2) + accession.group(3)
|
34
|
+
cik = re.search(r'/data/(\d+)/', url).group(1)
|
44
35
|
|
45
|
-
|
46
|
-
|
47
|
-
async def _master_monitor_impl(data_callback=None, poll_callback=None, submission_type=None, cik=None,
|
48
|
-
polling_interval=200, requests_per_second=2.0, quiet=True, start_date=None):
|
49
|
-
"""Implementation of the master monitor."""
|
50
|
-
# Set default start date to today if not provided (eastern)
|
51
|
-
eastern_tz = pytz.timezone('US/Eastern')
|
52
|
-
current_date = datetime.now(eastern_tz).strftime('%Y-%m-%d')
|
53
|
-
if not start_date:
|
54
|
-
start_date = current_date
|
36
|
+
if accession not in grouped:
|
37
|
+
grouped[accession] = {'submission_type': '', 'ciks': set(), 'filing_date': ''}
|
55
38
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
39
|
+
grouped[accession]['ciks'].add(cik)
|
40
|
+
grouped[accession]['submission_type'] = entry.find('atom:category', namespace).get('term')
|
41
|
+
summary_text = entry.find('atom:summary', namespace).text
|
42
|
+
filing_date_match = re.search(r'Filed:</b>\s*(\d{4}-\d{2}-\d{2})', summary_text)
|
43
|
+
if filing_date_match:
|
44
|
+
grouped[accession]['filing_date'] = filing_date_match.group(1)
|
45
|
+
|
46
|
+
results = [{'accession': int(k.replace('-', '')), 'submission_type': v['submission_type'], 'ciks': list(v['ciks']), 'filing_date': v['filing_date']} for k, v in grouped.items()]
|
47
|
+
return results
|
48
|
+
|
49
|
+
def clean_efts_hits(hits):
|
50
|
+
# clean hits
|
51
|
+
hits = [{'accession': int(hit['_source']['adsh'].replace('-','')), 'filing_date': hit['_source']['file_date'], 'ciks': hit['_source']['ciks']} for hit in hits]
|
52
|
+
return hits
|
53
|
+
|
54
|
+
class Monitor():
|
55
|
+
def __init__(self):
|
56
|
+
self.accessions = deque(maxlen=50000)
|
57
|
+
self.ratelimiters = {'sec.gov': PreciseRateLimiter(rate=5)}
|
58
|
+
self.efts_query = EFTSQuery(quiet=True)
|
59
|
+
self.efts_query.limiter = self.ratelimiters['sec.gov']
|
60
|
+
|
61
|
+
def set_domain_rate_limit(self, domain, rate):
|
62
|
+
self.ratelimiters[domain] = PreciseRateLimiter(rate=rate)
|
63
|
+
if domain == 'sec.gov':
|
64
|
+
self.efts_query.limiter = self.ratelimiters[domain]
|
72
65
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
66
|
+
async def _async_run_efts_query(self, **kwargs):
|
67
|
+
"""Async helper method to run EFTS query without creating a new event loop"""
|
68
|
+
# Make sure to set quiet parameter if provided in kwargs
|
69
|
+
self.efts_query.quiet = kwargs.get('quiet', True)
|
70
|
+
return await self.efts_query.query(
|
71
|
+
cik=kwargs.get('cik'),
|
72
|
+
submission_type=kwargs.get('submission_type'),
|
73
|
+
filing_date=kwargs.get('filing_date'),
|
74
|
+
location=kwargs.get('location'),
|
75
|
+
callback=kwargs.get('callback'),
|
76
|
+
name=kwargs.get('name')
|
80
77
|
)
|
78
|
+
|
79
|
+
async def _async_monitor_submissions(self, data_callback=None, interval_callback=None,
|
80
|
+
polling_interval=1000, quiet=True, start_date=None,
|
81
|
+
validation_interval=60000):
|
82
|
+
"""
|
83
|
+
Async implementation of monitor_submissions.
|
84
|
+
"""
|
85
|
+
|
86
|
+
# Backfill if start_date is provided
|
87
|
+
if start_date is not None:
|
88
|
+
today_date = datetime.now().date().strftime('%Y-%m-%d')
|
89
|
+
if not quiet:
|
90
|
+
print(f"Backfilling from {start_date} to {today_date}")
|
91
|
+
|
92
|
+
hits = clean_efts_hits(await self._async_run_efts_query(
|
93
|
+
filing_date=(start_date, today_date),
|
94
|
+
quiet=quiet
|
95
|
+
))
|
96
|
+
|
97
|
+
new_hits = self._filter_new_accessions(hits)
|
98
|
+
if not quiet:
|
99
|
+
print(f"New submissions found: {len(new_hits)}")
|
100
|
+
if new_hits and data_callback:
|
101
|
+
data_callback(new_hits)
|
102
|
+
|
103
|
+
last_polling_time = time.time()
|
104
|
+
last_validation_time = last_polling_time
|
105
|
+
current_time = last_polling_time
|
106
|
+
|
107
|
+
while True:
|
108
|
+
# RSS polling
|
109
|
+
if not quiet:
|
110
|
+
print(f"Polling RSS feed")
|
111
|
+
results = await poll_rss(self.ratelimiters['sec.gov'])
|
112
|
+
new_results = self._filter_new_accessions(results)
|
113
|
+
if new_results:
|
114
|
+
if not quiet:
|
115
|
+
print(f"Found {len(new_results)} new submissions via RSS")
|
116
|
+
if data_callback:
|
117
|
+
data_callback(new_results)
|
118
|
+
|
119
|
+
# EFTS validation
|
120
|
+
if validation_interval and (current_time - last_validation_time) >= validation_interval/1000:
|
121
|
+
# Get submissions from the last 24 hours for validation
|
122
|
+
today_date = datetime.now().strftime('%Y-%m-%d')
|
123
|
+
if not quiet:
|
124
|
+
print(f"Validating submissions from {today_date}")
|
125
|
+
|
126
|
+
hits = clean_efts_hits(await self._async_run_efts_query(
|
127
|
+
filing_date=(today_date, today_date),
|
128
|
+
quiet=quiet
|
129
|
+
))
|
130
|
+
|
131
|
+
new_hits = self._filter_new_accessions(hits)
|
132
|
+
if new_hits:
|
133
|
+
if not quiet:
|
134
|
+
print(f"Found {len(new_hits)} new submissions via EFTS validation")
|
135
|
+
if data_callback:
|
136
|
+
data_callback(new_hits)
|
137
|
+
last_polling_time = time.time()
|
138
|
+
last_validation_time = current_time
|
139
|
+
|
140
|
+
# Interval callback
|
141
|
+
if interval_callback:
|
142
|
+
interval_callback()
|
143
|
+
|
144
|
+
next_poll_time = last_polling_time + (polling_interval / 1000)
|
145
|
+
current_time = time.time()
|
146
|
+
time_to_sleep = max(0, next_poll_time - current_time)
|
147
|
+
await asyncio.sleep(time_to_sleep)
|
148
|
+
last_polling_time = next_poll_time
|
149
|
+
|
150
|
+
|
151
|
+
def monitor_submissions(self, data_callback=None, interval_callback=None,
|
152
|
+
polling_interval=1000, quiet=True, start_date=None,
|
153
|
+
validation_interval=60000):
|
154
|
+
"""
|
155
|
+
Monitor SEC submissions using the EDGAR system.
|
156
|
+
:param data_callback: function to call with the data
|
157
|
+
:param interval_callback: function that executes between polls
|
158
|
+
:param polling_interval: interval between polls in milliseconds
|
159
|
+
:param quiet: if True, suppresses output
|
160
|
+
:param start_date: backfill start date in YYYY-MM-DD format
|
161
|
+
:param validation_interval: interval between validation in milliseconds
|
162
|
+
|
163
|
+
This function combines the speed of the RSS feed (fast, but misses some submissions) with the accuracy of the EFTS system.
|
164
|
+
"""
|
165
|
+
# This is now a synchronous wrapper around the async implementation
|
166
|
+
return asyncio.run(self._async_monitor_submissions(
|
167
|
+
data_callback=data_callback,
|
168
|
+
interval_callback=interval_callback,
|
169
|
+
polling_interval=polling_interval,
|
170
|
+
quiet=quiet,
|
171
|
+
start_date=start_date,
|
172
|
+
validation_interval=validation_interval
|
173
|
+
))
|
81
174
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
await start_monitor(
|
92
|
-
data_callback=data_callback,
|
93
|
-
poll_callback=poll_callback,
|
94
|
-
submission_type=submission_type,
|
95
|
-
cik=cik,
|
96
|
-
polling_interval=polling_interval,
|
97
|
-
requests_per_second=requests_per_second,
|
98
|
-
quiet=quiet,
|
99
|
-
known_accession_numbers=[], # Start with an empty list for ongoing tracking
|
100
|
-
skip_initial_accession_numbers=collected_accession_numbers # Pass the EFTS accession numbers as the skip list
|
101
|
-
)
|
102
|
-
|
103
|
-
def monitor(data_callback=None, poll_callback=None, submission_type=None, cik=None,
|
104
|
-
polling_interval=200, requests_per_second=2.0, quiet=True, start_date=None):
|
105
|
-
"""
|
106
|
-
Monitor SEC filings by combining EFTS historical queries with real-time RSS monitoring.
|
107
|
-
|
108
|
-
Parameters:
|
109
|
-
data_callback (callable): Async function to call when new filings are found.
|
110
|
-
Will be called with a list of dicts containing
|
111
|
-
'accession_number', 'submission_type', and 'ciks'.
|
112
|
-
poll_callback (callable): Async function to call during RSS polling wait periods.
|
113
|
-
submission_type (str or list): Form type(s) to monitor (e.g., "8-K", "10-Q").
|
114
|
-
cik (str or list): CIK(s) to monitor.
|
115
|
-
polling_interval (int): Polling interval in milliseconds for RSS monitor.
|
116
|
-
requests_per_second (float): Maximum requests per second.
|
117
|
-
quiet (bool): Suppress verbose output.
|
118
|
-
start_date (str): ISO format date (YYYY-MM-DD) from which to start monitoring.
|
119
|
-
If None, will start from current date. (EASTERN TIME)
|
120
|
-
"""
|
121
|
-
return asyncio.run(_master_monitor_impl(
|
122
|
-
data_callback=data_callback,
|
123
|
-
poll_callback=poll_callback,
|
124
|
-
submission_type=submission_type,
|
125
|
-
cik=cik,
|
126
|
-
polling_interval=polling_interval,
|
127
|
-
requests_per_second=requests_per_second,
|
128
|
-
quiet=quiet,
|
129
|
-
start_date=start_date
|
130
|
-
))
|
175
|
+
def _filter_new_accessions(self, items):
|
176
|
+
"""Filter items to only include those with new accession numbers."""
|
177
|
+
new_items = []
|
178
|
+
for item in items:
|
179
|
+
accession = item['accession']
|
180
|
+
if accession not in self.accessions:
|
181
|
+
self.accessions.append(accession)
|
182
|
+
new_items.append(item)
|
183
|
+
return new_items
|
@@ -2,7 +2,7 @@ import asyncio
|
|
2
2
|
import aiohttp
|
3
3
|
import json
|
4
4
|
from tqdm import tqdm
|
5
|
-
from ..utils import PreciseRateLimiter, RateMonitor,
|
5
|
+
from ..utils import PreciseRateLimiter, RateMonitor, headers
|
6
6
|
|
7
7
|
async def fetch_company_facts(session, cik, rate_limiter, rate_monitor, pbar):
|
8
8
|
# Format CIK with leading zeros to 10 digits
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import os
|
2
2
|
import asyncio
|
3
3
|
import aiohttp
|
4
|
-
from pathlib import Path
|
5
4
|
from tqdm import tqdm
|
6
5
|
import time
|
7
6
|
import shutil
|
@@ -13,11 +12,12 @@ from concurrent.futures import ThreadPoolExecutor
|
|
13
12
|
from functools import partial
|
14
13
|
from queue import Queue, Empty
|
15
14
|
from threading import Thread
|
16
|
-
from secsgml import parse_sgml_submission
|
17
15
|
from .query import query
|
18
16
|
from os import cpu_count
|
19
17
|
from ..submission import Submission
|
20
18
|
|
19
|
+
|
20
|
+
|
21
21
|
class Downloader:
|
22
22
|
def __init__(self, api_key=None):
|
23
23
|
self.BASE_URL = "https://library.datamule.xyz/original/nc/"
|
datamule/submission.py
CHANGED
@@ -4,6 +4,70 @@ from .document.document import Document
|
|
4
4
|
from secsgml import parse_sgml_submission_into_memory
|
5
5
|
import os
|
6
6
|
import aiofiles
|
7
|
+
import tempfile
|
8
|
+
|
9
|
+
|
10
|
+
# # NEW CODE YAY. probably will remove
|
11
|
+
|
12
|
+
# def save_metadata_atomically(metadata_file_path, metadata_content):
|
13
|
+
# """Save metadata to a JSONL file atomically, works on any filesystem"""
|
14
|
+
|
15
|
+
# # Create directory if it doesn't exist
|
16
|
+
# os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
|
17
|
+
|
18
|
+
# # Format the JSON with newline
|
19
|
+
# json_str = json.dumps(metadata_content, indent=4) + "\n"
|
20
|
+
|
21
|
+
# # Write complete content to a temporary file first
|
22
|
+
# fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
|
23
|
+
# try:
|
24
|
+
# with os.fdopen(fd, 'w') as temp_file:
|
25
|
+
# temp_file.write(json_str)
|
26
|
+
# temp_file.flush()
|
27
|
+
# os.fsync(temp_file.fileno()) # Force write to disk
|
28
|
+
|
29
|
+
# # Append the temporary file to the main file
|
30
|
+
# with open(metadata_file_path, 'a') as target_file:
|
31
|
+
# with open(temp_path, 'r') as temp_read:
|
32
|
+
# content = temp_read.read()
|
33
|
+
# target_file.write(content)
|
34
|
+
# target_file.flush()
|
35
|
+
# os.fsync(target_file.fileno()) # Force write to disk
|
36
|
+
# finally:
|
37
|
+
# # Clean up the temporary file
|
38
|
+
# if os.path.exists(temp_path):
|
39
|
+
# os.unlink(temp_path)
|
40
|
+
|
41
|
+
# async def save_metadata_atomically_async(metadata_file_path, metadata_content):
|
42
|
+
# """Save metadata to a JSONL file atomically in async mode"""
|
43
|
+
|
44
|
+
# # Create directory if it doesn't exist
|
45
|
+
# os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
|
46
|
+
|
47
|
+
# # Format the JSON with newline
|
48
|
+
# json_str = json.dumps(metadata_content, indent=4) + "\n"
|
49
|
+
|
50
|
+
# # Write to a temporary file first
|
51
|
+
# fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
|
52
|
+
# os.close(fd) # Close the file descriptor
|
53
|
+
|
54
|
+
# try:
|
55
|
+
# async with aiofiles.open(temp_path, 'w') as temp_file:
|
56
|
+
# await temp_file.write(json_str)
|
57
|
+
# await temp_file.flush()
|
58
|
+
|
59
|
+
# # Append the temporary file to the main file
|
60
|
+
# async with aiofiles.open(metadata_file_path, 'a') as target_file:
|
61
|
+
# async with aiofiles.open(temp_path, 'r') as temp_read:
|
62
|
+
# content = await temp_read.read()
|
63
|
+
# await target_file.write(content)
|
64
|
+
# await target_file.flush()
|
65
|
+
# finally:
|
66
|
+
# # Clean up the temporary file
|
67
|
+
# if os.path.exists(temp_path):
|
68
|
+
# os.unlink(temp_path)
|
69
|
+
|
70
|
+
# # END OF NEW CODE
|
7
71
|
|
8
72
|
|
9
73
|
class Submission:
|
@@ -15,16 +79,17 @@ class Submission:
|
|
15
79
|
|
16
80
|
if sgml_content is not None:
|
17
81
|
self.path = None
|
18
|
-
|
82
|
+
metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
83
|
+
self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=None)
|
19
84
|
|
20
85
|
# code dupe
|
21
|
-
self.accession = self.metadata['accession-number']
|
22
|
-
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
86
|
+
self.accession = self.metadata.content['accession-number']
|
87
|
+
self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
|
23
88
|
|
24
89
|
self.documents = []
|
25
90
|
filtered_metadata_documents = []
|
26
91
|
|
27
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
92
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
28
93
|
type = doc.get('type')
|
29
94
|
|
30
95
|
# Keep only specified types
|
@@ -36,17 +101,18 @@ class Submission:
|
|
36
101
|
|
37
102
|
filtered_metadata_documents.append(doc)
|
38
103
|
|
39
|
-
self.metadata['documents'] = filtered_metadata_documents
|
104
|
+
self.metadata.content['documents'] = filtered_metadata_documents
|
40
105
|
|
41
106
|
if path is not None:
|
42
107
|
self.path = Path(path)
|
43
108
|
metadata_path = self.path / 'metadata.json'
|
44
109
|
with metadata_path.open('r') as f:
|
45
|
-
|
110
|
+
metadata = json.load(f)
|
111
|
+
self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
|
46
112
|
|
47
113
|
# Code dupe
|
48
|
-
self.accession = self.metadata['accession-number']
|
49
|
-
self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
|
114
|
+
self.accession = self.metadata.content['accession-number']
|
115
|
+
self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
|
50
116
|
|
51
117
|
|
52
118
|
|
@@ -58,7 +124,7 @@ class Submission:
|
|
58
124
|
else:
|
59
125
|
document_types = document_type
|
60
126
|
|
61
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
127
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
62
128
|
if doc['type'] in document_types:
|
63
129
|
|
64
130
|
# if loaded from path
|
@@ -84,7 +150,7 @@ class Submission:
|
|
84
150
|
|
85
151
|
|
86
152
|
def __iter__(self):
|
87
|
-
for idx,doc in enumerate(self.metadata['documents']):
|
153
|
+
for idx,doc in enumerate(self.metadata.content['documents']):
|
88
154
|
# if loaded from path
|
89
155
|
if self.path is not None:
|
90
156
|
filename = doc.get('filename')
|
@@ -121,9 +187,9 @@ class Submission:
|
|
121
187
|
|
122
188
|
metadata_path = file_dir / "metadata.json"
|
123
189
|
with open(metadata_path, 'w') as f:
|
124
|
-
json.dump(self.metadata, f, indent=4)
|
190
|
+
json.dump(self.metadata.content, f, indent=4)
|
125
191
|
|
126
|
-
for idx, doc in enumerate(self.metadata['documents']):
|
192
|
+
for idx, doc in enumerate(self.metadata.content['documents']):
|
127
193
|
try:
|
128
194
|
filename = doc.get('filename')
|
129
195
|
if filename is None:
|
@@ -162,9 +228,9 @@ class Submission:
|
|
162
228
|
|
163
229
|
metadata_path = file_dir / "metadata.json"
|
164
230
|
async with aiofiles.open(metadata_path, 'w') as f:
|
165
|
-
await f.write(json.dumps(self.metadata, indent=4))
|
231
|
+
await f.write(json.dumps(self.metadata.content, indent=4))
|
166
232
|
|
167
|
-
for idx, doc in enumerate(self.metadata['documents']):
|
233
|
+
for idx, doc in enumerate(self.metadata.content['documents']):
|
168
234
|
try:
|
169
235
|
filename = doc.get('filename')
|
170
236
|
if filename is None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.7
|
4
4
|
Summary: Making it easier to use SEC filings.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
@@ -10,7 +10,6 @@ Requires-Dist: tqdm
|
|
10
10
|
Requires-Dist: requests
|
11
11
|
Requires-Dist: nest-asyncio
|
12
12
|
Requires-Dist: aiofiles
|
13
|
-
Requires-Dist: polars
|
14
13
|
Requires-Dist: setuptools
|
15
14
|
Requires-Dist: selectolax
|
16
15
|
Requires-Dist: pytz
|
@@ -1,35 +1,37 @@
|
|
1
|
-
datamule/__init__.py,sha256=
|
1
|
+
datamule/__init__.py,sha256=glzwBeGJEE6-TG7mRule9GH6L59XaIRR9T7ALcdpMus,1067
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
|
-
datamule/helper.py,sha256=
|
4
|
-
datamule/index.py,sha256=
|
3
|
+
datamule/helper.py,sha256=g9Kb1DWbViCoON06PjOkSX5Ucu0uG7zPwhsO2LQ6C1g,3579
|
4
|
+
datamule/index.py,sha256=_7Ox5hyF_7RWdblVFr5rNyv_ARwBP7VY4f703pk9qQ8,2074
|
5
5
|
datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
|
6
|
-
datamule/portfolio.py,sha256=
|
6
|
+
datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
|
7
7
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
8
|
-
datamule/submission.py,sha256=
|
8
|
+
datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
|
9
9
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
datamule/document/document.py,sha256=
|
11
|
-
datamule/document/processing.py,sha256=
|
12
|
-
datamule/document/table.py,sha256=
|
10
|
+
datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
|
11
|
+
datamule/document/processing.py,sha256=MLgOtVNmsUotfxj5XvQqw5Q3idhvK6FdHcQz3U4ud7s,29333
|
12
|
+
datamule/document/table.py,sha256=NdDQh7EWOf2qSx5ZxydCHpaNPd7J7wtVFoooiVzbmgk,12443
|
13
13
|
datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
datamule/document/mappings/atsn.py,sha256=qkZGNIhyPC3VTTOjQ8-FSCQIhUy4XeSycUGLShxNVCo,17743
|
15
15
|
datamule/document/mappings/cfportal.py,sha256=bR9d6DDY0kJ_HGx_hND2y1PNNkZjemYZ2KdyFAcv760,25257
|
16
|
+
datamule/document/mappings/ex102_abs.py,sha256=FdGKvteRh_HsYgILF-8o4R6aSsjYwcaLpJxzdru4FTE,3976
|
16
17
|
datamule/document/mappings/ex99a_sdr.py,sha256=PNdj9I0ZhNicPObLelNmjp33EgTwzvukqkBDnwxarE0,19
|
17
18
|
datamule/document/mappings/ex99c_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
19
|
datamule/document/mappings/ex99g_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
20
|
datamule/document/mappings/ex99i_sdr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
datamule/document/mappings/information_table.py,sha256=
|
21
|
+
datamule/document/mappings/information_table.py,sha256=6l2Via728I59RS0y9Pit37NoOSAbaT-vclArYxU1vtY,1585
|
21
22
|
datamule/document/mappings/nmfp.py,sha256=WuTyM1SkBiiLVAHqFF4DTZ_8AvsIuonT2w7pwYDPTDw,17767
|
22
23
|
datamule/document/mappings/npx.py,sha256=xwruBueC09kfWhXV3fNUnQWYwCWrdrhQoVO3cKfPTO4,6556
|
23
24
|
datamule/document/mappings/onefourtyfour.py,sha256=_-w9h6wGINGH5pQqQvPrd0cgB5QfCtPG5M40ewf_w8Q,2604
|
24
|
-
datamule/document/mappings/ownership.py,sha256=
|
25
|
-
datamule/document/mappings/proxy_voting_record.py,sha256=
|
25
|
+
datamule/document/mappings/ownership.py,sha256=piD9vs4WFrB4yvp6c0pT5bibLKXgsM7hpnBUzaY0Xxs,10155
|
26
|
+
datamule/document/mappings/proxy_voting_record.py,sha256=tSqLH065EOUq7U80P5GP1JBqipmAiqniPpP3E4adA1A,721
|
26
27
|
datamule/document/mappings/sbs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
28
|
datamule/document/mappings/sbsef.py,sha256=Zw58rbYcnitynk1mh9g1jDrCfqmFlY60OEjPM6p9iF0,534
|
28
29
|
datamule/document/mappings/schedule13.py,sha256=lh9sukpEte514Gid77Nz9zh3uBEFZEemrZ2Uau0qsgk,6295
|
29
30
|
datamule/document/mappings/sdr.py,sha256=UekqZId5PFMMWRAJSaPvCpN4c1Hx-SLAQPEN8GW_Gbg,4829
|
31
|
+
datamule/document/mappings/submission_metadata.py,sha256=pi1eW-tnoAQ6y3laRI29Op80E9BPqqmcfe45owKYStw,271
|
30
32
|
datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
datamule/document/mappings/thirteenfhr.py,sha256=
|
32
|
-
datamule/document/mappings/twentyfivense.py,sha256=
|
33
|
+
datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
|
34
|
+
datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
|
33
35
|
datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
|
34
36
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
37
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
@@ -38,24 +40,22 @@ datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
40
|
datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
|
39
41
|
datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
42
|
datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
|
41
|
-
datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
-
datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
|
43
43
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
datamule/sec/submissions/downloader.py,sha256=60wX2Yml1UCuxOtU0xMxqqeyHhrypCmlDQ0jZF-StJo,2665
|
45
45
|
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
46
|
-
datamule/sec/submissions/monitor.py,sha256=
|
46
|
+
datamule/sec/submissions/monitor.py,sha256=s6uknn1dF1EemiI3Hl4nEq3txwK7nYl6wmayuUPYpRs,7844
|
47
47
|
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
48
|
-
datamule/sec/submissions/textsearch.py,sha256
|
48
|
+
datamule/sec/submissions/textsearch.py,sha256=zEr3NXdhVFL8eMh2jruVXIt7taUZTMdNy2hOAyRM2pA,5706
|
49
49
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
50
|
datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
|
51
51
|
datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
|
52
|
-
datamule/sec/xbrl/streamcompanyfacts.py,sha256=
|
52
|
+
datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H5N2tbvKUzM,3307
|
53
53
|
datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
|
54
54
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
56
|
-
datamule/seclibrary/downloader.py,sha256=
|
56
|
+
datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
|
57
57
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
58
|
-
datamule-1.2.
|
59
|
-
datamule-1.2.
|
60
|
-
datamule-1.2.
|
61
|
-
datamule-1.2.
|
58
|
+
datamule-1.2.7.dist-info/METADATA,sha256=EvHa-0eCP6v0GRF6tq4QqnIDQpqM0m9yIXc68BO8Wck,490
|
59
|
+
datamule-1.2.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
60
|
+
datamule-1.2.7.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
61
|
+
datamule-1.2.7.dist-info/RECORD,,
|
datamule/sec/rss/__init__.py
DELETED
File without changes
|