datamule 2.1.3__tar.gz → 2.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-2.1.3 → datamule-2.1.5}/PKG-INFO +1 -1
- datamule-2.1.5/datamule/datasets.py +51 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_proxyvotingrecord.py +2 -1
- {datamule-2.1.3 → datamule-2.1.5}/datamule/portfolio.py +10 -6
- {datamule-2.1.3 → datamule-2.1.5}/datamule/submission.py +4 -4
- {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/PKG-INFO +1 -1
- {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/SOURCES.txt +1 -0
- {datamule-2.1.3 → datamule-2.1.5}/setup.py +1 -1
- {datamule-2.1.3 → datamule-2.1.5}/datamule/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/config.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/data/listed_filer_metadata.csv +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/datamule_lookup.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/datamule_mysql_rds.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/downloader.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/sec_connector.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/document.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_13fhr.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_25nse.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_informationtable.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_npx.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_ownership.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_sbsef.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_sdr.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/utils.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/helper.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/index.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/package_updater.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/portfolio_compression_utils.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/infrastructure/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/downloader.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/eftsquery.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/monitor.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/streamer.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/textsearch.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/utils.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/filter_xbrl.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/seclibrary/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/seclibrary/bq.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/sheet.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/__init__.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/construct_submissions_data.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/format_accession.py +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/requires.txt +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/top_level.txt +0 -0
- {datamule-2.1.3 → datamule-2.1.5}/setup.cfg +0 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
# datamule/datasets.py
|
2
|
+
from pathlib import Path
|
3
|
+
import requests
|
4
|
+
import gzip
|
5
|
+
import shutil
|
6
|
+
import csv
|
7
|
+
|
8
|
+
# Dataset URLs
|
9
|
+
DATASET_URLS = {
|
10
|
+
"cik_cusip_crosswalk": "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/cik_cusip_crosswalk.csv.gz",
|
11
|
+
"financial_security_identifiers_crosswalk" : "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/financial_security_identifiers_crosswalk.csv.gz"
|
12
|
+
}
|
13
|
+
|
14
|
+
def update_dataset(name):
|
15
|
+
"""Force update a dataset by re-downloading it."""
|
16
|
+
return _get_dataset(name, update=True)
|
17
|
+
|
18
|
+
def _get_dataset(name, update=False):
|
19
|
+
"""Internal function to get dataset as list of dicts, downloading if necessary."""
|
20
|
+
if name not in DATASET_URLS:
|
21
|
+
raise ValueError(f"Unknown dataset: {name}")
|
22
|
+
|
23
|
+
url = DATASET_URLS[name]
|
24
|
+
data_dir = Path.home() / ".datamule" / "datasets"
|
25
|
+
file_path = data_dir / f"{name}.csv"
|
26
|
+
|
27
|
+
if not file_path.exists() or update:
|
28
|
+
print(f"Downloading {name}...")
|
29
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
30
|
+
|
31
|
+
response = requests.get(url, stream=True)
|
32
|
+
response.raise_for_status()
|
33
|
+
|
34
|
+
gz_path = file_path.with_suffix('.csv.gz')
|
35
|
+
with open(gz_path, 'wb') as f:
|
36
|
+
for chunk in response.iter_content(chunk_size=8192):
|
37
|
+
f.write(chunk)
|
38
|
+
|
39
|
+
with gzip.open(gz_path, 'rb') as f_in:
|
40
|
+
with open(file_path, 'wb') as f_out:
|
41
|
+
shutil.copyfileobj(f_in, f_out)
|
42
|
+
|
43
|
+
gz_path.unlink()
|
44
|
+
|
45
|
+
# Read CSV and return as list of dicts
|
46
|
+
with open(file_path, 'r') as f:
|
47
|
+
return list(csv.DictReader(f))
|
48
|
+
|
49
|
+
# Dataset available as list of dicts on import
|
50
|
+
cik_cusip_crosswalk = _get_dataset("cik_cusip_crosswalk")
|
51
|
+
financial_security_identifiers_crosswalk = _get_dataset("financial_security_identifiers_crosswalk")
|
@@ -13,7 +13,8 @@ proxy_voting_record_dict = {
|
|
13
13
|
'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
|
14
14
|
'isin': 'isin',
|
15
15
|
'voteSource': 'voteSource',
|
16
|
-
'voteSeries': 'voteSeries'
|
16
|
+
'voteSeries': 'voteSeries',
|
17
|
+
'figi': 'figi',
|
17
18
|
}
|
18
19
|
|
19
20
|
config_proxyvotingrecord = {
|
@@ -96,12 +96,16 @@ class Portfolio:
|
|
96
96
|
# Create submissions for each accession
|
97
97
|
submissions = []
|
98
98
|
for accession_prefix in accession_prefixes:
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
99
|
+
try:
|
100
|
+
submission = Submission(
|
101
|
+
batch_tar_path=batch_tar_path,
|
102
|
+
accession_prefix=accession_prefix,
|
103
|
+
portfolio_ref=self
|
104
|
+
)
|
105
|
+
submissions.append(submission)
|
106
|
+
except Exception as e:
|
107
|
+
pass
|
108
|
+
#print(f"Path: {batch_tar_path}. Exception: {e}")
|
105
109
|
pbar.update(1) # Update progress for each successful submission
|
106
110
|
|
107
111
|
return submissions
|
@@ -12,6 +12,7 @@ import urllib.request
|
|
12
12
|
from secxbrl import parse_inline_xbrl
|
13
13
|
from company_fundamentals import construct_fundamentals
|
14
14
|
from decimal import Decimal
|
15
|
+
from .utils.format_accession import format_accession
|
15
16
|
|
16
17
|
|
17
18
|
class Submission:
|
@@ -93,11 +94,10 @@ class Submission:
|
|
93
94
|
# standardize metadata
|
94
95
|
metadata = transform_metadata_string(metadata)
|
95
96
|
self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
|
96
|
-
|
97
|
+
|
98
|
+
# lets just use accesion-prefix, to get around malformed metadata files (1995 has a lot!)
|
99
|
+
self.accession = format_accession(self.accession_prefix,'dash')
|
97
100
|
|
98
|
-
# Band-aid fix: some SGML files in the SEC are bad lol, so they have TWO header sections. Will fix post w/ my cleaned archive
|
99
|
-
if isinstance(self.accession,list):
|
100
|
-
self.accession = self.accession[0]
|
101
101
|
#print(f"s: {self.metadata.content['accession-number']} : {batch_tar_path}")
|
102
102
|
self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
|
103
103
|
|
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
|
|
32
32
|
setup(
|
33
33
|
name="datamule",
|
34
34
|
author="John Friedman",
|
35
|
-
version="2.1.
|
35
|
+
version="2.1.5",
|
36
36
|
description="Work with SEC submissions at scale.",
|
37
37
|
packages=find_packages(include=['datamule', 'datamule.*']),
|
38
38
|
url="https://github.com/john-friedman/datamule-python",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|