datamule 2.1.3__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/datasets.py ADDED
@@ -0,0 +1,51 @@
1
+ # datamule/datasets.py
2
+ from pathlib import Path
3
+ import requests
4
+ import gzip
5
+ import shutil
6
+ import csv
7
+
8
+ # Dataset URLs
9
+ DATASET_URLS = {
10
+ "cik_cusip_crosswalk": "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/cik_cusip_crosswalk.csv.gz",
11
+ "financial_security_identifiers_crosswalk" : "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/financial_security_identifiers_crosswalk.csv.gz"
12
+ }
13
+
14
+ def update_dataset(name):
15
+ """Force update a dataset by re-downloading it."""
16
+ return _get_dataset(name, update=True)
17
+
18
+ def _get_dataset(name, update=False):
19
+ """Internal function to get dataset as list of dicts, downloading if necessary."""
20
+ if name not in DATASET_URLS:
21
+ raise ValueError(f"Unknown dataset: {name}")
22
+
23
+ url = DATASET_URLS[name]
24
+ data_dir = Path.home() / ".datamule" / "datasets"
25
+ file_path = data_dir / f"{name}.csv"
26
+
27
+ if not file_path.exists() or update:
28
+ print(f"Downloading {name}...")
29
+ data_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+ response = requests.get(url, stream=True)
32
+ response.raise_for_status()
33
+
34
+ gz_path = file_path.with_suffix('.csv.gz')
35
+ with open(gz_path, 'wb') as f:
36
+ for chunk in response.iter_content(chunk_size=8192):
37
+ f.write(chunk)
38
+
39
+ with gzip.open(gz_path, 'rb') as f_in:
40
+ with open(file_path, 'wb') as f_out:
41
+ shutil.copyfileobj(f_in, f_out)
42
+
43
+ gz_path.unlink()
44
+
45
+ # Read CSV and return as list of dicts
46
+ with open(file_path, 'r') as f:
47
+ return list(csv.DictReader(f))
48
+
49
+ # Dataset available as list of dicts on import
50
+ cik_cusip_crosswalk = _get_dataset("cik_cusip_crosswalk")
51
+ financial_security_identifiers_crosswalk = _get_dataset("financial_security_identifiers_crosswalk")
@@ -13,7 +13,8 @@ proxy_voting_record_dict = {
13
13
  'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
14
14
  'isin': 'isin',
15
15
  'voteSource': 'voteSource',
16
- 'voteSeries': 'voteSeries'
16
+ 'voteSeries': 'voteSeries',
17
+ 'figi': 'figi',
17
18
  }
18
19
 
19
20
  config_proxyvotingrecord = {
datamule/portfolio.py CHANGED
@@ -96,12 +96,16 @@ class Portfolio:
96
96
  # Create submissions for each accession
97
97
  submissions = []
98
98
  for accession_prefix in accession_prefixes:
99
- submission = Submission(
100
- batch_tar_path=batch_tar_path,
101
- accession_prefix=accession_prefix,
102
- portfolio_ref=self
103
- )
104
- submissions.append(submission)
99
+ try:
100
+ submission = Submission(
101
+ batch_tar_path=batch_tar_path,
102
+ accession_prefix=accession_prefix,
103
+ portfolio_ref=self
104
+ )
105
+ submissions.append(submission)
106
+ except Exception as e:
107
+ pass
108
+ #print(f"Path: {batch_tar_path}. Exception: {e}")
105
109
  pbar.update(1) # Update progress for each successful submission
106
110
 
107
111
  return submissions
datamule/submission.py CHANGED
@@ -12,6 +12,7 @@ import urllib.request
12
12
  from secxbrl import parse_inline_xbrl
13
13
  from company_fundamentals import construct_fundamentals
14
14
  from decimal import Decimal
15
+ from .utils.format_accession import format_accession
15
16
 
16
17
 
17
18
  class Submission:
@@ -93,11 +94,10 @@ class Submission:
93
94
  # standardize metadata
94
95
  metadata = transform_metadata_string(metadata)
95
96
  self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
96
- self.accession = self.metadata.content['accession-number']
97
+
98
+ # lets just use accesion-prefix, to get around malformed metadata files (1995 has a lot!)
99
+ self.accession = format_accession(self.accession_prefix,'dash')
97
100
 
98
- # Band-aid fix: some SGML files in the SEC are bad lol, so they have TWO header sections. Will fix post w/ my cleaned archive
99
- if isinstance(self.accession,list):
100
- self.accession = self.accession[0]
101
101
  #print(f"s: {self.metadata.content['accession-number']} : {batch_tar_path}")
102
102
  self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
103
103
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -1,12 +1,13 @@
1
1
  datamule/__init__.py,sha256=sY9rYx9z4LADjOLmwjL3BXssIzHs8MQM6gt9IWMS85U,1192
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
+ datamule/datasets.py,sha256=-2_5kTRS3mxlkKbXwBg8aiistYljLYRnZjDLZNhV8bk,1867
3
4
  datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
4
5
  datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
5
6
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
- datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
7
+ datamule/portfolio.py,sha256=0-E1ZSEjJ8hba7HxF8oCrRneNuF_KKISOY6K4dRg0Cg,12282
7
8
  datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
8
9
  datamule/sheet.py,sha256=Ws_YRtpvewLVioarngVMe8cgG_sp11MP9_goGbRaiWE,23952
9
- datamule/submission.py,sha256=piMtTyoMZrKmLBpjyi0BBFhlkugi_CEuyox7J-jnusQ,15898
10
+ datamule/submission.py,sha256=TdQDfFjOKXy2qAZcD6hc9kjDSxmuZLqk8WRhtMjjC-g,15822
10
11
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
11
12
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
13
  datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
@@ -22,7 +23,7 @@ datamule/document/tables/tables_25nse.py,sha256=kpoOcIpra6i3Wx_6pUCj1fkx0wUbMhx7
22
23
  datamule/document/tables/tables_informationtable.py,sha256=3yjuxYuLoBjRd6O0BNd0jQDmS1XUDjA6xp51Csq2cH8,649
23
24
  datamule/document/tables/tables_npx.py,sha256=tZDBAonAQWLsgecVK_OwIgNcUJhuV5L2gkTSNbXAgNE,6652
24
25
  datamule/document/tables/tables_ownership.py,sha256=pRoFFRGLWp8gKAAvvUbVRxIU2xDFAQhwi9bgwddsT8A,11185
25
- datamule/document/tables/tables_proxyvotingrecord.py,sha256=t5h6iQtlg28Rqt1NJ1FDHCFHFjj_4_aelnFbWLtQcs0,875
26
+ datamule/document/tables/tables_proxyvotingrecord.py,sha256=S_Th294-KWRL-QVXkexNWIksSaFePZGSVq6EU8iiK0o,896
26
27
  datamule/document/tables/tables_sbsef.py,sha256=X6VKVnAdWxn2TgRmaAd1WWlxPhcLPQ-53s0qDokkPI0,635
27
28
  datamule/document/tables/tables_sdr.py,sha256=BwHRJvtijiYvNJ2lIc_30kct6VEmLimIzX28JjZBBqo,4924
28
29
  datamule/document/tables/utils.py,sha256=2-X_1NsiWj_XsD9djxCXwTeIVlg-ip78gG11xACJiDs,738
@@ -50,7 +51,7 @@ datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,180
50
51
  datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
52
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
52
53
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
53
- datamule-2.1.3.dist-info/METADATA,sha256=jE2eNeO223VJUDysOc9ENyiOIDbVsv5DEXdCnXrhFRA,560
54
- datamule-2.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
55
- datamule-2.1.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
56
- datamule-2.1.3.dist-info/RECORD,,
54
+ datamule-2.1.5.dist-info/METADATA,sha256=O7H7morwBBDgqmyytWiui81VS4Wt5PoyIsNGv3_j3rk,560
55
+ datamule-2.1.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
56
+ datamule-2.1.5.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
57
+ datamule-2.1.5.dist-info/RECORD,,