datamule 2.1.3__tar.gz → 2.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {datamule-2.1.3 → datamule-2.1.5}/PKG-INFO +1 -1
  2. datamule-2.1.5/datamule/datasets.py +51 -0
  3. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_proxyvotingrecord.py +2 -1
  4. {datamule-2.1.3 → datamule-2.1.5}/datamule/portfolio.py +10 -6
  5. {datamule-2.1.3 → datamule-2.1.5}/datamule/submission.py +4 -4
  6. {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/PKG-INFO +1 -1
  7. {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/SOURCES.txt +1 -0
  8. {datamule-2.1.3 → datamule-2.1.5}/setup.py +1 -1
  9. {datamule-2.1.3 → datamule-2.1.5}/datamule/__init__.py +0 -0
  10. {datamule-2.1.3 → datamule-2.1.5}/datamule/config.py +0 -0
  11. {datamule-2.1.3 → datamule-2.1.5}/datamule/data/listed_filer_metadata.csv +0 -0
  12. {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/__init__.py +0 -0
  13. {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/datamule_lookup.py +0 -0
  14. {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/datamule_mysql_rds.py +0 -0
  15. {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/downloader.py +0 -0
  16. {datamule-2.1.3 → datamule-2.1.5}/datamule/datamule/sec_connector.py +0 -0
  17. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/__init__.py +0 -0
  18. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/document.py +0 -0
  19. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/__init__.py +0 -0
  20. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables.py +0 -0
  21. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_13fhr.py +0 -0
  22. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_25nse.py +0 -0
  23. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_informationtable.py +0 -0
  24. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_npx.py +0 -0
  25. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_ownership.py +0 -0
  26. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_sbsef.py +0 -0
  27. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/tables_sdr.py +0 -0
  28. {datamule-2.1.3 → datamule-2.1.5}/datamule/document/tables/utils.py +0 -0
  29. {datamule-2.1.3 → datamule-2.1.5}/datamule/helper.py +0 -0
  30. {datamule-2.1.3 → datamule-2.1.5}/datamule/index.py +0 -0
  31. {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/__init__.py +0 -0
  32. {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  33. {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  34. {datamule-2.1.3 → datamule-2.1.5}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  35. {datamule-2.1.3 → datamule-2.1.5}/datamule/package_updater.py +0 -0
  36. {datamule-2.1.3 → datamule-2.1.5}/datamule/portfolio_compression_utils.py +0 -0
  37. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/__init__.py +0 -0
  38. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/infrastructure/__init__.py +0 -0
  39. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  40. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/__init__.py +0 -0
  41. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/downloader.py +0 -0
  42. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/eftsquery.py +0 -0
  43. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/monitor.py +0 -0
  44. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/streamer.py +0 -0
  45. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/submissions/textsearch.py +0 -0
  46. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/utils.py +0 -0
  47. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/__init__.py +0 -0
  48. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  49. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  50. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  51. {datamule-2.1.3 → datamule-2.1.5}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  52. {datamule-2.1.3 → datamule-2.1.5}/datamule/seclibrary/__init__.py +0 -0
  53. {datamule-2.1.3 → datamule-2.1.5}/datamule/seclibrary/bq.py +0 -0
  54. {datamule-2.1.3 → datamule-2.1.5}/datamule/sheet.py +0 -0
  55. {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/__init__.py +0 -0
  56. {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/construct_submissions_data.py +0 -0
  57. {datamule-2.1.3 → datamule-2.1.5}/datamule/utils/format_accession.py +0 -0
  58. {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/dependency_links.txt +0 -0
  59. {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/requires.txt +0 -0
  60. {datamule-2.1.3 → datamule-2.1.5}/datamule.egg-info/top_level.txt +0 -0
  61. {datamule-2.1.3 → datamule-2.1.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -0,0 +1,51 @@
1
+ # datamule/datasets.py
2
+ from pathlib import Path
3
+ import requests
4
+ import gzip
5
+ import shutil
6
+ import csv
7
+
8
+ # Dataset URLs
9
+ DATASET_URLS = {
10
+ "cik_cusip_crosswalk": "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/cik_cusip_crosswalk.csv.gz",
11
+ "financial_security_identifiers_crosswalk" : "https://github.com/john-friedman/datamule-data/raw/refs/heads/master/data/datasets/financial_security_identifiers_crosswalk.csv.gz"
12
+ }
13
+
14
+ def update_dataset(name):
15
+ """Force update a dataset by re-downloading it."""
16
+ return _get_dataset(name, update=True)
17
+
18
+ def _get_dataset(name, update=False):
19
+ """Internal function to get dataset as list of dicts, downloading if necessary."""
20
+ if name not in DATASET_URLS:
21
+ raise ValueError(f"Unknown dataset: {name}")
22
+
23
+ url = DATASET_URLS[name]
24
+ data_dir = Path.home() / ".datamule" / "datasets"
25
+ file_path = data_dir / f"{name}.csv"
26
+
27
+ if not file_path.exists() or update:
28
+ print(f"Downloading {name}...")
29
+ data_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+ response = requests.get(url, stream=True)
32
+ response.raise_for_status()
33
+
34
+ gz_path = file_path.with_suffix('.csv.gz')
35
+ with open(gz_path, 'wb') as f:
36
+ for chunk in response.iter_content(chunk_size=8192):
37
+ f.write(chunk)
38
+
39
+ with gzip.open(gz_path, 'rb') as f_in:
40
+ with open(file_path, 'wb') as f_out:
41
+ shutil.copyfileobj(f_in, f_out)
42
+
43
+ gz_path.unlink()
44
+
45
+ # Read CSV and return as list of dicts
46
+ with open(file_path, 'r') as f:
47
+ return list(csv.DictReader(f))
48
+
49
+ # Dataset available as list of dicts on import
50
+ cik_cusip_crosswalk = _get_dataset("cik_cusip_crosswalk")
51
+ financial_security_identifiers_crosswalk = _get_dataset("financial_security_identifiers_crosswalk")
@@ -13,7 +13,8 @@ proxy_voting_record_dict = {
13
13
  'vote_voteRecord_sharesVoted': 'recordSharesVoted', # To distinguish from top-level sharesVoted
14
14
  'isin': 'isin',
15
15
  'voteSource': 'voteSource',
16
- 'voteSeries': 'voteSeries'
16
+ 'voteSeries': 'voteSeries',
17
+ 'figi': 'figi',
17
18
  }
18
19
 
19
20
  config_proxyvotingrecord = {
@@ -96,12 +96,16 @@ class Portfolio:
96
96
  # Create submissions for each accession
97
97
  submissions = []
98
98
  for accession_prefix in accession_prefixes:
99
- submission = Submission(
100
- batch_tar_path=batch_tar_path,
101
- accession_prefix=accession_prefix,
102
- portfolio_ref=self
103
- )
104
- submissions.append(submission)
99
+ try:
100
+ submission = Submission(
101
+ batch_tar_path=batch_tar_path,
102
+ accession_prefix=accession_prefix,
103
+ portfolio_ref=self
104
+ )
105
+ submissions.append(submission)
106
+ except Exception as e:
107
+ pass
108
+ #print(f"Path: {batch_tar_path}. Exception: {e}")
105
109
  pbar.update(1) # Update progress for each successful submission
106
110
 
107
111
  return submissions
@@ -12,6 +12,7 @@ import urllib.request
12
12
  from secxbrl import parse_inline_xbrl
13
13
  from company_fundamentals import construct_fundamentals
14
14
  from decimal import Decimal
15
+ from .utils.format_accession import format_accession
15
16
 
16
17
 
17
18
  class Submission:
@@ -93,11 +94,10 @@ class Submission:
93
94
  # standardize metadata
94
95
  metadata = transform_metadata_string(metadata)
95
96
  self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
96
- self.accession = self.metadata.content['accession-number']
97
+
98
+ # lets just use accesion-prefix, to get around malformed metadata files (1995 has a lot!)
99
+ self.accession = format_accession(self.accession_prefix,'dash')
97
100
 
98
- # Band-aid fix: some SGML files in the SEC are bad lol, so they have TWO header sections. Will fix post w/ my cleaned archive
99
- if isinstance(self.accession,list):
100
- self.accession = self.accession[0]
101
101
  #print(f"s: {self.metadata.content['accession-number']} : {batch_tar_path}")
102
102
  self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
103
103
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -1,6 +1,7 @@
1
1
  setup.py
2
2
  datamule/__init__.py
3
3
  datamule/config.py
4
+ datamule/datasets.py
4
5
  datamule/helper.py
5
6
  datamule/index.py
6
7
  datamule/package_updater.py
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="2.1.3",
35
+ version="2.1.5",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes