datamule 1.8.6__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {datamule-1.8.6 → datamule-1.9.0}/PKG-INFO +2 -1
  2. {datamule-1.8.6/datamule/seclibrary → datamule-1.9.0/datamule/datamule}/datamule_lookup.py +0 -1
  3. datamule-1.9.0/datamule/datamule/datamule_mysql_rds.py +3 -0
  4. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/document.py +55 -0
  5. {datamule-1.8.6 → datamule-1.9.0}/datamule/portfolio.py +1 -4
  6. {datamule-1.8.6 → datamule-1.9.0}/datamule/sheet.py +6 -1
  7. {datamule-1.8.6 → datamule-1.9.0}/datamule.egg-info/PKG-INFO +2 -1
  8. {datamule-1.8.6 → datamule-1.9.0}/datamule.egg-info/SOURCES.txt +3 -3
  9. {datamule-1.8.6 → datamule-1.9.0}/datamule.egg-info/requires.txt +1 -0
  10. {datamule-1.8.6 → datamule-1.9.0}/setup.py +2 -1
  11. datamule-1.8.6/datamule/seclibrary/query.py +0 -181
  12. {datamule-1.8.6 → datamule-1.9.0}/datamule/__init__.py +0 -0
  13. {datamule-1.8.6 → datamule-1.9.0}/datamule/config.py +0 -0
  14. {datamule-1.8.6 → datamule-1.9.0}/datamule/data/listed_filer_metadata.csv +0 -0
  15. {datamule-1.8.6 → datamule-1.9.0}/datamule/datamule/__init__.py +0 -0
  16. {datamule-1.8.6/datamule/seclibrary → datamule-1.9.0/datamule/datamule}/downloader.py +0 -0
  17. {datamule-1.8.6 → datamule-1.9.0}/datamule/datamule/sec_connector.py +0 -0
  18. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/__init__.py +0 -0
  19. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/__init__.py +0 -0
  20. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/atsn.py +0 -0
  21. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/cfportal.py +0 -0
  22. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/d.py +0 -0
  23. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ex102_abs.py +0 -0
  24. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ex99a_sdr.py +0 -0
  25. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ex99c_sdr.py +0 -0
  26. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ex99g_sdr.py +0 -0
  27. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ex99i_sdr.py +0 -0
  28. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/information_table.py +0 -0
  29. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/nmfp.py +0 -0
  30. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/npx.py +0 -0
  31. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/onefourtyfour.py +0 -0
  32. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ownership.py +0 -0
  33. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/proxy_voting_record.py +0 -0
  34. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/sbs.py +0 -0
  35. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/sbsef.py +0 -0
  36. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/schedule13.py +0 -0
  37. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/sdr.py +0 -0
  38. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/submission_metadata.py +0 -0
  39. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/ta.py +0 -0
  40. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/thirteenfhr.py +0 -0
  41. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/twentyfivense.py +0 -0
  42. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/mappings/twentyfourf2nt.py +0 -0
  43. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/processing.py +0 -0
  44. {datamule-1.8.6 → datamule-1.9.0}/datamule/document/table.py +0 -0
  45. {datamule-1.8.6 → datamule-1.9.0}/datamule/helper.py +0 -0
  46. {datamule-1.8.6 → datamule-1.9.0}/datamule/index.py +0 -0
  47. {datamule-1.8.6 → datamule-1.9.0}/datamule/mapping_dicts/__init__.py +0 -0
  48. {datamule-1.8.6 → datamule-1.9.0}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  49. {datamule-1.8.6 → datamule-1.9.0}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  50. {datamule-1.8.6 → datamule-1.9.0}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  51. {datamule-1.8.6 → datamule-1.9.0}/datamule/package_updater.py +0 -0
  52. {datamule-1.8.6 → datamule-1.9.0}/datamule/portfolio_compression_utils.py +0 -0
  53. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/__init__.py +0 -0
  54. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/infrastructure/__init__.py +0 -0
  55. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  56. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/__init__.py +0 -0
  57. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/downloader.py +0 -0
  58. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/eftsquery.py +0 -0
  59. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/monitor.py +0 -0
  60. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/streamer.py +0 -0
  61. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/submissions/textsearch.py +0 -0
  62. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/utils.py +0 -0
  63. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/xbrl/__init__.py +0 -0
  64. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  65. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  66. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  67. {datamule-1.8.6 → datamule-1.9.0}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  68. {datamule-1.8.6 → datamule-1.9.0}/datamule/seclibrary/__init__.py +0 -0
  69. {datamule-1.8.6 → datamule-1.9.0}/datamule/seclibrary/bq.py +0 -0
  70. {datamule-1.8.6 → datamule-1.9.0}/datamule/submission.py +0 -0
  71. {datamule-1.8.6 → datamule-1.9.0}/datamule/utils/__init__.py +0 -0
  72. {datamule-1.8.6 → datamule-1.9.0}/datamule/utils/construct_submissions_data.py +0 -0
  73. {datamule-1.8.6 → datamule-1.9.0}/datamule/utils/format_accession.py +0 -0
  74. {datamule-1.8.6 → datamule-1.9.0}/datamule.egg-info/dependency_links.txt +0 -0
  75. {datamule-1.8.6 → datamule-1.9.0}/datamule.egg-info/top_level.txt +0 -0
  76. {datamule-1.8.6 → datamule-1.9.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.8.6
3
+ Version: 1.9.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -18,3 +18,4 @@ Requires-Dist: doc2dict
18
18
  Requires-Dist: secxbrl
19
19
  Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
+ Requires-Dist: company_fundamentals
@@ -3,7 +3,6 @@ import asyncio
3
3
  import aiohttp
4
4
  import urllib.parse
5
5
  import ssl
6
- import json
7
6
  import time
8
7
  from tqdm import tqdm
9
8
 
@@ -0,0 +1,3 @@
1
+ # connection to worker
2
+ # basically everything should be handled on worker end
3
+ # except for like dates. - nah even dates
@@ -13,6 +13,7 @@ from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
15
  from secxbrl import parse_inline_xbrl
16
+ from company_fundamentals import construct_fundamentals
16
17
 
17
18
  class Document:
18
19
  def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -35,6 +36,7 @@ class Document:
35
36
  # this will be filled by parsed
36
37
  self.data = None
37
38
  self.xbrl = None
39
+ self.fundamentals = None
38
40
 
39
41
  #_load_text_content
40
42
  def _preprocess_txt_content(self):
@@ -113,6 +115,59 @@ class Document:
113
115
  self.xbrl = parse_inline_xbrl(self.content)
114
116
  else:
115
117
  raise ValueError("Only inline has been implemented so far.")
118
+
119
+ def parse_fundamentals(self,categories=None):
120
+ self.parse_xbrl()
121
+ # Transform XBRL records into the format needed by construct_fundamentals
122
+ xbrl = []
123
+
124
+ for xbrl_record in self.xbrl:
125
+ try:
126
+ # Extract basic fields
127
+ value = xbrl_record.get('_val', None)
128
+ taxonomy, name = xbrl_record['_attributes']['name'].split(':')
129
+
130
+ # Handle scaling if present
131
+ if xbrl_record.get('_attributes', {}).get('scale') is not None:
132
+ scale = int(xbrl_record['_attributes']['scale'])
133
+ try:
134
+ value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
135
+ except:
136
+ pass
137
+
138
+ # Extract period dates
139
+ period_start_date = None
140
+ period_end_date = None
141
+
142
+ if xbrl_record.get('_context'):
143
+ context = xbrl_record['_context']
144
+ period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
145
+ period_end_date = context.get('context_period_enddate')
146
+
147
+ # Create record in the format expected by construct_fundamentals
148
+ record = {
149
+ 'taxonomy': taxonomy,
150
+ 'name': name,
151
+ 'value': value,
152
+ 'period_start_date': period_start_date,
153
+ 'period_end_date': period_end_date
154
+ }
155
+
156
+ xbrl.append(record)
157
+
158
+ except Exception as e:
159
+ # Skip malformed records
160
+ continue
161
+
162
+ # Call construct_fundamentals with the transformed data
163
+ fundamentals = construct_fundamentals(xbrl,
164
+ taxonomy_key='taxonomy',
165
+ concept_key='name',
166
+ start_date_key='period_start_date',
167
+ end_date_key='period_end_date',
168
+ categories=categories)
169
+
170
+ self.fundamentals = fundamentals
116
171
 
117
172
  # Note: this method will be heavily modified in the future
118
173
  def parse(self):
@@ -9,14 +9,11 @@ import os
9
9
  import tarfile
10
10
  from threading import Lock
11
11
  from .helper import _process_cik_and_metadata_filters
12
- from .seclibrary.downloader import download as seclibrary_download
12
+ from .datamule.downloader import download as seclibrary_download
13
13
  from .sec.xbrl.filter_xbrl import filter_xbrl
14
14
  from .sec.submissions.monitor import Monitor
15
15
  from .portfolio_compression_utils import CompressionManager
16
16
  from .datamule.sec_connector import SecConnector
17
- from secsgml.utils import bytes_to_str, calculate_documents_locations_in_tar
18
- import json
19
- import io
20
17
  import shutil
21
18
 
22
19
 
@@ -3,7 +3,7 @@ import csv
3
3
  import os
4
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
5
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
- from .seclibrary.datamule_lookup import datamule_lookup
6
+ from .datamule.datamule_lookup import datamule_lookup
7
7
 
8
8
  # slated for deprecation?
9
9
  from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
@@ -12,11 +12,16 @@ class Sheet:
12
12
  def __init__(self, path):
13
13
  self.path = Path(path)
14
14
 
15
+ # Keep
15
16
  def get_submissions(self,cik=None, accession_number=None, submission_type=None, filing_date=None,
16
17
  columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
17
18
 
18
19
  return datamule_lookup(cik, accession_number, submission_type, filing_date,
19
20
  columns, distinct, page_size, quiet, api_key)
21
+
22
+ # Implement
23
+ def get_table(self,table,**kwargs):
24
+ pass
20
25
 
21
26
  def download_xbrl(
22
27
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.8.6
3
+ Version: 1.9.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -18,3 +18,4 @@ Requires-Dist: doc2dict
18
18
  Requires-Dist: secxbrl
19
19
  Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
+ Requires-Dist: company_fundamentals
@@ -15,6 +15,9 @@ datamule.egg-info/requires.txt
15
15
  datamule.egg-info/top_level.txt
16
16
  datamule/data/listed_filer_metadata.csv
17
17
  datamule/datamule/__init__.py
18
+ datamule/datamule/datamule_lookup.py
19
+ datamule/datamule/datamule_mysql_rds.py
20
+ datamule/datamule/downloader.py
18
21
  datamule/datamule/sec_connector.py
19
22
  datamule/document/__init__.py
20
23
  datamule/document/document.py
@@ -65,9 +68,6 @@ datamule/sec/xbrl/streamcompanyfacts.py
65
68
  datamule/sec/xbrl/xbrlmonitor.py
66
69
  datamule/seclibrary/__init__.py
67
70
  datamule/seclibrary/bq.py
68
- datamule/seclibrary/datamule_lookup.py
69
- datamule/seclibrary/downloader.py
70
- datamule/seclibrary/query.py
71
71
  datamule/utils/__init__.py
72
72
  datamule/utils/construct_submissions_data.py
73
73
  datamule/utils/format_accession.py
@@ -12,3 +12,4 @@ doc2dict
12
12
  secxbrl
13
13
  secsgml
14
14
  websocket-client
15
+ company_fundamentals
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="1.8.6",
35
+ version="1.9.0",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
@@ -51,6 +51,7 @@ setup(
51
51
  'secxbrl',
52
52
  'secsgml',
53
53
  'websocket-client',
54
+ 'company_fundamentals'
54
55
  ],
55
56
  # Include the data directory in the package
56
57
  package_data={
@@ -1,181 +0,0 @@
1
- import os
2
- import asyncio
3
- import aiohttp
4
- import urllib.parse
5
- import ssl
6
- import json
7
- import time
8
- from tqdm import tqdm
9
-
10
- class Query:
11
- def __init__(self, api_key=None):
12
- self.API_BASE_URL = "https://sec-library.jgfriedman99.workers.dev/"
13
- self._api_key = api_key
14
- self.total_cost = 0
15
- self.remaining_balance = None
16
- self.start_time = None
17
-
18
- @property
19
- def api_key(self):
20
- return getattr(self, '_api_key', None) or os.getenv('DATAMULE_API_KEY')
21
-
22
- @api_key.setter
23
- def api_key(self, value):
24
- if not value:
25
- raise ValueError("API key cannot be empty")
26
- self._api_key = value
27
-
28
- async def _fetch_page(self, session, submission_type=None, cik=None, filing_date=None, page=1):
29
- params = {
30
- 'api_key': self.api_key,
31
- 'page': page
32
- }
33
-
34
- # Handle submission_type parameter
35
- if submission_type:
36
- if isinstance(submission_type, list):
37
- params['submission_type'] = ','.join(str(x) for x in submission_type)
38
- else:
39
- params['submission_type'] = str(submission_type)
40
-
41
- # Handle CIK parameter
42
- if cik:
43
- if isinstance(cik, list):
44
- params['cik'] = ','.join(str(x) for x in cik)
45
- else:
46
- params['cik'] = str(cik)
47
-
48
- # Handle filing_date parameter
49
- if filing_date:
50
- if isinstance(filing_date, tuple):
51
- params['startdt'] = str(filing_date[0])
52
- params['enddt'] = str(filing_date[1])
53
- else:
54
- if isinstance(filing_date, list):
55
- params['filing_date'] = ','.join(str(x) for x in filing_date)
56
- else:
57
- params['filing_date'] = str(filing_date)
58
-
59
- url = f"{self.API_BASE_URL}?{urllib.parse.urlencode(params)}"
60
-
61
- async with session.get(url) as response:
62
- data = await response.json()
63
- if not data.get('success'):
64
- raise ValueError(f"API request failed: {data.get('error')}")
65
-
66
- # Track costs and balance
67
- charges = data['metadata']['billing']['charges']
68
- page_cost = charges['total']
69
- self.total_cost += page_cost
70
- self.remaining_balance = data['metadata']['billing']['remaining_balance']
71
-
72
- return data['data'], data['metadata']['pagination'], page_cost
73
-
74
- async def execute_query(self, submission_type=None, cik=None, filing_date=None):
75
- if self.api_key is None:
76
- raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
77
-
78
- self.start_time = time.time()
79
- total_items = 0
80
- pages_processed = 0
81
-
82
- # Display query parameters
83
- query_desc = []
84
- if cik:
85
- query_desc.append(f"CIK={cik}")
86
- if submission_type:
87
- query_desc.append(f"Type={submission_type}")
88
- if filing_date:
89
- if isinstance(filing_date, tuple):
90
- query_desc.append(f"Date={filing_date[0]} to {filing_date[1]}")
91
- else:
92
- query_desc.append(f"Date={filing_date}")
93
-
94
- if query_desc:
95
- print(f"QUERY: {', '.join(query_desc)}")
96
-
97
- connector = aiohttp.TCPConnector(ssl=ssl.create_default_context())
98
- async with aiohttp.ClientSession(connector=connector) as session:
99
- # Initialize progress bar with a custom format to avoid extra colons
100
- pbar = tqdm(unit="page", bar_format="{desc}: {n_fmt} {unit} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")
101
- pbar.set_description("Fetching data")
102
-
103
- current_page = 1
104
- has_more = True
105
- results = []
106
-
107
- while has_more:
108
- # Fetch page
109
- page_results, pagination, page_cost = await self._fetch_page(session,
110
- submission_type=submission_type,
111
- cik=cik,
112
- filing_date=filing_date,
113
- page=current_page)
114
-
115
- # Accumulate results
116
- results.extend(page_results)
117
-
118
- pages_processed += 1
119
- total_items += len(page_results)
120
-
121
- # Update progress bar with cleaner format
122
- pbar.set_description(f"Fetching data (page {current_page})")
123
- pbar.set_postfix_str(f"cost=${self.total_cost:.2f} | balance=${self.remaining_balance:.2f}")
124
- pbar.update(1)
125
-
126
- # Check if we need to fetch more pages
127
- has_more = pagination.get('hasMore', False)
128
- current_page += 1
129
-
130
- # For the first page, display record info using pbar.write instead of print
131
- if pages_processed == 1:
132
- records_per_page = pagination.get('currentPageRecords', len(page_results))
133
- total_records = pagination.get('totalRecords', None)
134
- if total_records:
135
- pbar.write(f"Retrieved {records_per_page} records (page 1) of {total_records} total - Fetching additional pages...")
136
- else:
137
- pbar.write(f"Retrieved {records_per_page} records (page 1) - Fetching additional pages...")
138
-
139
- pbar.close()
140
-
141
- # Final summary
142
- elapsed_time = time.time() - self.start_time
143
- print("\nQuery complete:")
144
- print(f"- Retrieved {total_items} filings across {pages_processed} pages")
145
- print(f"- Total cost: ${self.total_cost:.2f}")
146
- print(f"- Remaining balance: ${self.remaining_balance:.2f}")
147
- print(f"- Time: {elapsed_time:.1f} seconds")
148
-
149
- return results
150
-
151
-
152
- def query(cik=None, submission_type=None, filing_date=None, api_key=None):
153
- """
154
- Query SEC filings data with optional filtering
155
-
156
- Parameters:
157
- - cik: Company CIK number(s), can be string, int, or list
158
- - submission_type: Filing type(s), can be string or list (e.g., '10-K', ['10-K', '10-Q'])
159
- - filing_date: Filing date(s), can be string, list, or tuple of (start_date, end_date)
160
- - api_key: Optional API key (can also use DATAMULE_API_KEY environment variable)
161
-
162
- Returns:
163
- - List of all matching submission data
164
- """
165
- # Create a Query instance for this request
166
- q = Query(api_key=api_key)
167
- # remove dash from filing_date
168
- if isinstance(filing_date, tuple):
169
- filing_date = (filing_date[0].replace('-', ''), filing_date[1].replace('-', ''))
170
- elif isinstance(filing_date, str):
171
- filing_date = filing_date.replace('-', '')
172
- elif isinstance(filing_date, list):
173
- filing_date = [x.replace('-', '') for x in filing_date]
174
-
175
- print(filing_date)
176
- # Run the query and return results
177
- return asyncio.run(q.execute_query(
178
- submission_type=submission_type,
179
- cik=cik,
180
- filing_date=filing_date
181
- ))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes