datamule 0.380__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. datamule/__init__.py +46 -86
  2. datamule/book.py +16 -0
  3. datamule/config.py +29 -0
  4. datamule/data/company_former_names.csv +8148 -8148
  5. datamule/data/company_metadata.csv +10049 -10049
  6. datamule/data/company_tickers.csv +9999 -10168
  7. datamule/data/sec-glossary.csv +728 -728
  8. datamule/data/xbrl_descriptions.csv +10024 -10024
  9. datamule/document.py +278 -0
  10. datamule/downloader/downloader.py +374 -0
  11. datamule/downloader/premiumdownloader.py +335 -0
  12. datamule/helper.py +123 -136
  13. datamule/mapping_dicts/txt_mapping_dicts.py +232 -0
  14. datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
  15. datamule/monitor.py +238 -0
  16. datamule/mulebot/__init__.py +1 -1
  17. datamule/mulebot/helper.py +34 -34
  18. datamule/mulebot/mulebot.py +129 -129
  19. datamule/mulebot/mulebot_server/server.py +86 -86
  20. datamule/mulebot/mulebot_server/static/css/minimalist.css +173 -173
  21. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +67 -67
  22. datamule/mulebot/mulebot_server/static/scripts/chat.js +91 -91
  23. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +55 -55
  24. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +14 -14
  25. datamule/mulebot/mulebot_server/static/scripts/main.js +56 -56
  26. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +26 -26
  27. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +46 -46
  28. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +128 -128
  29. datamule/mulebot/mulebot_server/static/scripts/utils.js +27 -27
  30. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +90 -90
  31. datamule/mulebot/search.py +51 -51
  32. datamule/mulebot/tools.py +82 -82
  33. datamule/packageupdater.py +207 -0
  34. datamule/portfolio.py +106 -0
  35. datamule/submission.py +76 -0
  36. datamule-1.0.0.dist-info/METADATA +27 -0
  37. datamule-1.0.0.dist-info/RECORD +40 -0
  38. {datamule-0.380.dist-info → datamule-1.0.0.dist-info}/WHEEL +1 -1
  39. datamule/data/filing_types.csv +0 -485
  40. datamule/data/ftd_locations.csv +0 -388
  41. datamule/datamule_api.py +0 -21
  42. datamule/dataset_builder/_init.py +0 -1
  43. datamule/dataset_builder/dataset_builder.py +0 -260
  44. datamule/downloader/__init__.py +0 -0
  45. datamule/downloader/dropbox_downloader.py +0 -225
  46. datamule/downloader/ftd.py +0 -216
  47. datamule/downloader/information_table_13f.py +0 -231
  48. datamule/downloader/sec_downloader.py +0 -635
  49. datamule/filing_viewer/__init__.py +0 -1
  50. datamule/filing_viewer/filing_viewer.py +0 -256
  51. datamule/global_vars.py +0 -202
  52. datamule/parser/__init__.py +0 -1
  53. datamule/parser/basic_10k_parser.py +0 -82
  54. datamule/parser/basic_10q_parser.py +0 -73
  55. datamule/parser/basic_13d_parser.py +0 -58
  56. datamule/parser/basic_13g_parser.py +0 -61
  57. datamule/parser/basic_8k_parser.py +0 -84
  58. datamule/parser/company_concepts_parser.py +0 -0
  59. datamule/parser/form_d_parser.py +0 -70
  60. datamule/parser/generalized_item_parser.py +0 -78
  61. datamule/parser/generalized_xml_parser.py +0 -0
  62. datamule/parser/helper.py +0 -75
  63. datamule/parser/information_table_parser_13fhr.py +0 -41
  64. datamule/parser/insider_trading_parser.py +0 -158
  65. datamule/parser/mappings.py +0 -95
  66. datamule/parser/n_port_p_parser.py +0 -70
  67. datamule/parser/sec_parser.py +0 -79
  68. datamule/parser/sgml_parser.py +0 -180
  69. datamule/sec_filing.py +0 -126
  70. datamule/sec_search.py +0 -20
  71. datamule-0.380.dist-info/METADATA +0 -110
  72. datamule-0.380.dist-info/RECORD +0 -61
  73. {datamule-0.380.dist-info → datamule-1.0.0.dist-info}/top_level.txt +0 -0
datamule/mulebot/tools.py CHANGED
@@ -1,82 +1,82 @@
1
-
2
- get_company_concept_tool = {
3
- "type": "function",
4
- "function": {
5
- "name": "get_company_concept",
6
- "description": "ONLY use this when explicitly asked to get company XBRL concepts or facts for a given ticker",
7
- "parameters": {
8
- "type": "object",
9
- "properties": {
10
- "ticker": {"type": "string", "description": "The ticker of the company to get facts for"}
11
- },
12
- "required": ["ticker"]
13
- }
14
- }
15
- }
16
-
17
- identifier_to_cik_tool = {
18
- "type": "function",
19
- "function": {
20
- "name": "identifier_to_cik",
21
- "description": "ONLY use this when explicitly asked to convert a company's ticker to a CIK.",
22
- "parameters": {
23
- "type": "object",
24
- "properties": {
25
- "ticker": {"type": "string", "description": "The ticker to convert to a CIK"},
26
- },
27
- "required": ["ticker"]
28
- }
29
- }
30
- }
31
-
32
- get_filing_urls_tool = {
33
- "type": "function",
34
- "function": {
35
- "name": "get_filing_urls",
36
- "description": "ONLY use this when explicitly asked to get URLs of filings for a given company or multiple",
37
- "parameters": {
38
- "type": "object",
39
- "properties": {
40
- "ticker": {"type": "string", "description": "Ticker symbol of the company. Can be a single ticker or a list of tickers."},
41
- "form": {"type": "string", "description": "Form type to get (e.g., '10-K', '10-Q')"},
42
- "date": {"type": "string", "description": "Date of the filing, can be a single date, a range, or a list of dates. Format: 'YYYY-MM-DD'. If range use a tuple of two dates."},
43
- },
44
- "required": ["ticker"]
45
- }
46
- }
47
- }
48
-
49
- find_filing_section_by_title_tool = {
50
- "type": "function",
51
- "function": {
52
- "name": "find_filing_section_by_title",
53
- "description": "ONLY use this when explicitly given a filing URL and told to find a specific section",
54
- "parameters": {
55
- "type": "object",
56
- "properties": {
57
- "url": {"type": "string", "description": "URL of the filing to parse"},
58
- "title": {"type": "string", "description": "The section title to search for in the filing"}
59
- },
60
- "required": ["url","title"]
61
- }
62
- }
63
- }
64
-
65
- return_title_tool = {
66
- "type": "function",
67
- "function": {
68
- "name": "return_title",
69
- "description": "use this to select a title",
70
- "parameters": {
71
- "type": "object",
72
- "properties": {
73
- "title": {"type": "string", "description": "The title to return"}
74
- },
75
- "required": ["title"]
76
- }
77
- }
78
- }
79
-
80
-
81
-
82
- tools = [get_company_concept_tool, identifier_to_cik_tool, get_filing_urls_tool, find_filing_section_by_title_tool]
1
+
2
+ get_company_concept_tool = {
3
+ "type": "function",
4
+ "function": {
5
+ "name": "get_company_concept",
6
+ "description": "ONLY use this when explicitly asked to get company XBRL concepts or facts for a given ticker",
7
+ "parameters": {
8
+ "type": "object",
9
+ "properties": {
10
+ "ticker": {"type": "string", "description": "The ticker of the company to get facts for"}
11
+ },
12
+ "required": ["ticker"]
13
+ }
14
+ }
15
+ }
16
+
17
+ identifier_to_cik_tool = {
18
+ "type": "function",
19
+ "function": {
20
+ "name": "identifier_to_cik",
21
+ "description": "ONLY use this when explicitly asked to convert a company's ticker to a CIK.",
22
+ "parameters": {
23
+ "type": "object",
24
+ "properties": {
25
+ "ticker": {"type": "string", "description": "The ticker to convert to a CIK"},
26
+ },
27
+ "required": ["ticker"]
28
+ }
29
+ }
30
+ }
31
+
32
+ get_filing_urls_tool = {
33
+ "type": "function",
34
+ "function": {
35
+ "name": "get_filing_urls",
36
+ "description": "ONLY use this when explicitly asked to get URLs of filings for a given company or multiple",
37
+ "parameters": {
38
+ "type": "object",
39
+ "properties": {
40
+ "ticker": {"type": "string", "description": "Ticker symbol of the company. Can be a single ticker or a list of tickers."},
41
+ "form": {"type": "string", "description": "Form type to get (e.g., '10-K', '10-Q')"},
42
+ "date": {"type": "string", "description": "Date of the filing, can be a single date, a range, or a list of dates. Format: 'YYYY-MM-DD'. If range use a tuple of two dates."},
43
+ },
44
+ "required": ["ticker"]
45
+ }
46
+ }
47
+ }
48
+
49
+ find_filing_section_by_title_tool = {
50
+ "type": "function",
51
+ "function": {
52
+ "name": "find_filing_section_by_title",
53
+ "description": "ONLY use this when explicitly given a filing URL and told to find a specific section",
54
+ "parameters": {
55
+ "type": "object",
56
+ "properties": {
57
+ "url": {"type": "string", "description": "URL of the filing to parse"},
58
+ "title": {"type": "string", "description": "The section title to search for in the filing"}
59
+ },
60
+ "required": ["url","title"]
61
+ }
62
+ }
63
+ }
64
+
65
+ return_title_tool = {
66
+ "type": "function",
67
+ "function": {
68
+ "name": "return_title",
69
+ "description": "use this to select a title",
70
+ "parameters": {
71
+ "type": "object",
72
+ "properties": {
73
+ "title": {"type": "string", "description": "The title to return"}
74
+ },
75
+ "required": ["title"]
76
+ }
77
+ }
78
+ }
79
+
80
+
81
+
82
+ tools = [get_company_concept_tool, identifier_to_cik_tool, get_filing_urls_tool, find_filing_section_by_title_tool]
@@ -0,0 +1,207 @@
1
+ import asyncio
2
+ import aiohttp
3
+ import json
4
+ import csv
5
+ import os
6
+ from pkg_resources import resource_filename
7
+ from .helper import headers
8
+ from .downloader.downloader import PreciseRateLimiter, RateMonitor
9
+
10
+ class PackageUpdater:
11
+ def __init__(self):
12
+ self.limiter = PreciseRateLimiter(5) # 5 requests per second
13
+ self.rate_monitor = RateMonitor()
14
+ self.headers = headers
15
+
16
+ async def _fetch_json(self, session, url):
17
+ """Fetch JSON with rate limiting and monitoring."""
18
+ async with self.limiter:
19
+ try:
20
+ async with session.get(url) as response:
21
+ response.raise_for_status()
22
+ content = await response.read()
23
+ await self.rate_monitor.add_request(len(content))
24
+ return await response.json()
25
+ except Exception as e:
26
+ print(f"Error fetching {url}: {str(e)}")
27
+ return None
28
+
29
+ async def _update_company_tickers(self):
30
+ """Update company tickers data files."""
31
+ url = 'https://www.sec.gov/files/company_tickers.json'
32
+
33
+ # Define file paths
34
+ json_file = resource_filename('datamule', 'data/company_tickers.json')
35
+ csv_file = resource_filename('datamule', 'data/company_tickers.csv')
36
+
37
+ # Define temporary file paths
38
+ temp_json_file = json_file + '.temp'
39
+ temp_csv_file = csv_file + '.temp'
40
+
41
+ async with aiohttp.ClientSession(headers=self.headers) as session:
42
+ try:
43
+ data = await self._fetch_json(session, url)
44
+ if not data:
45
+ raise Exception("Failed to fetch company tickers data")
46
+
47
+ # Save the raw JSON file
48
+ with open(temp_json_file, 'w') as f:
49
+ json.dump(data, f, indent=4)
50
+
51
+ # Convert to CSV
52
+ with open(temp_csv_file, 'w', newline='') as csvfile:
53
+ fieldnames = ['cik', 'ticker', 'title']
54
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
55
+ writer.writeheader()
56
+ for _, company in data.items():
57
+ writer.writerow({
58
+ 'cik': str(company['cik_str']).zfill(10),
59
+ 'ticker': company['ticker'],
60
+ 'title': company['title']
61
+ })
62
+
63
+ # Replace original files
64
+ for src, dst in [(temp_json_file, json_file), (temp_csv_file, csv_file)]:
65
+ if os.path.exists(dst):
66
+ os.remove(dst)
67
+ os.rename(src, dst)
68
+
69
+ print(f"Company tickers successfully updated")
70
+ return True
71
+
72
+ except Exception as e:
73
+ print(f"Error updating company tickers: {str(e)}")
74
+ return False
75
+
76
+ finally:
77
+ # Clean up temp files
78
+ for temp_file in [temp_json_file, temp_csv_file]:
79
+ if os.path.exists(temp_file):
80
+ try:
81
+ os.remove(temp_file)
82
+ except Exception as e:
83
+ print(f"Warning: Could not remove {temp_file}: {str(e)}")
84
+
85
+ async def _process_metadata_batch(self, session, companies, metadata_writer, former_names_writer):
86
+ """Process a batch of companies for metadata updates."""
87
+ metadata_fields = [
88
+ 'cik', 'name', 'entityType', 'sic', 'sicDescription', 'ownerOrg',
89
+ 'insiderTransactionForOwnerExists', 'insiderTransactionForIssuerExists',
90
+ 'tickers', 'exchanges', 'ein', 'description', 'website', 'investorWebsite',
91
+ 'category', 'fiscalYearEnd', 'stateOfIncorporation', 'stateOfIncorporationDescription',
92
+ 'phone', 'flags', 'mailing_street1', 'mailing_street2', 'mailing_city',
93
+ 'mailing_stateOrCountry', 'mailing_zipCode', 'mailing_stateOrCountryDescription',
94
+ 'business_street1', 'business_street2', 'business_city', 'business_stateOrCountry',
95
+ 'business_zipCode', 'business_stateOrCountryDescription'
96
+ ]
97
+
98
+ tasks = []
99
+ for company in companies:
100
+ cik = company['cik']
101
+ url = f'https://data.sec.gov/submissions/CIK{str(cik).zfill(10)}.json'
102
+ tasks.append(self._fetch_json(session, url))
103
+
104
+ results = await asyncio.gather(*tasks, return_exceptions=True)
105
+
106
+ for company, result in zip(companies, results):
107
+ if isinstance(result, Exception) or not result:
108
+ print(f"Error processing CIK {company['cik']}: {str(result) if isinstance(result, Exception) else 'No data'}")
109
+ continue
110
+
111
+ try:
112
+ metadata = {field: result.get(field, '') for field in metadata_fields if field not in ['tickers', 'exchanges']}
113
+ metadata['cik'] = company['cik']
114
+ metadata['tickers'] = ','.join(result.get('tickers', []))
115
+ metadata['exchanges'] = ','.join(result.get('exchanges', []))
116
+
117
+ # Add address information
118
+ for address_type in ['mailing', 'business']:
119
+ address = result.get('addresses', {}).get(address_type, {})
120
+ for key, value in address.items():
121
+ metadata[f'{address_type}_{key}'] = value if value is not None else ''
122
+
123
+ metadata_writer.writerow(metadata)
124
+
125
+ for former_name in result.get('formerNames', []):
126
+ former_names_writer.writerow({
127
+ 'cik': company['cik'],
128
+ 'former_name': former_name['name'],
129
+ 'from_date': former_name['from'],
130
+ 'to_date': former_name['to']
131
+ })
132
+
133
+ except Exception as e:
134
+ print(f"Error processing metadata for CIK {company['cik']}: {str(e)}")
135
+
136
+ async def _update_company_metadata(self):
137
+ """Update company metadata and former names files."""
138
+ metadata_file = resource_filename('datamule', 'data/company_metadata.csv')
139
+ former_names_file = resource_filename('datamule', 'data/company_former_names.csv')
140
+
141
+ temp_metadata_file = metadata_file + '.temp'
142
+ temp_former_names_file = former_names_file + '.temp'
143
+
144
+ # Load current company tickers
145
+ with open(resource_filename('datamule', 'data/company_tickers.csv'), 'r') as f:
146
+ company_tickers = list(csv.DictReader(f))
147
+
148
+ metadata_fields = ['cik', 'name', 'entityType', 'sic', 'sicDescription', 'ownerOrg',
149
+ 'insiderTransactionForOwnerExists', 'insiderTransactionForIssuerExists',
150
+ 'tickers', 'exchanges', 'ein', 'description', 'website', 'investorWebsite',
151
+ 'category', 'fiscalYearEnd', 'stateOfIncorporation', 'stateOfIncorporationDescription',
152
+ 'phone', 'flags', 'mailing_street1', 'mailing_street2', 'mailing_city',
153
+ 'mailing_stateOrCountry', 'mailing_zipCode', 'mailing_stateOrCountryDescription',
154
+ 'business_street1', 'business_street2', 'business_city', 'business_stateOrCountry',
155
+ 'business_zipCode', 'business_stateOrCountryDescription']
156
+
157
+ former_names_fields = ['cik', 'former_name', 'from_date', 'to_date']
158
+
159
+ try:
160
+ async with aiohttp.ClientSession(headers=self.headers) as session:
161
+ with open(temp_metadata_file, 'w', newline='') as mf, \
162
+ open(temp_former_names_file, 'w', newline='') as fnf:
163
+
164
+ metadata_writer = csv.DictWriter(mf, fieldnames=metadata_fields)
165
+ metadata_writer.writeheader()
166
+
167
+ former_names_writer = csv.DictWriter(fnf, fieldnames=former_names_fields)
168
+ former_names_writer.writeheader()
169
+
170
+ # Process in batches of 10 companies
171
+ batch_size = 10
172
+ for i in range(0, len(company_tickers), batch_size):
173
+ batch = company_tickers[i:i + batch_size]
174
+ await self._process_metadata_batch(
175
+ session, batch, metadata_writer, former_names_writer
176
+ )
177
+
178
+ # Replace original files
179
+ for src, dst in [(temp_metadata_file, metadata_file),
180
+ (temp_former_names_file, former_names_file)]:
181
+ if os.path.exists(dst):
182
+ os.remove(dst)
183
+ os.rename(src, dst)
184
+
185
+ print("Company metadata successfully updated")
186
+ return True
187
+
188
+ except Exception as e:
189
+ print(f"Error updating company metadata: {str(e)}")
190
+ return False
191
+
192
+ finally:
193
+ # Clean up temp files
194
+ for temp_file in [temp_metadata_file, temp_former_names_file]:
195
+ if os.path.exists(temp_file):
196
+ try:
197
+ os.remove(temp_file)
198
+ except Exception as e:
199
+ print(f"Warning: Could not remove {temp_file}: {str(e)}")
200
+
201
+ def update_company_tickers(self):
202
+ """Update company tickers data files."""
203
+ return asyncio.run(self._update_company_tickers())
204
+
205
+ def update_company_metadata(self):
206
+ """Update company metadata and former names files."""
207
+ return asyncio.run(self._update_company_metadata())
datamule/portfolio.py ADDED
@@ -0,0 +1,106 @@
1
+ from pathlib import Path
2
+ from tqdm import tqdm
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from .submission import Submission
5
+ from .downloader.premiumdownloader import PremiumDownloader
6
+ from .downloader.downloader import Downloader
7
+ from .config import Config
8
+ import os
9
+
10
+ class Portfolio:
11
+ def __init__(self, path):
12
+ self.path = Path(path)
13
+ self.submissions = []
14
+ self.MAX_WORKERS = os.cpu_count() - 1
15
+
16
+ if self.path.exists():
17
+ self._load_submissions()
18
+
19
+ def _load_submissions(self):
20
+ folders = [f for f in self.path.iterdir() if f.is_dir()]
21
+ print(f"Loading {len(folders)} submissions")
22
+
23
+ def load_submission(folder):
24
+ try:
25
+ return Submission(folder)
26
+ except Exception as e:
27
+ print(f"Error loading submission from {folder}: {str(e)}")
28
+ return None
29
+
30
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
31
+ self.submissions = list(tqdm(
32
+ executor.map(load_submission, folders),
33
+ total=len(folders),
34
+ desc="Loading submissions"
35
+ ))
36
+
37
+ # Filter out None values from failed submissions
38
+ self.submissions = [s for s in self.submissions if s is not None]
39
+ print(f"Successfully loaded {len(self.submissions)} submissions")
40
+
41
+ def process_submissions(self, callback):
42
+ """Process all submissions using a thread pool."""
43
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
44
+ results = list(tqdm(
45
+ executor.map(callback, self.submissions),
46
+ total=len(self.submissions),
47
+ desc="Processing submissions"
48
+ ))
49
+ return results
50
+
51
+ def process_documents(self, callback):
52
+ """Process all documents using a thread pool."""
53
+ documents = [doc for sub in self.submissions for doc in sub]
54
+
55
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
56
+ results = list(tqdm(
57
+ executor.map(callback, documents),
58
+ total=len(documents),
59
+ desc="Processing documents"
60
+ ))
61
+ return results
62
+
63
+ def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None):
64
+ if provider is None:
65
+ config = Config()
66
+ provider = config.get_default_source()
67
+
68
+ downloader = PremiumDownloader() if provider == 'datamule' else Downloader()
69
+ downloader.download_submissions(
70
+ output_dir=self.path,
71
+ cik=cik,
72
+ ticker=ticker,
73
+ submission_type=submission_type,
74
+ filing_date=filing_date
75
+ )
76
+
77
+ # Reload submissions after download
78
+ self._load_submissions()
79
+
80
+ def __iter__(self):
81
+ return iter(self.submissions)
82
+
83
+ def document_type(self, document_types):
84
+ """Filter documents by type(s)."""
85
+ if isinstance(document_types, str):
86
+ document_types = [document_types]
87
+
88
+ for submission in self.submissions:
89
+ yield from submission.document_type(document_types)
90
+
91
+ def contains_string(self, pattern, document_types=None):
92
+ """Search for pattern in documents, with optional type filter."""
93
+ def check_document(document):
94
+ return document if document.contains_string(pattern) else None
95
+
96
+ # Get documents, filtered by type if specified
97
+ documents = list(self.document_type(document_types)) if document_types else [
98
+ doc for sub in self.submissions for doc in sub
99
+ ]
100
+
101
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
102
+ results = executor.map(check_document, documents)
103
+
104
+ for doc in tqdm(results, total=len(documents), desc=f"Searching for '{pattern}'"):
105
+ if doc is not None:
106
+ yield doc
datamule/submission.py ADDED
@@ -0,0 +1,76 @@
1
+ from pathlib import Path
2
+ import json
3
+ from .document import Document
4
+
5
+ class Submission:
6
+ def __init__(self, path):
7
+ self.path = Path(path)
8
+ self._load_metadata()
9
+
10
+ def _load_metadata(self):
11
+ metadata_path = self.path / 'metadata.json'
12
+ with metadata_path.open('r') as f:
13
+ self.metadata = json.load(f)
14
+
15
+ def keep(self, document_types):
16
+ """Keep files of specified document types, delete others
17
+ Args:
18
+ document_types: string or list of strings representing document types to keep
19
+ """
20
+ # Convert single string to list for consistent handling
21
+ if isinstance(document_types, str):
22
+ document_types = [document_types]
23
+
24
+ for doc in self.metadata['documents']:
25
+ filename = doc.get('filename')
26
+ if filename is None:
27
+ continue
28
+
29
+ filepath = self.path / filename
30
+ # Delete if document type isn't in our keep list
31
+ if doc['type'] not in document_types and filepath.exists():
32
+ filepath.unlink()
33
+
34
+ def drop(self, document_types):
35
+ """Delete files of specified document types, keep others
36
+ Args:
37
+ document_types: string or list of strings representing document types to drop
38
+ """
39
+ # Convert single string to list for consistent handling
40
+ if isinstance(document_types, str):
41
+ document_types = [document_types]
42
+
43
+ for doc in self.metadata['documents']:
44
+ filename = doc.get('filename')
45
+ if filename is None:
46
+ continue
47
+
48
+ filepath = self.path / filename
49
+ # Delete if document type is in our drop list
50
+ if doc['type'] in document_types and filepath.exists():
51
+ filepath.unlink()
52
+
53
+ def document_type(self, document_type):
54
+ # Convert single document type to list for consistent handling
55
+ if isinstance(document_type, str):
56
+ document_types = [document_type]
57
+ else:
58
+ document_types = document_type
59
+
60
+ for doc in self.metadata['documents']:
61
+ if doc['type'] in document_types:
62
+ filename = doc.get('filename')
63
+ if filename is None:
64
+ continue
65
+
66
+ document_path = self.path / filename
67
+ yield Document(doc['type'], document_path)
68
+
69
+ def __iter__(self):
70
+ for doc in self.metadata['documents']:
71
+ filename = doc.get('filename')
72
+ if filename is None:
73
+ continue
74
+
75
+ document_path = self.path / filename
76
+ yield Document(doc['type'], document_path)
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.1
2
+ Name: datamule
3
+ Version: 1.0.0
4
+ Summary: Making it easier to use SEC filings.
5
+ Home-page: https://github.com/john-friedman/datamule-python
6
+ Author: John Friedman
7
+ Requires-Dist: aiohttp
8
+ Requires-Dist: aiolimiter
9
+ Requires-Dist: tqdm
10
+ Requires-Dist: requests
11
+ Requires-Dist: nest-asyncio
12
+ Requires-Dist: aiofiles
13
+ Requires-Dist: polars
14
+ Requires-Dist: setuptools
15
+ Requires-Dist: selectolax
16
+ Requires-Dist: pytz
17
+ Requires-Dist: zstandard
18
+ Requires-Dist: doc2dict
19
+ Requires-Dist: secsgml
20
+ Provides-Extra: all
21
+ Requires-Dist: openai; extra == "all"
22
+ Requires-Dist: flask; extra == "all"
23
+ Provides-Extra: mulebot
24
+ Requires-Dist: openai; extra == "mulebot"
25
+ Provides-Extra: mulebot_server
26
+ Requires-Dist: flask; extra == "mulebot-server"
27
+
@@ -0,0 +1,40 @@
1
+ datamule/__init__.py,sha256=IDVK3i5i5DxLlQJ_71aYkloGNi528JOUx8hU6bDzLXM,1255
2
+ datamule/book.py,sha256=hzp5Ae_PfTg3c_h_LdRQOc9fU6OifapKtV0bU-SyIOw,775
3
+ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
4
+ datamule/document.py,sha256=P1UeF-GLta08T7Ur77IFOp7vJBIYcQPbz3gfuWJ3fi0,10851
5
+ datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
6
+ datamule/monitor.py,sha256=AfhGqC_GFTYWemRKgYE85V7rIGMN_pbcpxW6kORQtpw,9273
7
+ datamule/packageupdater.py,sha256=vEGqlTj6FudIeVHBVJltPh2eBDEqMG9HYmnyrRVKeSU,9595
8
+ datamule/portfolio.py,sha256=U_QRNk_CbMmi3nJ0VBIwc9SVEGq6kA8LCZHBj9nOGXs,4032
9
+ datamule/submission.py,sha256=uioIYJbsoe-87nRPyzlo-LZ8Hp7HG7A4KPGSnw86PKY,2790
10
+ datamule/data/company_former_names.csv,sha256=HE9cAv-_QKFX6jT-_-D0rHmaDyQuAzL4MJwank5O1U8,706380
11
+ datamule/data/company_metadata.csv,sha256=yPovrCVjYwLWTU_hBUFJymp8iNO0NBYuq_QwOkRLoN8,3068599
12
+ datamule/data/company_tickers.csv,sha256=GW6lOP54RiGJCx-d9N5jEBy7tGVgU3zI-5xHJXrZfSI,400363
13
+ datamule/data/sec-glossary.csv,sha256=-cN7GjiadLw5C1sv4zSeCnfeZZDYeSgJl-0ydarMAo0,251209
14
+ datamule/data/xbrl_descriptions.csv,sha256=SQ9wUURNqG424rnTiZtopsxV2q-PvU4NMj52LqgDsvg,2621524
15
+ datamule/downloader/downloader.py,sha256=vnMsw0oWqRa84scu6ZcywxbJxsIn38vLV0tybakx3jQ,15217
16
+ datamule/downloader/premiumdownloader.py,sha256=YhGFwkYqjLkdc5ex2YKM-L7nBAPm5MMCdTwVVP0JO78,14314
17
+ datamule/mapping_dicts/txt_mapping_dicts.py,sha256=Eh6qYhseuKjjnxGh0A5blHr7mbq9CigFn6Zv9xcG2zU,5783
18
+ datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
19
+ datamule/mulebot/__init__.py,sha256=YvZXV6xQ0iP-oGD8rloufjdwJL6D46P3NNr0CY9PQCA,29
20
+ datamule/mulebot/helper.py,sha256=olztOwltfELZ-IERM2bRNLBavD04kfB6ueWTisJAleA,1080
21
+ datamule/mulebot/mulebot.py,sha256=XbtgvXBSFu9OaaLW_k1KDgHVTNQGV8_0ZwNMFad-pPU,5837
22
+ datamule/mulebot/search.py,sha256=mwvbB6Fex5dEQkfxkCL53ne5pXdVno-5KlZ5vZyGnJQ,2073
23
+ datamule/mulebot/tools.py,sha256=ctnGc2HItR-Roi-QXkc7GEaAOEYQiFRtfmdmIxNxYXk,2940
24
+ datamule/mulebot/mulebot_server/__init__.py,sha256=x1QhXys7BWxi2g9_ZHUYA6S6rL3VL2718x4rYtGaaIg,33
25
+ datamule/mulebot/mulebot_server/server.py,sha256=M7kU4aZUoi8X8DUKZNckLewBiI637Krbeap31qD2jt8,3547
26
+ datamule/mulebot/mulebot_server/static/css/minimalist.css,sha256=Tz1tz8oF_esbfCvLTJBmTfb-5MIiqjfhU_4A4nto1mo,2974
27
+ datamule/mulebot/mulebot_server/static/scripts/artifacts.js,sha256=WUAoI3LtEBEt3x-Ri0gwd6YT0JtGNwDZ_b8tuhWWSsg,2258
28
+ datamule/mulebot/mulebot_server/static/scripts/chat.js,sha256=q8vV_KtzuNCXCfXqavM1HROIkYHItOAmaR8P1OjSqa0,3108
29
+ datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js,sha256=wxeIM2RzF6Zh_9ivnYuNyTzIgIcEz0-zX8gTCvyACJo,2034
30
+ datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js,sha256=DZFLe-45mmzWvJPO1be5Ivfqx0BInrXfduQ1IhbHWzk,429
31
+ datamule/mulebot/mulebot_server/static/scripts/main.js,sha256=NEIVih1WJeQ-qo5k8hnmgFHd7N839Mr6hJur856oXVQ,1882
32
+ datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js,sha256=mGhAXQnjnSxYqVqg1mE5g_ev0-aDhh849xunQtRchnY,1093
33
+ datamule/mulebot/mulebot_server/static/scripts/suggestions.js,sha256=TCyz8OYuXeIG9qNRgwU2fhz18YNXpy4Bl9mk66lXefo,1795
34
+ datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js,sha256=UtkUpLvELNI4Ibpb7VstgVA9Tk-8jbkxXhmXsgufFa4,4437
35
+ datamule/mulebot/mulebot_server/static/scripts/utils.js,sha256=oGPMtyT9dvuqHqrfZj33t4vLZiF8UJrMXB1hpPXRNu4,1255
36
+ datamule/mulebot/mulebot_server/templates/chat-minimalist.html,sha256=MsTbgpnLD0JCQiKKP3XeeNJRNsRqKsRa1j_XXW7nBKw,6975
37
+ datamule-1.0.0.dist-info/METADATA,sha256=EMOJtZUCrYTNu7YOrhfZmzapdumfM0TXsnRhDBs0x6A,732
38
+ datamule-1.0.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
39
+ datamule-1.0.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
40
+ datamule-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5