datamule 1.1.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {datamule-1.1.0 → datamule-1.1.1}/PKG-INFO +1 -1
  2. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/eftsquery.py +37 -22
  3. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/textsearch.py +10 -6
  4. {datamule-1.1.0 → datamule-1.1.1}/datamule.egg-info/PKG-INFO +1 -1
  5. {datamule-1.1.0 → datamule-1.1.1}/setup.py +1 -1
  6. {datamule-1.1.0 → datamule-1.1.1}/datamule/__init__.py +0 -0
  7. {datamule-1.1.0 → datamule-1.1.1}/datamule/book/__init__.py +0 -0
  8. {datamule-1.1.0 → datamule-1.1.1}/datamule/book/book.py +0 -0
  9. {datamule-1.1.0 → datamule-1.1.1}/datamule/config.py +0 -0
  10. {datamule-1.1.0 → datamule-1.1.1}/datamule/document.py +0 -0
  11. {datamule-1.1.0 → datamule-1.1.1}/datamule/helper.py +0 -0
  12. {datamule-1.1.0 → datamule-1.1.1}/datamule/mapping_dicts/__init__.py +0 -0
  13. {datamule-1.1.0 → datamule-1.1.1}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  14. {datamule-1.1.0 → datamule-1.1.1}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  15. {datamule-1.1.0 → datamule-1.1.1}/datamule/portfolio.py +0 -0
  16. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/__init__.py +0 -0
  17. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/infrastructure/__init__.py +0 -0
  18. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  19. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/rss/__init__.py +0 -0
  20. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/rss/monitor.py +0 -0
  21. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/__init__.py +0 -0
  22. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/downloader.py +0 -0
  23. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/monitor.py +0 -0
  24. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/submissions/streamer.py +0 -0
  25. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/utils.py +0 -0
  26. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/xbrl/__init__.py +0 -0
  27. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  28. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  29. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  30. {datamule-1.1.0 → datamule-1.1.1}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  31. {datamule-1.1.0 → datamule-1.1.1}/datamule/seclibrary/__init__.py +0 -0
  32. {datamule-1.1.0 → datamule-1.1.1}/datamule/seclibrary/downloader.py +0 -0
  33. {datamule-1.1.0 → datamule-1.1.1}/datamule/seclibrary/query.py +0 -0
  34. {datamule-1.1.0 → datamule-1.1.1}/datamule/submission.py +0 -0
  35. {datamule-1.1.0 → datamule-1.1.1}/datamule.egg-info/SOURCES.txt +0 -0
  36. {datamule-1.1.0 → datamule-1.1.1}/datamule.egg-info/dependency_links.txt +0 -0
  37. {datamule-1.1.0 → datamule-1.1.1}/datamule.egg-info/requires.txt +0 -0
  38. {datamule-1.1.0 → datamule-1.1.1}/datamule.egg-info/top_level.txt +0 -0
  39. {datamule-1.1.0 → datamule-1.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -6,13 +6,14 @@ from tqdm import tqdm
6
6
  from ..utils import RetryException, PreciseRateLimiter, RateMonitor, headers
7
7
 
8
8
  class EFTSQuery:
9
- def __init__(self, requests_per_second=5.0):
9
+ def __init__(self, requests_per_second=5.0, quiet=False):
10
10
  self.base_url = "https://efts.sec.gov/LATEST/search-index"
11
11
  self.headers = headers
12
12
  self.limiter = PreciseRateLimiter(requests_per_second)
13
13
  self.rate_monitor = RateMonitor()
14
14
  self.session = None
15
15
  self.pbar = None
16
+ self.quiet = quiet
16
17
  self.max_page_size = 100 # EFTS API limit
17
18
  self.fetch_queue = asyncio.Queue()
18
19
  self.connection_semaphore = asyncio.Semaphore(5) # Max 5 concurrent connections
@@ -127,6 +128,8 @@ class EFTSQuery:
127
128
  return ", ".join(parts)
128
129
 
129
130
  async def _fetch_json(self, url):
131
+ if not self.quiet:
132
+ print(f"Fetching {url}...")
130
133
  async with self.connection_semaphore:
131
134
  async with self.limiter:
132
135
  try:
@@ -160,18 +163,21 @@ class EFTSQuery:
160
163
  await callback(hits)
161
164
  self.fetch_queue.task_done()
162
165
  except RetryException as e:
163
- print(f"\nRate limited. Sleeping for {e.retry_after} seconds...")
166
+ if not self.quiet:
167
+ print(f"\nRate limited. Sleeping for {e.retry_after} seconds...")
164
168
  await asyncio.sleep(e.retry_after)
165
169
  # Put back in queue
166
170
  await self.fetch_queue.put((params, from_val, size_val, callback))
167
171
  self.fetch_queue.task_done()
168
172
  except Exception as e:
169
- print(f"\nError fetching {url}: {str(e)}")
173
+ if not self.quiet:
174
+ print(f"\nError fetching {url}: {str(e)}")
170
175
  self.fetch_queue.task_done()
171
176
  except asyncio.CancelledError:
172
177
  break
173
178
  except Exception as e:
174
- print(f"\nWorker error: {str(e)}")
179
+ if not self.quiet:
180
+ print(f"\nWorker error: {str(e)}")
175
181
  self.fetch_queue.task_done()
176
182
 
177
183
  def _split_date_range(self, start_date, end_date, num_splits=4):
@@ -322,12 +328,14 @@ class EFTSQuery:
322
328
 
323
329
  # Skip if no results
324
330
  if total_hits == 0:
325
- print(f"Skipping negated forms query - no results returned")
331
+ if not self.quiet:
332
+ print(f"Skipping negated forms query - no results returned")
326
333
  return
327
334
 
328
- query_desc = self._get_query_description(params)
329
- date_range = f"{start_date} to {end_date}"
330
- print(f"Planning: Analyzing negated forms query (depth {depth}): {date_range} [{total_hits:,} hits]")
335
+ if not self.quiet:
336
+ query_desc = self._get_query_description(params)
337
+ date_range = f"{start_date} to {end_date}"
338
+ print(f"Planning: Analyzing negated forms query (depth {depth}): {date_range} [{total_hits:,} hits]")
331
339
 
332
340
  # If small enough or at max depth, process directly
333
341
  if total_hits < self.max_efts_hits or start_date == end_date:
@@ -350,8 +358,9 @@ class EFTSQuery:
350
358
 
351
359
  total_hits, data = await self._test_query_size(params)
352
360
 
353
- query_desc = self._get_query_description(params)
354
- print(f"Planning: Analyzing {' '*depth}query: {query_desc} [{total_hits:,} hits]")
361
+ if not self.quiet:
362
+ query_desc = self._get_query_description(params)
363
+ print(f"Planning: Analyzing {' '*depth}query: {query_desc} [{total_hits:,} hits]")
355
364
 
356
365
  # If we're at the maximum recursion depth or hits are under limit, process directly
357
366
  if depth >= max_depth or total_hits < self.max_efts_hits:
@@ -396,8 +405,9 @@ class EFTSQuery:
396
405
 
397
406
  async def _start_query_phase(self, callback):
398
407
  """Start the query phase after planning is complete"""
399
- print("\n--- Starting query phase ---")
400
- self.pbar = tqdm(total=self.total_results_to_fetch, desc="Querying documents [Rate: 0/s | 0 MB/s]")
408
+ if not self.quiet:
409
+ print("\n--- Starting query phase ---")
410
+ self.pbar = tqdm(total=self.total_results_to_fetch, desc="Querying documents [Rate: 0/s | 0 MB/s]")
401
411
 
402
412
  # Queue all pending page requests
403
413
  for params, from_val, size_val, callback in self.pending_page_requests:
@@ -425,18 +435,21 @@ class EFTSQuery:
425
435
  self.pbar = None
426
436
 
427
437
  # First check size
428
- print("\n--- Starting query planning phase ---")
429
- print("Analyzing request and splitting into manageable chunks...")
438
+ if not self.quiet:
439
+ print("\n--- Starting query planning phase ---")
440
+ print("Analyzing request and splitting into manageable chunks...")
430
441
 
431
442
  total_hits, data = await self._test_query_size(params)
432
443
 
433
444
  if total_hits == 0:
434
- print("No results found for this query.")
445
+ if not self.quiet:
446
+ print("No results found for this query.")
435
447
  return []
436
448
 
437
449
  # Get accurate total from aggregation buckets
438
450
  self.true_total_docs = self._get_total_from_buckets(data)
439
- print(f"Found {self.true_total_docs:,} total documents to retrieve.")
451
+ if not self.quiet:
452
+ print(f"Found {self.true_total_docs:,} total documents to retrieve.")
440
453
 
441
454
  # Start worker tasks
442
455
  workers = [asyncio.create_task(self._fetch_worker()) for _ in range(5)]
@@ -458,7 +471,8 @@ class EFTSQuery:
458
471
  negated_forms.append('-0') # Keep primary documents constraint
459
472
 
460
473
  remaining_docs = self.true_total_docs - self.processed_doc_count
461
- print(f"Planning: Analyzing remaining primary document forms using negation (~{remaining_docs:,} hits)")
474
+ if not self.quiet:
475
+ print(f"Planning: Analyzing remaining primary document forms using negation (~{remaining_docs:,} hits)")
462
476
 
463
477
  # Process negated forms query with recursive date splitting
464
478
  start_date = params['startdt']
@@ -466,9 +480,9 @@ class EFTSQuery:
466
480
  await self._process_negated_forms_recursive(
467
481
  params, negated_forms, start_date, end_date, 0, collect_hits
468
482
  )
469
- else:
483
+ elif not self.quiet:
470
484
  print("No additional forms to process with negation - not a primary documents query")
471
- else:
485
+ elif not self.quiet:
472
486
  print("No additional forms to process with negation")
473
487
 
474
488
  # Start the download phase
@@ -488,15 +502,16 @@ class EFTSQuery:
488
502
  self.pbar.close()
489
503
  self.pbar = None
490
504
 
491
- print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
505
+ if not self.quiet:
506
+ print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
492
507
  return all_hits
493
508
 
494
- def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None):
509
+ def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None, quiet=False):
495
510
  """
496
511
  Convenience function to run a query without managing the async context.
497
512
  """
498
513
  async def run_query():
499
- query = EFTSQuery(requests_per_second=requests_per_second)
514
+ query = EFTSQuery(requests_per_second=requests_per_second, quiet=quiet)
500
515
  return await query.query(cik, submission_type, filing_date, callback)
501
516
 
502
517
  return asyncio.run(run_query())
@@ -9,8 +9,8 @@ class TextSearchEFTSQuery(EFTSQuery):
9
9
  """
10
10
  Extended EFTSQuery class that adds text search capabilities.
11
11
  """
12
- def __init__(self, text_query, requests_per_second=5.0):
13
- super().__init__(requests_per_second=requests_per_second)
12
+ def __init__(self, text_query, requests_per_second=5.0, quiet=False):
13
+ super().__init__(requests_per_second=requests_per_second, quiet=quiet)
14
14
  self.text_query = text_query
15
15
 
16
16
  def _prepare_params(self, cik=None, submission_type=None, filing_date=None):
@@ -46,7 +46,7 @@ async def extract_accession_numbers(hits):
46
46
  accession_numbers.append(acc_no)
47
47
  return accession_numbers
48
48
 
49
- def query(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0):
49
+ def query(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, quiet=False):
50
50
  """
51
51
  Search SEC filings for text and return the full search results.
52
52
 
@@ -66,6 +66,8 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
66
66
  requests_per_second : float, optional
67
67
  Maximum number of requests per second to make to the SEC API.
68
68
  Default is 5.0.
69
+ quiet : bool, optional
70
+ If True, suppresses all output (progress bars and prints). Default is False.
69
71
 
70
72
  Returns:
71
73
  --------
@@ -73,12 +75,12 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
73
75
  Complete search results with all hit data.
74
76
  """
75
77
  async def run_query():
76
- query = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second)
78
+ query = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
77
79
  return await query.query(cik, submission_type, filing_date)
78
80
 
79
81
  return asyncio.run(run_query())
80
82
 
81
- def filter_text(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0):
83
+ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, quiet=False):
82
84
  """
83
85
  Search SEC filings for text and return matching accession numbers.
84
86
 
@@ -98,6 +100,8 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
98
100
  requests_per_second : float, optional
99
101
  Maximum number of requests per second to make to the SEC API.
100
102
  Default is 5.0.
103
+ quiet : bool, optional
104
+ If True, suppresses all output (progress bars and prints). Default is False.
101
105
 
102
106
  Returns:
103
107
  --------
@@ -105,7 +109,7 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
105
109
  List of accession numbers (as strings) for filings that match the text query.
106
110
  """
107
111
  async def run_query():
108
- query_obj = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second)
112
+ query_obj = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
109
113
 
110
114
  # Create a collector for accession numbers
111
115
  all_acc_nos = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -29,7 +29,7 @@ if not file_path.exists():
29
29
  setup(
30
30
  name="datamule",
31
31
  author="John Friedman",
32
- version="1.1.0",
32
+ version="1.1.1",
33
33
  description="Making it easier to use SEC filings.",
34
34
  packages=find_packages(include=['datamule', 'datamule.*']),
35
35
  url="https://github.com/john-friedman/datamule-python",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes