PyPI - datamule - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.5__py3-none-any.whl - Mend

datamule 1.1.0py3-none-any.whl → 1.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

datamule/__init__.py +5 -1
datamule/index.py +62 -0
datamule/sec/submissions/eftsquery.py +37 -22
datamule/sec/submissions/textsearch.py +10 -6
datamule/{book/book.py → sheet.py} +3 -3
{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/METADATA +1 -1
{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/RECORD +9 -9
datamule/book/__init__.py +0 -0
{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/WHEEL +0 -0
{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/top_level.txt +0 -0

datamule/__init__.py CHANGED Viewed

@@ -3,6 +3,8 @@ from .portfolio import Portfolio
 from .document import Document
 from .helper import _load_package_csv, load_package_dataset
 from .config import Config
+from .sheet import Sheet
+from .index import Index
 # Keep the notebook environment setup
@@ -32,5 +34,7 @@ __all__ = [
     'Portfolio',
     'Submission',
     'Document',
-    'Config'
+    'Config',
+    'Sheet',
+    'Index',
 ]

datamule/index.py ADDED Viewed

@@ -0,0 +1,62 @@
+from pathlib import Path
+from .sec.submissions.textsearch import query
+from .helper import _process_cik_and_metadata_filters, load_package_dataset
+class Index:
+    def __init__(self, path=None):
+        self.path = Path(path) if path else None
+    def search_submissions(
+        self,
+        text_query,
+        start_date=None,
+        end_date=None,
+        submission_type=None,
+        cik=None,
+        ticker=None,
+        requests_per_second=5.0,
+        quiet=True,
+        **kwargs
+    ):
+        """
+        Search SEC filings for the given text query.
+        Args:
+            text_query (str): Text to search for in SEC filings.
+            start_date (str or date, optional): Start date for filing search.
+            end_date (str or date, optional): End date for filing search.
+            submission_type (str, optional): Type of SEC submission to search.
+            cik (str, int, or list, optional): CIK(s) to filter by.
+            ticker (str or list, optional): Ticker(s) to filter by.
+            requests_per_second (float, optional): Rate limit for SEC API requests.
+            quiet (bool, optional): Whether to suppress output.
+            **kwargs: Additional filters to apply.
+        Returns:
+            dict: Search results from the query function.
+        """
+        # Process CIK and ticker filters if provided
+        if cik is not None or ticker is not None:
+            cik_list = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
+            # Add CIK filter to the query if we have results
+            if cik_list:
+                # Implementation note: Update as needed - this assumes your query function
+                # can accept a cik parameter, otherwise you may need additional logic here
+                kwargs['cik'] = cik_list
+        # Execute the search query
+        results = query(
+            f'{text_query}',
+            filing_date=(start_date, end_date),
+            requests_per_second=requests_per_second,
+            quiet=quiet,
+            submission_type=submission_type,
+            **kwargs
+        )
+        # Save results to path if specified
+        if self.path:
+            self._save_results(results, text_query)
+        return results

datamule/sec/submissions/eftsquery.py CHANGED Viewed

@@ -6,13 +6,14 @@ from tqdm import tqdm
 from ..utils import RetryException, PreciseRateLimiter, RateMonitor, headers
 class EFTSQuery:
-    def __init__(self, requests_per_second=5.0):
+    def __init__(self, requests_per_second=5.0, quiet=False):
         self.base_url = "https://efts.sec.gov/LATEST/search-index"
         self.headers = headers
         self.limiter = PreciseRateLimiter(requests_per_second)
         self.rate_monitor = RateMonitor()
         self.session = None
         self.pbar = None
+        self.quiet = quiet
         self.max_page_size = 100  # EFTS API limit
         self.fetch_queue = asyncio.Queue()
         self.connection_semaphore = asyncio.Semaphore(5)  # Max 5 concurrent connections
@@ -127,6 +128,8 @@ class EFTSQuery:
         return ", ".join(parts)
     async def _fetch_json(self, url):
+        if not self.quiet:
+            print(f"Fetching {url}...")
         async with self.connection_semaphore:
             async with self.limiter:
                 try:
@@ -160,18 +163,21 @@ class EFTSQuery:
                             await callback(hits)
                     self.fetch_queue.task_done()
                 except RetryException as e:
-                    print(f"\nRate limited. Sleeping for {e.retry_after} seconds...")
+                    if not self.quiet:
+                        print(f"\nRate limited. Sleeping for {e.retry_after} seconds...")
                     await asyncio.sleep(e.retry_after)
                     # Put back in queue
                     await self.fetch_queue.put((params, from_val, size_val, callback))
                     self.fetch_queue.task_done()
                 except Exception as e:
-                    print(f"\nError fetching {url}: {str(e)}")
+                    if not self.quiet:
+                        print(f"\nError fetching {url}: {str(e)}")
                     self.fetch_queue.task_done()
             except asyncio.CancelledError:
                 break
             except Exception as e:
-                print(f"\nWorker error: {str(e)}")
+                if not self.quiet:
+                    print(f"\nWorker error: {str(e)}")
                 self.fetch_queue.task_done()
     def _split_date_range(self, start_date, end_date, num_splits=4):
@@ -322,12 +328,14 @@ class EFTSQuery:
         # Skip if no results
         if total_hits == 0:
-            print(f"Skipping negated forms query - no results returned")
+            if not self.quiet:
+                print(f"Skipping negated forms query - no results returned")
             return
-        query_desc = self._get_query_description(params)
-        date_range = f"{start_date} to {end_date}"
-        print(f"Planning: Analyzing negated forms query (depth {depth}): {date_range} [{total_hits:,} hits]")
+        if not self.quiet:
+            query_desc = self._get_query_description(params)
+            date_range = f"{start_date} to {end_date}"
+            print(f"Planning: Analyzing negated forms query (depth {depth}): {date_range} [{total_hits:,} hits]")
         # If small enough or at max depth, process directly
         if total_hits < self.max_efts_hits or start_date == end_date:
@@ -350,8 +358,9 @@ class EFTSQuery:
         total_hits, data = await self._test_query_size(params)
-        query_desc = self._get_query_description(params)
-        print(f"Planning: Analyzing {'  '*depth}query: {query_desc} [{total_hits:,} hits]")
+        if not self.quiet:
+            query_desc = self._get_query_description(params)
+            print(f"Planning: Analyzing {'  '*depth}query: {query_desc} [{total_hits:,} hits]")
         # If we're at the maximum recursion depth or hits are under limit, process directly
         if depth >= max_depth or total_hits < self.max_efts_hits:
@@ -396,8 +405,9 @@ class EFTSQuery:
     async def _start_query_phase(self, callback):
         """Start the query phase after planning is complete"""
-        print("\n--- Starting query phase ---")
-        self.pbar = tqdm(total=self.total_results_to_fetch, desc="Querying documents [Rate: 0/s | 0 MB/s]")
+        if not self.quiet:
+            print("\n--- Starting query phase ---")
+            self.pbar = tqdm(total=self.total_results_to_fetch, desc="Querying documents [Rate: 0/s | 0 MB/s]")
         # Queue all pending page requests
         for params, from_val, size_val, callback in self.pending_page_requests:
@@ -425,18 +435,21 @@ class EFTSQuery:
             self.pbar = None
             # First check size
-            print("\n--- Starting query planning phase ---")
-            print("Analyzing request and splitting into manageable chunks...")
+            if not self.quiet:
+                print("\n--- Starting query planning phase ---")
+                print("Analyzing request and splitting into manageable chunks...")
             total_hits, data = await self._test_query_size(params)
             if total_hits == 0:
-                print("No results found for this query.")
+                if not self.quiet:
+                    print("No results found for this query.")
                 return []
             # Get accurate total from aggregation buckets
             self.true_total_docs = self._get_total_from_buckets(data)
-            print(f"Found {self.true_total_docs:,} total documents to retrieve.")
+            if not self.quiet:
+                print(f"Found {self.true_total_docs:,} total documents to retrieve.")
             # Start worker tasks
             workers = [asyncio.create_task(self._fetch_worker()) for _ in range(5)]
@@ -458,7 +471,8 @@ class EFTSQuery:
                     negated_forms.append('-0')  # Keep primary documents constraint
                     remaining_docs = self.true_total_docs - self.processed_doc_count
-                    print(f"Planning: Analyzing remaining primary document forms using negation (~{remaining_docs:,} hits)")
+                    if not self.quiet:
+                        print(f"Planning: Analyzing remaining primary document forms using negation (~{remaining_docs:,} hits)")
                     # Process negated forms query with recursive date splitting
                     start_date = params['startdt']
@@ -466,9 +480,9 @@ class EFTSQuery:
                     await self._process_negated_forms_recursive(
                         params, negated_forms, start_date, end_date, 0, collect_hits
                     )
-                else:
+                elif not self.quiet:
                     print("No additional forms to process with negation - not a primary documents query")
-            else:
+            elif not self.quiet:
                 print("No additional forms to process with negation")
             # Start the download phase
@@ -488,15 +502,16 @@ class EFTSQuery:
                 self.pbar.close()
                 self.pbar = None
-            print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
+            if not self.quiet:
+                print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
             return all_hits
-def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None):
+def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None, quiet=False):
     """
     Convenience function to run a query without managing the async context.
     """
     async def run_query():
-        query = EFTSQuery(requests_per_second=requests_per_second)
+        query = EFTSQuery(requests_per_second=requests_per_second, quiet=quiet)
         return await query.query(cik, submission_type, filing_date, callback)
     return asyncio.run(run_query())

datamule/sec/submissions/textsearch.py CHANGED Viewed

@@ -9,8 +9,8 @@ class TextSearchEFTSQuery(EFTSQuery):
     """
     Extended EFTSQuery class that adds text search capabilities.
     """
-    def __init__(self, text_query, requests_per_second=5.0):
-        super().__init__(requests_per_second=requests_per_second)
+    def __init__(self, text_query, requests_per_second=5.0, quiet=False):
+        super().__init__(requests_per_second=requests_per_second, quiet=quiet)
         self.text_query = text_query
     def _prepare_params(self, cik=None, submission_type=None, filing_date=None):
@@ -46,7 +46,7 @@ async def extract_accession_numbers(hits):
                 accession_numbers.append(acc_no)
     return accession_numbers
-def query(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0):
+def query(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, quiet=False):
     """
     Search SEC filings for text and return the full search results.
@@ -66,6 +66,8 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
     requests_per_second : float, optional
         Maximum number of requests per second to make to the SEC API.
         Default is 5.0.
+    quiet : bool, optional
+        If True, suppresses all output (progress bars and prints). Default is False.
     Returns:
     --------
@@ -73,12 +75,12 @@ def query(text_query, cik=None, submission_type=None, filing_date=None, requests
         Complete search results with all hit data.
     """
     async def run_query():
-        query = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second)
+        query = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
         return await query.query(cik, submission_type, filing_date)
     return asyncio.run(run_query())
-def filter_text(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0):
+def filter_text(text_query, cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, quiet=False):
     """
     Search SEC filings for text and return matching accession numbers.
@@ -98,6 +100,8 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
     requests_per_second : float, optional
         Maximum number of requests per second to make to the SEC API.
         Default is 5.0.
+    quiet : bool, optional
+        If True, suppresses all output (progress bars and prints). Default is False.
     Returns:
     --------
@@ -105,7 +109,7 @@ def filter_text(text_query, cik=None, submission_type=None, filing_date=None, re
         List of accession numbers (as strings) for filings that match the text query.
     """
     async def run_query():
-        query_obj = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second)
+        query_obj = TextSearchEFTSQuery(text_query, requests_per_second=requests_per_second, quiet=quiet)
         # Create a collector for accession numbers
         all_acc_nos = []

datamule/{book/book.py → sheet.py} RENAMED Viewed

@@ -1,8 +1,8 @@
 from pathlib import Path
-from ..helper import _process_cik_and_metadata_filters, load_package_dataset
-from ..sec.xbrl.downloadcompanyfacts import download_company_facts
+from .helper import _process_cik_and_metadata_filters, load_package_dataset
+from .sec.xbrl.downloadcompanyfacts import download_company_facts
-class Book:
+class Sheet:
     def __init__(self, path):
         self.path = Path(path)

{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.1.0
+Version: 1.1.5
 Summary: Making it easier to use SEC filings.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-datamule/__init__.py,sha256=0npnB3i2F7YB7etG315oDiCd-eMo-A6MP5LX2gQclHY,914
+datamule/__init__.py,sha256=l6YlwT5EeRxPlCtO5Jd4I8l266rSRUJyfFe97cRtSCM,991
 datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
 datamule/document.py,sha256=BC8jdVy9pMOA9ghIqV5N2XJidmVNThqbBohsuSAnVoY,10813
 datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
+datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
 datamule/portfolio.py,sha256=JmZlTrom_g7FXKXxWp_CiQTyC7p6_cDP08G0kFUja48,6982
+datamule/sheet.py,sha256=WwumRdniClGU7W3AXVLOpCdMnepLC7KMrRpQlA6_NUY,1022
 datamule/submission.py,sha256=JsxYlEz1Ywu6eC32OS15p4p-p8qB6SWd_rXuf2p5UfY,1247
-datamule/book/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamule/book/book.py,sha256=QWiowVNqb84o-JcVo0fpKumxnIbBge2ZeKwHxqkVMqw,1023
 datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
 datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
@@ -17,10 +17,10 @@ datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
 datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/sec/submissions/downloader.py,sha256=HxbSkNotLLW6ROmU30rnXPlCo9gY3SoB1Z4ZWvj9FIY,2669
-datamule/sec/submissions/eftsquery.py,sha256=h3MEkYWTrr_Dy76HzQnUvV-nQzi9b-B2CrW-5ah9WQ8,21892
+datamule/sec/submissions/eftsquery.py,sha256=v6YMBZzksqweqHnNIllMFN-frWypAgvZPKx2FH1UrL4,22515
 datamule/sec/submissions/monitor.py,sha256=XkwH5nvzr_dNttmFRQ52m7344IKbOtWDfOZIEdie4H8,5234
 datamule/sec/submissions/streamer.py,sha256=hc61le7gGIIWp1KEaOv_PhriUxf7YYFkQrSKELlZ3pg,9748
-datamule/sec/submissions/textsearch.py,sha256=4TTw-ceEu3_A4GktBDbsqo5vCUnrjdRnKiaWtfQSV7A,4340
+datamule/sec/submissions/textsearch.py,sha256=oEIUrcO3HW-4dcyPCiOTvM7UUimNEM4HNIb-Juvc1BQ,4642
 datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
 datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
@@ -29,7 +29,7 @@ datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTq
 datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
 datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
-datamule-1.1.0.dist-info/METADATA,sha256=SsccfLG4NULPHgcZHL-06layatv9j4ZvhmmVaYv8PAg,512
-datamule-1.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-datamule-1.1.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
-datamule-1.1.0.dist-info/RECORD,,
+datamule-1.1.5.dist-info/METADATA,sha256=9Q8YzsBipVuGYN4eWmH49sF5oyouyZvVdJ6rncDa0VE,512
+datamule-1.1.5.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+datamule-1.1.5.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
+datamule-1.1.5.dist-info/RECORD,,

datamule/book/__init__.py DELETED Viewed

File without changes

{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamule-1.1.0.dist-info → datamule-1.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

datamule 1.1.0__py3-none-any.whl → 1.1.5__py3-none-any.whl

datamule 1.1.0py3-none-any.whl → 1.1.5py3-none-any.whl