PyPI - datamule - Versions diffs - 1.4.5__py3-none-any.whl → 1.4.9__py3-none-any.whl - Mend

datamule 1.4.5py3-none-any.whl → 1.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

datamule/document/document.py +3 -14
datamule/index.py +1 -1
datamule/sec/submissions/textsearch.py +5 -3
datamule/submission.py +7 -71
{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/METADATA +1 -1
{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/RECORD +8 -8
{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/WHEEL +0 -0
{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/top_level.txt +0 -0

datamule/document/document.py CHANGED Viewed

@@ -11,19 +11,7 @@ from selectolax.parser import HTMLParser
 from .processing import process_tabular_data
 from pathlib import Path
 import webbrowser
-def convert_bytes_keys(obj):
-    if isinstance(obj, dict):
-        return {
-            (k.decode('utf-8').lower() if isinstance(k, bytes) else k): convert_bytes_keys(v)
-            for k, v in obj.items()
-        }
-    elif isinstance(obj, list):
-        return [convert_bytes_keys(item) for item in obj]
-    elif isinstance(obj, bytes):
-        return obj.decode('utf-8').lower()
-    else:
-        return obj
+from secsgml.utils import bytes_to_str
 class Document:
     def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -34,7 +22,8 @@ class Document:
         self.filing_date = filing_date
         if self.type == 'submission_metadata':
-            self.content = convert_bytes_keys(content)
+            # this converts to lower
+            self.content = bytes_to_str(content)
         else:
             self.content = content

datamule/index.py CHANGED Viewed

@@ -9,7 +9,7 @@ class Index:
     def search_submissions(
         self,
-        text_query,
+        text_query=None,
         filing_date=None,
         submission_type=None,
         cik=None,

datamule/sec/submissions/textsearch.py CHANGED Viewed

@@ -7,14 +7,16 @@ class TextSearchEFTSQuery(EFTSQuery):
     """
     def __init__(self, text_query, requests_per_second=5.0, quiet=False):
         super().__init__(requests_per_second=requests_per_second, quiet=quiet)
-        self.text_query = text_query
+        if text_query is not None:
+            self.text_query = text_query
     def _prepare_params(self, cik=None, submission_type=None, filing_date=None, location=None):
         # Get base parameters from parent class
         params = super()._prepare_params(cik, submission_type, filing_date, location)
         # Add text query parameter
-        params['q'] = self.text_query
+        if self.text_query is not None:
+            params['q'] = self.text_query
         return params
@@ -42,7 +44,7 @@ async def extract_accession_numbers(hits):
                 accession_numbers.append(acc_no)
     return accession_numbers
-def query(text_query, cik=None, submission_type=None, filing_date=None, location=None,
+def query(text_query=None, cik=None, submission_type=None, filing_date=None, location=None,
           name=None, requests_per_second=5.0, quiet=False):
     """
     Search SEC filings for text and return the full search results.

datamule/submission.py CHANGED Viewed

@@ -4,72 +4,8 @@ from .document.document import Document
 from secsgml import parse_sgml_content_into_memory
 import os
 import aiofiles
-import tempfile
-# # NEW CODE YAY. probably will remove
-# def save_metadata_atomically(metadata_file_path, metadata_content):
-#     """Save metadata to a JSONL file atomically, works on any filesystem"""
-#     # Create directory if it doesn't exist
-#     os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
-#     # Format the JSON with newline
-#     json_str = json.dumps(metadata_content, indent=4) + "\n"
-#     # Write complete content to a temporary file first
-#     fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
-#     try:
-#         with os.fdopen(fd, 'w') as temp_file:
-#             temp_file.write(json_str)
-#             temp_file.flush()
-#             os.fsync(temp_file.fileno())  # Force write to disk
-#         # Append the temporary file to the main file
-#         with open(metadata_file_path, 'a') as target_file:
-#             with open(temp_path, 'r') as temp_read:
-#                 content = temp_read.read()
-#                 target_file.write(content)
-#                 target_file.flush()
-#                 os.fsync(target_file.fileno())  # Force write to disk
-#     finally:
-#         # Clean up the temporary file
-#         if os.path.exists(temp_path):
-#             os.unlink(temp_path)
-# async def save_metadata_atomically_async(metadata_file_path, metadata_content):
-#     """Save metadata to a JSONL file atomically in async mode"""
-#     # Create directory if it doesn't exist
-#     os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
-#     # Format the JSON with newline
-#     json_str = json.dumps(metadata_content, indent=4) + "\n"
-#     # Write to a temporary file first
-#     fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
-#     os.close(fd)  # Close the file descriptor
-#     try:
-#         async with aiofiles.open(temp_path, 'w') as temp_file:
-#             await temp_file.write(json_str)
-#             await temp_file.flush()
-#         # Append the temporary file to the main file
-#         async with aiofiles.open(metadata_file_path, 'a') as target_file:
-#             async with aiofiles.open(temp_path, 'r') as temp_read:
-#                 content = await temp_read.read()
-#                 await target_file.write(content)
-#                 await target_file.flush()
-#     finally:
-#         # Clean up the temporary file
-#         if os.path.exists(temp_path):
-#             os.unlink(temp_path)
-# # END OF NEW CODE
+# TODO add .tar path
 class Submission:
     def __init__(self, path=None,sgml_content=None,keep_document_types=None):
         if path is None and sgml_content is None:
@@ -89,7 +25,7 @@ class Submission:
             filtered_metadata_documents = []
             for idx,doc in enumerate(self.metadata.content['documents']):
-                type = doc.get('type')
+                type = doc.get('type').upper()
                 # Keep only specified types
                 if keep_document_types is not None and type not in keep_document_types:
@@ -98,7 +34,7 @@ class Submission:
                 # write as txt if not declared
                 filename = doc.get('filename','.txt')
                 extension = Path(filename).suffix
-                self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension,filing_date=self.filing_date,accession=self.accession))
+                self.documents.append(Document(type=type.upper(), content=raw_documents[idx], extension=extension,filing_date=self.filing_date,accession=self.accession))
                 filtered_metadata_documents.append(doc)
@@ -121,9 +57,9 @@ class Submission:
     def document_type(self, document_type):
         # Convert single document type to list for consistent handling
         if isinstance(document_type, str):
-            document_types = [document_type]
+            document_types = [document_type.lower()]
         else:
-            document_types = document_type
+            document_types = [item.lower() for item in document_type]
         for idx,doc in enumerate(self.metadata.content['documents']):
             if doc['type'] in document_types:
@@ -144,7 +80,7 @@ class Submission:
                     if extension in ['.htm','.html','.txt','.xml']:
                         content = content.decode('utf-8', errors='replace')
-                    yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
+                    yield Document(type=doc['type'].upper(), content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
                 # if loaded from sgml_content
                 else:
                     yield self.documents[idx]
@@ -171,7 +107,7 @@ class Submission:
                     if extension in ['.htm','.html','.txt','.xml']:
                         content = content.decode('utf-8', errors='replace')
-                    yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
+                    yield Document(type=doc['type'].upper(), content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
                 else:
                     print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")

{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.4.5
+Version: 1.4.9
 Summary: Work with SEC submissions at scale.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 datamule/__init__.py,sha256=glzwBeGJEE6-TG7mRule9GH6L59XaIRR9T7ALcdpMus,1067
 datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
 datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
-datamule/index.py,sha256=_7Ox5hyF_7RWdblVFr5rNyv_ARwBP7VY4f703pk9qQ8,2074
+datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
 datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
 datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
 datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
-datamule/submission.py,sha256=EtWdEnAyWLZdu69Dyzbs4qb5YL41HlExFGMjwEoMhsg,10904
+datamule/submission.py,sha256=UTGIkXOFVrBTHLJxTekw60Nvp92GuSsgQApJWXHNuNg,8493
 datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
 datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamule/document/document.py,sha256=3vX850H7rZH4H8BysitZDaLhT6WPJuIreoV1PSjACno,14301
+datamule/document/document.py,sha256=VaJWo9HrcODlbifYcXzifW3xBD7nUOWAN8zcVCDWMcs,13958
 datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
 datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
 datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,7 +48,7 @@ datamule/sec/submissions/downloader.py,sha256=izaz559PtBCAWPWGzqUReloawJtXwnracl
 datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
 datamule/sec/submissions/monitor.py,sha256=dZYuVCi_X82eYA8l_9cbnkRjiawz3K4U-FnCAyJcgk4,7892
 datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
-datamule/sec/submissions/textsearch.py,sha256=zEr3NXdhVFL8eMh2jruVXIt7taUZTMdNy2hOAyRM2pA,5706
+datamule/sec/submissions/textsearch.py,sha256=MKDXEz_VI_0ljl73_aw2lx4MVzJW5uDt8KxjvJBwPwM,5794
 datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
 datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
@@ -58,7 +58,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
 datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
 datamule/seclibrary/downloader.py,sha256=VIdaQq5wDcYWnqrv9t8J7z0KtdNRGK8ahfBsgvTfdQQ,13675
 datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
-datamule-1.4.5.dist-info/METADATA,sha256=oKn8LWnuOVozqGyVe7ggRcCmoyjy7vwZbgZG7AKYWGY,469
-datamule-1.4.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-datamule-1.4.5.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
-datamule-1.4.5.dist-info/RECORD,,
+datamule-1.4.9.dist-info/METADATA,sha256=ojII-iQoWPNIJXs28FNsQjExKxOdQROgYsIHdOfMw6I,469
+datamule-1.4.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+datamule-1.4.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
+datamule-1.4.9.dist-info/RECORD,,

{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamule-1.4.5.dist-info → datamule-1.4.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

datamule 1.4.5__py3-none-any.whl → 1.4.9__py3-none-any.whl

datamule 1.4.5py3-none-any.whl → 1.4.9py3-none-any.whl