PyPI - datamule - Versions diffs - 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl - Mend

datamule 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

datamule/portfolio.py +8 -4
datamule/sec/submissions/downloader.py +3 -2
datamule/sec/submissions/monitor.py +42 -22
datamule/seclibrary/downloader.py +97 -8
datamule/submission.py +123 -45
{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/METADATA +1 -1
{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/RECORD +9 -9
{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/WHEEL +0 -0
{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/top_level.txt +0 -0

datamule/portfolio.py CHANGED Viewed

@@ -34,7 +34,6 @@ class Portfolio:
     def _load_submissions(self):
         folders = [f for f in self.path.iterdir() if f.is_dir() or f.suffix=='.tar']
-        print(folders)
         print(f"Loading {len(folders)} submissions")
         def load_submission(folder):
@@ -126,7 +125,8 @@ class Portfolio:
             # First query, just set the accession numbers
             self.accession_numbers = new_accession_numbers
-    def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,document_type=[],requests_per_second=5, **kwargs):
+    def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,document_type=[],
+                             requests_per_second=5,keep_filtered_metadata=False,standardize_metadata=True, **kwargs):
         if provider is None:
             config = Config()
             provider = config.get_default_source()
@@ -143,7 +143,9 @@ class Portfolio:
                 submission_type=submission_type,
                 filing_date=filing_date,
                 accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None,
-                keep_document_types=document_type
+                keep_document_types=document_type,
+                keep_filtered_metadata=keep_filtered_metadata,
+                standardize_metadata=standardize_metadata,
             )
         else:
             sec_download(
@@ -153,7 +155,9 @@ class Portfolio:
                 filing_date=filing_date,
                 requests_per_second=requests_per_second,
                 accession_numbers=self.accession_numbers if hasattr(self, 'accession_numbers') else None,
-                keep_document_types=document_type
+                keep_document_types=document_type,
+                keep_filtered_metadata=keep_filtered_metadata,
+                standardize_metadata=standardize_metadata,
             )
         self.submissions_loaded = False

datamule/sec/submissions/downloader.py CHANGED Viewed

@@ -5,7 +5,7 @@ from tqdm import tqdm
 def download(cik=None, submission_type=None, filing_date=None, location=None, name=None,
              requests_per_second=5, output_dir="filings", accession_numbers=None,
-             quiet=False, keep_document_types=[]):
+             quiet=False, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True):
     # Make sure output directory exists
     os.makedirs(output_dir, exist_ok=True)
@@ -14,7 +14,8 @@ def download(cik=None, submission_type=None, filing_date=None, location=None, na
     # Create a wrapper for the download_callback that includes the output_dir
     async def callback_wrapper(hit, content, cik, accno, url):
         output_path = os.path.join(output_dir, accno.replace('-','') + '.tar')
-        write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=keep_document_types)
+        write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=keep_document_types,keep_filtered_metadata=keep_filtered_metadata,
+                               standardize_metadata=standardize_metadata)
         pbar.update(1)

datamule/sec/submissions/monitor.py CHANGED Viewed

@@ -77,12 +77,19 @@ class Monitor():
         )
     async def _async_monitor_submissions(self, data_callback=None, interval_callback=None,
-                            polling_interval=1000, quiet=True, start_date=None,
-                            validation_interval=60000):
+                        polling_interval=1000, quiet=True, start_date=None,
+                        validation_interval=60000):
         """
         Async implementation of monitor_submissions.
+        Either polling_interval or validation_interval (or both) must be specified.
+        If polling_interval is None, only EFTS validation will be performed.
+        If validation_interval is None, only RSS polling will be performed.
         """
+        # Validate that at least one interval is specified
+        if polling_interval is None and validation_interval is None:
+            raise ValueError("At least one of polling_interval or validation_interval must be specified")
         # Backfill if start_date is provided
         if start_date is not None:
             today_date = datetime.now().date().strftime('%Y-%m-%d')
@@ -100,24 +107,33 @@ class Monitor():
             if new_hits and data_callback:
                 data_callback(new_hits)
-        last_polling_time = time.time()
-        last_validation_time = last_polling_time
-        current_time = last_polling_time
+        # Initialize timing variables
+        current_time = time.time()
+        last_polling_time = current_time
+        last_validation_time = current_time
+        # Determine which operations to perform
+        do_polling = polling_interval is not None
+        do_validation = validation_interval is not None
         while True:
-            # RSS polling
-            if not quiet:
-                print(f"Polling RSS feed")
-            results = await poll_rss(self.ratelimiters['sec.gov'])
-            new_results = self._filter_new_accessions(results)
-            if new_results:
+            current_time = time.time()
+            # RSS polling (if enabled)
+            if do_polling and (current_time - last_polling_time) >= polling_interval/1000:
                 if not quiet:
-                    print(f"Found {len(new_results)} new submissions via RSS")
-                if data_callback:
-                    data_callback(new_results)
+                    print(f"Polling RSS feed")
+                results = await poll_rss(self.ratelimiters['sec.gov'])
+                new_results = self._filter_new_accessions(results)
+                if new_results:
+                    if not quiet:
+                        print(f"Found {len(new_results)} new submissions via RSS")
+                    if data_callback:
+                        data_callback(new_results)
+                last_polling_time = current_time
-            # EFTS validation
-            if validation_interval and (current_time - last_validation_time) >= validation_interval/1000:
+            # EFTS validation (if enabled)
+            if do_validation and (current_time - last_validation_time) >= validation_interval/1000:
                 # Get submissions from the last 24 hours for validation
                 today_date = datetime.now().strftime('%Y-%m-%d')
                 if not quiet:
@@ -134,19 +150,23 @@ class Monitor():
                         print(f"Found {len(new_hits)} new submissions via EFTS validation")
                     if data_callback:
                         data_callback(new_hits)
-                last_polling_time = time.time()
                 last_validation_time = current_time
             # Interval callback
             if interval_callback:
                 interval_callback()
-            next_poll_time = last_polling_time + (polling_interval / 1000)
+            # Calculate next wake-up time
+            next_times = []
+            if do_polling:
+                next_times.append(last_polling_time + (polling_interval / 1000))
+            if do_validation:
+                next_times.append(last_validation_time + (validation_interval / 1000))
+            next_wake_time = min(next_times)
             current_time = time.time()
-            time_to_sleep = max(0, next_poll_time - current_time)
+            time_to_sleep = max(0, next_wake_time - current_time)
             await asyncio.sleep(time_to_sleep)
-            last_polling_time = next_poll_time
     def monitor_submissions(self, data_callback=None, interval_callback=None,
                             polling_interval=1000, quiet=True, start_date=None,

datamule/seclibrary/downloader.py CHANGED Viewed

@@ -74,7 +74,7 @@ class Downloader:
             print(f"Failed to log error to {error_file}: {str(e)}")
     class FileProcessor:
-        def __init__(self, output_dir, max_workers, queue_size, pbar, downloader, keep_document_types=[]):
+        def __init__(self, output_dir, max_workers, queue_size, pbar, downloader, keep_document_types=[], keep_filtered_metadata=False,standardize_metadata=True):
             self.processing_queue = Queue(maxsize=queue_size)
             self.should_stop = False
             self.processing_workers = []
@@ -84,6 +84,8 @@ class Downloader:
             self.pbar = pbar
             self.downloader = downloader
             self.keep_document_types = keep_document_types
+            self.keep_filtered_metadata = keep_filtered_metadata
+            self.standardize_metadata = standardize_metadata
         def start_processing_workers(self):
             for _ in range(self.max_workers):
@@ -95,7 +97,8 @@ class Downloader:
         def _process_file(self, item):
             filename, content = item
             output_path = os.path.join(self.output_dir, filename.split('.')[0] + '.tar')
-            write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=self.keep_document_types)
+            write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=self.keep_document_types, keep_filtered_metadata=self.keep_filtered_metadata,standardize_metadata=self.standardize_metadata)
             self.pbar.update(1)
         def _processing_worker(self):
@@ -204,11 +207,12 @@ class Downloader:
             except Exception as e:
                 self._log_error(output_dir, filename, str(e))
-    async def process_batch(self, urls, output_dir, keep_document_types=[]):
+    async def process_batch(self, urls, output_dir, keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True):
         os.makedirs(output_dir, exist_ok=True)
         with tqdm(total=len(urls), desc="Processing files") as pbar:
-            processor = self.FileProcessor(output_dir, self.MAX_PROCESSING_WORKERS, self.QUEUE_SIZE, pbar, self, keep_document_types=keep_document_types)
+            processor = self.FileProcessor(output_dir, self.MAX_PROCESSING_WORKERS, self.QUEUE_SIZE, pbar, self, keep_document_types=keep_document_types,
+                                            keep_filtered_metadata=keep_filtered_metadata,standardize_metadata=standardize_metadata)
             processor.start_processing_workers()
             semaphore = asyncio.Semaphore(self.MAX_CONCURRENT_DOWNLOADS)
@@ -231,7 +235,7 @@ class Downloader:
             processor.stop_workers()
             decompression_pool.shutdown()
-    def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=[]):
+    def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True):
         """
         Query SEC filings and download/process them.
@@ -242,6 +246,7 @@ class Downloader:
         - output_dir: Directory to save downloaded files
         - accession_numbers: List of specific accession numbers to download
         - keep_document_types: List of document types to keep (e.g., ['10-K', 'EX-10.1'])
+        - keep_filtered_metadata: Whether to keep metadata for filtered documents
         """
         if self.api_key is None:
             raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
@@ -279,7 +284,7 @@ class Downloader:
         start_time = time.time()
         # Process the batch asynchronously
-        asyncio.run(self.process_batch(urls, output_dir, keep_document_types=keep_document_types))
+        asyncio.run(self.process_batch(urls, output_dir, keep_document_types=keep_document_types, keep_filtered_metadata=keep_filtered_metadata, standardize_metadata=standardize_metadata))
         # Calculate and display performance metrics
         elapsed_time = time.time() - start_time
@@ -292,7 +297,65 @@ class Downloader:
             self.loop.call_soon_threadsafe(self.loop.stop)
-def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[]):
+    def download_files_using_filename(self, filenames, output_dir="downloads", keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True):
+        """
+        Download and process SEC filings using specific filenames.
+        Parameters:
+        - filenames: List of specific filenames to download (e.g., ['000091205797006494.sgml', '000100704297000007.sgml.zst'])
+        - output_dir: Directory to save downloaded files
+        - keep_document_types: List of document types to keep (e.g., ['10-K', 'EX-10.1'])
+        - keep_filtered_metadata: Whether to keep metadata for filtered documents
+        - standardize_metadata: Whether to standardize metadata format
+        """
+        if self.api_key is None:
+            raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
+        if not filenames:
+            raise ValueError("No filenames provided")
+        if not isinstance(filenames, (list, tuple)):
+            filenames = [filenames]
+        # Validate filenames format
+        for filename in filenames:
+            if not isinstance(filename, str):
+                raise ValueError(f"Invalid filename type: {type(filename)}. Expected string.")
+            if not (filename.endswith('.sgml') or filename.endswith('.sgml.zst')):
+                raise ValueError(f"Invalid filename format: {filename}. Expected .sgml or .sgml.zst extension.")
+        # Generate URLs directly from filenames
+        print(f"Generating URLs for {len(filenames)} files...")
+        urls = []
+        for filename in filenames:
+            url = f"{self.BASE_URL}{filename}"
+            urls.append(url)
+        # Remove duplicates while preserving order
+        seen = set()
+        urls = [url for url in urls if not (url in seen or seen.add(url))]
+        print(f"Downloading {len(urls)} files...")
+        # Process the batch asynchronously using existing infrastructure
+        start_time = time.time()
+        asyncio.run(self.process_batch(
+            urls,
+            output_dir,
+            keep_document_types=keep_document_types,
+            keep_filtered_metadata=keep_filtered_metadata,
+            standardize_metadata=standardize_metadata
+        ))
+        # Calculate and display performance metrics
+        elapsed_time = time.time() - start_time
+        print(f"\nProcessing completed in {elapsed_time:.2f} seconds")
+        print(f"Processing speed: {len(urls)/elapsed_time:.2f} files/second")
+def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True):
     """
     Query SEC filings and download/process them.
@@ -304,6 +367,7 @@ def download(submission_type=None, cik=None, filing_date=None, api_key=None, out
     - output_dir: Directory to save downloaded files
     - accession_numbers: List of specific accession numbers to download
     - keep_document_types: List of document types to keep (e.g., ['10-K', 'EX-10.1'])
+    - keep_filtered_metadata: Whether to keep metadata for filtered documents
     """
     if accession_numbers:
         accession_numbers = [int(str(x).replace('-', '')) for x in accession_numbers]
@@ -317,5 +381,30 @@ def download(submission_type=None, cik=None, filing_date=None, api_key=None, out
         filing_date=filing_date,
         output_dir=output_dir,
         accession_numbers=accession_numbers,
-        keep_document_types=keep_document_types
+        keep_document_types=keep_document_types,
+        keep_filtered_metadata=keep_filtered_metadata,
+        standardize_metadata=standardize_metadata
+    )
+def download_files_using_filename(filenames, api_key=None, output_dir="downloads", keep_document_types=[], keep_filtered_metadata=False, standardize_metadata=True):
+    """
+    Download and process SEC filings using specific filenames.
+    Parameters:
+    - filenames: List of specific filenames to download (e.g., ['000091205797006494.sgml', '000100704297000007.sgml.zst'])
+    - api_key: API key for datamule service (optional if DATAMULE_API_KEY env var is set)
+    - output_dir: Directory to save downloaded files
+    - keep_document_types: List of document types to keep (e.g., ['10-K', 'EX-10.1'])
+    - keep_filtered_metadata: Whether to keep metadata for filtered documents
+    - standardize_metadata: Whether to standardize metadata format
+    """
+    downloader = Downloader(api_key=api_key)
+    downloader.QUEUE_SIZE = 1
+    downloader.MAX_CONCURRENT_DOWNLOADS = 1
+    downloader.download_files_using_filename(
+        filenames=filenames,
+        output_dir=output_dir,
+        keep_document_types=keep_document_types,
+        keep_filtered_metadata=keep_filtered_metadata,
+        standardize_metadata=standardize_metadata
     )

datamule/submission.py CHANGED Viewed

@@ -2,11 +2,80 @@ from pathlib import Path
 import json
 from .document.document import Document
 from secsgml import parse_sgml_content_into_memory
+from secsgml.utils import bytes_to_str
+from secsgml.parse_sgml import transform_metadata_string
 import tarfile
 import shutil
 import zstandard as zstd
-from io import BytesIO
 import gzip
+import io
+import copy
+def calculate_documents_locations_in_tar(metadata, documents):
+    # Step 1: Add placeholder byte positions to get accurate size (10-digit padded)
+    placeholder_metadata = copy.deepcopy(metadata)
+    for file_num in range(len(documents)):
+        if 'documents' in placeholder_metadata:
+            placeholder_metadata['documents'][file_num]['secsgml_start_byte'] = "9999999999"  # 10 digits
+            placeholder_metadata['documents'][file_num]['secsgml_end_byte'] = "9999999999"    # 10 digits
+    # Step 2: Calculate size with placeholders
+    placeholder_str = bytes_to_str(placeholder_metadata, lower=False)
+    placeholder_json = json.dumps(placeholder_str).encode('utf-8')
+    metadata_size = len(placeholder_json)
+    # Step 3: Now calculate actual positions using this size
+    current_pos = 512 + metadata_size
+    current_pos += (512 - (current_pos % 512)) % 512
+    # Step 4: Calculate real positions and update original metadata (10-digit padded)
+    for file_num, content in enumerate(documents):
+        start_byte = current_pos + 512
+        end_byte = start_byte + len(content)
+        if 'documents' in metadata:
+            metadata['documents'][file_num]['secsgml_start_byte'] = f"{start_byte:010d}"  # 10-digit padding
+            metadata['documents'][file_num]['secsgml_end_byte'] = f"{end_byte:010d}"      # 10-digit padding
+        file_total_size = 512 + len(content)
+        padded_size = file_total_size + (512 - (file_total_size % 512)) % 512
+        current_pos += padded_size
+    return metadata
+def write_submission_to_tar(output_path,metadata,documents,standardize_metadata,compression_list):
+     # Write tar directly to disk
+    with tarfile.open(output_path, 'w') as tar:
+        # calculate document locations in tar
+        metadata = calculate_documents_locations_in_tar(metadata, documents)
+        # serialize metadata
+        metadata_str  = bytes_to_str(metadata,lower=False)
+        metadata_json = json.dumps(metadata_str).encode('utf-8')
+        # save metadata
+        tarinfo = tarfile.TarInfo(name='metadata.json')
+        tarinfo.size = len(metadata_json)
+        tar.addfile(tarinfo, io.BytesIO(metadata_json))
+        for file_num, content in enumerate(documents, 0):
+            if standardize_metadata:
+                document_name = metadata['documents'][file_num]['filename'] if metadata['documents'][file_num].get('filename') else metadata['documents'][file_num]['sequence'] + '.txt'
+            compression = compression_list[file_num]
+            if compression == 'gzip':
+                document_name = f'{document_name}.gz'
+            elif compression == 'zstd':
+                document_name = f'{document_name}.zst'
+            tarinfo = tarfile.TarInfo(name=f'{document_name}')
+            tarinfo.size = len(content)
+            tar.addfile(tarinfo, io.BytesIO(content))
 class Submission:
     def __init__(self, path=None,sgml_content=None,keep_document_types=None):
@@ -18,6 +87,10 @@ class Submission:
         if sgml_content is not None:
             self.path = None
             metadata, raw_documents = parse_sgml_content_into_memory(sgml_content)
+            # standardize metadata
+            metadata = transform_metadata_string(metadata)
             self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=None)
             # code dupe
             self.accession = self.metadata.content['accession-number']
@@ -55,6 +128,9 @@ class Submission:
                 metadata_path = self.path / 'metadata.json'
                 with metadata_path.open('r') as f:
                     metadata = json.load(f)
+            # standardize metadata
+            metadata = transform_metadata_string(metadata)
             self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
             self.accession = self.metadata.content['accession-number']
             self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
@@ -68,51 +144,34 @@ class Submission:
         if compression is not None and compression not in ['gzip', 'zstd']:
             raise ValueError("compression must be 'gzip' or 'zstd'")
+        # check if we're loading from a dir or a tar file
+        is_dir_not_tar = True
+        if self.path.suffix == '.tar':
+            is_dir_not_tar = False
+        elif not self.path.is_dir():
+            raise ValueError("Path must be a directory to compress")
         # Create tar file (replace directory with .tar file)
         tar_path = self.path.with_suffix('.tar')
+        # load all files in the directory or tar file
+        documents = [doc.content.encode('utf-8') if isinstance(doc.content, str) else doc.content for doc in self]
-        with tarfile.open(tar_path, 'w') as tar:
-            # Add metadata.json first
-            metadata_path = self.path / 'metadata.json'
-            if metadata_path.exists():
-                tar.add(metadata_path, arcname='metadata.json')
-            # Add documents in order
-            for doc in self.metadata.content['documents']:
-                filename = doc.get('filename')
-                if filename is None:
-                    filename = doc['sequence'] + '.txt'
-                file_path = self.path / filename
-                if file_path.exists():
-                    file_size = file_path.stat().st_size
-                    # Compress if compression specified and over threshold
-                    if compression is not None and file_size >= threshold:
-                        content = file_path.read_bytes()
-                        if compression == 'gzip':
-                            compressed_content = gzip.compress(content, compresslevel=level or 6)
-                            compressed_filename = filename + '.gz'
-                        else:  # zstd
-                            cctx = zstd.ZstdCompressor(level=level or 3)
-                            compressed_content = cctx.compress(content)
-                            compressed_filename = filename + '.zst'
-                        # Add compressed file to tar
-                        tarinfo = tarfile.TarInfo(name=compressed_filename)
-                        tarinfo.size = len(compressed_content)
-                        tar.addfile(tarinfo, BytesIO(compressed_content))
-                    else:
-                        # Add uncompressed file
-                        tar.add(file_path, arcname=filename)
+        # we should compress everything here first.
+        compression_list = [compression if len(doc) >= threshold else '' for doc in documents]
+        documents = [gzip.compress(doc, compresslevel=level or 6) if compression == 'gzip' and
+            len(doc) >= threshold else zstd.ZstdCompressor(level=level or 3).compress(doc) if compression == 'zstd' and
+            len(doc) >= threshold else doc for doc in documents]
+        metadata = self.metadata.content.copy()
+        write_submission_to_tar(tar_path,metadata,documents,compression_list=compression_list,standardize_metadata=True)
         # Delete original folder
-        shutil.rmtree(self.path)
-        # Update path to point to new tar file
-        self.path = tar_path
+        if is_dir_not_tar:
+            shutil.rmtree(self.path)
+            # otherwise, we already replaced the tar file
+            # Update path to point to new tar file
+            self.path = tar_path
     def decompress(self):
         if self.path is None:
@@ -129,17 +188,36 @@ class Submission:
                 if member.isfile():
                     content = tar.extractfile(member).read()
-                    # Decompress if gzipped
+                    # Decompress based on file extension
                     if member.name.endswith('.gz'):
                         content = gzip.decompress(content)
                         output_path = output_dir / member.name[:-3]  # Remove .gz extension
+                    elif member.name.endswith('.zst'):
+                        dctx = zstd.ZstdDecompressor()
+                        content = dctx.decompress(content)
+                        output_path = output_dir / member.name[:-4]  # Remove .zst extension
                     else:
                         output_path = output_dir / member.name
-                    # Write to output directory
-                    output_path.parent.mkdir(parents=True, exist_ok=True)
-                    with output_path.open('wb') as f:
-                        f.write(content)
+                    # check if it is metadata.json
+                    if output_path.name == 'metadata.json':
+                        # load as json
+                        metadata = json.loads(content.decode('utf-8'))
+                        # remove SECSGML_START_BYTE and SECSGML_END_BYTE from documents
+                        for doc in metadata['documents']:
+                            if 'secsgml_start_byte' in doc:
+                                del doc['secsgml_start_byte']
+                            if 'secsgml_end_byte' in doc:
+                                del doc['secsgml_end_byte']
+                        with output_path.open('w', encoding='utf-8') as f:
+                            json.dump(metadata, f)
+                    else:
+                        # Write to output directory
+                        output_path.parent.mkdir(parents=True, exist_ok=True)
+                        with output_path.open('wb') as f:
+                            f.write(content)
         # delete original file
         self.path.unlink()

{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.5.2
+Version: 1.5.4
 Summary: Work with SEC submissions at scale.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
 datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
 datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
 datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
-datamule/portfolio.py,sha256=PEpi9Sg6aldgafDeIiXVe47Nm9RkhDtwA9ykUDGd6T0,7209
+datamule/portfolio.py,sha256=iW54frGfoCQb-6aYfocDqQQPe0gc_22voedv0It_1q0,7517
 datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
-datamule/submission.py,sha256=Pq6dGf1zjlFdOIKaaNvPgYGkqcqa1PEla-3QcSGqMSk,9300
+datamule/submission.py,sha256=6JIi-ayLL-jENVj6Q4IhmrYlAreJI7xBAHP_NYaDB6k,12918
 datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
 datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/document/document.py,sha256=04Rivdphq0D1HEGIBjtl1LelJr-IyQU1qCMi8yNJajw,14038
@@ -44,9 +44,9 @@ datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
 datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
 datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamule/sec/submissions/downloader.py,sha256=QQ7DkSaXBe-TQ2oZb2coGn3TBXIHywXn0uCGeIaVWCI,1216
+datamule/sec/submissions/downloader.py,sha256=tDWn8bsK9XabQo2pBGYSiqTw37MmqM8rEma8Ph7zp-o,1391
 datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
-datamule/sec/submissions/monitor.py,sha256=dZYuVCi_X82eYA8l_9cbnkRjiawz3K4U-FnCAyJcgk4,7892
+datamule/sec/submissions/monitor.py,sha256=CvpHywnrn4Lwk_3rWRE5K5UNYrdJ9Gyon97Uo0Ocq-4,8985
 datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
 datamule/sec/submissions/textsearch.py,sha256=MKDXEz_VI_0ljl73_aw2lx4MVzJW5uDt8KxjvJBwPwM,5794
 datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,9 +56,9 @@ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H
 datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
 datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
-datamule/seclibrary/downloader.py,sha256=m8yrRjjRVJ2WtKIKIO_66lQHtAsWssyTPip6B2mW9tE,13275
+datamule/seclibrary/downloader.py,sha256=wNRURTGb3eqg12Ltt4578L0WcAm7DmCWg0Rm0Om6Z4U,17959
 datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
-datamule-1.5.2.dist-info/METADATA,sha256=02m6KtljMqIAJZsLIGYJuhWYhePLVyN1NXmGoCAJx-0,469
-datamule-1.5.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-datamule-1.5.2.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
-datamule-1.5.2.dist-info/RECORD,,
+datamule-1.5.4.dist-info/METADATA,sha256=jl-zXUtvVrWz4Etn1BW8zsZ2AQ7CaE-zDF18sS0Lf7E,469
+datamule-1.5.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+datamule-1.5.4.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
+datamule-1.5.4.dist-info/RECORD,,

{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamule-1.5.2.dist-info → datamule-1.5.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

datamule 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

datamule 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl