PyPI - datamule - Versions diffs - 2.0.3__py3-none-any.whl → 2.0.4__py3-none-any.whl - Mend

datamule 2.0.3py3-none-any.whl → 2.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

datamule/document/document.py CHANGED Viewed

@@ -12,8 +12,7 @@ from .processing import process_tabular_data
 from pathlib import Path
 import webbrowser
 from secsgml.utils import bytes_to_str
-from secxbrl import parse_inline_xbrl
-from company_fundamentals import construct_fundamentals
 class Document:
     def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -35,8 +34,7 @@ class Document:
         self.extension = extension
         # this will be filled by parsed
         self.data = None
-        self.xbrl = None
-        self.fundamentals = None
     #_load_text_content
     def _preprocess_txt_content(self):
@@ -106,70 +104,6 @@ class Document:
             return bool(re.search(pattern, self.content))
         return False
-    # slated for removal
-    def parse_xbrl(self,type='inline'):
-        if self.xbrl:
-            return
-        if type =='inline':
-            if self.extension not in ['.htm','.html']:
-                return
-            self.xbrl = parse_inline_xbrl(self.content)
-        else:
-            raise ValueError("Only inline has been implemented so far.")
-    def parse_fundamentals(self,categories=None):
-        self.parse_xbrl()
-        # Transform XBRL records into the format needed by construct_fundamentals
-        xbrl = []
-        for xbrl_record in self.xbrl:
-            try:
-                # Extract basic fields
-                value = xbrl_record.get('_val', None)
-                taxonomy, name = xbrl_record['_attributes']['name'].split(':')
-                # Handle scaling if present
-                if xbrl_record.get('_attributes', {}).get('scale') is not None:
-                    scale = int(xbrl_record['_attributes']['scale'])
-                    try:
-                        value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
-                    except:
-                        pass
-                # Extract period dates
-                period_start_date = None
-                period_end_date = None
-                if xbrl_record.get('_context'):
-                    context = xbrl_record['_context']
-                    period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
-                    period_end_date = context.get('context_period_enddate')
-                # Create record in the format expected by construct_fundamentals
-                record = {
-                    'taxonomy': taxonomy,
-                    'name': name,
-                    'value': value,
-                    'period_start_date': period_start_date,
-                    'period_end_date': period_end_date
-                }
-                xbrl.append(record)
-            except Exception as e:
-                # Skip malformed records
-                continue
-        # Call construct_fundamentals with the transformed data
-        fundamentals = construct_fundamentals(xbrl,
-                            taxonomy_key='taxonomy',
-                            concept_key='name',
-                            start_date_key='period_start_date',
-                            end_date_key='period_end_date',
-                            categories=categories)
-        self.fundamentals = fundamentals
     # Note: this method will be heavily modified in the future
     def parse(self):
         # check if we have already parsed the content

datamule/submission.py CHANGED Viewed

@@ -9,6 +9,10 @@ import tarfile
 import zstandard as zstd
 import gzip
 import urllib.request
+from secxbrl import parse_inline_xbrl
+from company_fundamentals import construct_fundamentals
+from decimal import Decimal
 class Submission:
     def __init__(self, path=None, sgml_content=None, keep_document_types=None,
@@ -17,6 +21,7 @@ class Submission:
         # declare vars to be filled later
         self.xbrl = None
+        self.fundamentals = None
         # Validate parameters
         param_count = sum(x is not None for x in [path, sgml_content, batch_tar_path,url])
@@ -242,18 +247,81 @@ class Submission:
             if doc['type'] in document_types:
                 yield self._load_document_by_index(idx)
-    # def parse_xbrl(self):
-    #     for idx, doc in enumerate(self.metadata.content['documents']):
-    #         if doc['type'] in ['EX-100.INS','EX-101.INS']:
-    #             document = self._load_document_by_index(idx)
-    #             break
+    def parse_xbrl(self):
+        if self.xbrl:
+            return
+        for idx, doc in enumerate(self.metadata.content['documents']):
+            if doc['type'] in ['EX-100.INS','EX-101.INS']:
+                document = self._load_document_by_index(idx)
+                self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
+                return
+            if doc['filename'].endswith('_htm.xml'):
+                document = self._load_document_by_index(idx)
+                self.xbrl = parse_inline_xbrl(content=document.content,file_type='extracted_inline')
+                return
+    def parse_fundamentals(self,categories=None):
+        self.parse_xbrl()
+        # if no xbrl return
+        if not self.xbrl:
+            return
+        # Transform XBRL records into the format needed by construct_fundamentals
+        xbrl = []
+        for xbrl_record in self.xbrl:
+            try:
+                # Extract basic fields
+                value = xbrl_record.get('_val', None)
+                taxonomy, name = xbrl_record['_attributes']['name'].split(':')
+                # Handle scaling if present
+                if xbrl_record.get('_attributes', {}).get('scale') is not None:
+                    scale = int(xbrl_record['_attributes']['scale'])
+                    try:
+                        value = str(Decimal(value.replace(',', '')) * (Decimal(10) ** scale))
+                    except:
+                        pass
-    #         if doc['filename'].endswith('_htm.xml'):
-    #             document = self._load_document_by_index(idx)
-    #             break
+                # Extract period dates
+                period_start_date = None
+                period_end_date = None
+                if xbrl_record.get('_context'):
+                    context = xbrl_record['_context']
+                    period_start_date = context.get('context_period_instant') or context.get('context_period_startdate')
+                    period_end_date = context.get('context_period_enddate')
+                # Create record in the format expected by construct_fundamentals
+                record = {
+                    'taxonomy': taxonomy,
+                    'name': name,
+                    'value': value,
+                    'period_start_date': period_start_date,
+                    'period_end_date': period_end_date
+                }
+                xbrl.append(record)
+            except Exception as e:
+                # Skip malformed records
+                continue
+        # Call construct_fundamentals with the transformed data
+        fundamentals = construct_fundamentals(xbrl,
+                            taxonomy_key='taxonomy',
+                            concept_key='name',
+                            start_date_key='period_start_date',
+                            end_date_key='period_end_date',
+                            categories=categories)
+        self.fundamentals = fundamentals
-    #     print(doc['type'])
-    #     if not document:
-    #         return
-    #     self.xbrl = document.parse_xbrl()

{datamule-2.0.3.dist-info → datamule-2.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 2.0.3
+Version: 2.0.4
 Summary: Work with SEC submissions at scale.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-2.0.3.dist-info → datamule-2.0.4.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,9
 datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
 datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
 datamule/sheet.py,sha256=Ws_YRtpvewLVioarngVMe8cgG_sp11MP9_goGbRaiWE,23952
-datamule/submission.py,sha256=DtLoiwRE7JJW2R0NvJNyQfwstWIlU2N9Z6yOgpnH1LU,11812
+datamule/submission.py,sha256=qcb5TogrB2q6x4zcGPKFf4dkrAy0bAPzY71Ops_xW44,14437
 datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
 datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
@@ -14,7 +14,7 @@ datamule/datamule/datamule_mysql_rds.py,sha256=Oj_xPTBKkzWsuRlb_tphjJrBW1eua1cOu
 datamule/datamule/downloader.py,sha256=aTyVUuIwynPtHB0Z9BvCasy9Ao5wfHptNAsjN-7yDTk,18525
 datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
 datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamule/document/document.py,sha256=8UC5QfiMDufkA7v3o76mlfftqsUjNUFWKB3j894tsKw,16795
+datamule/document/document.py,sha256=U9hSXT2Y06prM6sPcUU6uziV1f4_BhaaGz3QXE5zveg,14034
 datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
 datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
 datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -65,7 +65,7 @@ datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,180
 datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
 datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
-datamule-2.0.3.dist-info/METADATA,sha256=xILAQeqGiaZbF19rqWWRoFqBLv1to0a3RdRFKdhlu0Q,560
-datamule-2.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-datamule-2.0.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
-datamule-2.0.3.dist-info/RECORD,,
+datamule-2.0.4.dist-info/METADATA,sha256=CNSfwZgqLh3WR4TObVkP8Y1p2wWx4To_NMthG4EvhEQ,560
+datamule-2.0.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+datamule-2.0.4.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
+datamule-2.0.4.dist-info/RECORD,,

{datamule-2.0.3.dist-info → datamule-2.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamule-2.0.3.dist-info → datamule-2.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

datamule 2.0.3__py3-none-any.whl → 2.0.4__py3-none-any.whl

datamule 2.0.3py3-none-any.whl → 2.0.4py3-none-any.whl