PyPI - datamule - Versions diffs - 1.3.0__tar.gz → 1.4.0__tar.gz - Mend

datamule 1.3.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{datamule-1.3.0 → datamule-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.3.0
+Version: 1.4.0
 Summary: Work with SEC submissions at scale.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/document.py RENAMED Viewed

@@ -3,11 +3,14 @@ import csv
 import re
 from doc2dict import xml2dict, txt2dict, dict2dict
 from doc2dict.mapping import flatten_hierarchy
+from doc2dict import html2dict, visualize_dict, get_title, unnest_dict
 from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
 from ..mapping_dicts.xml_mapping_dicts import dict_345
+from ..mapping_dicts.html_mapping_dicts import dict_10k_html, dict_10q_html, dict_8k_html
 from selectolax.parser import HTMLParser
 from .processing import process_tabular_data
 from pathlib import Path
+import webbrowser
 class Document:
     def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -99,26 +102,10 @@ class Document:
         if self.data:
             return self.data
-        # preprocess content
-        if self.extension == '.txt':
-            self.content = self._preprocess_txt_content()
-        elif self.extension in ['.htm', '.html']:
-            self.content = self._preprocess_html_content()
         mapping_dict = None
-        if self.extension == '.xml':
-            if self.type in ['3', '4', '5', '3/A', '4/A', '5/A']:
-                mapping_dict = dict_345
-            self.data = xml2dict(content=self.content, mapping_dict=mapping_dict)
-        # will deprecate this when we add html2dict
-        elif self.extension in ['.htm', '.html','.txt']:
+        if self.extension == '.txt':
+            content = self._preprocess_txt_content()
             if self.type == '10-Q':
                 mapping_dict = dict_10q
             elif self.type == '10-K':
@@ -131,8 +118,24 @@ class Document:
                 mapping_dict = dict_13g
             self.data = {}
-            self.data['document'] = dict2dict(txt2dict(content=self.content, mapping_dict=mapping_dict))
-        return self.data
+            self.data['document'] = dict2dict(txt2dict(content=content, mapping_dict=mapping_dict))
+        elif self.extension in ['.htm', '.html']:
+            if self.type == '10-K':
+                mapping_dict = dict_10k_html
+            elif self.type == '10-Q':
+                mapping_dict = dict_10q_html
+            elif self.type == '8-K':
+                mapping_dict = dict_8k_html
+            dct = html2dict(content=self.content, mapping_dict=mapping_dict)
+            self.data = dct
+        elif self.extension == '.xml':
+            if self.type in ['3', '4', '5', '3/A', '4/A', '5/A']:
+                mapping_dict = dict_345
+            self.data = xml2dict(content=self.content, mapping_dict=mapping_dict)
+        else:
+            pass
     def write_json(self, output_filename=None):
         if not self.data:
@@ -206,6 +209,31 @@ class Document:
                     })
         return items
+    def visualize(self):
+        if not self.data:
+            self.parse()
+        if not self.data:
+            if self.extension in ['.jpg', '.png', '.pdf']:
+                webbrowser.open('file://' + str(self.path))
+            else:
+                pass
+        else:
+            visualize_dict(self.data)
+    def get_section(self, title, format='dict'):
+        if not self.data:
+            self.parse()
+        result = get_title(self.data,title)
+        if format == 'text':
+            result = [item[1] for item in result]
+            result = [unnest_dict(item) for item in result]
+        return result
    # this will all have to be changed. default will be to flatten everything
    # candidate for deletion

{datamule-1.3.0 → datamule-1.4.0}/datamule/helper.py RENAMED Viewed

@@ -5,28 +5,17 @@ import os
 def _load_package_csv(name):
     """Load CSV files from package data directory"""
-    # First try to load from the package data directory
-    try:
-        package_dir = os.path.dirname(os.path.dirname(__file__))
-        csv_path = os.path.join(package_dir, "data", f"{name}.csv")
-        # Fallback to the legacy location
-        if not os.path.exists(csv_path):
-            csv_path = Path.home() / ".datamule" / f"{name}.csv"
-        data = []
-        with open(csv_path, 'r') as csvfile:
-            csv_reader = csv.DictReader(csvfile)
-            for row in csv_reader:
-                data.append(row)
-        return data
+    package_dir = os.path.dirname(os.path.dirname(__file__))
+    csv_path = os.path.join(package_dir,"datamule", "data", f"{name}.csv")
+    data = []
+    with open(csv_path, 'r') as csvfile:
+        csv_reader = csv.DictReader(csvfile)
+        for row in csv_reader:
+            data.append(row)
+    return data
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Required data file '{name}.csv' not found. "
-            f"This file should be in the datamule package directory or in ~/.datamule/"
-        )
 def load_package_dataset(dataset):
     if dataset =='listed_filer_metadata':

datamule-1.4.0/datamule/mapping_dicts/html_mapping_dicts.py ADDED Viewed

@@ -0,0 +1,11 @@
+dict_10k_html = {
+    ('part',r'^part\s*([ivx]+)$') : 0,
+    ('signatures',r'^signatures?\.*$') : 0,
+    ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
+}
+dict_10q_html = dict_10k_html
+dict_8k_html = {
+    ('signatures',r'^signatures?\.*$') : 0,
+    ('item',r'^item\s*(\d+\.\d+)') : 0,
+}

{datamule-1.3.0 → datamule-1.4.0}/datamule.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.3.0
+Version: 1.4.0
 Summary: Work with SEC submissions at scale.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

{datamule-1.3.0 → datamule-1.4.0}/datamule.egg-info/SOURCES.txt RENAMED Viewed

@@ -42,6 +42,7 @@ datamule/document/mappings/thirteenfhr.py
 datamule/document/mappings/twentyfivense.py
 datamule/document/mappings/twentyfourf2nt.py
 datamule/mapping_dicts/__init__.py
+datamule/mapping_dicts/html_mapping_dicts.py
 datamule/mapping_dicts/txt_mapping_dicts.py
 datamule/mapping_dicts/xml_mapping_dicts.py
 datamule/sec/__init__.py

{datamule-1.3.0 → datamule-1.4.0}/setup.py RENAMED Viewed

@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
 setup(
     name="datamule",
     author="John Friedman",
-    version="1.3.0",
+    version="1.4.0",
     description="Work with SEC submissions at scale.",
     packages=find_packages(include=['datamule', 'datamule.*']),
     url="https://github.com/john-friedman/datamule-python",

{datamule-1.3.0 → datamule-1.4.0}/datamule/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/config.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/data/listed_filer_metadata.csv RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/atsn.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/cfportal.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/d.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ex102_abs.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ex99a_sdr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ex99c_sdr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ex99g_sdr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ex99i_sdr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/information_table.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/nmfp.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/npx.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/onefourtyfour.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ownership.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/proxy_voting_record.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/sbs.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/sbsef.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/schedule13.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/sdr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/submission_metadata.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/ta.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/thirteenfhr.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/twentyfivense.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/mappings/twentyfourf2nt.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/processing.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/document/table.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/index.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/mapping_dicts/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/mapping_dicts/txt_mapping_dicts.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/mapping_dicts/xml_mapping_dicts.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/package_updater.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/portfolio.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/infrastructure/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/infrastructure/submissions_metadata.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/downloader.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/eftsquery.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/monitor.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/streamer.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/submissions/textsearch.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/utils.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/xbrl/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/xbrl/downloadcompanyfacts.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/xbrl/filter_xbrl.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/xbrl/streamcompanyfacts.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sec/xbrl/xbrlmonitor.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/seclibrary/__init__.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/seclibrary/bq.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/seclibrary/downloader.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/seclibrary/query.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/sheet.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule/submission.py RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule.egg-info/requires.txt RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/datamule.egg-info/top_level.txt RENAMED Viewed

File without changes

{datamule-1.3.0 → datamule-1.4.0}/setup.cfg RENAMED Viewed

File without changes

datamule 1.3.0__tar.gz → 1.4.0__tar.gz

datamule 1.3.0tar.gz → 1.4.0tar.gz