PyPI - datamule - Versions diffs - 1.0.7__tar.gz → 1.0.9__tar.gz - Mend

datamule 1.0.7tar.gz → 1.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{datamule-1.0.7 → datamule-1.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamule
-Version: 1.0.7
+Version: 1.0.9
 Summary: Making it easier to use SEC filings.
 Home-page: https://github.com/john-friedman/datamule-python
 Author: John Friedman

datamule-1.0.9/datamule/book/__init__.py ADDED Viewed

File without changes

datamule-1.0.9/datamule/book/book.py ADDED Viewed

@@ -0,0 +1,34 @@
+from pathlib import Path
+from ..helper import _process_cik_and_metadata_filters, load_package_dataset
+from ..sec.xbrl.downloadcompanyfacts import download_company_facts
+class Book:
+    def __init__(self, path):
+        self.path = Path(path)
+    def download_xbrl(
+        self,
+        cik=None,
+        ticker=None,
+        **kwargs
+    ):
+        # If no CIK or ticker specified, get all companies with tickers
+        if cik is None and ticker is None:
+            cik = [row['cik'] for row in load_package_dataset('company_tickers')]
+        # Normalize cik to list format
+        if isinstance(cik, (str, int)):
+            cik = [cik]
+        # Process CIK and metadata filters
+        cik_list = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
+        # Download facts for all CIKs in parallel
+        download_company_facts(cik=cik_list, output_dir=self.path)
+    def query_345():
+        pass
+    def query_xbrl():
+        pass
+    def query_13fhr():
+        pass

datamule-1.0.9/datamule/mapping_dicts/__init__.py ADDED Viewed

File without changes

datamule-1.0.9/datamule/mapping_dicts/txt_mapping_dicts.py ADDED Viewed

@@ -0,0 +1,234 @@
+import copy
+dict_sgml = {
+    "rules": {
+        "join_text": "\n",
+        "remove": [
+            {
+                "pattern": r"^<PAGE>",
+            }
+        ],
+        "mappings": [
+            {
+                "name": "table",
+                "pattern": r"^<TABLE>",
+                "end": r"^</TABLE>"
+            },
+            {
+                "name": "caption",
+                "pattern": r"^<CAPTION>",
+                "end": r"^<S>",
+                "keep_end": True
+            },
+            {
+                "name": "footnote",
+                "pattern": r"^<FN>",
+                "end": r"^</FN>"
+            }
+        ]
+    }
+}
+item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
+item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
+part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?"
+item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
+item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?"
+part_pattern_standardization =  r"^\s*(?:PART|Part)\s+([IVX]+)"
+dict_10k = copy.deepcopy(dict_sgml)
+dict_10k["rules"]["mappings"].extend([
+    {
+                "type": "hierarchy",
+                "name": "part",
+                "pattern": part_pattern_mapping,
+                "hierarchy": 0
+            },
+            {
+                "type": "hierarchy",
+                "name": "item",
+                "pattern": item_pattern_mapping,
+                "hierarchy": 1
+            },
+            ])
+# In the mapping dict:
+dict_10k['transformations'] = [
+    {
+        "type": "standardize",
+        "match": {
+            "type": "part",
+            "text_pattern": part_pattern_standardization
+        },
+        "output": {
+            "format": "part{}",
+            "field": "text"  # Where to store the standardized value
+        }
+    },
+    {
+        "type": "standardize",
+        "match": {
+            "type": "item",
+            "text_pattern": item_pattern_standardization
+        },
+        "output": {
+            "format": "item{}",
+            "field": "text"  # Could also be "text" or any other field name
+        }
+    },
+    {
+        "type": "merge_consecutive",
+        "match": {
+            "types": ["part", "item"]  # sections types to check for merging
+        }
+    },
+    {
+        "type": "trim",
+        "match": {
+            "type": "item",  # or "item"
+            "expected": 1
+        },
+        "output": {
+            "type": "introduction",
+            "separator": "\n"
+        }
+    }
+]
+dict_10q = copy.deepcopy(dict_sgml)
+dict_10q["rules"]["mappings"].extend([
+    {
+                "type": "hierarchy",
+                "name": "part",
+                "pattern": part_pattern_mapping,
+                "hierarchy": 0
+            },
+            {
+                "type": "hierarchy",
+                "name": "item",
+                "pattern": item_pattern_mapping,
+                "hierarchy": 1
+            },
+            ])
+# In the mapping dict:
+dict_10q['transformations'] = [
+    {
+        "type": "standardize",
+        "match": {
+            "type": "part",
+            "text_pattern": part_pattern_standardization
+        },
+        "output": {
+            "format": "part{}",
+            "field": "text"  # Where to store the standardized value
+        }
+    },
+    {
+        "type": "standardize",
+        "match": {
+            "type": "item",
+            "text_pattern": item_pattern_standardization
+        },
+        "output": {
+            "format": "item{}",
+            "field": "text"  # Could also be "text" or any other field name
+        }
+    },
+    {
+        "type": "merge_consecutive",
+        "match": {
+            "types": ["part", "item"]  # sections types to check for merging
+        }
+    },
+    {
+        "type": "trim",
+        "match": {
+            "type": "item",  # or "item"
+            "expected": 2
+        },
+        "output": {
+            "type": "introduction",
+            "separator": "\n"
+        }
+    }
+]
+dict_13d = copy.deepcopy(dict_sgml)
+dict_13d["rules"]["mappings"].extend([
+            {
+                "type": "hierarchy",
+                "name": "item",
+                "pattern": item_pattern_mapping,
+                "hierarchy": 0
+            },
+            ])
+dict_13d['transformations'] = [
+    {
+        "type": "standardize",
+        "match": {
+            "type": "item",
+            "text_pattern": item_pattern_standardization
+        },
+        "output": {
+            "format": "item{}",
+            "field": "text"  # Could also be "text" or any other field name
+        }
+    },
+    {
+        "type": "merge_consecutive",
+        "match": {
+            "types": ["item"]  # sections types to check for merging
+        }
+    }
+]
+dict_13g = copy.deepcopy(dict_13d)
+dict_8k = copy.deepcopy(dict_sgml)
+dict_8k["rules"]["mappings"].extend([
+            {
+                "type": "hierarchy",
+                "name": "item",
+                "pattern": item_pattern_mapping_8k,
+                "hierarchy": 0
+            },
+            ])
+dict_8k['transformations'] = [
+    {
+        "type": "standardize",
+        "match": {
+            "type": "item",
+            "text_pattern": item_pattern_standardization_8k
+        },
+        "output": {
+            "format": "item{}",
+            "field": "text"  # Could also be "text" or any other field name
+        }
+    },
+    {
+        "type": "merge_consecutive",
+        "match": {
+            "types": ["item"]  # sections types to check for merging
+        }
+    },
+    {
+        "type": "trim",
+        "match": {
+            "type": "item",  # or "item"
+            "expected": 1
+        },
+        "output": {
+            "type": "introduction",
+            "separator": "\n"
+        }
+    }
+]

datamule-1.0.9/datamule/mapping_dicts/xml_mapping_dicts.py ADDED Viewed

@@ -0,0 +1,19 @@
+dict_345 = {
+    "transformations": [
+        {
+            "search": {
+                "key": "footnoteId",
+                "identifier": "@id"
+            },
+            "match": {
+                "identifier": "@id",
+                "content": "#text",
+                "remove_after_use": True
+            },
+            "output": {
+                "key": "footnote",
+                "value": "content"
+            }
+        }
+    ]
+}

datamule-1.0.9/datamule/sec/__init__.py ADDED Viewed

File without changes

datamule-1.0.9/datamule/sec/infrastructure/__init__.py ADDED Viewed

File without changes

datamule 1.0.7__tar.gz → 1.0.9__tar.gz

datamule 1.0.7tar.gz → 1.0.9tar.gz