PyPI - mergeron - Versions diffs - 2025.739439.15__tar.gz → 2025.739439.19__tar.gz - Mend

mergeron 2025.739439.15tar.gz → 2025.739439.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (23) hide show

{mergeron-2025.739439.15 → mergeron-2025.739439.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mergeron
-Version: 2025.739439.15
+Version: 2025.739439.19
 Summary: Python for analyzing merger enforcement policy
 License: MIT
 Keywords: merger enforcement policy,merger guidelines,merger screening,enforcement presumptions,concentration standards,diversion ratio,upward pricing pressure,GUPPI
@@ -11,7 +11,6 @@ Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console
 Classifier: Intended Audience :: End Users/Desktop
 Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: Implementation :: CPython

{mergeron-2025.739439.15 → mergeron-2025.739439.19}/pyproject.toml RENAMED Viewed

@@ -15,7 +15,7 @@ keywords = [
     "upward pricing pressure",
     "GUPPI",
 ]
-version = "2025.739439.15"
+version = "2025.739439.19"
 requires-python = ">=3.13,<4.0" # need math.fma
 # Classifiers list: https://pypi.org/classifiers/
@@ -24,7 +24,6 @@ classifiers = [
     "Environment :: Console",
     "Intended Audience :: End Users/Desktop",
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
     "Programming Language :: Python",
     "Programming Language :: Python :: Implementation :: CPython",
@@ -189,6 +188,8 @@ preview = true
 cache_fine_grained = true
 ignore_missing_imports = false
+warn_unreachable = false
+warn_redundant_casts = true
 strict = true
 local_partial_types = true
 allow_redefinition_new = true
@@ -240,8 +241,12 @@ commands = [
         "poetry",
         "install",
         "--without",
-        "doc,lint",
+        "doc",
         "--no-root",
+    ],[
+        "ruff", "check", "./src",
+    ],[
+        "mypy", "./src",
     ],
     [
         "pytest",
@@ -249,5 +254,5 @@ commands = [
     ],
 ]
-description = "install pytest in a virtual environment and invoke it on the tests folder"
+description = "install ruff, mpypy, pytest in a virtual environment and invoke each, in turn"
 deps = ["poetry"]

{mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/__init__.py RENAMED Viewed

@@ -15,7 +15,7 @@ from ruamel import yaml
 _PKG_NAME: str = Path(__file__).parent.name
-VERSION = "2025.739439.15"
+VERSION = "2025.739439.19"
 __version__ = VERSION

{mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/__init__.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import shutil
 from collections.abc import Mapping
 from decimal import Decimal
 from types import MappingProxyType
@@ -20,6 +21,8 @@ from .. import (  # noqa: TID252
     yamelize_attrs,
     yaml_rt_mapper,
 )
+from .. import WORK_DIR as PKG_WORK_DIR  # noqa: TID252
+from .. import data as mdat  # noqa: TID252
 __version__ = VERSION
@@ -40,6 +43,64 @@ class GuidelinesBoundary:
     """Area under the boundary."""
+WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
+"""Redefined, in case the user defines WORK_DIR between module imports."""
+FID_WORK_DIR = WORK_DIR / "FTCData"
+if not FID_WORK_DIR.is_dir():
+    FID_WORK_DIR.mkdir(parents=True)
+INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
+if not INVDATA_ARCHIVE_PATH.is_file():
+    shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH)  # type: ignore
+TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
+CONC_TABLE_ALL = "Table 3.1"
+CNT_TABLE_ALL = "Table 4.1"
+TTL_KEY = 86825
+CONC_HHI_DICT = {
+    "0 - 1,799": 0,
+    "1,800 - 1,999": 1800,
+    "2,000 - 2,399": 2000,
+    "2,400 - 2,999": 2400,
+    "3,000 - 3,999": 3000,
+    "4,000 - 4,999": 4000,
+    "5,000 - 6,999": 5000,
+    "7,000 - 10,000": 7000,
+    "TOTAL": TTL_KEY,
+}
+CONC_DELTA_DICT = {
+    "0 - 100": 0,
+    "100 - 200": 100,
+    "200 - 300": 200,
+    "300 - 500": 300,
+    "500 - 800": 500,
+    "800 - 1,200": 800,
+    "1,200 - 2,500": 1200,
+    "2,500 - 5,000": 2500,
+    "TOTAL": TTL_KEY,
+}
+CNT_FCOUNT_DICT = {
+    "2 to 1": 2,
+    "3 to 2": 3,
+    "4 to 3": 4,
+    "5 to 4": 5,
+    "6 to 5": 6,
+    "7 to 6": 7,
+    "8 to 7": 8,
+    "9 to 8": 9,
+    "10 to 9": 10,
+    "10 +": 11,
+    "TOTAL": TTL_KEY,
+}
+def invert_map(_dict: Mapping[Any, Any]) -> Mapping[Any, Any]:
+    """Invert mapping, mapping values to keys of the original mapping."""
+    return {_v: _k for _k, _v in _dict.items()}
 @frozen
 class INVTableData:
     """Represents individual table of FTC merger investigations data."""

mergeron-2025.739439.19/src/mergeron/core/_process_ftc_merger_investigations_data.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""Download and parse FTC Merger Investigations Data.
+This module provided as documentation only. The package
+:code:`pymupdf` is a requirement of this module but is
+distributed under a license that  may be incompatible with
+the MIT license under which this package is distributed.
+"""
+import re
+from collections.abc import Sequence
+from operator import itemgetter
+from pathlib import Path
+from typing import Any
+import numpy as np
+# import pymupdf  # type: ignore
+import urllib3
+from bs4 import BeautifulSoup
+from numpy.testing import assert_array_equal
+from .. import ArrayBIGINT  # noqa: TID252
+from . import (
+    CNT_FCOUNT_DICT,
+    CONC_DELTA_DICT,
+    CONC_HHI_DICT,
+    CONC_TABLE_ALL,
+    FID_WORK_DIR,
+    TABLE_TYPES,
+    TTL_KEY,
+    INVData,
+    INVData_in,
+    INVTableData,
+    _mappingproxy_from_mapping,
+)
+TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
+def _parse_invdata() -> INVData:
+    """Parse FTC merger investigations data reports to structured data.
+    Returns
+    -------
+        Immutable dictionary of merger investigations data, keyed to
+        reporting period, and including all tables organized by
+        Firm Count (number of remaining competitors) and
+        by range of HHI and ∆HHI.
+    """
+    raise ValueError(
+        "This function is defined here as documentation.\n"
+        "NOTE: License for `pymupdf`, upon which this function depends,"
+        " may be incompatible with the MIT license,"
+        " under which this pacakge is distributed."
+        " Making this fumction operable requires the user to modify"
+        " the source code as well as to install an additional package"
+        " not distributed with this package or identified as a requirement."
+    )
+    invdata_docnames = _download_invdata(FID_WORK_DIR)
+    invdata: INVData_in = {}
+    for invdata_docname in invdata_docnames:
+        invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
+        invdata_doc = pymupdf.open(invdata_pdf_path)  # type: ignore  # noqa: F821
+        invdata_meta = invdata_doc.metadata
+        if invdata_meta["title"] == " ":
+            invdata_meta["title"] = ", ".join((
+                "Horizontal Merger Investigation Data",
+                "Fiscal Years",
+                "1996-2005",
+            ))
+        data_period = "".join(  # line-break here for readability
+            re.findall(r"(\d{4}) *(-) *(\d{4})", invdata_meta["title"])[0]
+        )
+        # Initialize containers for parsed data
+        invdata[data_period] = {k: {} for k in TABLE_TYPES}
+        for pdf_pg in invdata_doc.pages():
+            doc_pg_blocks = pdf_pg.get_text("blocks", sort=False)
+            # Across all published reports of FTC investigations data,
+            #   sorting lines (PDF page blocks) by the lower coordinates
+            #   and then the left coordinates is most effective for
+            #   ordering table rows in top-to-bottom order; this doesn't
+            #   work for the 1996-2005 data, however, so we resort later
+            doc_pg_blocks = sorted([
+                (f"{_f[3]:03.0f}{_f[0]:03.0f}{_f[1]:03.0f}{_f[2]:03.0f}", *_f)
+                for _f in doc_pg_blocks
+                if _f[-1] == 0
+            ])
+            data_blocks: list[tuple[str]] = [("",)]
+            # Pages layouts not the same in all reports
+            pg_hdr_strings = (
+                "FEDERAL TRADE COMMISSION",
+                "HORIZONTAL MERGER INVESTIGATION DATA: FISCAL YEARS 1996 - 2011",
+            )
+            if len(doc_pg_blocks) > 4:
+                tnum = None
+                for _pg_blk in doc_pg_blocks:
+                    if tnum := TABLE_NO_RE.fullmatch(_pg_blk[-3].strip()):
+                        data_blocks = [
+                            b_
+                            for b_ in doc_pg_blocks
+                            if not b_[-3].startswith(pg_hdr_strings)
+                            and (
+                                b_[-3].strip()
+                                not in {"Significant Competitors", "Post Merger HHI"}
+                            )
+                            and not re.fullmatch(r"\d+", b_[-3].strip())
+                        ]
+                        break
+                if not tnum:
+                    continue
+                del tnum
+            else:
+                continue
+            _parse_page_blocks(invdata, data_period, data_blocks)
+        invdata_doc.close()
+    return _mappingproxy_from_mapping(invdata)
+def _parse_page_blocks(
+    _invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
+) -> None:
+    if _data_period != "1996-2011":
+        _parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
+    else:
+        test_list = [
+            (g, f[-3].strip())
+            for g, f in enumerate(_doc_pg_blocks)
+            if TABLE_NO_RE.fullmatch(f[-3].strip())
+        ]
+        # In the 1996-2011 report, there are 2 tables per page
+        if len(test_list) == 1:
+            table_a_blocks = _doc_pg_blocks
+            table_b_blocks: Sequence[Sequence[Any]] = []
+        else:
+            table_a_blocks, table_b_blocks = (
+                _doc_pg_blocks[test_list[0][0] : test_list[1][0]],
+                _doc_pg_blocks[test_list[1][0] :],
+            )
+        for table_i_blocks in table_a_blocks, table_b_blocks:
+            if not table_i_blocks:
+                continue
+            _parse_table_blocks(_invdata, _data_period, table_i_blocks)
+def _parse_table_blocks(
+    _invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
+) -> None:
+    invdata_evid_cond = "Unrestricted on additional evidence"
+    table_num, table_ser, table_type = _identify_table_type(
+        _table_blocks[0][-3].strip()
+    )
+    if _data_period == "1996-2011":
+        invdata_ind_group = (
+            _table_blocks[1][-3].split("\n")[1]
+            if table_num == "Table 4.8"
+            else _table_blocks[2][-3].split("\n", maxsplit=1)[0]
+        )
+        if table_ser > 4:
+            invdata_evid_cond = (
+                _table_blocks[2][-3].split("\n")[1]
+                if table_ser in {9, 10}
+                else _table_blocks[3][-3].strip()
+            )
+    elif _data_period == "1996-2005":
+        _table_blocks = sorted(_table_blocks, key=itemgetter(6))
+        invdata_ind_group = _table_blocks[3][-3].strip()
+        if table_ser > 4:
+            invdata_evid_cond = _table_blocks[5][-3].strip()
+    elif table_ser % 2 == 0:
+        invdata_ind_group = _table_blocks[1][-3].split("\n")[2]
+        if (evid_cond_teststr := _table_blocks[2][-3].strip()) == "Outcome":
+            invdata_evid_cond = "Unrestricted on additional evidence"
+        else:
+            invdata_evid_cond = evid_cond_teststr
+    elif _table_blocks[3][-3].startswith("FTC Horizontal Merger Investigations"):
+        invdata_ind_group = _table_blocks[3][-3].split("\n")[2]
+        invdata_evid_cond = "Unrestricted on additional evidence"
+    else:
+        # print(_table_blocks)
+        invdata_evid_cond = (
+            _table_blocks[1][-3].strip()
+            if table_ser == 9
+            else _table_blocks[3][-3].strip()
+        )
+        invdata_ind_group = _table_blocks[4][-3].split("\n")[2]
+    if invdata_ind_group == "Pharmaceutical Markets":
+        invdata_ind_group = "Pharmaceuticals Markets"
+    process_table_func = (
+        _process_table_blks_conc_type
+        if table_type == TABLE_TYPES[0]
+        else _process_table_blks_cnt_type
+    )
+    table_array = process_table_func(_table_blocks)
+    if not isinstance(table_array, np.ndarray) or table_array.dtype != int:
+        print(table_num)
+        print(_table_blocks)
+        raise ValueError
+    table_data = INVTableData(invdata_ind_group, invdata_evid_cond, table_array)
+    _invdata[_data_period][table_type] |= {table_num: table_data}
+def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str]:
+    tnum = _tnstr.split(" ")[1]
+    tsub = int(tnum.split(".")[0])
+    return _tnstr, tsub, TABLE_TYPES[(tsub + 1) % 2]
+def _process_table_blks_conc_type(
+    _table_blocks: Sequence[Sequence[str]], /
+) -> ArrayBIGINT:
+    conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
+    col_titles = tuple(CONC_DELTA_DICT.values())
+    col_totals: ArrayBIGINT = np.zeros(len(col_titles), int)
+    invdata_array: ArrayBIGINT = np.array(None)
+    for tbl_blk in _table_blocks:
+        if conc_row_pat.match(_blk_str := tbl_blk[-3]):
+            row_list: list[str] = _blk_str.strip().split("\n")
+            row_title: str = row_list.pop(0)
+            row_key: int = (
+                7000 if row_title.startswith("7,000") else CONC_HHI_DICT[row_title]
+            )
+            row_total = np.array(row_list.pop().replace(",", "").split("/"), int)
+            data_row_list: list[list[int]] = []
+            while row_list:
+                enfd_val, clsd_val = row_list.pop(0).split("/")
+                data_row_list += [
+                    [
+                        row_key,
+                        col_titles[len(data_row_list)],
+                        int(enfd_val),
+                        int(clsd_val),
+                        int(enfd_val) + int(clsd_val),
+                    ]
+                ]
+            data_row_array = np.array(data_row_list, int)
+            del data_row_list
+            # Check row totals
+            assert_array_equal(row_total, np.einsum("ij->j", data_row_array[:, 2:4]))
+            if row_key == TTL_KEY:
+                col_totals = data_row_array
+            else:
+                invdata_array = (
+                    np.vstack((invdata_array, data_row_array))
+                    if invdata_array.shape
+                    else data_row_array
+                )
+            del data_row_array
+        else:
+            continue
+    # Check column totals
+    for _col_tot in col_totals:
+        assert_array_equal(
+            _col_tot[2:],
+            np.einsum(
+                "ij->j", invdata_array[invdata_array[:, 1] == _col_tot[1]][:, 2:]
+            ),
+        )
+    return invdata_array[
+        np.argsort(np.einsum("ij,ij->i", [[100, 1]], invdata_array[:, :2]))
+    ]
+def _process_table_blks_cnt_type(
+    _table_blocks: Sequence[Sequence[str]], /
+) -> ArrayBIGINT:
+    cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
+    invdata_array: ArrayBIGINT = np.array(None)
+    col_totals: ArrayBIGINT = np.zeros(3, int)  # "enforced", "closed", "total"
+    for _tbl_blk in _table_blocks:
+        if cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
+            row_list_s = _blk_str.strip().replace(",", "").split("\n")
+            row_list = np.array([CNT_FCOUNT_DICT[row_list_s[0]], *row_list_s[1:]], int)
+            del row_list_s
+            if row_list[3] != row_list[1] + row_list[2]:
+                raise ValueError(
+                    "Total number of investigations does not equal #enforced plus #closed."
+                )
+            if row_list[0] == TTL_KEY:
+                col_totals = row_list
+            else:
+                invdata_array = (
+                    np.vstack((invdata_array, row_list))
+                    if invdata_array.shape
+                    else row_list
+                )
+        else:
+            continue
+    if not np.array_equal(
+        np.array(list(col_totals[1:]), int), np.einsum("ij->j", invdata_array[:, 1:])
+    ):
+        raise ValueError("Column totals don't compute.")
+    return invdata_array[np.argsort(invdata_array[:, 0])]
+def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
+    if not _dl_path.is_dir():
+        _dl_path.mkdir(parents=True)
+    invdata_homepage_urls = (
+        "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
+        "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
+        "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
+        "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
+    )
+    invdata_docnames = (
+        "040831horizmergersdata96-03.pdf",
+        "p035603horizmergerinvestigationdata1996-2005.pdf",
+        "081201hsrmergerdata.pdf",
+        "130104horizontalmergerreport.pdf",
+    )
+    if all(
+        _dl_path.joinpath(invdata_docname).is_file()
+        for invdata_docname in invdata_docnames
+    ):
+        return invdata_docnames
+    invdata_docnames_dl: tuple[str, ...] = ()
+    u3pm = urllib3.PoolManager()
+    chunk_size_ = 1024 * 1024
+    for invdata_homepage_url in invdata_homepage_urls:
+        with u3pm.request(
+            "GET", invdata_homepage_url, preload_content=False
+        ) as _u3handle:
+            invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
+            invdata_attrs = [
+                (_g.get("title", ""), _g.get("href", ""))
+                for _g in invdata_soup.find_all("a")
+                if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
+            ]
+        for invdata_attr in invdata_attrs:
+            invdata_docname, invdata_link = invdata_attr
+            invdata_docnames_dl += (invdata_docname,)
+            with (
+                u3pm.request(
+                    "GET", f"https://www.ftc.gov/{invdata_link}", preload_content=False
+                ) as _urlopen_handle,
+                _dl_path.joinpath(invdata_docname).open("wb") as invdata_fh,
+            ):
+                while True:
+                    data = _urlopen_handle.read(chunk_size_)
+                    if not data:
+                        break
+                    invdata_fh.write(data)
+    return invdata_docnames_dl

mergeron 2025.739439.15__tar.gz → 2025.739439.19__tar.gz

Potentially problematic release.

mergeron 2025.739439.15tar.gz → 2025.739439.19tar.gz