PyPI - mergeron - Versions diffs - 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl - Mend

mergeron 2024.738953.1py3-none-any.whl → 2025.739265.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show

mergeron/__init__.py +26 -6
mergeron/core/__init__.py +5 -65
mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
mergeron/core/ftc_merger_investigations_data.py +147 -101
mergeron/core/guidelines_boundaries.py +290 -1078
mergeron/core/guidelines_boundary_functions.py +1128 -0
mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
mergeron/core/pseudorandom_numbers.py +16 -22
mergeron/data/__init__.py +3 -0
mergeron/data/damodaran_margin_data.xls +0 -0
mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
mergeron/demo/__init__.py +3 -0
mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
mergeron/gen/__init__.py +258 -246
mergeron/gen/data_generation.py +473 -224
mergeron/gen/data_generation_functions.py +876 -0
mergeron/gen/enforcement_stats.py +355 -0
mergeron/gen/upp_tests.py +171 -259
mergeron-2025.739265.0.dist-info/METADATA +115 -0
mergeron-2025.739265.0.dist-info/RECORD +23 -0
{mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
mergeron/License.txt +0 -16
mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
mergeron/core/excel_helper.py +0 -257
mergeron/core/proportions_tests.py +0 -520
mergeron/ext/__init__.py +0 -5
mergeron/ext/tol_colors.py +0 -851
mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
mergeron/gen/investigations_stats.py +0 -709
mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
mergeron-2024.738953.1.dist-info/METADATA +0 -93
mergeron-2024.738953.1.dist-info/RECORD +0 -30
/mergeron/{core → data}/ftc_invdata.msgpack +0 -0

mergeron/__init__.py CHANGED Viewed

@@ -1,12 +1,17 @@
 from __future__ import annotations
 import enum
-from importlib.metadata import version
 from pathlib import Path
+from typing import Literal
+import numpy as np
+from numpy.typing import NDArray
 _PKG_NAME: str = Path(__file__).parent.stem
-__version__ = version(_PKG_NAME)
+VERSION = "2025.739265.0"
+__version__ = VERSION
 DATA_DIR: Path = Path.home() / _PKG_NAME
 """
@@ -14,14 +19,26 @@ Defines a subdirectory named for this package in the user's home path.
 If the subdirectory doesn't exist, it is created on package invocation.
 """
 if not DATA_DIR.is_dir():
     DATA_DIR.mkdir(parents=False)
+np.set_printoptions(precision=24, floatmode="fixed")
+type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
+type ArrayBoolean = NDArray[np.bool_]
+type ArrayFloat = NDArray[np.float16 | np.float32 | np.float64 | np.float128]
+type ArrayINT = NDArray[np.intp]
+type ArrayDouble = NDArray[np.float64]
+type ArrayBIGINT = NDArray[np.int64]
+DEFAULT_REC_RATIO = 0.85
 @enum.unique
-class RECConstants(enum.StrEnum):
-    """Recapture rate - derivation methods."""
+class RECForm(enum.StrEnum):
+    """For derivation of recapture ratio from market shares."""
     INOUT = "inside-out"
     OUTIN = "outside-in"
@@ -31,15 +48,18 @@ class RECConstants(enum.StrEnum):
 @enum.unique
 class UPPAggrSelector(enum.StrEnum):
     """
-    Aggregator selection for GUPPI and diversion ratio
+    Aggregator for GUPPI and diversion ratio estimates.
     """
     AVG = "average"
     CPA = "cross-product-share weighted average"
     CPD = "cross-product-share weighted distance"
+    CPG = "cross-product-share weighted geometric mean"
     DIS = "symmetrically-weighted distance"
+    GMN = "geometric mean"
     MAX = "max"
     MIN = "min"
     OSA = "own-share weighted average"
     OSD = "own-share weighted distance"
+    OSG = "own-share weighted geometric mean"

mergeron/core/__init__.py CHANGED Viewed

@@ -1,68 +1,8 @@
-from __future__ import annotations
+from mpmath import mp  # type: ignore
-from importlib.metadata import version
+from .. import VERSION  # noqa: TID252
-from attrs import Attribute, define, field, validators
+__version__ = VERSION
-from .. import _PKG_NAME, RECConstants, UPPAggrSelector  # noqa: TID252
-__version__ = version(_PKG_NAME)
-def _delta_value_validator(
-    _instance: UPPBoundarySpec, _attribute: Attribute[float], _value: float, /
-) -> None:
-    if not 0 <= _value <= 1:
-        raise ValueError(
-            "Margin-adjusted benchmark share ratio must lie between 0 and 1."
-        )
-def _rec_spec_validator(
-    _instance: UPPBoundarySpec,
-    _attribute: Attribute[RECConstants],
-    _value: RECConstants,
-    /,
-) -> None:
-    if _value == RECConstants.OUTIN:
-        raise ValueError(
-            f"Invalid recapture specification, {_value!r}. "
-            "You may consider specifying RECConstants.INOUT here, and "
-            "assigning the recapture rate for the merging-firm with "
-            'the smaller market-share to the attribue, "rec" of '
-            "the UPPBoundarySpec object you pass."
-        )
-    if _value is None and _instance.agg_method != UPPAggrSelector.MAX:
-        raise ValueError(
-            f"Specified aggregation method, {_instance.agg_method} requires a recapture specification."
-        )
-@define(slots=True, frozen=True)
-class UPPBoundarySpec:
-    share_ratio: float = field(
-        kw_only=False,
-        default=0.075,
-        validator=(validators.instance_of(float), _delta_value_validator),
-    )
-    rec: float = field(
-        kw_only=False, default=0.80, validator=validators.instance_of(float)
-    )
-    agg_method: UPPAggrSelector = field(
-        kw_only=True,
-        default=UPPAggrSelector.MAX,
-        validator=validators.instance_of(UPPAggrSelector),
-    )
-    recapture_spec: RECConstants | None = field(
-        kw_only=True,
-        default=RECConstants.INOUT,
-        validator=(
-            validators.optional(validators.instance_of(RECConstants)),  # type: ignore
-            _rec_spec_validator,
-        ),
-    )
-    precision: int = field(
-        kw_only=False, default=5, validator=validators.instance_of(int)
-    )
+type MPFloat = mp.mpf  # pyright: ignore
+type MPMatrix = mp.matrix  # pyright: ignore

mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} RENAMED Viewed

@@ -2,12 +2,16 @@
 Functions to parse margin data compiled by
 Prof. Aswath Damodaran, Stern School of Business, NYU.
+Provides :func:`mgn_data_resampler` for generating margin data
+from an estimated Gaussian KDE from the source (margin) data.
 Data are downloaded or reused from a local copy, on demand.
 For terms of use of Prof. Damodaran's data, please see:
 https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
-Important caveats:
+NOTES
+-----
 Prof. Damodaran notes that the data construction may not be
 consistent from iteration to iteration. He also notes that,
@@ -32,29 +36,29 @@ price-cost margins fall in the interval :math:`[0, 1]`.
 """
+import shutil
 from collections.abc import Mapping
-from importlib.metadata import version
+from importlib import resources
 from pathlib import Path
 from types import MappingProxyType
 import msgpack  # type:ignore
 import numpy as np
-import requests
+import urllib3
 from numpy.random import PCG64DXSM, Generator, SeedSequence
-from numpy.typing import NDArray
-from requests_toolbelt.downloadutils import stream  # type: ignore
 from scipy import stats  # type: ignore
 from xlrd import open_workbook  # type: ignore
-from .. import _PKG_NAME, DATA_DIR  # noqa: TID252
-__version__ = version(_PKG_NAME)
+from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble  # noqa: TID252
+__version__ = VERSION
 MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
+u3pm = urllib3.PoolManager()
-def scrape_data_table(
+def mgn_data_getter(  # noqa: PLR0912
     _table_name: str = "margin",
     *,
     data_archive_path: Path | None = None,
@@ -68,32 +72,46 @@ def scrape_data_table(
     _data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
     _mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
-    _mgn_path = _data_archive_path.parent.joinpath(f"damodaran_{_table_name}_data.xls")
+    _mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
     if _data_archive_path.is_file() and not data_download_flag:
         return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
     elif _mgn_path.is_file():
         _mgn_path.unlink()
-        _data_archive_path.unlink()
-    _REQ_TIMEOUT = (9.05, 27)
-    # NYU will eventually updates its server certificate, to one signed with
-    #   "InCommon RSA Server CA 2.pem", the step below will be obsolete. In
-    #   the interim, it is necessary to provide the certificate chain to the
-    #   root CA, so that the obsolete CA certificate is validated.
-    _INCOMMON_2014_CERT_CHAIN_PATH = (
-        Path(__file__).parent / "InCommon RSA Server CA cert chain.pem"
-    )
-    try:
-        _urlopen_handle = requests.get(_mgn_urlstr, timeout=_REQ_TIMEOUT, stream=True)
-    except requests.exceptions.SSLError:
-        _urlopen_handle = requests.get(
-            _mgn_urlstr,
-            timeout=_REQ_TIMEOUT,
-            stream=True,
-            verify=str(_INCOMMON_2014_CERT_CHAIN_PATH),
-        )
+        if _data_archive_path.is_file():
+            _data_archive_path.unlink()
-    _mgn_filename = stream.stream_response_to_file(_urlopen_handle, path=_mgn_path)
+    try:
+        _chunk_size = 1024 * 1024
+        with (
+            u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
+            _mgn_path.open("wb") as _mgn_file,
+        ):
+            while True:
+                _data = _urlopen_handle.read(_chunk_size)
+                if not _data:
+                    break
+                _mgn_file.write(_data)
+        print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
+    except urllib3.exceptions.MaxRetryError as _err:
+        if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
+            # Works fine with other sites secured with certificates
+            # from the Internet2 CA, such as,
+            # https://snap.stanford.edu/data/web-Stanford.txt.gz
+            print(
+                f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
+                "Using bundled copy."
+            )
+            if not _mgn_path.is_file():
+                with resources.as_file(
+                    resources.files(f"{_PKG_NAME}.data").joinpath(
+                        "empirical_margin_distribution.xls"
+                    )
+                ) as _mgn_data_archive_path:
+                    shutil.copy2(_mgn_data_archive_path, _mgn_path)
+        else:
+            raise _err
     _xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
     _xl_sheet = _xl_book.sheet_by_name("Industry Averages")
@@ -114,16 +132,16 @@ def scrape_data_table(
         _xl_row[1] = int(_xl_row[1])
         _mgn_dict[_xl_row[0]] = dict(zip(_mgn_row_keys[1:], _xl_row[1:], strict=True))
-    _ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict))
+    _ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict))  # pyright: ignore
     return MappingProxyType(_mgn_dict)
 def mgn_data_builder(
     _mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
-) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
+) -> tuple[ArrayDouble, ArrayDouble, ArrayDouble]:
     if _mgn_tbl_dict is None:
-        _mgn_tbl_dict = scrape_data_table()
+        _mgn_tbl_dict = mgn_data_getter()
     _mgn_data_wts, _mgn_data_obs = (
         _f.flatten()
@@ -169,22 +187,24 @@ def mgn_data_builder(
     )
-def resample_mgn_data(
-    _sample_size: int | tuple[int, int] = (10**6, 2),
+def mgn_data_resampler(
+    _sample_size: int | tuple[int, ...] = (10**6, 2),
     /,
     *,
     seed_sequence: SeedSequence | None = None,
-) -> NDArray[np.float64]:
+) -> ArrayDouble:
     """
-    Generate the specified number of draws from the empirical distribution
-    for Prof. Damodaran's margin data using the estimated Gaussian KDE.
-    Margins for firms in finance, investment, insurance, reinsurance, and REITs
-    are excluded from the sample used to estimate the Gaussian KDE.
+    Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
+    The empirical distribution is estimated using a Gaussian KDE; the bandwidth
+    selected using Silverman's rule is narrowed to reflect that the margin data
+    are multimodal. Margins for firms in finance, investment, insurance, reinsurance, and
+    REITs are excluded from the sample used to estimate the empirical distribution.
     Parameters
     ----------
     _sample_size
-        Number of draws
+        Number of draws; if tuple, (number of draws, number of columns)
     seed_sequence
         SeedSequence for seeding random-number generator when results
@@ -198,28 +218,24 @@ def resample_mgn_data(
     _seed_sequence = seed_sequence or SeedSequence(pool_size=8)
-    _x, _w, _ = mgn_data_builder(scrape_data_table())
-    _mgn_kde = stats.gaussian_kde(_x, weights=_w)
+    _x, _w, _ = mgn_data_builder(mgn_data_getter())
-    def _generate_draws(
-        _mgn_kde: stats.gaussian_kde, _ssz: int, _seed_seq: SeedSequence
-    ) -> NDArray[np.float64]:
-        _seed = Generator(PCG64DXSM(_seed_sequence))
+    _mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
+    _mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0)  # pyright: ignore
-        # We enlarge the sample, then truncate to
-        # the range between [0.0, 1.0)
-        ssz_up = int(_ssz / (_mgn_kde.integrate_box_1d(0.0, 1.0) ** 2))
-        sample_1 = _mgn_kde.resample(ssz_up, seed=_seed)[0]
+    if isinstance(_sample_size, int):
         return np.array(
-            sample_1[(sample_1 >= 0.0) & (sample_1 <= 1)][:_ssz], np.float64
+            _mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
+                0
+            ]
         )
-    if isinstance(_sample_size, int):
-        return _generate_draws(_mgn_kde, _sample_size, _seed_sequence)
-    else:
+    elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
         _ssz, _num_cols = _sample_size
         _ret_array = np.empty(_sample_size, np.float64)
         for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
-            _ret_array[:, _idx] = _generate_draws(_mgn_kde, _ssz, _seed_seq)
+            _ret_array[:, _idx] = _mgn_kde.resample(
+                _ssz, seed=Generator(PCG64DXSM(_seed_seq))
+            )[0]
         return _ret_array
+    else:
+        raise ValueError(f"Invalid sample size: {_sample_size!r}")

mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

Potentially problematic release.

mergeron 2024.738953.1py3-none-any.whl → 2025.739265.0py3-none-any.whl