PyPI - mergeron - Versions diffs - 2025.739290.4__tar.gz → 2025.739290.5__tar.gz - Mend

mergeron 2025.739290.4tar.gz → 2025.739290.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (24) hide show

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mergeron
-Version: 2025.739290.4
+Version: 2025.739290.5
 Summary: Analyze merger enforcement policy using Python
 License: MIT
 Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ keywords = [
     "upward pricing pressure",
     "GUPPI",
 ]
-version = "2025.739290.4"
+version = "2025.739290.5"
 # Classifiers list: https://pypi.org/classifiers/
 classifiers = [

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ from ruamel import yaml
 _PKG_NAME: str = Path(__file__).parent.stem
-VERSION = "2025.739290.4"
+VERSION = "2025.739290.5"
 __version__ = VERSION
@@ -32,7 +32,6 @@ EMPTY_ARRAYINT = np.array([], int)
 NTHREADS = 2 * cpu_count()
-PKG_ENUMS_MAP: dict[str, object] = {}
 PKG_ATTRS_MAP: dict[str, object] = {}
 np.set_printoptions(precision=24, floatmode="fixed")
@@ -71,38 +70,27 @@ this_yaml.indent(mapping=2, sequence=4, offset=2)
 )
-@this_yaml.register_class
-class EnumYAMLized(enum.Enum):
-    @classmethod
-    def to_yaml(
-        cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
-    ) -> yaml.ScalarNode:
-        return _r.represent_scalar(
-            f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
-        )
-    @classmethod
-    def from_yaml(
-        cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
-    ) -> object[enum.EnumType]:
-        return super().__getattribute__(cls, _n.value)
 def yaml_rt_mapper(
     _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
 ) -> Mapping[str, Any]:
+    """
+    Constructs a mapping from a mapping node with the RoundTripConstructor
+    """
     data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
     _c.construct_mapping(_n, maptyp=data_, deep=True)
     return data_
 def yamelize_attrs(
-    _typ: object,
-    excluded_attributes: set | None = None,
-    /,
-    *,
-    attr_map: Mapping[str, object] = PKG_ATTRS_MAP,
+    _typ: object, /, *, attr_map: Mapping[str, object] = PKG_ATTRS_MAP
 ) -> None:
+    """Add yaml representer, constructor for attrs-defined class.
+    Applying this function, attributes with property, `init=False` are
+    not serialized to YAML.
+    """
     attr_map |= {_typ.__name__: _typ}
     _ = this_yaml.representer.add_representer(
@@ -112,11 +100,7 @@ def yamelize_attrs(
             # construct mapping, rather than calling attrs.asdict(),
             # to use yaml representers defined in this package for
             # "upstream" objects
-            {
-                _a.name: getattr(_d, _a.name)
-                for _a in _d.__attrs_attrs__
-                if excluded_attributes is None or _a.name not in excluded_attributes
-            },
+            {_a.name: getattr(_d, _a.name) for _a in _d.__attrs_attrs__ if _a.init},
         ),
     )
     _ = this_yaml.constructor.add_constructor(
@@ -125,19 +109,64 @@ def yamelize_attrs(
     )
+@this_yaml.register_class
+class Enameled(enum.Enum):
+    """Add YAML representer, constructor for enum.Enum"""
+    @classmethod
+    def to_yaml(
+        cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
+    ) -> yaml.ScalarNode:
+        return _r.represent_scalar(
+            f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
+        )
+    @classmethod
+    def from_yaml(
+        cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
+    ) -> object[enum.EnumType]:
+        return super().__getattribute__(cls, _n.value)
 @this_yaml.register_class
 @enum.unique
-class RECForm(str, EnumYAMLized):
+class RECForm(str, Enameled):
     """For derivation of recapture ratio from market shares."""
     INOUT = "inside-out"
+    R"""
+    Given, :math:`\overline{r}, s_i {\ } \forall {\ } i \in \set{1, 2, \ldots, m}`, with
+    :math:`s_{min} = \min(s_1, s_2)`,
+    .. math::
+        REC_i = \frac{(1 - s_i) \overline{r}}{(1 - s_{min}) - (s_i - s_{min}) \overline{r}}
+    """
     OUTIN = "outside-in"
+    R"""
+    Given, :math:`\pi_i {\ } \forall {\ } i \in N`,
+    .. math::
+        REC_i = \frac{\sum_{i \in M} \pi_i}{\sum_{j \in N} \pi_j}
+    """
     FIXED = "proportional"
+    R"""Given, :math:`\overline{r}`,
+    .. math::
+        REC_i = \overline{r} {\ } \forall {\ } i \in M
+    """
 @this_yaml.register_class
 @enum.unique
-class UPPAggrSelector(str, EnumYAMLized):
+class UPPAggrSelector(str, Enameled):
     """
     Aggregator for GUPPI and diversion ratio estimates.

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/__init__.py RENAMED Viewed

@@ -27,7 +27,7 @@ class INVTableData:
 type INVData = MappingProxyType[
     str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
 ]
-type INVData_in_ = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
+type INVData_in = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
 (_, _) = (

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/empirical_margin_distribution.py RENAMED Viewed

@@ -71,21 +71,21 @@ def margin_data_getter(  # noqa: PLR0912
             "This code is designed for parsing Prof. Damodaran's margin tables."
         )
-    data_archive_path_ = data_archive_path or MGNDATA_ARCHIVE_PATH
-    workbook_path_ = data_archive_path_.parent / f"damodaran_{_table_name}_data.xls"
-    if data_archive_path_.is_file() and not data_download_flag:
+    data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
+    workbook_path = data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
+    if data_archive_path.is_file() and not data_download_flag:
         # with data_archive_path_.open("r") as _yfh:
         #     margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
         with (
-            zipfile.ZipFile(data_archive_path_) as _yzip,
-            _yzip.open(f"{data_archive_path_.stem}.yaml") as _yfh,
+            zipfile.ZipFile(data_archive_path) as _yzip,
+            _yzip.open(f"{data_archive_path.stem}.yaml") as _yfh,
         ):
             margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
         return _mappingproxy_from_mapping(margin_data_dict)
-    elif workbook_path_.is_file():
-        workbook_path_.unlink()
-        if data_archive_path_.is_file():
-            data_archive_path_.unlink()
+    elif workbook_path.is_file():
+        workbook_path.unlink()
+        if data_archive_path.is_file():
+            data_archive_path.unlink()
     margin_urlstr = (
         f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
@@ -96,7 +96,7 @@ def margin_data_getter(  # noqa: PLR0912
             u3pm.request(
                 "GET", margin_urlstr, preload_content=False
             ) as _urlopen_handle,
-            workbook_path_.open("wb") as margin_file,
+            workbook_path.open("wb") as margin_file,
         ):
             while True:
                 data_ = _urlopen_handle.read(chunk_size_)
@@ -104,7 +104,7 @@ def margin_data_getter(  # noqa: PLR0912
                     break
                 margin_file.write(data_)
-        print(f"Downloaded {margin_urlstr} to {workbook_path_}.")
+        print(f"Downloaded {margin_urlstr} to {workbook_path}.")
     except urllib3.exceptions.MaxRetryError as error_:
         if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
@@ -115,38 +115,38 @@ def margin_data_getter(  # noqa: PLR0912
                 f"WARNING: Could not establish secure connection to, {margin_urlstr}."
                 "Using bundled copy."
             )
-            if not workbook_path_.is_file():
+            if not workbook_path.is_file():
                 with resources.as_file(
                     resources.files(f"{_PKG_NAME}.data").joinpath(
                         "empirical_margin_distribution.xls"
                     )
                 ) as margin_data_archive_path:
-                    shutil.copy2(margin_data_archive_path, workbook_path_)
+                    shutil.copy2(margin_data_archive_path, workbook_path)
         else:
             raise error_
-    xl_book_ = open_workbook(workbook_path_, ragged_rows=True, on_demand=True)
-    xl_sheet_ = xl_book_.sheet_by_name("Industry Averages")
+    xl_book = open_workbook(workbook_path, ragged_rows=True, on_demand=True)
+    xl_sheet = xl_book.sheet_by_name("Industry Averages")
     margin_dict: dict[str, dict[str, float | int]] = {}
-    row_keys_: list[str] = []
+    row_keys: list[str] = []
     read_row_flag = False
-    for _ridx in range(xl_sheet_.nrows):
-        xl_row = xl_sheet_.row_values(_ridx)
+    for _ridx in range(xl_sheet.nrows):
+        xl_row = xl_sheet.row_values(_ridx)
         if xl_row[0] == "Industry Name":
             read_row_flag = True
-            row_keys_ = xl_row
+            row_keys = xl_row
             continue
         if not xl_row[0] or not read_row_flag:
             continue
         xl_row[1] = int(xl_row[1])
-        margin_dict[xl_row[0]] = dict(zip(row_keys_[1:], xl_row[1:], strict=True))
+        margin_dict[xl_row[0]] = dict(zip(row_keys[1:], xl_row[1:], strict=True))
     with (
-        zipfile.ZipFile(data_archive_path_, "w") as _yzip,
-        _yzip.open(f"{data_archive_path_.stem}.yaml", "w") as _yfh,
+        zipfile.ZipFile(data_archive_path, "w") as _yzip,
+        _yzip.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
     ):
         this_yaml.dump(margin_dict, _yfh)
@@ -240,9 +240,9 @@ def margin_data_resampler(
     seed_sequence_ = seed_sequence or SeedSequence(pool_size=8)
-    x_, w_, _ = margin_data_builder(margin_data_getter())
+    _x, _w, _ = margin_data_builder(margin_data_getter())
-    margin_kde = stats.gaussian_kde(x_, weights=w_, bw_method="silverman")
+    margin_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
     margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
     if isinstance(_sample_size, int):

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/ftc_merger_investigations_data.py RENAMED Viewed

@@ -36,7 +36,7 @@ from .. import (  # noqa: TID252
 )
 from . import (
     INVData,
-    INVData_in_,
+    INVData_in,
     INVTableData,
     _dict_from_mapping,
     _mappingproxy_from_mapping,
@@ -147,8 +147,8 @@ def construct_data(
             ZipFile(_archive_path, "r") as _yzh,
             _yzh.open(f"{_archive_path.stem}.yaml", "r") as _yfh,
         ):
-            invdata_ = this_yaml.load(_yfh)
-        if isinstance(invdata_, MappingProxyType):
+            invdata_: INVData = this_yaml.load(_yfh)
+        if not isinstance(invdata_, MappingProxyType):
             invdata_ = _mappingproxy_from_mapping(invdata_)
             with (
                 ZipFile(_archive_path, "w", compression=ZIP_DEFLATED) as _yzh,
@@ -157,7 +157,7 @@ def construct_data(
                 this_yaml.dump(invdata_, _yfh)
         return invdata_
-    invdata: INVData_in_ = _dict_from_mapping(_parse_invdata())
+    invdata: INVData_in = _dict_from_mapping(_parse_invdata())
     # Add some data periods (
     #   only periods ending in 2011, others have few observations and
@@ -217,7 +217,7 @@ def construct_data(
     return retval
-def _construct_no_evidence_data(_invdata: INVData_in_, _data_period: str, /) -> None:
+def _construct_no_evidence_data(_invdata: INVData_in, _data_period: str, /) -> None:
     invdata_ind_grp = "All Markets"
     table_nos_map = dict(
         zip(
@@ -444,7 +444,7 @@ def _parse_invdata() -> INVData:
     invdata_docnames = _download_invdata(FTCDATA_DIR)
-    invdata: INVData_in_ = {}
+    invdata: INVData_in = {}
     for invdata_docname in invdata_docnames:
         invdata_pdf_path = FTCDATA_DIR.joinpath(invdata_docname)
@@ -513,7 +513,7 @@ def _parse_invdata() -> INVData:
 def _parse_page_blocks(
-    _invdata: INVData_in_, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
+    _invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
 ) -> None:
     if _data_period != "1996-2011":
         _parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
@@ -540,7 +540,7 @@ def _parse_page_blocks(
 def _parse_table_blocks(
-    _invdata: INVData_in_, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
+    _invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
 ) -> None:
     invdata_evid_cond = "Unrestricted on additional evidence"
     table_num, table_ser, table_type = _identify_table_type(

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundaries.py RENAMED Viewed

@@ -12,7 +12,6 @@ from typing import Literal
 import numpy as np
 from attrs import Attribute, field, frozen, validators
 from mpmath import mp  # type: ignore
-from ruamel import yaml
 from .. import (  # noqa: TID252
     DEFAULT_REC_RATIO,
@@ -23,7 +22,6 @@ from .. import (  # noqa: TID252
     UPPAggrSelector,
     this_yaml,
     yamelize_attrs,
-    yaml_rt_mapper,
 )
 from . import guidelines_boundary_functions as gbfn
@@ -147,21 +145,6 @@ class GuidelinesThresholds:
             ),
         )
-    @classmethod
-    def to_yaml(
-        cls, _r: yaml.representer.RoundTripRepresenter, _d: GuidelinesThresholds
-    ) -> yaml.MappingNode:
-        ret: yaml.MappingNode = _r.represent_mapping(
-            f"!{cls.__name__}", {"pub_year": _d.pub_year}
-        )
-        return ret
-    @classmethod
-    def from_yaml(
-        cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
-    ) -> GuidelinesThresholds:
-        return cls(**yaml_rt_mapper(_c, _n))
 @frozen
 class ConcentrationBoundary:
@@ -170,8 +153,8 @@ class ConcentrationBoundary:
     measure_name: Literal[
         "ΔHHI",
         "Combined share",
-        "Pre-merger HHI Contribution",
-        "Post-merger HHI Contribution",
+        "HHI contribution, pre-merger",
+        "HHI contribution, post-merger",
     ] = field(kw_only=False, default="ΔHHI")
     @measure_name.validator
@@ -181,8 +164,8 @@ class ConcentrationBoundary:
         if _value not in {
             "ΔHHI",
             "Combined share",
-            "Pre-merger HHI Contribution",
-            "Post-merger HHI Contribution",
+            "HHI contribution, pre-merger",
+            "HHI contribution, post-merger",
         }:
             raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
@@ -211,9 +194,9 @@ class ConcentrationBoundary:
                 conc_fn = gbfn.hhi_delta_boundary
             case "Combined share":
                 conc_fn = gbfn.combined_share_boundary
-            case "Pre-merger HHI Contribution":
+            case "HHI contribution, pre-merger":
                 conc_fn = gbfn.hhi_pre_contrib_boundary
-            case "Post-merger HHI Contribution":
+            case "HHI contribution, post-merger":
                 conc_fn = gbfn.hhi_post_contrib_boundary
         boundary_ = conc_fn(self.threshold, dps=self.precision)
@@ -477,5 +460,10 @@ if __name__ == "__main__":
     )
-for _typ in (HMGThresholds, ConcentrationBoundary, DiversionRatioBoundary):
-    yamelize_attrs(_typ, {"coordinates", "area"})
+for _typ in (
+    ConcentrationBoundary,
+    DiversionRatioBoundary,
+    GuidelinesThresholds,
+    HMGThresholds,
+):
+    yamelize_attrs(_typ)

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundary_functions.py RENAMED Viewed

@@ -699,7 +699,7 @@ def _shrratio_boundary_intcpt(
 def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
-    _x1: LerpT, _x2: LerpT, r_: float | MPFloat = 0.25, /
+    _x1: LerpT, _x2: LerpT, _r: float | MPFloat = 0.25, /
 ) -> LerpT:
     """
     From the function of the same name in the C++ standard [2]_
@@ -711,7 +711,7 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
     ----------
     _x1, _x2
         bounds :math:`x_1, x_2` to interpolate between.
-    r_
+    _r
         interpolation weight :math:`r` assigned to :math:`x_2`
     Returns
@@ -731,14 +731,14 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
     """
-    if not 0 <= r_ <= 1:
+    if not 0 <= _r <= 1:
         raise ValueError("Specified interpolation weight must lie in [0, 1].")
-    elif r_ == 0:
+    elif _r == 0:
         return _x1
-    elif r_ == 1:
+    elif _r == 1:
         return _x2
     else:
-        return r_ * _x2 + (1 - r_) * _x1
+        return _r * _x2 + (1 - _r) * _x1
 def round_cust(

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundary_functions_extra.py RENAMED Viewed

@@ -45,8 +45,6 @@ def dh_area_quad(_dh_val: float = 0.01, /) -> float:
     ----------
     _dh_val
         Merging-firms' ΔHHI bound.
-    dps
-        Specified precision in decimal places.
     Returns
     -------
@@ -297,10 +295,10 @@ def shrratio_boundary_distance(  # noqa: PLR0914
             weights_i = (
                 (
-                    w_ := mp.fdiv(
+                    _w := mp.fdiv(
                         s_2 if weighting == "cross-product-share" else s_1, s_1 + s_2
                     ),
-                    1 - w_,
+                    1 - _w,
                 )
                 if weighting
                 else _weights_base

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/pseudorandom_numbers.py RENAMED Viewed

@@ -112,11 +112,11 @@ def gen_seed_seq_list_default(
         63206306147411023146090085885772240748399174641427012462446714431253444120718,
     ]
-    if _len > (lge_ := len(generated_entropy)):
+    if _len > (_lge := len(generated_entropy)):
         e_str_segs = (
             "This function can presently create SeedSequences for generating up to ",
-            f"{lge_:,d} independent random variates. If you really need to generate ",
-            f"more than {lge_:,d} seeded independent random variates, please pass a ",
+            f"{_lge:,d} independent random variates. If you really need to generate ",
+            f"more than {_lge:,d} seeded independent random variates, please pass a ",
             "sufficiently large list of seeds as generated_entropy. See,",
             "{}/{}.".format(
                 "https://numpy.org/doc/stable/reference/random",
@@ -219,13 +219,13 @@ class MultithreadedRNG:
             self.dist_parms, DEFAULT_DIST_PARMS
         ):
             if self.dist_type == "Uniform":
-                dist_type_ = "Random"
+                dist_type = "Random"
             elif self.dist_type == "Normal":
-                dist_type_ = "Gaussian"
+                dist_type = "Gaussian"
         else:
-            dist_type_ = self.dist_type
+            dist_type = self.dist_type
-        step_size = (len(self.values) / self.nthreads).__ceil__()  # noqa: PLC2801
+        step_size = (len(self.values) / self.nthreads).__ceil__()
         seed_ = (
             SeedSequence(pool_size=8)
@@ -233,7 +233,7 @@ class MultithreadedRNG:
             else self.seed_sequence
         )
-        random_generators_ = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
+        random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
         def _fill(
             _rng: np.random.Generator,
@@ -244,23 +244,23 @@ class MultithreadedRNG:
             _last: int,
             /,
         ) -> None:
-            sz_: tuple[int, ...] = out_[_first:_last].shape
+            _sz: tuple[int, ...] = out_[_first:_last].shape
             match _dist_type:
                 case "Beta":
                     shape_a, shape_b = _dist_parms
-                    out_[_first:_last] = _rng.beta(shape_a, shape_b, size=sz_)
+                    out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
                 case "Dirichlet":
-                    out_[_first:_last] = _rng.dirichlet(_dist_parms, size=sz_[:-1])
+                    out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
                 case "Gaussian":
                     _rng.standard_normal(out=out_[_first:_last])
                 case "Normal":
-                    mu_, sigma_ = _dist_parms
-                    out_[_first:_last] = _rng.normal(mu_, sigma_, size=sz_)
+                    _mu, _sigma = _dist_parms
+                    out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
                 case "Random":
                     _rng.random(out=out_[_first:_last])
                 case "Uniform":
                     uni_l, uni_h = _dist_parms
-                    out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=sz_)
+                    out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
                 case _:
                     "Unreachable. The validator would have rejected this as invalid."
@@ -271,8 +271,8 @@ class MultithreadedRNG:
                 executor_.submit(
                     _fill,
-                    random_generators_[_i],
-                    dist_type_,
+                    random_generators[_i],
+                    dist_type,
                     self.dist_parms,
                     self.values,
                     range_first,

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/__init__.py RENAMED Viewed

@@ -7,9 +7,11 @@ containers for industry data generation and testing.
 from __future__ import annotations
 import enum
+import io
 from collections.abc import Sequence
 from operator import attrgetter
+import h5py
 import numpy as np
 from attrs import Attribute, Converter, cmp_using, field, frozen, validators
 from numpy.random import SeedSequence
@@ -22,7 +24,7 @@ from .. import (  # noqa: TID252
     ArrayDouble,
     ArrayFloat,
     ArrayINT,
-    EnumYAMLized,
+    Enameled,
     RECForm,
     UPPAggrSelector,
     this_yaml,
@@ -50,7 +52,7 @@ class SeedSequenceData:
 @this_yaml.register_class
 @enum.unique
-class PriceSpec(tuple[bool, str | None], EnumYAMLized):
+class PriceSpec(tuple[bool, str | None], Enameled):
     """Price specification.
     Whether prices are symmetric and, if not, the direction of correlation, if any.
@@ -65,7 +67,7 @@ class PriceSpec(tuple[bool, str | None], EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class SHRDistribution(str, EnumYAMLized):
+class SHRDistribution(str, Enameled):
     """Market share distributions."""
     UNI = "Uniform"
@@ -285,7 +287,7 @@ class ShareSpec:
 @this_yaml.register_class
 @enum.unique
-class PCMDistribution(str, EnumYAMLized):
+class PCMDistribution(str, Enameled):
     """Margin distributions."""
     UNI = "Uniform"
@@ -296,7 +298,7 @@ class PCMDistribution(str, EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class FM2Constraint(str, EnumYAMLized):
+class FM2Constraint(str, Enameled):
     """Firm 2 margins - derivation methods."""
     IID = "i.i.d"
@@ -401,7 +403,7 @@ class PCMSpec:
 @this_yaml.register_class
 @enum.unique
-class SSZConstant(float, EnumYAMLized):
+class SSZConstant(float, Enameled):
     """
     Scale factors to offset sample size reduction.
@@ -467,10 +469,8 @@ class MarketSampleData:
     """
     @aggregate_purchase_prob.default
-    def __appd(_i: MarketSampleData) -> ArrayINT:
-        e_ = np.empty_like(_i.frmshr_array[:, :1], float)
-        e_.fill(np.nan)
-        return e_
+    def __appd(_i: MarketSampleData) -> ArrayDouble:
+        return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
     fcounts: ArrayINT = field(eq=cmp_using(np.array_equal))
     """Number of firms in market"""
@@ -487,19 +487,34 @@ class MarketSampleData:
     """
     @nth_firm_share.default
-    def __nfsd(_i: MarketSampleData) -> ArrayINT:
-        e_ = np.empty_like(_i.frmshr_array[:, :1], float)
-        e_.fill(np.nan)
-        return e_
+    def __nfsd(_i: MarketSampleData) -> ArrayDouble:
+        return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
     hhi_post: ArrayDouble = field(eq=cmp_using(np.array_equal))
     """Post-merger change in Herfindahl-Hirschmann Index (HHI)"""
     @hhi_post.default
-    def __hpd(_i: MarketSampleData) -> ArrayINT:
-        e_ = np.empty_like(_i.frmshr_array[:, :1], float)
-        e_.fill(np.nan)
-        return e_
+    def __hpd(_i: MarketSampleData) -> ArrayDouble:
+        return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
+    def to_h5bin(self) -> bytes:
+        """Save market sample data to HDF5 file."""
+        byte_stream = io.BytesIO()
+        with h5py.File(byte_stream, "w") as _h5f:
+            for _a in self.__attrs_attrs__:
+                if all((
+                    (_arr := getattr(self, _a.name)).any(),
+                    not np.isnan(_arr).all(),
+                )):
+                    _h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
+        return byte_stream.getvalue()
+    @classmethod
+    def from_h5f(cls, _hfh: io.BufferedReader) -> MarketSampleData:
+        """Load market sample data from HDF5 file."""
+        with h5py.File(_hfh, "r") as _h5f:
+            _retval = cls(**{_a: _h5f[_a][:] for _a in _h5f})
+        return _retval
 @frozen
@@ -557,7 +572,7 @@ class MarginDataSample:
 @this_yaml.register_class
 @enum.unique
-class INVResolution(str, EnumYAMLized):
+class INVResolution(str, Enameled):
     CLRN = "clearance"
     ENFT = "enforcement"
     BOTH = "investigation"

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/data_generation.py RENAMED Viewed

@@ -5,12 +5,10 @@ Methods to generate data for analyzing merger enforcement policy.
 from __future__ import annotations
-import io
 import zipfile
 from itertools import starmap
 from typing import TypedDict
-import h5py  # type: ignore
 import numpy as np
 from attrs import Attribute, Converter, define, field, validators
 from joblib import Parallel, cpu_count, delayed  # type: ignore
@@ -448,26 +446,26 @@ class MarketSample:
             this_yaml.dump(self, _yfh)
         if save_dataset:
-            if all((_dt := self.dataset is None, _et := self.enf_counts is None)):
+            if all((_ndt := self.dataset is None, _net := self.enf_counts is None)):
                 raise ValueError(
                     "No dataset and/or enforcement counts available for saving. "
                     "Generate some data or set save_dataset to False to poceed."
                 )
-            if not _dt:
-                byte_stream = io.BytesIO()
-                with h5py.File(byte_stream, "w") as h5f:
-                    for _a in self.dataset.__attrs_attrs__:
-                        if all((
-                            (_arr := getattr(self.dataset, _a.name)).any(),
-                            not np.isnan(_arr).all(),
-                        )):
-                            h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
+            if not _ndt:
+                # byte_stream = io.BytesIO()
+                # with h5py.File(byte_stream, "w") as h5f:
+                #     for _a in self.dataset.__attrs_attrs__:
+                #         if all((
+                #             (_arr := getattr(self.dataset, _a.name)).any(),
+                #             not np.isnan(_arr).all(),
+                #         )):
+                #             h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
                 with (zpath / f"{name_root}_dataset.h5").open("wb") as _hfh:
-                    _hfh.write(byte_stream.getvalue())
+                    _hfh.write(self.dataset.to_h5bin())
-            if not _et:
+            if not _net:
                 with (zpath / f"{name_root}_enf_counts.yaml").open("w") as _yfh:
                     this_yaml.dump(self.enf_counts, _yfh)
@@ -491,11 +489,11 @@ class MarketSample:
             if _dt:
                 with _dp.open("rb") as _hfh:
-                    h5f = h5py.File(_hfh)
                     object.__setattr__(  # noqa: PLC2801
                         market_sample_,
                         "dataset",
-                        MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
+                        # MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
+                        MarketSampleData.from_h5f(_hfh),
                     )
             if _et:
                 object.__setattr__(  # noqa: PLC2801

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/enforcement_stats.py RENAMED Viewed

@@ -9,7 +9,7 @@ from collections.abc import Mapping
 import numpy as np
 from scipy.interpolate import interp1d  # type: ignore
-from .. import VERSION, ArrayBIGINT, EnumYAMLized, this_yaml  # noqa: TID252
+from .. import VERSION, ArrayBIGINT, Enameled, this_yaml  # noqa: TID252
 from ..core import ftc_merger_investigations_data as fid  # noqa: TID252
 from . import INVResolution
@@ -18,7 +18,7 @@ __version__ = VERSION
 @this_yaml.register_class
 @enum.unique
-class IndustryGroup(str, EnumYAMLized):
+class IndustryGroup(str, Enameled):
     ALL = "All Markets"
     GRO = "Grocery Markets"
     OIL = "Oil Markets"
@@ -33,7 +33,7 @@ class IndustryGroup(str, EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class OtherEvidence(str, EnumYAMLized):
+class OtherEvidence(str, Enameled):
     UR = "Unrestricted on additional evidence"
     HD = "Hot Documents Identified"
     HN = "No Hot Documents Identified"
@@ -48,7 +48,7 @@ class OtherEvidence(str, EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class StatsGrpSelector(str, EnumYAMLized):
+class StatsGrpSelector(str, Enameled):
     FC = "ByFirmCount"
     HD = "ByHHIandDelta"
     DL = "ByDelta"
@@ -57,7 +57,7 @@ class StatsGrpSelector(str, EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class StatsReturnSelector(str, EnumYAMLized):
+class StatsReturnSelector(str, Enameled):
     CNT = "count"
     RPT = "rate, point"
     RIN = "rate, interval"
@@ -65,7 +65,7 @@ class StatsReturnSelector(str, EnumYAMLized):
 @this_yaml.register_class
 @enum.unique
-class SortSelector(str, EnumYAMLized):
+class SortSelector(str, Enameled):
     UCH = "unchanged"
     REV = "reversed"
@@ -236,19 +236,19 @@ def table_no_lku(
     /,
 ) -> str:
     if _table_ind_group not in (
-        igl_ := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
+        _igl := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
     ):
         raise ValueError(
             f"Invalid value for industry group, {f'"{_table_ind_group}"'}."
-            f"Must be one of {igl_!r}"
+            f"Must be one of {_igl!r}"
         )
     tno_ = next(
-        t_
-        for t_ in _data_array_dict_sub
+        _t
+        for _t in _data_array_dict_sub
         if all((
-            _data_array_dict_sub[t_].industry_group == _table_ind_group,
-            _data_array_dict_sub[t_].additional_evidence == _table_evid_cond,
+            _data_array_dict_sub[_t].industry_group == _table_ind_group,
+            _data_array_dict_sub[_t].additional_evidence == _table_evid_cond,
         ))
     )
@@ -259,10 +259,10 @@ def enf_cnts_byfirmcount(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
     ndim_in = 1
     return np.vstack([
         np.concatenate([
-            (f,),
-            np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == f][:, ndim_in:]),
+            (_i,),
+            np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == _i][:, ndim_in:]),
         ])
-        for f in np.unique(_cnts_array[:, 0])
+        for _i in np.unique(_cnts_array[:, 0])
     ])
@@ -270,10 +270,10 @@ def enf_cnts_bydelta(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
     ndim_in = 2
     return np.vstack([
         np.concatenate([
-            (f_,),
-            np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == f_][:, ndim_in:]),
+            (_k,),
+            np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == _k][:, ndim_in:]),
         ])
-        for f_ in HHI_DELTA_KNOTS[:-1]
+        for _k in HHI_DELTA_KNOTS[:-1]
     ])
@@ -286,10 +286,11 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
     # aggregation reduces the footprint of this step in memory. Although this point
     # is more relevant for generated than observed data, using the same coding pattern
     # in both cases does make life easier
-    ndim_in = 2
-    nkeys_ = 3
+    _ndim_in = 2
+    _nkeys = 3
     cnts_byhhipostanddelta, cnts_byconczone = (
-        np.zeros(nkeys_ + _cnts_array.shape[1] - ndim_in, dtype=int) for _ in range(2)
+        np.zeros((1, _nkeys + _cnts_array.shape[1] - _ndim_in), dtype=int)
+        for _ in range(2)
     )
     # Prepare to tag clearance stats by presumption zone
@@ -314,7 +315,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
                 np.array(
                     (
                         *zone_val,
-                        *np.einsum("ij->j", _cnts_array[:, ndim_in:][conc_test]),
+                        *np.einsum("ij->j", _cnts_array[:, _ndim_in:][conc_test]),
                     ),
                     dtype=int,
                 ),
@@ -337,7 +338,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
                 (
                     zone_val,
                     np.einsum(
-                        "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, nkeys_:]
+                        "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, _nkeys:]
                     ),
                 ),
                 dtype=int,

{mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/upp_tests.py RENAMED Viewed

@@ -191,7 +191,7 @@ def compute_upp_test_arrays(
     Parameters
     ----------
-    _market_data
+    _market_data_sample
         market data sample
     _upp_test_parms
         guidelines thresholds for testing UPP and related statistics