PyPI - mergeron - Versions diffs - 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl - Mend

mergeron 2025.739265.2py3-none-any.whl → 2025.739290.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (11) hide show

mergeron/__init__.py +51 -2
mergeron/core/guidelines_boundaries.py +16 -13
mergeron/core/pseudorandom_numbers.py +77 -51
mergeron/gen/__init__.py +222 -84
mergeron/gen/data_generation.py +143 -176
mergeron/gen/data_generation_functions.py +68 -118
mergeron/gen/enforcement_stats.py +30 -6
mergeron/gen/upp_tests.py +6 -7
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/METADATA +2 -1
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/RECORD +11 -11
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/WHEEL +0 -0

mergeron/gen/data_generation.py CHANGED Viewed

@@ -5,23 +5,24 @@ Methods to generate data for analyzing merger enforcement policy.
 from __future__ import annotations
-from collections.abc import Sequence
 from typing import TypedDict
 import numpy as np
-from attrs import Attribute, define, field, validators
+from attrs import Attribute, Converter, define, field, validators
 from joblib import Parallel, cpu_count, delayed  # type: ignore
 from numpy.random import SeedSequence
+from ruamel import yaml
-from .. import DEFAULT_REC_RATIO, VERSION, RECForm  # noqa: TID252  # noqa
+from .. import NTHREADS, VERSION, RECForm, this_yaml  # noqa: TID252  # noqa
 from ..core import guidelines_boundaries as gbl  # noqa: TID252
 from ..core.guidelines_boundaries import HMGThresholds  # noqa: TID252
 from . import (
     FM2Constraint,
-    MarketDataSample,
+    MarketSampleData,
     PCMDistribution,
     PCMSpec,
     PriceSpec,
+    SeedSequenceData,
     ShareSpec,
     SHRDistribution,
     SSZConstant,
@@ -32,9 +33,8 @@ from .data_generation_functions import (
     gen_divr_array,
     gen_margin_price_data,
     gen_share_data,
-    parse_seed_seq_list,
 )
-from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
+from .upp_tests import SaveData, compute_upp_test_counts
 __version__ = VERSION
@@ -45,17 +45,8 @@ class SamplingFunctionKWArgs(TypedDict, total=False):
     sample_size: int
     """number of draws to generate"""
-    seed_seq_list: Sequence[SeedSequence] | None
-    """sequence of SeedSequences to ensure replicable data generation with
-    appropriately independent random streams
-    NOTES
-    -----
-    See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
-    specification of this parameter.
-    """
+    seed_data: SeedSequenceData | None
+    """seed data to ensure independedent and replicable draws"""
     nthreads: int
     """number of parallel threads to use"""
@@ -67,26 +58,47 @@ class SamplingFunctionKWArgs(TypedDict, total=False):
     """optionally specify a suffix for the HDF5 array names"""
-@define
+def _seed_data_conv(_v: SeedSequenceData | None, _i: MarketSample) -> SeedSequenceData:
+    if isinstance(_v, SeedSequenceData):
+        return _v
+    _mktshr_dist_type = _i.share_spec.dist_type
+    _price_spec = _i.price_spec
+    _seed_count = 2 if _mktshr_dist_type == SHRDistribution.UNI else 3
+    _seed_count += 1 if _price_spec == PriceSpec.ZERO else 0
+    _sseq_list = tuple(SeedSequence(pool_size=8) for _ in range(_seed_count))
+    _mktshr_rng_seed_seq, _pcm_rng_seed_seq = _sseq_list[:2]
+    _fcount_rng_seed_seq = (
+        None if _mktshr_dist_type == SHRDistribution.UNI else _sseq_list[2]
+    )
+    _pr_rng_seed_seq = _sseq_list[-1] if _price_spec == PriceSpec.ZERO else None
+    return SeedSequenceData(
+        _mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq, _pr_rng_seed_seq
+    )
+@this_yaml.register_class
+@define(kw_only=True)
 class MarketSample:
     """Parameter specification for market data generation."""
     share_spec: ShareSpec = field(
-        kw_only=True,
-        default=ShareSpec(
-            SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
-        ),
+        default=ShareSpec(SHRDistribution.UNI),
         validator=validators.instance_of(ShareSpec),
     )
     """Market-share specification, see :class:`ShareSpec`"""
     pcm_spec: PCMSpec = field(
-        kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
+        default=PCMSpec(PCMDistribution.UNI), validator=validators.instance_of(PCMSpec)
     )
     """Margin specification, see :class:`PCMSpec`"""
     @pcm_spec.validator
-    def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
+    def _psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
         if (
             self.share_spec.recapture_form == RECForm.FIXED
             and _v.firm2_pcm_constraint == FM2Constraint.MNL
@@ -98,29 +110,60 @@ class MarketSample:
             )
     price_spec: PriceSpec = field(
-        kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
+        default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
     )
     """Price specification, see :class:`PriceSpec`"""
     hsr_filing_test_type: SSZConstant = field(
-        kw_only=True,
-        default=SSZConstant.ONE,
-        validator=validators.instance_of(SSZConstant),
+        default=SSZConstant.ONE, validator=validators.instance_of(SSZConstant)
     )
     """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
-    data: MarketDataSample = field(default=None)
+    sample_size: int = field(default=10**6, validator=validators.instance_of(int))
+    """number of draws to simulate"""
-    enf_counts: UPPTestsCounts = field(default=None)
+    seed_data: SeedSequenceData = field(
+        converter=Converter(_seed_data_conv, takes_self=True)  # type: ignore
+    )
+    """sequence of SeedSequences to ensure replicable data generation with
+    appropriately independent random streams
+    """
-    def __gen_market_sample(
-        self,
-        /,
-        *,
-        sample_size: int,
-        seed_seq_list: Sequence[SeedSequence] | None,
-        nthreads: int,
-    ) -> MarketDataSample:
+    @seed_data.default
+    def __dsd(self) -> SeedSequenceData | None:
+        return _seed_data_conv(None, self)
+    @seed_data.validator
+    def _sdv(
+        _i: MarketSample, _a: Attribute[SeedSequenceData], _v: SeedSequenceData, /
+    ) -> None:
+        if _i.share_spec.dist_type == SHRDistribution.UNI and any((
+            _v.fcounts,
+            _v.price,
+        )):
+            raise ValueError(
+                "Attribute, seed_data.fcounts is ignored as irrelevant when "
+                "market shares are drawn with Uniform distribution. "
+                "Set seed_data.fcounts to None and retry."
+            )
+        if _i.price_spec != PriceSpec.ZERO and _v.price is not None:
+            raise ValueError(
+                "Attribute, seed_data.price is ignored as irrelevant unless "
+                "prices are asymmetric and uncorrelated and price-cost margins "
+                "are also not symmetric. Set seed_data.price to None and retry."
+            )
+    nthreads: int = field(default=NTHREADS, validator=validators.instance_of(int))
+    """number of parallel threads to use"""
+    data: MarketSampleData | None = field(default=None)
+    enf_counts: UPPTestsCounts | None = field(default=None)
+    def _gen_market_sample(
+        self, /, *, sample_size: int, seed_data: SeedSequenceData | None, nthreads: int
+    ) -> MarketSampleData:
         """
         Generate share, diversion ratio, price, and margin data for MarketSpec.
@@ -139,14 +182,15 @@ class MarketSample:
         _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
         _hsr_filing_test_type = self.hsr_filing_test_type
+        _seed_data = seed_data or self.seed_data
         (
             _mktshr_rng_seed_seq,
             _pcm_rng_seed_seq,
             _fcount_rng_seed_seq,
             _pr_rng_seed_seq,
-        ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
+        ) = (getattr(_seed_data, _a) for _a in _seed_data.__dataclass_fields__)
+        _shr_sample_size = 1.0 * (sample_size or self.sample_size)
-        _shr_sample_size = 1.0 * sample_size
         # Scale up sample size to offset discards based on specified criteria
         _shr_sample_size *= _hsr_filing_test_type
         if _dist_firm2_pcm == FM2Constraint.MNL:
@@ -159,7 +203,7 @@ class MarketSample:
             self.share_spec,
             _fcount_rng_seed_seq,
             _mktshr_rng_seed_seq,
-            nthreads,
+            nthreads or self.nthreads,
         )
         _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
@@ -224,7 +268,7 @@ class MarketSample:
             _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
         )
-        return MarketDataSample(
+        return MarketSampleData(
             _frmshr_array,
             _pcm_array,
             _price_array,
@@ -236,41 +280,19 @@ class MarketSample:
             _hhi_delta,
         )
-    def generate_sample(
-        self,
-        /,
-        *,
-        sample_size: int = 10**6,
-        seed_seq_list: Sequence[SeedSequence] | None = None,
-        nthreads: int = 16,
-        save_data_to_file: SaveData = False,
-        saved_array_name_suffix: str = "",
-    ) -> None:
+    def generate_sample(self, /) -> None:
         """Populate :attr:`data` with generated data
-        see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
         Returns
         -------
         None
         """
-        self.data = self.__gen_market_sample(
-            sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
-        )
-        _invalid_array_names = (
-            ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
-            if self.share_spec.dist_type == "Uniform"
-            else ()
-        )
-        save_data_to_hdf5(
-            self.data,
-            saved_array_name_suffix=saved_array_name_suffix,
-            excluded_attrs=_invalid_array_names,
-            save_data_to_file=save_data_to_file,
+        self.data = self._gen_market_sample(
+            seed_data=self.seed_data,
+            sample_size=self.sample_size,
+            nthreads=self.nthreads,
         )
     def __sim_enf_cnts(
@@ -279,11 +301,9 @@ class MarketSample:
         _sim_test_regime: UPPTestRegime,
         /,
         *,
+        seed_data: SeedSequenceData,
         sample_size: int = 10**6,
-        seed_seq_list: Sequence[SeedSequence] | None = None,
-        nthreads: int = 16,
-        save_data_to_file: SaveData = False,
-        saved_array_name_suffix: str = "",
+        nthreads: int = NTHREADS,
     ) -> UPPTestsCounts:
         """Generate market data and etstimate UPP test counts on same.
@@ -302,26 +322,20 @@ class MarketSample:
         sample_size
             Number of draws to generate
-        seed_seq_list
+        seed_data
             List of seed sequences, to assure independent samples in each thread
         nthreads
             Number of parallel processes to use
-        save_data_to_file
-            Whether to save data to an HDF5 file, and where to save it
-        saved_array_name_suffix
-            Suffix to add to the array names in the HDF5 file
         Returns
         -------
             UPPTestCounts ojbect with  of test counts by firm count, ΔHHI and concentration zone
         """
-        _market_data_sample = self.__gen_market_sample(
-            sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
+        _market_data_sample = self._gen_market_sample(
+            sample_size=sample_size, seed_data=seed_data, nthreads=nthreads
         )
         _invalid_array_names = (
@@ -330,42 +344,20 @@ class MarketSample:
             else ()
         )
-        save_data_to_hdf5(
-            _market_data_sample,
-            saved_array_name_suffix=saved_array_name_suffix,
-            excluded_attrs=_invalid_array_names,
-            save_data_to_file=save_data_to_file,
-        )
         _upp_test_arrays = compute_upp_test_counts(
             _market_data_sample, _upp_test_parms, _sim_test_regime
         )
-        save_data_to_hdf5(
-            _upp_test_arrays,
-            saved_array_name_suffix=saved_array_name_suffix,
-            save_data_to_file=save_data_to_file,
-        )
         return _upp_test_arrays
     def __sim_enf_cnts_ll(
-        self,
-        _enf_parm_vec: gbl.HMGThresholds,
-        _sim_test_regime: UPPTestRegime,
-        /,
-        *,
-        sample_size: int = 10**6,
-        seed_seq_list: Sequence[SeedSequence] | None = None,
-        nthreads: int = 16,
-        save_data_to_file: SaveData = False,
-        saved_array_name_suffix: str = "",
+        self, _enf_parm_vec: gbl.HMGThresholds, _sim_test_regime: UPPTestRegime, /
     ) -> UPPTestsCounts:
         """A function to parallelize data-generation and testing
         The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
         the parent function, `sim_enf_cnts()`, except that, if provided,
-        `seed_seq_list` is used to spawn a seed sequence for each thread,
+        `seed_data` is used to spawn a seed sequence for each thread,
         to assure independent samples in each thread, and `nthreads` defines
         the number of parallel processes used. The number of draws in
         each thread may be tuned, by trial and error, to the amount of
@@ -380,33 +372,18 @@ class MarketSample:
         _sim_test_regime
             Configuration to use for testing
-        sample_size
-            Number of draws to simulate
-        seed_seq_list
-            List of seed sequences, to assure independent samples in each thread
-        nthreads
-            Number of parallel processes to use
-        save_data_to_file
-            Whether to save data to an HDF5 file, and where to save it
-        saved_array_name_suffix
-            Suffix to add to the array names in the HDF5 file
         Returns
         -------
             Arrays of enforcement counts or clearance counts by firm count,
             ΔHHI and concentration zone
         """
-        _sample_sz = sample_size
+        _sample_sz = self.sample_size
         _subsample_sz = 10**6
         _iter_count = (
             int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
         )
-        _thread_count = cpu_count()
+        _thread_count = self.nthreads or cpu_count()
         if (
             self.share_spec.recapture_form != RECForm.OUTIN
@@ -420,27 +397,35 @@ class MarketSample:
                 )
             )
-        _rng_seed_seq_list = [None] * _iter_count
-        if seed_seq_list:
-            _rng_seed_seq_list = list(
-                zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True)  # type: ignore
+        _rng_seed_data = [
+            SeedSequenceData(*_z)
+            for _z in (
+                zip(
+                    *[
+                        _s.spawn(_iter_count) if _s else [None] * _iter_count
+                        for _s in (
+                            getattr(self.seed_data, _a)
+                            for _a in self.seed_data.__dataclass_fields__
+                        )
+                    ],
+                    strict=True,
+                )
             )
+        ]
-        _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
+        __sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
             "sample_size": _subsample_sz,
-            "save_data_to_file": save_data_to_file,
-            "nthreads": nthreads,
+            "nthreads": self.nthreads,
         })
         _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
             delayed(self.__sim_enf_cnts)(
                 _enf_parm_vec,
                 _sim_test_regime,
-                **_sim_enf_cnts_kwargs,
-                saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
-                seed_seq_list=_rng_seed_seq_list_ch,
+                **__sim_enf_cnts_kwargs,
+                seed_data=_rng_seed_data_ch,
             )
-            for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
+            for _iter_id, _rng_seed_data_ch in enumerate(_rng_seed_data)
         )
         _res_list_stacks = UPPTestsCounts(*[
@@ -450,10 +435,10 @@ class MarketSample:
         upp_test_results = UPPTestsCounts(*[
             np.column_stack((
                 (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
-                np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
+                np.einsum("ijk->jk", _gv[:, :, _h:], dtype=np.int64),
             ))
             for _g, _h in zip(
-                _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
+                _res_list_stacks.__dataclass_fields__, [1, 1, 3], strict=True
             )
         ])
         del _res_list, _res_list_stacks
@@ -461,16 +446,7 @@ class MarketSample:
         return upp_test_results
     def estimate_enf_counts(
-        self,
-        _enf_parm_vec: HMGThresholds,
-        _upp_test_regime: UPPTestRegime,
-        /,
-        *,
-        sample_size: int = 10**6,
-        seed_seq_list: Sequence[SeedSequence] | None = None,
-        nthreads: int = 16,
-        save_data_to_file: SaveData = False,
-        saved_array_name_suffix: str = "",
+        self, _enf_parm_vec: HMGThresholds, _upp_test_regime: UPPTestRegime, /
     ) -> None:
         """Populate :attr:`enf_counts` with estimated UPP test counts.
@@ -486,21 +462,6 @@ class MarketSample:
             merging-firm GUPPI and maximum diversion ratio between the
             merging firms
-        sample_size
-            Number of draws to simulate
-        seed_seq_list
-            List of seed sequences, to assure independent samples in each thread
-        nthreads
-            Number of parallel processes to use
-        save_data_to_file
-            Whether to save data to an HDF5 file, and where to save it
-        saved_array_name_suffix
-            Suffix to add to the array names in the HDF5 file
         Returns
         -------
         None
@@ -508,22 +469,28 @@ class MarketSample:
         """
         if self.data is None:
-            self.enf_counts = self.__sim_enf_cnts_ll(
-                _enf_parm_vec,
-                _upp_test_regime,
-                sample_size=sample_size,
-                seed_seq_list=seed_seq_list,
-                nthreads=nthreads,
-                save_data_to_file=save_data_to_file,
-                saved_array_name_suffix=saved_array_name_suffix,
-            )
+            self.enf_counts = self.__sim_enf_cnts_ll(_enf_parm_vec, _upp_test_regime)
         else:
             self.enf_counts = compute_upp_test_counts(
                 self.data, _enf_parm_vec, _upp_test_regime
             )
-            if save_data_to_file:
-                save_data_to_hdf5(
-                    self.enf_counts,
-                    save_data_to_file=save_data_to_file,
-                    saved_array_name_suffix=saved_array_name_suffix,
-                )
+    @classmethod
+    def to_yaml(
+        cls, _r: yaml.representer.SafeRepresenter, _d: MarketSample
+    ) -> yaml.MappingNode:
+        _ret: yaml.MappingNode = _r.represent_mapping(
+            f"!{cls.__name__}",
+            {
+                _a.name: getattr(_d, _a.name)
+                for _a in _d.__attrs_attrs__
+                if _a.type not in (MarketSampleData, UPPTestsCounts)
+            },
+        )
+        return _ret
+    @classmethod
+    def from_yaml(
+        cls, _c: yaml.constructor.SafeConstructor, _n: yaml.MappingNode
+    ) -> MarketSample:
+        return cls(**_c.construct_mapping(_n))

mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl

Potentially problematic release.

mergeron 2025.739265.2py3-none-any.whl → 2025.739290.0py3-none-any.whl