PyPI - mergeron - Versions diffs - 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl - Mend

mergeron 2024.738953.1py3-none-any.whl → 2025.739265.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show

mergeron/__init__.py +26 -6
mergeron/core/__init__.py +5 -65
mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
mergeron/core/ftc_merger_investigations_data.py +147 -101
mergeron/core/guidelines_boundaries.py +290 -1078
mergeron/core/guidelines_boundary_functions.py +1128 -0
mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
mergeron/core/pseudorandom_numbers.py +16 -22
mergeron/data/__init__.py +3 -0
mergeron/data/damodaran_margin_data.xls +0 -0
mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
mergeron/demo/__init__.py +3 -0
mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
mergeron/gen/__init__.py +258 -246
mergeron/gen/data_generation.py +473 -224
mergeron/gen/data_generation_functions.py +876 -0
mergeron/gen/enforcement_stats.py +355 -0
mergeron/gen/upp_tests.py +171 -259
mergeron-2025.739265.0.dist-info/METADATA +115 -0
mergeron-2025.739265.0.dist-info/RECORD +23 -0
{mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
mergeron/License.txt +0 -16
mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
mergeron/core/excel_helper.py +0 -257
mergeron/core/proportions_tests.py +0 -520
mergeron/ext/__init__.py +0 -5
mergeron/ext/tol_colors.py +0 -851
mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
mergeron/gen/investigations_stats.py +0 -709
mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
mergeron-2024.738953.1.dist-info/METADATA +0 -93
mergeron-2024.738953.1.dist-info/RECORD +0 -30
/mergeron/{core → data}/ftc_invdata.msgpack +0 -0

mergeron/gen/data_generation.py CHANGED Viewed

@@ -1,280 +1,529 @@
 """
-Routines to generate data for analyzing merger enforcement policy.
+Methods to generate data for analyzing merger enforcement policy.
 """
 from __future__ import annotations
-from importlib.metadata import version
+from collections.abc import Sequence
+from typing import TypedDict
-import attrs
 import numpy as np
+from attrs import Attribute, define, field, validators
+from joblib import Parallel, cpu_count, delayed  # type: ignore
 from numpy.random import SeedSequence
-from numpy.typing import NDArray
-from .. import _PKG_NAME, RECConstants  # noqa: TID252
+from .. import DEFAULT_REC_RATIO, VERSION, RECForm  # noqa: TID252  # noqa
+from ..core import guidelines_boundaries as gbl  # noqa: TID252
+from ..core.guidelines_boundaries import HMGThresholds  # noqa: TID252
 from . import (
-    EMPTY_ARRAY_DEFAULT,
-    TF,
-    FM2Constants,
+    FM2Constraint,
     MarketDataSample,
-    MarketSampleSpec,
-    PRIConstants,
-    SHRConstants,
-    SSZConstants,
+    PCMDistribution,
+    PCMSpec,
+    PriceSpec,
+    ShareSpec,
+    SHRDistribution,
+    SSZConstant,
+    UPPTestRegime,
+    UPPTestsCounts,
 )
-from ._data_generation_functions_nonpublic import (
-    _gen_market_shares_dirichlet,  # noqa: F401 easter-egg for external modules
-    _gen_market_shares_uniform,  # noqa: F401 easter-egg for external modules
-    _gen_pcm_data,
-    _gen_pr_data,
-    _gen_share_data,
+from .data_generation_functions import (
+    gen_divr_array,
+    gen_margin_price_data,
+    gen_share_data,
+    parse_seed_seq_list,
 )
+from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
-__version__ = version(_PKG_NAME)
+__version__ = VERSION
-def gen_market_sample(
-    _mkt_sample_spec: MarketSampleSpec,
-    /,
-    *,
-    seed_seq_list: list[SeedSequence] | None = None,
-    nthreads: int = 16,
-) -> MarketDataSample:
-    """
-    Generate share, diversion ratio, price, and margin data based on supplied parameters
-    Diversion ratios generated assuming share-proportionality, unless
-    `recapture_spec` = "proportional", in which case both firms' recapture rate
-    is set to `r_bar`.
-    The tuple of SeedSequences, if specified, is parsed in the following order
-    for generating the relevant random variates:
-    1.) quantity shares
-    2.) price-cost margins
-    3.) firm-counts, from :code:`[2, 2 + len(firm_counts_weights)]`,
-    weighted by :code:`firm_counts_weights`, where relevant
-    4.) prices, if :code:`pr_sym_spec == PRIConstants.ZERO`.
-    Parameters
-    ----------
-    _mkt_sample_spec
-        class specifying parameters for data generation
-    seed_seq_list
-        tuple of SeedSequences to ensure replicable data generation with
-        appropriately independent random streams
-    nthreads
-        optionally specify the number of CPU threads for the PRNG
-    Returns
-    -------
-        Merging firms' shares, margins, etc. for each hypothetical  merger
-        in the sample
+class SamplingFunctionKWArgs(TypedDict, total=False):
+    "Keyword arguments of sampling methods defined below"
+    sample_size: int
+    """number of draws to generate"""
+    seed_seq_list: Sequence[SeedSequence] | None
+    """sequence of SeedSequences to ensure replicable data generation with
+    appropriately independent random streams
+    NOTES
+    -----
+    See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
+    specification of this parameter.
     """
-    _mkt_sample_spec = _mkt_sample_spec or MarketSampleSpec()
+    nthreads: int
+    """number of parallel threads to use"""
-    _recapture_spec = _mkt_sample_spec.share_spec.recapture_spec
-    _dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
-    _dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
-    _hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
+    save_data_to_file: SaveData
+    """optionally save data to HDF5 file"""
-    (
-        _mktshr_rng_seed_seq,
-        _pcm_rng_seed_seq,
-        _fcount_rng_seed_seq,
-        _pr_rng_seed_seq,
-    ) = parse_seed_seq_list(
-        seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.pr_sym_spec
-    )
+    saved_array_name_suffix: str
+    """optionally specify a suffix for the HDF5 array names"""
-    _shr_sample_size = 1.0 * _mkt_sample_spec.sample_size
-    # Scale up sample size to offset discards based on specified criteria
-    _shr_sample_size *= _hsr_filing_test_type
-    if _dist_firm2_pcm == FM2Constants.MNL:
-        _shr_sample_size *= SSZConstants.MNL_DEP
-    _mkt_sample_spec_here = attrs.evolve(
-        _mkt_sample_spec, sample_size=int(_shr_sample_size)
-    )
-    del _shr_sample_size
-    # Generate share data
-    _mktshr_data = _gen_share_data(
-        _mkt_sample_spec_here, _fcount_rng_seed_seq, _mktshr_rng_seed_seq, nthreads
-    )
+@define
+class MarketSample:
+    """Parameter specification for market data generation."""
-    _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
-        getattr(_mktshr_data, _f)
-        for _f in (
-            "mktshr_array",
-            "fcounts",
-            "aggregate_purchase_prob",
-            "nth_firm_share",
-        )
+    share_spec: ShareSpec = field(
+        kw_only=True,
+        default=ShareSpec(
+            SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
+        ),
+        validator=validators.instance_of(ShareSpec),
     )
+    """Market-share specification, see :class:`ShareSpec`"""
-    # Generate merging-firm price data
-    _price_data = _gen_pr_data(
-        _mktshr_array[:, :2], _nth_firm_share, _mkt_sample_spec_here, _pr_rng_seed_seq
+    pcm_spec: PCMSpec = field(
+        kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
     )
+    """Margin specification, see :class:`PCMSpec`"""
+    @pcm_spec.validator  # pyright: ignore
+    def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
+        if (
+            self.share_spec.recapture_form == RECForm.FIXED
+            and _v.firm2_pcm_constraint == FM2Constraint.MNL
+        ):
+            raise ValueError(
+                f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
+                "requires Firm 2 margin must have property, "
+                f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
+            )
-    _price_array, _hsr_filing_test = (
-        getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
+    price_spec: PriceSpec = field(
+        kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
     )
+    """Price specification, see :class:`PriceSpec`"""
-    if _hsr_filing_test_type != SSZConstants.ONE:
-        _mktshr_array = _mktshr_array[_hsr_filing_test]
-        _fcounts = _fcounts[_hsr_filing_test]
-        _aggregate_purchase_prob = _aggregate_purchase_prob[_hsr_filing_test]
-        _nth_firm_share = _nth_firm_share[_hsr_filing_test]
-        _price_array = _price_array[_hsr_filing_test]
-    # Calculate diversion ratios
-    _divr_array = gen_divr_array(
-        _mktshr_array[:, :2],
-        _mkt_sample_spec_here.recapture_rate or 0.8,
-        _recapture_spec,
-        _aggregate_purchase_prob,
+    hsr_filing_test_type: SSZConstant = field(
+        kw_only=True,
+        default=SSZConstant.ONE,
+        validator=validators.instance_of(SSZConstant),
     )
+    """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
+    data: MarketDataSample = field(default=None)
+    enf_counts: UPPTestsCounts = field(default=None)
+    def __gen_market_sample(
+        self,
+        /,
+        *,
+        sample_size: int,
+        seed_seq_list: Sequence[SeedSequence] | None,
+        nthreads: int,
+    ) -> MarketDataSample:
+        """
+        Generate share, diversion ratio, price, and margin data for MarketSpec.
+        see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
+        Returns
+        -------
+            Merging firms' shares, margins, etc. for each hypothetical  merger
+            in the sample
+        """
+        _recapture_form = self.share_spec.recapture_form
+        _recapture_ratio = self.share_spec.recapture_ratio
+        _dist_type_mktshr = self.share_spec.dist_type
+        _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
+        _hsr_filing_test_type = self.hsr_filing_test_type
+        (
+            _mktshr_rng_seed_seq,
+            _pcm_rng_seed_seq,
+            _fcount_rng_seed_seq,
+            _pr_rng_seed_seq,
+        ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
+        _shr_sample_size = 1.0 * sample_size
+        # Scale up sample size to offset discards based on specified criteria
+        _shr_sample_size *= _hsr_filing_test_type
+        if _dist_firm2_pcm == FM2Constraint.MNL:
+            _shr_sample_size *= SSZConstant.MNL_DEP
+        _shr_sample_size = int(_shr_sample_size)
+        # Generate share data
+        _mktshr_data = gen_share_data(
+            _shr_sample_size,
+            self.share_spec,
+            _fcount_rng_seed_seq,
+            _mktshr_rng_seed_seq,
+            nthreads,
+        )
-    # Generate margin data
-    _pcm_data = _gen_pcm_data(
-        _mktshr_array[:, :2],
-        _mkt_sample_spec_here,
-        _price_array,
-        _aggregate_purchase_prob,
-        _pcm_rng_seed_seq,
-        nthreads,
-    )
-    _pcm_array, _mnl_test_rows = (
-        getattr(_pcm_data, _f) for _f in ("pcm_array", "mnl_test_array")
-    )
+        _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
+            getattr(_mktshr_data, _f)
+            for _f in (
+                "mktshr_array",
+                "fcounts",
+                "aggregate_purchase_prob",
+                "nth_firm_share",
+            )
+        )
-    _s_size = _mkt_sample_spec.sample_size  # originally-specified sample size
-    if _dist_firm2_pcm == FM2Constants.MNL:
-        _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
-        _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
-        _price_array = _price_array[_mnl_test_rows][:_s_size]
-        _fcounts = _fcounts[_mnl_test_rows][:_s_size]
-        _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
-        _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
-        _divr_array = _divr_array[_mnl_test_rows][:_s_size]
+        # Generate merging-firm price and PCM data
+        _margin_data, _price_data = gen_margin_price_data(
+            _mktshr_array[:, :2],
+            _nth_firm_share,
+            _aggregate_purchase_prob,
+            self.pcm_spec,
+            self.price_spec,
+            self.hsr_filing_test_type,
+            _pcm_rng_seed_seq,
+            _pr_rng_seed_seq,
+            nthreads,
+        )
-    del _mnl_test_rows, _s_size
+        _price_array, _hsr_filing_test = (
+            getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
+        )
-    _frmshr_array = _mktshr_array[:, :2]
-    _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
+        _pcm_array, _mnl_test_rows = (
+            getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
+        )
-    _hhi_post = (
-        _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
-    )
+        _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
+        _s_size = sample_size  # originally-specified sample size
+        if _dist_firm2_pcm == FM2Constraint.MNL:
+            _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
+            _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
+            _price_array = _price_array[_mnl_test_rows][:_s_size]
+            _fcounts = _fcounts[_mnl_test_rows][:_s_size]
+            _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
+                :_s_size
+            ]
+            _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
+        # Calculate diversion ratios
+        _divr_array = gen_divr_array(
+            _recapture_form,
+            _recapture_ratio,
+            _mktshr_array[:, :2],
+            _aggregate_purchase_prob,
+        )
-    return MarketDataSample(
-        _frmshr_array,
-        _pcm_array,
-        _price_array,
-        _fcounts,
-        _aggregate_purchase_prob,
-        _nth_firm_share,
-        _divr_array,
-        _hhi_post,
-        _hhi_delta,
-    )
+        del _mnl_test_rows, _s_size
+        _frmshr_array = _mktshr_array[:, :2]
+        _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
+            :, None
+        ]
-def parse_seed_seq_list(
-    _sseq_list: list[SeedSequence] | None,
-    _dist_type_mktshr: SHRConstants,
-    _pr_sym_spec: PRIConstants,
-    /,
-) -> tuple[SeedSequence, SeedSequence, SeedSequence | None, SeedSequence | None]:
-    """Initialize RNG seed sequences to ensure independence of distinct random streams."""
-    _fcount_rng_seed_seq: SeedSequence | None = None
-    _pr_rng_seed_seq: SeedSequence | None = None
-    if _pr_sym_spec == PRIConstants.ZERO:
-        _pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
-    if _dist_type_mktshr == SHRConstants.UNI:
-        _fcount_rng_seed_seq = None
-        _seed_count = 2
-        _mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
-            _sseq_list[:_seed_count]
-            if _sseq_list
-            else (SeedSequence(pool_size=8) for _ in range(_seed_count))
+        _hhi_post = (
+            _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
         )
-    else:
-        _seed_count = 3
-        (_mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq) = (
-            _sseq_list[:_seed_count]
-            if _sseq_list
-            else (SeedSequence(pool_size=8) for _ in range(_seed_count))
+        return MarketDataSample(
+            _frmshr_array,
+            _pcm_array,
+            _price_array,
+            _fcounts,
+            _aggregate_purchase_prob,
+            _nth_firm_share,
+            _divr_array,
+            _hhi_post,
+            _hhi_delta,
         )
-    return (
-        _mktshr_rng_seed_seq,
-        _pcm_rng_seed_seq,
-        _fcount_rng_seed_seq,
-        _pr_rng_seed_seq,
-    )
+    def generate_sample(
+        self,
+        /,
+        *,
+        sample_size: int = 10**6,
+        seed_seq_list: Sequence[SeedSequence] | None = None,
+        nthreads: int = 16,
+        save_data_to_file: SaveData = False,
+        saved_array_name_suffix: str = "",
+    ) -> None:
+        """Populate :attr:`data` with generated data
+        see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
-def gen_divr_array(
-    _frmshr_array: NDArray[np.floating[TF]],
-    _r_bar: float,
-    _recapture_spec: RECConstants = RECConstants.INOUT,
-    _aggregate_purchase_prob: NDArray[np.floating[TF]] = EMPTY_ARRAY_DEFAULT,
-    /,
-) -> NDArray[np.float64]:
-    """
-    Given merging-firm shares and related parameters, return diverion ratios.
+        Returns
+        -------
+        None
+        """
+        self.data = self.__gen_market_sample(
+            sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
+        )
-    If recapture is specified as "Outside-in" (RECConstants.OUTIN), then the
-    choice-probability for the outside good must be supplied.
+        _invalid_array_names = (
+            ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
+            if self.share_spec.dist_type == "Uniform"
+            else ()
+        )
+        save_data_to_hdf5(
+            self.data,
+            saved_array_name_suffix=saved_array_name_suffix,
+            excluded_attrs=_invalid_array_names,
+            save_data_to_file=save_data_to_file,
+        )
-    Parameters
-    ----------
-    _frmshr_array
-        Merging-firm shares.
+    def __sim_enf_cnts(
+        self,
+        _upp_test_parms: gbl.HMGThresholds,
+        _sim_test_regime: UPPTestRegime,
+        /,
+        *,
+        sample_size: int = 10**6,
+        seed_seq_list: Sequence[SeedSequence] | None = None,
+        nthreads: int = 16,
+        save_data_to_file: SaveData = False,
+        saved_array_name_suffix: str = "",
+    ) -> UPPTestsCounts:
+        """Generate market data and etstimate UPP test counts on same.
-    _r_bar
-        If recapture is proportional or inside-out, the recapture rate
-        for the firm with the smaller share.
+        Parameters
+        ----------
-    _aggregate_purchase_prob
-        1 minus probability that the outside good is chosen; converts
-        market shares to choice probabilities by multiplication.
+        _upp_test_parms
+            Guidelines thresholds for testing UPP and related statistics
-    _recapture_spec
-        Enum specifying Fixed (proportional), Inside-out, or Outside-in
+        _sim_test_regime
+            Configuration to use for testing; UPPTestsRegime object
+            specifying whether investigation results in enforcement, clearance,
+            or both; and aggregation methods used for GUPPI and diversion ratio
+            measures
-    Returns
-    -------
-        Merging-firm diversion ratios for mergers in the sample.
+        sample_size
+            Number of draws to generate
-    """
+        seed_seq_list
+            List of seed sequences, to assure independent samples in each thread
+        nthreads
+            Number of parallel processes to use
+        save_data_to_file
+            Whether to save data to an HDF5 file, and where to save it
+        saved_array_name_suffix
+            Suffix to add to the array names in the HDF5 file
-    _divr_array: NDArray[np.float64]
-    if _recapture_spec == RECConstants.FIXED:
-        _divr_array = _r_bar * _frmshr_array[:, ::-1] / (1 - _frmshr_array)
-    else:
-        _purchprob_array = _aggregate_purchase_prob * _frmshr_array
-        _divr_array = _purchprob_array[:, ::-1] / (1 - _purchprob_array)
-    _divr_assert_test = (
-        (np.round(np.einsum("ij->i", _frmshr_array), 15) == 1)
-        | (np.argmin(_frmshr_array, axis=1) == np.argmax(_divr_array, axis=1))
-    )[:, None]
-    if not all(_divr_assert_test):
-        raise ValueError(
-            "{} {} {} {}".format(
-                "Data construction fails tests:",
-                "the index of min(s_1, s_2) must equal",
-                "the index of max(d_12, d_21), for all draws.",
-                "unless frmshr_array sums to 1.00.",
+        Returns
+        -------
+            UPPTestCounts ojbect with  of test counts by firm count, ΔHHI and concentration zone
+        """
+        _market_data_sample = self.__gen_market_sample(
+            sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
+        )
+        _invalid_array_names = (
+            ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
+            if self.share_spec.dist_type == "Uniform"
+            else ()
+        )
+        save_data_to_hdf5(
+            _market_data_sample,
+            saved_array_name_suffix=saved_array_name_suffix,
+            excluded_attrs=_invalid_array_names,
+            save_data_to_file=save_data_to_file,
+        )
+        _upp_test_arrays = compute_upp_test_counts(
+            _market_data_sample, _upp_test_parms, _sim_test_regime
+        )
+        save_data_to_hdf5(
+            _upp_test_arrays,
+            saved_array_name_suffix=saved_array_name_suffix,
+            save_data_to_file=save_data_to_file,
+        )
+        return _upp_test_arrays
+    def __sim_enf_cnts_ll(
+        self,
+        _enf_parm_vec: gbl.HMGThresholds,
+        _sim_test_regime: UPPTestRegime,
+        /,
+        *,
+        sample_size: int = 10**6,
+        seed_seq_list: Sequence[SeedSequence] | None = None,
+        nthreads: int = 16,
+        save_data_to_file: SaveData = False,
+        saved_array_name_suffix: str = "",
+    ) -> UPPTestsCounts:
+        """A function to parallelize data-generation and testing
+        The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
+        the parent function, `sim_enf_cnts()`, except that, if provided,
+        `seed_seq_list` is used to spawn a seed sequence for each thread,
+        to assure independent samples in each thread, and `nthreads` defines
+        the number of parallel processes used. The number of draws in
+        each thread may be tuned, by trial and error, to the amount of
+        memory (RAM) available.
+        Parameters
+        ----------
+        _enf_parm_vec
+            Guidelines thresholds to test against
+        _sim_test_regime
+            Configuration to use for testing
+        sample_size
+            Number of draws to simulate
+        seed_seq_list
+            List of seed sequences, to assure independent samples in each thread
+        nthreads
+            Number of parallel processes to use
+        save_data_to_file
+            Whether to save data to an HDF5 file, and where to save it
+        saved_array_name_suffix
+            Suffix to add to the array names in the HDF5 file
+        Returns
+        -------
+            Arrays of enforcement counts or clearance counts by firm count,
+            ΔHHI and concentration zone
+        """
+        _sample_sz = sample_size
+        _subsample_sz = 10**6
+        _iter_count = (
+            int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
+        )
+        _thread_count = cpu_count()
+        if (
+            self.share_spec.recapture_form != RECForm.OUTIN
+            and self.share_spec.recapture_ratio != _enf_parm_vec.rec
+        ):
+            raise ValueError(
+                "{} {} {}".format(
+                    f"Recapture ratio from market sample spec, {self.share_spec.recapture_ratio}",
+                    f"must match the value, {_enf_parm_vec.rec}",
+                    "the guidelines thresholds vector.",
+                )
             )
+        _rng_seed_seq_list = [None] * _iter_count
+        if seed_seq_list:
+            _rng_seed_seq_list = list(
+                zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True)  # type: ignore
+            )
+        _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
+            "sample_size": _subsample_sz,
+            "save_data_to_file": save_data_to_file,
+            "nthreads": nthreads,
+        })
+        _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
+            delayed(self.__sim_enf_cnts)(
+                _enf_parm_vec,
+                _sim_test_regime,
+                **_sim_enf_cnts_kwargs,
+                saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",  # pyright: ignore
+                seed_seq_list=_rng_seed_seq_list_ch,  # pyright: ignore
+            )
+            for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
         )
-    return _divr_array
+        _res_list_stacks = UPPTestsCounts(*[
+            np.stack([getattr(_j, _k) for _j in _res_list])
+            for _k in ("by_firm_count", "by_delta", "by_conczone")
+        ])
+        upp_test_results = UPPTestsCounts(*[
+            np.column_stack((
+                (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
+                np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
+            ))
+            for _g, _h in zip(
+                _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
+            )
+        ])
+        del _res_list, _res_list_stacks
+        return upp_test_results
+    def estimate_enf_counts(
+        self,
+        _enf_parm_vec: HMGThresholds,
+        _upp_test_regime: UPPTestRegime,
+        /,
+        *,
+        sample_size: int = 10**6,
+        seed_seq_list: Sequence[SeedSequence] | None = None,
+        nthreads: int = 16,
+        save_data_to_file: SaveData = False,
+        saved_array_name_suffix: str = "",
+    ) -> None:
+        """Populate :attr:`enf_counts` with estimated UPP test counts.
+        Parameters
+        ----------
+        _enf_parm_vec
+            Threshold values for various Guidelines criteria
+        _upp_test_regime
+            Specifies whether to analyze enforcement, clearance, or both
+            and the GUPPI and diversion ratio aggregators employed, with
+            default being to analyze enforcement based on the maximum
+            merging-firm GUPPI and maximum diversion ratio between the
+            merging firms
+        sample_size
+            Number of draws to simulate
+        seed_seq_list
+            List of seed sequences, to assure independent samples in each thread
+        nthreads
+            Number of parallel processes to use
+        save_data_to_file
+            Whether to save data to an HDF5 file, and where to save it
+        saved_array_name_suffix
+            Suffix to add to the array names in the HDF5 file
+        Returns
+        -------
+        None
+        """
+        if self.data is None:
+            self.enf_counts = self.__sim_enf_cnts_ll(
+                _enf_parm_vec,
+                _upp_test_regime,
+                sample_size=sample_size,
+                seed_seq_list=seed_seq_list,
+                nthreads=nthreads,
+                save_data_to_file=save_data_to_file,
+                saved_array_name_suffix=saved_array_name_suffix,
+            )
+        else:
+            self.enf_counts = compute_upp_test_counts(
+                self.data, _enf_parm_vec, _upp_test_regime
+            )
+            if save_data_to_file:
+                save_data_to_hdf5(
+                    self.enf_counts,
+                    save_data_to_file=save_data_to_file,
+                    saved_array_name_suffix=saved_array_name_suffix,
+                )

mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

Potentially problematic release.

mergeron 2024.738953.1py3-none-any.whl → 2025.739265.0py3-none-any.whl