PyPI - mergeron - Versions diffs - 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl - Mend

mergeron 2025.739265.2py3-none-any.whl → 2025.739290.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (11) hide show

mergeron/__init__.py +51 -2
mergeron/core/guidelines_boundaries.py +16 -13
mergeron/core/pseudorandom_numbers.py +77 -51
mergeron/gen/__init__.py +222 -84
mergeron/gen/data_generation.py +143 -176
mergeron/gen/data_generation_functions.py +68 -118
mergeron/gen/enforcement_stats.py +30 -6
mergeron/gen/upp_tests.py +6 -7
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/METADATA +2 -1
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/RECORD +11 -11
{mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/WHEEL +0 -0

mergeron/__init__.py CHANGED Viewed

@@ -1,18 +1,25 @@
 from __future__ import annotations
 import enum
+from multiprocessing import cpu_count
 from pathlib import Path
 from typing import Literal
 import numpy as np
 from numpy.typing import NDArray
+from ruamel import yaml
 _PKG_NAME: str = Path(__file__).parent.stem
-VERSION = "2025.739265.2"
+VERSION = "2025.739290.0"
 __version__ = VERSION
+this_yaml = yaml.YAML(typ="safe", pure=True)
+this_yaml.constructor.deep_construct = True
+this_yaml.indent(mapping=2, sequence=4, offset=2)
 DATA_DIR: Path = Path.home() / _PKG_NAME
 """
 Defines a subdirectory named for this package in the user's home path.
@@ -22,6 +29,13 @@ If the subdirectory doesn't exist, it is created on package invocation.
 if not DATA_DIR.is_dir():
     DATA_DIR.mkdir(parents=False)
+DEFAULT_REC_RATIO = 0.85
+EMPTY_ARRAYDOUBLE = np.array([], float)
+EMPTY_ARRAYINT = np.array([], int)
+NTHREADS = 2 * cpu_count()
 np.set_printoptions(precision=24, floatmode="fixed")
 type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
@@ -33,7 +47,24 @@ type ArrayINT = NDArray[np.intp]
 type ArrayDouble = NDArray[np.float64]
 type ArrayBIGINT = NDArray[np.int64]
-DEFAULT_REC_RATIO = 0.85
+## Add yaml representer, constructor for NoneType
+(_, _) = (
+    this_yaml.representer.add_representer(
+        type(None), lambda _r, _d: _r.represent_scalar("!None", "none")
+    ),
+    this_yaml.constructor.add_constructor("!None", lambda _c, _n, /: None),
+)
+## Add yaml representer, constructor for ndarray
+(_, _) = (
+    this_yaml.representer.add_representer(
+        np.ndarray,
+        lambda _r, _d: _r.represent_sequence("!ndarray", (_d.tolist(), _d.dtype.str)),
+    ),
+    this_yaml.constructor.add_constructor(
+        "!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n))
+    ),
+)
 @enum.unique
@@ -63,3 +94,21 @@ class UPPAggrSelector(enum.StrEnum):
     OSA = "own-share weighted average"
     OSD = "own-share weighted distance"
     OSG = "own-share weighted geometric mean"
+for _typ in (RECForm, UPPAggrSelector):
+    #  NOTE: If additional enums are defined in this module,
+    #  add themn to the list above
+    _, _ = (
+        this_yaml.representer.add_representer(
+            _typ,
+            lambda _r, _d: _r.represent_scalar(f"!{_d.__class__.__name__}", _d.name),
+        ),
+        this_yaml.constructor.add_constructor(
+            f"!{_typ.__name__}",
+            lambda _c, _n, /: getattr(
+                globals().get(_n.tag.lstrip("!")), _c.construct_scalar(_n)
+            ),
+        ),
+    )

mergeron/core/guidelines_boundaries.py CHANGED Viewed

@@ -62,7 +62,7 @@ class GuidelinesThresholds:
     Year of publication of the Guidelines
     """
-    safeharbor: HMGThresholds = field(kw_only=True, default=None)
+    safeharbor: HMGThresholds = field(kw_only=True, default=None, init=False)
     """
     Negative presumption quantified on various measures
@@ -70,7 +70,7 @@ class GuidelinesThresholds:
     diversion ratio limit, CMCR, and IPR
     """
-    presumption: HMGThresholds = field(kw_only=True, default=None)
+    presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
     """
     Presumption of harm defined in HMG
@@ -78,7 +78,7 @@ class GuidelinesThresholds:
     diversion ratio limit, CMCR, and IPR
     """
-    imputed_presumption: HMGThresholds = field(kw_only=True, default=None)
+    imputed_presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
     """
     Presumption of harm imputed from guidelines
@@ -153,25 +153,28 @@ class ConcentrationBoundary:
     """Concentration parameters, boundary coordinates, and area under concentration boundary."""
     measure_name: Literal[
-        "ΔHHI", "Combined share", "Pre-merger HHI", "Post-merger HHI"
+        "ΔHHI",
+        "Combined share",
+        "Pre-merger HHI Contribution",
+        "Post-merger HHI Contribution",
     ] = field(kw_only=False, default="ΔHHI")
     @measure_name.validator
-    def __mnv(
+    def _mnv(
         _instance: ConcentrationBoundary, _attribute: Attribute[str], _value: str, /
     ) -> None:
         if _value not in (
             "ΔHHI",
             "Combined share",
-            "Pre-merger HHI",
-            "Post-merger HHI",
+            "Pre-merger HHI Contribution",
+            "Post-merger HHI Contribution",
         ):
             raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
     threshold: float = field(kw_only=False, default=0.01)
     @threshold.validator
-    def __tv(
+    def _tv(
         _instance: ConcentrationBoundary, _attribute: Attribute[float], _value: float, /
     ) -> None:
         if not 0 <= _value <= 1:
@@ -193,9 +196,9 @@ class ConcentrationBoundary:
                 _conc_fn = gbfn.hhi_delta_boundary
             case "Combined share":
                 _conc_fn = gbfn.combined_share_boundary
-            case "Pre-merger HHI":
+            case "Pre-merger HHI Contribution":
                 _conc_fn = gbfn.hhi_pre_contrib_boundary
-            case "Post-merger HHI":
+            case "Post-merger HHI Contribution":
                 _conc_fn = gbfn.hhi_post_contrib_boundary
         _boundary = _conc_fn(self.threshold, dps=self.precision)
@@ -221,13 +224,13 @@ class DiversionRatioBoundary:
     diversion_ratio: float = field(kw_only=False, default=0.065)
     @diversion_ratio.validator
-    def __dvv(
+    def _dvv(
         _instance: DiversionRatioBoundary,
         _attribute: Attribute[float],
         _value: float,
         /,
     ) -> None:
-        if not (isinstance(_value, float) and 0 <= _value <= 1):
+        if not (isinstance(_value, decimal.Decimal | float) and 0 <= _value <= 1):
             raise ValueError(
                 "Margin-adjusted benchmark share ratio must lie between 0 and 1."
             )
@@ -260,7 +263,7 @@ class DiversionRatioBoundary:
     """
     @recapture_form.validator
-    def __rsv(
+    def _rsv(
         _instance: DiversionRatioBoundary,
         _attribute: Attribute[RECForm],
         _value: RECForm,

mergeron/core/pseudorandom_numbers.py CHANGED Viewed

@@ -10,20 +10,34 @@ from __future__ import annotations
 import concurrent.futures
 from collections.abc import Sequence
-from multiprocessing import cpu_count
 from typing import Literal
 import numpy as np
-from attrs import Attribute, define, field
+from attrs import Attribute, Converter, define, field
 from numpy.random import PCG64DXSM, Generator, SeedSequence
-from .. import VERSION, ArrayDouble  # noqa: TID252
+from .. import NTHREADS, VERSION, ArrayDouble, ArrayFloat, this_yaml  # noqa: TID252
 __version__ = VERSION
-NTHREADS = 2 * cpu_count()
-DEFAULT_DIST_PARMS: ArrayDouble = np.array([0.0, 1.0], float)
-DEFAULT_BETA_DIST_PARMS: ArrayDouble = np.array([1.0, 1.0], float)
+DEFAULT_DIST_PARMS: ArrayFloat = np.array([0.0, 1.0], float)
+DEFAULT_BETA_DIST_PARMS: ArrayFloat = np.array([1.0, 1.0], float)
+# Add yaml representer, constructor for SeedSequence
+this_yaml.representer.add_representer(
+    SeedSequence,
+    lambda _r, _d: _r.represent_mapping(
+        "!SeedSequence",
+        {
+            _a: getattr(_d, _a)
+            for _a in ("entropy", "spawn_key", "pool_size", "n_children_spawned")
+        },
+    ),
+)
+this_yaml.constructor.add_constructor(
+    "!SeedSequence", lambda _c, _n, /: SeedSequence(**_c.construct_mapping(_n))
+)
 def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
@@ -110,6 +124,20 @@ def gen_seed_seq_list_default(
     return [SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_sseq_list_len]]
+def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
+    if not len(_v):
+        return {
+            "Beta": DEFAULT_BETA_DIST_PARMS,
+            "Dirichlet": np.ones(_i.values.shape[-1], float),
+        }.get(_i.dist_type, DEFAULT_DIST_PARMS)
+    elif isinstance(_v, Sequence | np.ndarray):
+        return np.asarray(_v, float)
+    else:
+        raise ValueError(
+            "Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
+        )
 @define
 class MultithreadedRNG:
     """Fill given array on demand with pseudo-random numbers as specified.
@@ -121,22 +149,32 @@ class MultithreadedRNG:
     before commencing multithreaded random number generation.
     """
-    values: ArrayDouble = field(kw_only=False, default=None)
+    values: ArrayDouble = field(kw_only=False)
     """Output array to which generated data are over-written
     Array-length defines the number of i.i.d. (vector) draws.
     """
+    @values.validator
+    def _vsv(
+        _instance: MultithreadedRNG,
+        _attribute: Attribute[ArrayDouble],
+        _value: ArrayDouble,
+        /,
+    ) -> None:
+        if not len(_value):
+            raise ValueError("Output array must at least be one dimension")
     dist_type: Literal[
         "Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform"
-    ] = field(kw_only=True, default="Uniform")
+    ] = field(default="Uniform")
     """Distribution for the generated random numbers.
     Default is "Uniform".
      """
     @dist_type.validator
-    def __dtv(
+    def _dtv(
         _instance: MultithreadedRNG, _attribute: Attribute[str], _value: str, /
     ) -> None:
         if _value not in (
@@ -144,60 +182,48 @@ class MultithreadedRNG:
         ):
             raise ValueError(f"Specified distribution must be one of {_rdts}")
-    dist_parms: ArrayDouble | None = field(kw_only=True, default=DEFAULT_DIST_PARMS)
+    dist_parms: ArrayFloat = field(
+        converter=Converter(_dist_parms_conv, takes_self=True)  # type: ignore
+    )
     """Parameters, if any, for tailoring random number generation
     """
+    @dist_parms.default
+    def _dpd(_instance: MultithreadedRNG) -> ArrayFloat:
+        return {
+            "Beta": DEFAULT_BETA_DIST_PARMS,
+            "Dirichlet": np.ones(_instance.values.shape[-1], float),
+        }.get(_instance.dist_type, DEFAULT_DIST_PARMS)
     @dist_parms.validator
-    def __dpv(
-        _instance: MultithreadedRNG, _attribute: Attribute[str], _value: ArrayDouble, /
+    def _dpv(
+        _instance: MultithreadedRNG,
+        _attribute: Attribute[ArrayFloat],
+        _value: ArrayFloat,
+        /,
     ) -> None:
-        if _value is not None:
-            if not isinstance(_value, Sequence | np.ndarray):
-                raise ValueError(
-                    "When specified, distribution parameters must be a list, tuple or Numpy array"
-                )
+        if (
+            _instance.dist_type != "Dirichlet"
+            and (_lrdp := len(_value)) != (_trdp := 2)
+        ) or (
+            _instance.dist_type == "Dirichlet"
+            and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
+        ):
+            raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
-            elif (
-                _instance.dist_type != "Dirichlet"
-                and (_lrdp := len(_value)) != (_trdp := 2)
-            ) or (
-                _instance.dist_type == "Dirichlet"
-                and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
-            ):
-                raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
-            elif (
-                _instance.dist_type in ("Beta", "Dirichlet")
-                and (np.array(_value) <= 0.0).any()
-            ):
-                raise ValueError(
-                    "Shape and location parameters must be strictly positive"
-                )
+        elif _instance.dist_type in ("Beta", "Dirichlet") and (_value <= 0.0).any():
+            raise ValueError("Shape and location parameters must be strictly positive")
-    seed_sequence: SeedSequence | None = field(kw_only=True, default=None)
+    seed_sequence: SeedSequence | None = field(default=None)
     """Seed sequence for generating random numbers."""
-    nthreads: int = field(kw_only=True, default=NTHREADS)
+    nthreads: int = field(default=NTHREADS)
     """Number of threads to spawn for random number generation."""
     def fill(self) -> None:
         """Fill the provided output array with random number draws as specified."""
-        if (
-            self.dist_parms is None
-            or not (
-                _dist_parms := np.array(self.dist_parms)  # one-shot conversion
-            ).any()
-        ):
-            if self.dist_type == "Beta":
-                _dist_parms = DEFAULT_BETA_DIST_PARMS
-            elif self.dist_type == "Dirichlet":
-                _dist_parms = np.ones(self.values.shape[1], float)
-            else:
-                _dist_parms = DEFAULT_DIST_PARMS
-        if self.dist_parms is None or np.array_equal(
+        if not len(self.dist_parms) or np.array_equal(
             self.dist_parms, DEFAULT_DIST_PARMS
         ):
             if self.dist_type == "Uniform":
@@ -219,7 +245,7 @@ class MultithreadedRNG:
         def _fill(
             _rng: np.random.Generator,
             _dist_type: str,
-            _dist_parms: ArrayDouble,
+            _dist_parms: ArrayFloat,
             _out: ArrayDouble,
             _first: int,
             _last: int,
@@ -254,7 +280,7 @@ class MultithreadedRNG:
                     _fill,
                     _random_generators[i],
                     _dist_type,
-                    _dist_parms,
+                    self.dist_parms,
                     self.values,
                     _range_first,
                     _range_last,

mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl

Potentially problematic release.

mergeron 2025.739265.2py3-none-any.whl → 2025.739290.0py3-none-any.whl