PyPI - mergeron - Versions diffs - 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl - Mend

mergeron 2025.739290.3py3-none-any.whl → 2025.739290.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (21) hide show

mergeron/__init__.py +103 -48
mergeron/core/__init__.py +105 -4
mergeron/core/empirical_margin_distribution.py +100 -78
mergeron/core/ftc_merger_investigations_data.py +309 -316
mergeron/core/guidelines_boundaries.py +67 -138
mergeron/core/guidelines_boundary_functions.py +202 -379
mergeron/core/guidelines_boundary_functions_extra.py +264 -106
mergeron/core/pseudorandom_numbers.py +73 -64
mergeron/data/damodaran_margin_data_serialized.zip +0 -0
mergeron/data/ftc_invdata.zip +0 -0
mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
mergeron/gen/__init__.py +138 -161
mergeron/gen/data_generation.py +181 -149
mergeron/gen/data_generation_functions.py +220 -237
mergeron/gen/enforcement_stats.py +78 -109
mergeron/gen/upp_tests.py +119 -194
{mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/METADATA +2 -3
mergeron-2025.739290.5.dist-info/RECORD +24 -0
{mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/WHEEL +1 -1
mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
mergeron-2025.739290.3.dist-info/RECORD +0 -23

mergeron/core/pseudorandom_numbers.py CHANGED Viewed

@@ -16,7 +16,14 @@ import numpy as np
 from attrs import Attribute, Converter, define, field
 from numpy.random import PCG64DXSM, Generator, SeedSequence
-from .. import NTHREADS, VERSION, ArrayDouble, ArrayFloat  # noqa: TID252
+from .. import (  # noqa: TID252
+    NTHREADS,
+    VERSION,
+    ArrayDouble,
+    ArrayFloat,
+    this_yaml,
+    yaml_rt_mapper,
+)
 __version__ = VERSION
@@ -40,9 +47,22 @@ def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
     return Generator(PCG64DXSM(_s))
+# Add yaml representer, constructor for SeedSequence
+(_, _) = (
+    this_yaml.representer.add_representer(
+        SeedSequence, lambda _r, _d: _r.represent_mapping("!SeedSequence", _d.state)
+    ),
+    this_yaml.constructor.add_constructor(
+        "!SeedSequence", lambda _c, _n, /: SeedSequence(**yaml_rt_mapper(_c, _n))
+    ),
+)
 def gen_seed_seq_list_default(
-    _sseq_list_len: int = 3, /, *, generated_entropy: Sequence[int] | None = None
-) -> list[SeedSequence]:
+    _len: int = 3, /, *, generated_entropy: Sequence[int] | None = None
+) -> tuple[SeedSequence, ...]:
     """
     Return specified number of SeedSequences, for generating random variates
@@ -54,7 +74,7 @@ def gen_seed_seq_list_default(
     Parameters
     ----------
-    _sseq_list_len
+    _len
         Number of SeedSequences to initialize
     generated_entropy
@@ -92,8 +112,8 @@ def gen_seed_seq_list_default(
         63206306147411023146090085885772240748399174641427012462446714431253444120718,
     ]
-    if _sseq_list_len > (_lge := len(generated_entropy)):
-        _e_str_segs = (
+    if _len > (_lge := len(generated_entropy)):
+        e_str_segs = (
             "This function can presently create SeedSequences for generating up to ",
             f"{_lge:,d} independent random variates. If you really need to generate ",
             f"more than {_lge:,d} seeded independent random variates, please pass a ",
@@ -103,22 +123,24 @@ def gen_seed_seq_list_default(
                 "bit_generators/generated/numpy.random.SeedSequence.html",
             ),
         )
-        raise ValueError("".join(_e_str_segs))
+        raise ValueError("".join(e_str_segs))
-    return [SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_sseq_list_len]]
+    return tuple(SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_len])
-def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
-    if not len(_v):
+def _dist_parms_conv(_v: ArrayFloat | None, _i: MultithreadedRNG) -> ArrayFloat:
+    if _v is None or not len(_v):
         return {
             "Beta": DEFAULT_BETA_DIST_PARMS,
             "Dirichlet": np.ones(_i.values.shape[-1], float),
+            "Normal": DEFAULT_DIST_PARMS,
+            "Uniform": DEFAULT_DIST_PARMS,
         }.get(_i.dist_type, DEFAULT_DIST_PARMS)
     elif isinstance(_v, Sequence | np.ndarray):
         return np.asarray(_v, float)
     else:
         raise ValueError(
-            "Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
+            f"Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
         )
@@ -141,12 +163,9 @@ class MultithreadedRNG:
     @values.validator
     def _vsv(
-        _instance: MultithreadedRNG,
-        _attribute: Attribute[ArrayDouble],
-        _value: ArrayDouble,
-        /,
+        _i: MultithreadedRNG, _a: Attribute[ArrayDouble], _v: ArrayDouble, /
     ) -> None:
-        if not len(_value):
+        if not len(_v):
             raise ValueError("Output array must at least be one dimension")
     dist_type: Literal[
@@ -158,10 +177,8 @@ class MultithreadedRNG:
      """
     @dist_type.validator
-    def _dtv(
-        _instance: MultithreadedRNG, _attribute: Attribute[str], _value: str, /
-    ) -> None:
-        if _value not in (
+    def _dtv(_i: MultithreadedRNG, _a: Attribute[str], _v: str, /) -> None:
+        if _v not in (
             _rdts := ("Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform")
         ):
             raise ValueError(f"Specified distribution must be one of {_rdts}")
@@ -173,29 +190,20 @@ class MultithreadedRNG:
     """
     @dist_parms.default
-    def _dpd(_instance: MultithreadedRNG) -> ArrayFloat:
-        return {
-            "Beta": DEFAULT_BETA_DIST_PARMS,
-            "Dirichlet": np.ones(_instance.values.shape[-1], float),
-        }.get(_instance.dist_type, DEFAULT_DIST_PARMS)
+    def _dpd(_i: MultithreadedRNG) -> ArrayFloat:
+        return _dist_parms_conv(None, _i)
     @dist_parms.validator
     def _dpv(
-        _instance: MultithreadedRNG,
-        _attribute: Attribute[ArrayFloat],
-        _value: ArrayFloat,
-        /,
+        _i: MultithreadedRNG, _a: Attribute[ArrayFloat], _v: ArrayFloat, /
     ) -> None:
-        if (
-            _instance.dist_type != "Dirichlet"
-            and (_lrdp := len(_value)) != (_trdp := 2)
-        ) or (
-            _instance.dist_type == "Dirichlet"
-            and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
+        if (_i.dist_type != "Dirichlet" and (_lrdp := len(_v)) != (_trdp := 2)) or (
+            _i.dist_type == "Dirichlet"
+            and (_lrdp := len(_v)) != (_trdp := _i.values.shape[-1])
         ):
             raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
-        elif _instance.dist_type in ("Beta", "Dirichlet") and (_value <= 0.0).any():
+        elif _i.dist_type in {"Beta", "Dirichlet"} and (_v <= 0.0).any():
             raise ValueError("Shape and location parameters must be strictly positive")
     seed_sequence: SeedSequence | None = field(default=None)
@@ -211,61 +219,62 @@ class MultithreadedRNG:
             self.dist_parms, DEFAULT_DIST_PARMS
         ):
             if self.dist_type == "Uniform":
-                _dist_type = "Random"
+                dist_type = "Random"
             elif self.dist_type == "Normal":
-                _dist_type = "Gaussian"
+                dist_type = "Gaussian"
         else:
-            _dist_type = self.dist_type
+            dist_type = self.dist_type
-        _step_size = (len(self.values) / self.nthreads).__ceil__()
-        # int; function gives float unsuitable for slicing
+        step_size = (len(self.values) / self.nthreads).__ceil__()
-        _seed_sequence = self.seed_sequence or SeedSequence(pool_size=8)
-        _random_generators = tuple(
-            prng(_t) for _t in _seed_sequence.spawn(self.nthreads)
+        seed_ = (
+            SeedSequence(pool_size=8)
+            if self.seed_sequence is None
+            else self.seed_sequence
         )
+        random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
         def _fill(
             _rng: np.random.Generator,
             _dist_type: str,
             _dist_parms: ArrayFloat,
-            _out: ArrayDouble,
+            out_: ArrayDouble,
             _first: int,
             _last: int,
             /,
         ) -> None:
-            _sz: tuple[int, ...] = _out[_first:_last].shape
+            _sz: tuple[int, ...] = out_[_first:_last].shape
             match _dist_type:
                 case "Beta":
-                    _shape_a, _shape_b = _dist_parms
-                    _out[_first:_last] = _rng.beta(_shape_a, _shape_b, size=_sz)
+                    shape_a, shape_b = _dist_parms
+                    out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
                 case "Dirichlet":
-                    _out[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
+                    out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
                 case "Gaussian":
-                    _rng.standard_normal(out=_out[_first:_last])
+                    _rng.standard_normal(out=out_[_first:_last])
                 case "Normal":
                     _mu, _sigma = _dist_parms
-                    _out[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
+                    out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
                 case "Random":
-                    _rng.random(out=_out[_first:_last])
+                    _rng.random(out=out_[_first:_last])
                 case "Uniform":
-                    _uni_l, _uni_h = _dist_parms
-                    _out[_first:_last] = _rng.uniform(_uni_l, _uni_h, size=_sz)
+                    uni_l, uni_h = _dist_parms
+                    out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
                 case _:
                     "Unreachable. The validator would have rejected this as invalid."
-        with concurrent.futures.ThreadPoolExecutor(self.nthreads) as _executor:
-            for i in range(self.nthreads):
-                _range_first = i * _step_size
-                _range_last = min(len(self.values), (i + 1) * _step_size)
+        with concurrent.futures.ThreadPoolExecutor(self.nthreads) as executor_:
+            for _i in range(self.nthreads):
+                range_first = _i * step_size
+                range_last = min(len(self.values), (_i + 1) * step_size)
-                _executor.submit(
+                executor_.submit(
                     _fill,
-                    _random_generators[i],
-                    _dist_type,
+                    random_generators[_i],
+                    dist_type,
                     self.dist_parms,
                     self.values,
-                    _range_first,
-                    _range_last,
+                    range_first,
+                    range_last,
                 )

mergeron/data/damodaran_margin_data_serialized.zip ADDED Viewed

Binary file

mergeron/data/ftc_invdata.zip ADDED Viewed

Binary file

mergeron/demo/visualize_empirical_margin_distribution.py CHANGED Viewed

@@ -12,23 +12,23 @@ from matplotlib.ticker import StrMethodFormatter
 from numpy.random import PCG64DXSM, Generator, SeedSequence
 from scipy import stats  # type: ignore
-import mergeron.core.empirical_margin_distribution as dmgn
+import mergeron.core.empirical_margin_distribution as emd
 from mergeron import DATA_DIR
 from mergeron.core.guidelines_boundary_functions import boundary_plot
 SAMPLE_SIZE = 10**6
 BIN_COUNT = 25
-mgn_data_obs, mgn_data_wts, mgn_data_stats = dmgn.mgn_data_builder()
-print(repr(mgn_data_obs))
-print(repr(mgn_data_stats))
+margin_data_obs, margin_data_wts, margin_data_stats = emd.margin_data_builder()
+print(repr(margin_data_obs))
+print(repr(margin_data_stats))
 plt, mgn_fig, mgn_ax, set_axis_def = boundary_plot(mktshares_plot_flag=False)
 mgn_fig.set_figheight(6.5)
 mgn_fig.set_figwidth(9.0)
 _, mgn_bins, _ = mgn_ax.hist(
-    x=mgn_data_obs,
-    weights=mgn_data_wts,
+    x=margin_data_obs,
+    weights=margin_data_wts,
     bins=BIN_COUNT,
     alpha=0.4,
     density=True,
@@ -44,7 +44,9 @@ with warnings.catch_warnings():
         for _g in mgn_ax.get_yticklabels()
     ])
-mgn_kde = stats.gaussian_kde(mgn_data_obs, weights=mgn_data_wts, bw_method="silverman")
+mgn_kde = stats.gaussian_kde(
+    margin_data_obs, weights=margin_data_wts, bw_method="silverman"
+)
 mgn_kde.set_bandwidth(bw_method=mgn_kde.factor / 3.0)
 mgn_ax.plot(

mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl

Potentially problematic release.

mergeron 2025.739290.3py3-none-any.whl → 2025.739290.5py3-none-any.whl