mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +51 -2
- mergeron/core/guidelines_boundaries.py +16 -13
- mergeron/core/pseudorandom_numbers.py +77 -51
- mergeron/gen/__init__.py +222 -84
- mergeron/gen/data_generation.py +143 -176
- mergeron/gen/data_generation_functions.py +68 -118
- mergeron/gen/enforcement_stats.py +30 -6
- mergeron/gen/upp_tests.py +6 -7
- {mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/METADATA +2 -1
- {mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/RECORD +11 -11
- {mergeron-2025.739265.2.dist-info → mergeron-2025.739290.0.dist-info}/WHEEL +0 -0
mergeron/__init__.py
CHANGED
|
@@ -1,18 +1,25 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
+
from multiprocessing import cpu_count
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Literal
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
from numpy.typing import NDArray
|
|
10
|
+
from ruamel import yaml
|
|
9
11
|
|
|
10
12
|
_PKG_NAME: str = Path(__file__).parent.stem
|
|
11
13
|
|
|
12
|
-
VERSION = "2025.
|
|
14
|
+
VERSION = "2025.739290.0"
|
|
13
15
|
|
|
14
16
|
__version__ = VERSION
|
|
15
17
|
|
|
18
|
+
this_yaml = yaml.YAML(typ="safe", pure=True)
|
|
19
|
+
this_yaml.constructor.deep_construct = True
|
|
20
|
+
this_yaml.indent(mapping=2, sequence=4, offset=2)
|
|
21
|
+
|
|
22
|
+
|
|
16
23
|
DATA_DIR: Path = Path.home() / _PKG_NAME
|
|
17
24
|
"""
|
|
18
25
|
Defines a subdirectory named for this package in the user's home path.
|
|
@@ -22,6 +29,13 @@ If the subdirectory doesn't exist, it is created on package invocation.
|
|
|
22
29
|
if not DATA_DIR.is_dir():
|
|
23
30
|
DATA_DIR.mkdir(parents=False)
|
|
24
31
|
|
|
32
|
+
DEFAULT_REC_RATIO = 0.85
|
|
33
|
+
|
|
34
|
+
EMPTY_ARRAYDOUBLE = np.array([], float)
|
|
35
|
+
EMPTY_ARRAYINT = np.array([], int)
|
|
36
|
+
|
|
37
|
+
NTHREADS = 2 * cpu_count()
|
|
38
|
+
|
|
25
39
|
np.set_printoptions(precision=24, floatmode="fixed")
|
|
26
40
|
|
|
27
41
|
type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
|
|
@@ -33,7 +47,24 @@ type ArrayINT = NDArray[np.intp]
|
|
|
33
47
|
type ArrayDouble = NDArray[np.float64]
|
|
34
48
|
type ArrayBIGINT = NDArray[np.int64]
|
|
35
49
|
|
|
36
|
-
|
|
50
|
+
## Add yaml representer, constructor for NoneType
|
|
51
|
+
(_, _) = (
|
|
52
|
+
this_yaml.representer.add_representer(
|
|
53
|
+
type(None), lambda _r, _d: _r.represent_scalar("!None", "none")
|
|
54
|
+
),
|
|
55
|
+
this_yaml.constructor.add_constructor("!None", lambda _c, _n, /: None),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
## Add yaml representer, constructor for ndarray
|
|
59
|
+
(_, _) = (
|
|
60
|
+
this_yaml.representer.add_representer(
|
|
61
|
+
np.ndarray,
|
|
62
|
+
lambda _r, _d: _r.represent_sequence("!ndarray", (_d.tolist(), _d.dtype.str)),
|
|
63
|
+
),
|
|
64
|
+
this_yaml.constructor.add_constructor(
|
|
65
|
+
"!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n))
|
|
66
|
+
),
|
|
67
|
+
)
|
|
37
68
|
|
|
38
69
|
|
|
39
70
|
@enum.unique
|
|
@@ -63,3 +94,21 @@ class UPPAggrSelector(enum.StrEnum):
|
|
|
63
94
|
OSA = "own-share weighted average"
|
|
64
95
|
OSD = "own-share weighted distance"
|
|
65
96
|
OSG = "own-share weighted geometric mean"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
for _typ in (RECForm, UPPAggrSelector):
|
|
100
|
+
# NOTE: If additional enums are defined in this module,
|
|
101
|
+
# add themn to the list above
|
|
102
|
+
|
|
103
|
+
_, _ = (
|
|
104
|
+
this_yaml.representer.add_representer(
|
|
105
|
+
_typ,
|
|
106
|
+
lambda _r, _d: _r.represent_scalar(f"!{_d.__class__.__name__}", _d.name),
|
|
107
|
+
),
|
|
108
|
+
this_yaml.constructor.add_constructor(
|
|
109
|
+
f"!{_typ.__name__}",
|
|
110
|
+
lambda _c, _n, /: getattr(
|
|
111
|
+
globals().get(_n.tag.lstrip("!")), _c.construct_scalar(_n)
|
|
112
|
+
),
|
|
113
|
+
),
|
|
114
|
+
)
|
|
@@ -62,7 +62,7 @@ class GuidelinesThresholds:
|
|
|
62
62
|
Year of publication of the Guidelines
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
safeharbor: HMGThresholds = field(kw_only=True, default=None)
|
|
65
|
+
safeharbor: HMGThresholds = field(kw_only=True, default=None, init=False)
|
|
66
66
|
"""
|
|
67
67
|
Negative presumption quantified on various measures
|
|
68
68
|
|
|
@@ -70,7 +70,7 @@ class GuidelinesThresholds:
|
|
|
70
70
|
diversion ratio limit, CMCR, and IPR
|
|
71
71
|
"""
|
|
72
72
|
|
|
73
|
-
presumption: HMGThresholds = field(kw_only=True, default=None)
|
|
73
|
+
presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
|
|
74
74
|
"""
|
|
75
75
|
Presumption of harm defined in HMG
|
|
76
76
|
|
|
@@ -78,7 +78,7 @@ class GuidelinesThresholds:
|
|
|
78
78
|
diversion ratio limit, CMCR, and IPR
|
|
79
79
|
"""
|
|
80
80
|
|
|
81
|
-
imputed_presumption: HMGThresholds = field(kw_only=True, default=None)
|
|
81
|
+
imputed_presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
|
|
82
82
|
"""
|
|
83
83
|
Presumption of harm imputed from guidelines
|
|
84
84
|
|
|
@@ -153,25 +153,28 @@ class ConcentrationBoundary:
|
|
|
153
153
|
"""Concentration parameters, boundary coordinates, and area under concentration boundary."""
|
|
154
154
|
|
|
155
155
|
measure_name: Literal[
|
|
156
|
-
"ΔHHI",
|
|
156
|
+
"ΔHHI",
|
|
157
|
+
"Combined share",
|
|
158
|
+
"Pre-merger HHI Contribution",
|
|
159
|
+
"Post-merger HHI Contribution",
|
|
157
160
|
] = field(kw_only=False, default="ΔHHI")
|
|
158
161
|
|
|
159
162
|
@measure_name.validator
|
|
160
|
-
def
|
|
163
|
+
def _mnv(
|
|
161
164
|
_instance: ConcentrationBoundary, _attribute: Attribute[str], _value: str, /
|
|
162
165
|
) -> None:
|
|
163
166
|
if _value not in (
|
|
164
167
|
"ΔHHI",
|
|
165
168
|
"Combined share",
|
|
166
|
-
"Pre-merger HHI",
|
|
167
|
-
"Post-merger HHI",
|
|
169
|
+
"Pre-merger HHI Contribution",
|
|
170
|
+
"Post-merger HHI Contribution",
|
|
168
171
|
):
|
|
169
172
|
raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
|
|
170
173
|
|
|
171
174
|
threshold: float = field(kw_only=False, default=0.01)
|
|
172
175
|
|
|
173
176
|
@threshold.validator
|
|
174
|
-
def
|
|
177
|
+
def _tv(
|
|
175
178
|
_instance: ConcentrationBoundary, _attribute: Attribute[float], _value: float, /
|
|
176
179
|
) -> None:
|
|
177
180
|
if not 0 <= _value <= 1:
|
|
@@ -193,9 +196,9 @@ class ConcentrationBoundary:
|
|
|
193
196
|
_conc_fn = gbfn.hhi_delta_boundary
|
|
194
197
|
case "Combined share":
|
|
195
198
|
_conc_fn = gbfn.combined_share_boundary
|
|
196
|
-
case "Pre-merger HHI":
|
|
199
|
+
case "Pre-merger HHI Contribution":
|
|
197
200
|
_conc_fn = gbfn.hhi_pre_contrib_boundary
|
|
198
|
-
case "Post-merger HHI":
|
|
201
|
+
case "Post-merger HHI Contribution":
|
|
199
202
|
_conc_fn = gbfn.hhi_post_contrib_boundary
|
|
200
203
|
|
|
201
204
|
_boundary = _conc_fn(self.threshold, dps=self.precision)
|
|
@@ -221,13 +224,13 @@ class DiversionRatioBoundary:
|
|
|
221
224
|
diversion_ratio: float = field(kw_only=False, default=0.065)
|
|
222
225
|
|
|
223
226
|
@diversion_ratio.validator
|
|
224
|
-
def
|
|
227
|
+
def _dvv(
|
|
225
228
|
_instance: DiversionRatioBoundary,
|
|
226
229
|
_attribute: Attribute[float],
|
|
227
230
|
_value: float,
|
|
228
231
|
/,
|
|
229
232
|
) -> None:
|
|
230
|
-
if not (isinstance(_value, float) and 0 <= _value <= 1):
|
|
233
|
+
if not (isinstance(_value, decimal.Decimal | float) and 0 <= _value <= 1):
|
|
231
234
|
raise ValueError(
|
|
232
235
|
"Margin-adjusted benchmark share ratio must lie between 0 and 1."
|
|
233
236
|
)
|
|
@@ -260,7 +263,7 @@ class DiversionRatioBoundary:
|
|
|
260
263
|
"""
|
|
261
264
|
|
|
262
265
|
@recapture_form.validator
|
|
263
|
-
def
|
|
266
|
+
def _rsv(
|
|
264
267
|
_instance: DiversionRatioBoundary,
|
|
265
268
|
_attribute: Attribute[RECForm],
|
|
266
269
|
_value: RECForm,
|
|
@@ -10,20 +10,34 @@ from __future__ import annotations
|
|
|
10
10
|
|
|
11
11
|
import concurrent.futures
|
|
12
12
|
from collections.abc import Sequence
|
|
13
|
-
from multiprocessing import cpu_count
|
|
14
13
|
from typing import Literal
|
|
15
14
|
|
|
16
15
|
import numpy as np
|
|
17
|
-
from attrs import Attribute, define, field
|
|
16
|
+
from attrs import Attribute, Converter, define, field
|
|
18
17
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
19
18
|
|
|
20
|
-
from .. import VERSION, ArrayDouble # noqa: TID252
|
|
19
|
+
from .. import NTHREADS, VERSION, ArrayDouble, ArrayFloat, this_yaml # noqa: TID252
|
|
21
20
|
|
|
22
21
|
__version__ = VERSION
|
|
23
22
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
DEFAULT_DIST_PARMS: ArrayFloat = np.array([0.0, 1.0], float)
|
|
24
|
+
DEFAULT_BETA_DIST_PARMS: ArrayFloat = np.array([1.0, 1.0], float)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Add yaml representer, constructor for SeedSequence
|
|
28
|
+
this_yaml.representer.add_representer(
|
|
29
|
+
SeedSequence,
|
|
30
|
+
lambda _r, _d: _r.represent_mapping(
|
|
31
|
+
"!SeedSequence",
|
|
32
|
+
{
|
|
33
|
+
_a: getattr(_d, _a)
|
|
34
|
+
for _a in ("entropy", "spawn_key", "pool_size", "n_children_spawned")
|
|
35
|
+
},
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
this_yaml.constructor.add_constructor(
|
|
39
|
+
"!SeedSequence", lambda _c, _n, /: SeedSequence(**_c.construct_mapping(_n))
|
|
40
|
+
)
|
|
27
41
|
|
|
28
42
|
|
|
29
43
|
def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
|
|
@@ -110,6 +124,20 @@ def gen_seed_seq_list_default(
|
|
|
110
124
|
return [SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_sseq_list_len]]
|
|
111
125
|
|
|
112
126
|
|
|
127
|
+
def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
|
|
128
|
+
if not len(_v):
|
|
129
|
+
return {
|
|
130
|
+
"Beta": DEFAULT_BETA_DIST_PARMS,
|
|
131
|
+
"Dirichlet": np.ones(_i.values.shape[-1], float),
|
|
132
|
+
}.get(_i.dist_type, DEFAULT_DIST_PARMS)
|
|
133
|
+
elif isinstance(_v, Sequence | np.ndarray):
|
|
134
|
+
return np.asarray(_v, float)
|
|
135
|
+
else:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
"Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
113
141
|
@define
|
|
114
142
|
class MultithreadedRNG:
|
|
115
143
|
"""Fill given array on demand with pseudo-random numbers as specified.
|
|
@@ -121,22 +149,32 @@ class MultithreadedRNG:
|
|
|
121
149
|
before commencing multithreaded random number generation.
|
|
122
150
|
"""
|
|
123
151
|
|
|
124
|
-
values: ArrayDouble = field(kw_only=False
|
|
152
|
+
values: ArrayDouble = field(kw_only=False)
|
|
125
153
|
"""Output array to which generated data are over-written
|
|
126
154
|
|
|
127
155
|
Array-length defines the number of i.i.d. (vector) draws.
|
|
128
156
|
"""
|
|
129
157
|
|
|
158
|
+
@values.validator
|
|
159
|
+
def _vsv(
|
|
160
|
+
_instance: MultithreadedRNG,
|
|
161
|
+
_attribute: Attribute[ArrayDouble],
|
|
162
|
+
_value: ArrayDouble,
|
|
163
|
+
/,
|
|
164
|
+
) -> None:
|
|
165
|
+
if not len(_value):
|
|
166
|
+
raise ValueError("Output array must at least be one dimension")
|
|
167
|
+
|
|
130
168
|
dist_type: Literal[
|
|
131
169
|
"Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform"
|
|
132
|
-
] = field(
|
|
170
|
+
] = field(default="Uniform")
|
|
133
171
|
"""Distribution for the generated random numbers.
|
|
134
172
|
|
|
135
173
|
Default is "Uniform".
|
|
136
174
|
"""
|
|
137
175
|
|
|
138
176
|
@dist_type.validator
|
|
139
|
-
def
|
|
177
|
+
def _dtv(
|
|
140
178
|
_instance: MultithreadedRNG, _attribute: Attribute[str], _value: str, /
|
|
141
179
|
) -> None:
|
|
142
180
|
if _value not in (
|
|
@@ -144,60 +182,48 @@ class MultithreadedRNG:
|
|
|
144
182
|
):
|
|
145
183
|
raise ValueError(f"Specified distribution must be one of {_rdts}")
|
|
146
184
|
|
|
147
|
-
dist_parms:
|
|
185
|
+
dist_parms: ArrayFloat = field(
|
|
186
|
+
converter=Converter(_dist_parms_conv, takes_self=True) # type: ignore
|
|
187
|
+
)
|
|
148
188
|
"""Parameters, if any, for tailoring random number generation
|
|
149
189
|
"""
|
|
150
190
|
|
|
191
|
+
@dist_parms.default
|
|
192
|
+
def _dpd(_instance: MultithreadedRNG) -> ArrayFloat:
|
|
193
|
+
return {
|
|
194
|
+
"Beta": DEFAULT_BETA_DIST_PARMS,
|
|
195
|
+
"Dirichlet": np.ones(_instance.values.shape[-1], float),
|
|
196
|
+
}.get(_instance.dist_type, DEFAULT_DIST_PARMS)
|
|
197
|
+
|
|
151
198
|
@dist_parms.validator
|
|
152
|
-
def
|
|
153
|
-
_instance: MultithreadedRNG,
|
|
199
|
+
def _dpv(
|
|
200
|
+
_instance: MultithreadedRNG,
|
|
201
|
+
_attribute: Attribute[ArrayFloat],
|
|
202
|
+
_value: ArrayFloat,
|
|
203
|
+
/,
|
|
154
204
|
) -> None:
|
|
155
|
-
if
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
205
|
+
if (
|
|
206
|
+
_instance.dist_type != "Dirichlet"
|
|
207
|
+
and (_lrdp := len(_value)) != (_trdp := 2)
|
|
208
|
+
) or (
|
|
209
|
+
_instance.dist_type == "Dirichlet"
|
|
210
|
+
and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
|
|
211
|
+
):
|
|
212
|
+
raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
|
|
160
213
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
and (_lrdp := len(_value)) != (_trdp := 2)
|
|
164
|
-
) or (
|
|
165
|
-
_instance.dist_type == "Dirichlet"
|
|
166
|
-
and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
|
|
167
|
-
):
|
|
168
|
-
raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
|
|
169
|
-
|
|
170
|
-
elif (
|
|
171
|
-
_instance.dist_type in ("Beta", "Dirichlet")
|
|
172
|
-
and (np.array(_value) <= 0.0).any()
|
|
173
|
-
):
|
|
174
|
-
raise ValueError(
|
|
175
|
-
"Shape and location parameters must be strictly positive"
|
|
176
|
-
)
|
|
214
|
+
elif _instance.dist_type in ("Beta", "Dirichlet") and (_value <= 0.0).any():
|
|
215
|
+
raise ValueError("Shape and location parameters must be strictly positive")
|
|
177
216
|
|
|
178
|
-
seed_sequence: SeedSequence | None = field(
|
|
217
|
+
seed_sequence: SeedSequence | None = field(default=None)
|
|
179
218
|
"""Seed sequence for generating random numbers."""
|
|
180
219
|
|
|
181
|
-
nthreads: int = field(
|
|
220
|
+
nthreads: int = field(default=NTHREADS)
|
|
182
221
|
"""Number of threads to spawn for random number generation."""
|
|
183
222
|
|
|
184
223
|
def fill(self) -> None:
|
|
185
224
|
"""Fill the provided output array with random number draws as specified."""
|
|
186
225
|
|
|
187
|
-
if (
|
|
188
|
-
self.dist_parms is None
|
|
189
|
-
or not (
|
|
190
|
-
_dist_parms := np.array(self.dist_parms) # one-shot conversion
|
|
191
|
-
).any()
|
|
192
|
-
):
|
|
193
|
-
if self.dist_type == "Beta":
|
|
194
|
-
_dist_parms = DEFAULT_BETA_DIST_PARMS
|
|
195
|
-
elif self.dist_type == "Dirichlet":
|
|
196
|
-
_dist_parms = np.ones(self.values.shape[1], float)
|
|
197
|
-
else:
|
|
198
|
-
_dist_parms = DEFAULT_DIST_PARMS
|
|
199
|
-
|
|
200
|
-
if self.dist_parms is None or np.array_equal(
|
|
226
|
+
if not len(self.dist_parms) or np.array_equal(
|
|
201
227
|
self.dist_parms, DEFAULT_DIST_PARMS
|
|
202
228
|
):
|
|
203
229
|
if self.dist_type == "Uniform":
|
|
@@ -219,7 +245,7 @@ class MultithreadedRNG:
|
|
|
219
245
|
def _fill(
|
|
220
246
|
_rng: np.random.Generator,
|
|
221
247
|
_dist_type: str,
|
|
222
|
-
_dist_parms:
|
|
248
|
+
_dist_parms: ArrayFloat,
|
|
223
249
|
_out: ArrayDouble,
|
|
224
250
|
_first: int,
|
|
225
251
|
_last: int,
|
|
@@ -254,7 +280,7 @@ class MultithreadedRNG:
|
|
|
254
280
|
_fill,
|
|
255
281
|
_random_generators[i],
|
|
256
282
|
_dist_type,
|
|
257
|
-
|
|
283
|
+
self.dist_parms,
|
|
258
284
|
self.values,
|
|
259
285
|
_range_first,
|
|
260
286
|
_range_last,
|