mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

mergeron/__init__.py CHANGED
@@ -1,18 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
+ from multiprocessing import cpu_count
4
5
  from pathlib import Path
5
6
  from typing import Literal
6
7
 
7
8
  import numpy as np
8
9
  from numpy.typing import NDArray
10
+ from ruamel import yaml
9
11
 
10
12
  _PKG_NAME: str = Path(__file__).parent.stem
11
13
 
12
- VERSION = "2025.739265.2"
14
+ VERSION = "2025.739290.0"
13
15
 
14
16
  __version__ = VERSION
15
17
 
18
+ this_yaml = yaml.YAML(typ="safe", pure=True)
19
+ this_yaml.constructor.deep_construct = True
20
+ this_yaml.indent(mapping=2, sequence=4, offset=2)
21
+
22
+
16
23
  DATA_DIR: Path = Path.home() / _PKG_NAME
17
24
  """
18
25
  Defines a subdirectory named for this package in the user's home path.
@@ -22,6 +29,13 @@ If the subdirectory doesn't exist, it is created on package invocation.
22
29
  if not DATA_DIR.is_dir():
23
30
  DATA_DIR.mkdir(parents=False)
24
31
 
32
+ DEFAULT_REC_RATIO = 0.85
33
+
34
+ EMPTY_ARRAYDOUBLE = np.array([], float)
35
+ EMPTY_ARRAYINT = np.array([], int)
36
+
37
+ NTHREADS = 2 * cpu_count()
38
+
25
39
  np.set_printoptions(precision=24, floatmode="fixed")
26
40
 
27
41
  type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
@@ -33,7 +47,24 @@ type ArrayINT = NDArray[np.intp]
33
47
  type ArrayDouble = NDArray[np.float64]
34
48
  type ArrayBIGINT = NDArray[np.int64]
35
49
 
36
- DEFAULT_REC_RATIO = 0.85
50
+ ## Add yaml representer, constructor for NoneType
51
+ (_, _) = (
52
+ this_yaml.representer.add_representer(
53
+ type(None), lambda _r, _d: _r.represent_scalar("!None", "none")
54
+ ),
55
+ this_yaml.constructor.add_constructor("!None", lambda _c, _n, /: None),
56
+ )
57
+
58
+ ## Add yaml representer, constructor for ndarray
59
+ (_, _) = (
60
+ this_yaml.representer.add_representer(
61
+ np.ndarray,
62
+ lambda _r, _d: _r.represent_sequence("!ndarray", (_d.tolist(), _d.dtype.str)),
63
+ ),
64
+ this_yaml.constructor.add_constructor(
65
+ "!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n))
66
+ ),
67
+ )
37
68
 
38
69
 
39
70
  @enum.unique
@@ -63,3 +94,21 @@ class UPPAggrSelector(enum.StrEnum):
63
94
  OSA = "own-share weighted average"
64
95
  OSD = "own-share weighted distance"
65
96
  OSG = "own-share weighted geometric mean"
97
+
98
+
99
+ for _typ in (RECForm, UPPAggrSelector):
100
+ # NOTE: If additional enums are defined in this module,
101
+ # add themn to the list above
102
+
103
+ _, _ = (
104
+ this_yaml.representer.add_representer(
105
+ _typ,
106
+ lambda _r, _d: _r.represent_scalar(f"!{_d.__class__.__name__}", _d.name),
107
+ ),
108
+ this_yaml.constructor.add_constructor(
109
+ f"!{_typ.__name__}",
110
+ lambda _c, _n, /: getattr(
111
+ globals().get(_n.tag.lstrip("!")), _c.construct_scalar(_n)
112
+ ),
113
+ ),
114
+ )
@@ -62,7 +62,7 @@ class GuidelinesThresholds:
62
62
  Year of publication of the Guidelines
63
63
  """
64
64
 
65
- safeharbor: HMGThresholds = field(kw_only=True, default=None)
65
+ safeharbor: HMGThresholds = field(kw_only=True, default=None, init=False)
66
66
  """
67
67
  Negative presumption quantified on various measures
68
68
 
@@ -70,7 +70,7 @@ class GuidelinesThresholds:
70
70
  diversion ratio limit, CMCR, and IPR
71
71
  """
72
72
 
73
- presumption: HMGThresholds = field(kw_only=True, default=None)
73
+ presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
74
74
  """
75
75
  Presumption of harm defined in HMG
76
76
 
@@ -78,7 +78,7 @@ class GuidelinesThresholds:
78
78
  diversion ratio limit, CMCR, and IPR
79
79
  """
80
80
 
81
- imputed_presumption: HMGThresholds = field(kw_only=True, default=None)
81
+ imputed_presumption: HMGThresholds = field(kw_only=True, default=None, init=False)
82
82
  """
83
83
  Presumption of harm imputed from guidelines
84
84
 
@@ -153,25 +153,28 @@ class ConcentrationBoundary:
153
153
  """Concentration parameters, boundary coordinates, and area under concentration boundary."""
154
154
 
155
155
  measure_name: Literal[
156
- "ΔHHI", "Combined share", "Pre-merger HHI", "Post-merger HHI"
156
+ "ΔHHI",
157
+ "Combined share",
158
+ "Pre-merger HHI Contribution",
159
+ "Post-merger HHI Contribution",
157
160
  ] = field(kw_only=False, default="ΔHHI")
158
161
 
159
162
  @measure_name.validator
160
- def __mnv(
163
+ def _mnv(
161
164
  _instance: ConcentrationBoundary, _attribute: Attribute[str], _value: str, /
162
165
  ) -> None:
163
166
  if _value not in (
164
167
  "ΔHHI",
165
168
  "Combined share",
166
- "Pre-merger HHI",
167
- "Post-merger HHI",
169
+ "Pre-merger HHI Contribution",
170
+ "Post-merger HHI Contribution",
168
171
  ):
169
172
  raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
170
173
 
171
174
  threshold: float = field(kw_only=False, default=0.01)
172
175
 
173
176
  @threshold.validator
174
- def __tv(
177
+ def _tv(
175
178
  _instance: ConcentrationBoundary, _attribute: Attribute[float], _value: float, /
176
179
  ) -> None:
177
180
  if not 0 <= _value <= 1:
@@ -193,9 +196,9 @@ class ConcentrationBoundary:
193
196
  _conc_fn = gbfn.hhi_delta_boundary
194
197
  case "Combined share":
195
198
  _conc_fn = gbfn.combined_share_boundary
196
- case "Pre-merger HHI":
199
+ case "Pre-merger HHI Contribution":
197
200
  _conc_fn = gbfn.hhi_pre_contrib_boundary
198
- case "Post-merger HHI":
201
+ case "Post-merger HHI Contribution":
199
202
  _conc_fn = gbfn.hhi_post_contrib_boundary
200
203
 
201
204
  _boundary = _conc_fn(self.threshold, dps=self.precision)
@@ -221,13 +224,13 @@ class DiversionRatioBoundary:
221
224
  diversion_ratio: float = field(kw_only=False, default=0.065)
222
225
 
223
226
  @diversion_ratio.validator
224
- def __dvv(
227
+ def _dvv(
225
228
  _instance: DiversionRatioBoundary,
226
229
  _attribute: Attribute[float],
227
230
  _value: float,
228
231
  /,
229
232
  ) -> None:
230
- if not (isinstance(_value, float) and 0 <= _value <= 1):
233
+ if not (isinstance(_value, decimal.Decimal | float) and 0 <= _value <= 1):
231
234
  raise ValueError(
232
235
  "Margin-adjusted benchmark share ratio must lie between 0 and 1."
233
236
  )
@@ -260,7 +263,7 @@ class DiversionRatioBoundary:
260
263
  """
261
264
 
262
265
  @recapture_form.validator
263
- def __rsv(
266
+ def _rsv(
264
267
  _instance: DiversionRatioBoundary,
265
268
  _attribute: Attribute[RECForm],
266
269
  _value: RECForm,
@@ -10,20 +10,34 @@ from __future__ import annotations
10
10
 
11
11
  import concurrent.futures
12
12
  from collections.abc import Sequence
13
- from multiprocessing import cpu_count
14
13
  from typing import Literal
15
14
 
16
15
  import numpy as np
17
- from attrs import Attribute, define, field
16
+ from attrs import Attribute, Converter, define, field
18
17
  from numpy.random import PCG64DXSM, Generator, SeedSequence
19
18
 
20
- from .. import VERSION, ArrayDouble # noqa: TID252
19
+ from .. import NTHREADS, VERSION, ArrayDouble, ArrayFloat, this_yaml # noqa: TID252
21
20
 
22
21
  __version__ = VERSION
23
22
 
24
- NTHREADS = 2 * cpu_count()
25
- DEFAULT_DIST_PARMS: ArrayDouble = np.array([0.0, 1.0], float)
26
- DEFAULT_BETA_DIST_PARMS: ArrayDouble = np.array([1.0, 1.0], float)
23
+ DEFAULT_DIST_PARMS: ArrayFloat = np.array([0.0, 1.0], float)
24
+ DEFAULT_BETA_DIST_PARMS: ArrayFloat = np.array([1.0, 1.0], float)
25
+
26
+
27
+ # Add yaml representer, constructor for SeedSequence
28
+ this_yaml.representer.add_representer(
29
+ SeedSequence,
30
+ lambda _r, _d: _r.represent_mapping(
31
+ "!SeedSequence",
32
+ {
33
+ _a: getattr(_d, _a)
34
+ for _a in ("entropy", "spawn_key", "pool_size", "n_children_spawned")
35
+ },
36
+ ),
37
+ )
38
+ this_yaml.constructor.add_constructor(
39
+ "!SeedSequence", lambda _c, _n, /: SeedSequence(**_c.construct_mapping(_n))
40
+ )
27
41
 
28
42
 
29
43
  def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
@@ -110,6 +124,20 @@ def gen_seed_seq_list_default(
110
124
  return [SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_sseq_list_len]]
111
125
 
112
126
 
127
+ def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
128
+ if not len(_v):
129
+ return {
130
+ "Beta": DEFAULT_BETA_DIST_PARMS,
131
+ "Dirichlet": np.ones(_i.values.shape[-1], float),
132
+ }.get(_i.dist_type, DEFAULT_DIST_PARMS)
133
+ elif isinstance(_v, Sequence | np.ndarray):
134
+ return np.asarray(_v, float)
135
+ else:
136
+ raise ValueError(
137
+ "Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
138
+ )
139
+
140
+
113
141
  @define
114
142
  class MultithreadedRNG:
115
143
  """Fill given array on demand with pseudo-random numbers as specified.
@@ -121,22 +149,32 @@ class MultithreadedRNG:
121
149
  before commencing multithreaded random number generation.
122
150
  """
123
151
 
124
- values: ArrayDouble = field(kw_only=False, default=None)
152
+ values: ArrayDouble = field(kw_only=False)
125
153
  """Output array to which generated data are over-written
126
154
 
127
155
  Array-length defines the number of i.i.d. (vector) draws.
128
156
  """
129
157
 
158
+ @values.validator
159
+ def _vsv(
160
+ _instance: MultithreadedRNG,
161
+ _attribute: Attribute[ArrayDouble],
162
+ _value: ArrayDouble,
163
+ /,
164
+ ) -> None:
165
+ if not len(_value):
166
+ raise ValueError("Output array must at least be one dimension")
167
+
130
168
  dist_type: Literal[
131
169
  "Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform"
132
- ] = field(kw_only=True, default="Uniform")
170
+ ] = field(default="Uniform")
133
171
  """Distribution for the generated random numbers.
134
172
 
135
173
  Default is "Uniform".
136
174
  """
137
175
 
138
176
  @dist_type.validator
139
- def __dtv(
177
+ def _dtv(
140
178
  _instance: MultithreadedRNG, _attribute: Attribute[str], _value: str, /
141
179
  ) -> None:
142
180
  if _value not in (
@@ -144,60 +182,48 @@ class MultithreadedRNG:
144
182
  ):
145
183
  raise ValueError(f"Specified distribution must be one of {_rdts}")
146
184
 
147
- dist_parms: ArrayDouble | None = field(kw_only=True, default=DEFAULT_DIST_PARMS)
185
+ dist_parms: ArrayFloat = field(
186
+ converter=Converter(_dist_parms_conv, takes_self=True) # type: ignore
187
+ )
148
188
  """Parameters, if any, for tailoring random number generation
149
189
  """
150
190
 
191
+ @dist_parms.default
192
+ def _dpd(_instance: MultithreadedRNG) -> ArrayFloat:
193
+ return {
194
+ "Beta": DEFAULT_BETA_DIST_PARMS,
195
+ "Dirichlet": np.ones(_instance.values.shape[-1], float),
196
+ }.get(_instance.dist_type, DEFAULT_DIST_PARMS)
197
+
151
198
  @dist_parms.validator
152
- def __dpv(
153
- _instance: MultithreadedRNG, _attribute: Attribute[str], _value: ArrayDouble, /
199
+ def _dpv(
200
+ _instance: MultithreadedRNG,
201
+ _attribute: Attribute[ArrayFloat],
202
+ _value: ArrayFloat,
203
+ /,
154
204
  ) -> None:
155
- if _value is not None:
156
- if not isinstance(_value, Sequence | np.ndarray):
157
- raise ValueError(
158
- "When specified, distribution parameters must be a list, tuple or Numpy array"
159
- )
205
+ if (
206
+ _instance.dist_type != "Dirichlet"
207
+ and (_lrdp := len(_value)) != (_trdp := 2)
208
+ ) or (
209
+ _instance.dist_type == "Dirichlet"
210
+ and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
211
+ ):
212
+ raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
160
213
 
161
- elif (
162
- _instance.dist_type != "Dirichlet"
163
- and (_lrdp := len(_value)) != (_trdp := 2)
164
- ) or (
165
- _instance.dist_type == "Dirichlet"
166
- and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
167
- ):
168
- raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
169
-
170
- elif (
171
- _instance.dist_type in ("Beta", "Dirichlet")
172
- and (np.array(_value) <= 0.0).any()
173
- ):
174
- raise ValueError(
175
- "Shape and location parameters must be strictly positive"
176
- )
214
+ elif _instance.dist_type in ("Beta", "Dirichlet") and (_value <= 0.0).any():
215
+ raise ValueError("Shape and location parameters must be strictly positive")
177
216
 
178
- seed_sequence: SeedSequence | None = field(kw_only=True, default=None)
217
+ seed_sequence: SeedSequence | None = field(default=None)
179
218
  """Seed sequence for generating random numbers."""
180
219
 
181
- nthreads: int = field(kw_only=True, default=NTHREADS)
220
+ nthreads: int = field(default=NTHREADS)
182
221
  """Number of threads to spawn for random number generation."""
183
222
 
184
223
  def fill(self) -> None:
185
224
  """Fill the provided output array with random number draws as specified."""
186
225
 
187
- if (
188
- self.dist_parms is None
189
- or not (
190
- _dist_parms := np.array(self.dist_parms) # one-shot conversion
191
- ).any()
192
- ):
193
- if self.dist_type == "Beta":
194
- _dist_parms = DEFAULT_BETA_DIST_PARMS
195
- elif self.dist_type == "Dirichlet":
196
- _dist_parms = np.ones(self.values.shape[1], float)
197
- else:
198
- _dist_parms = DEFAULT_DIST_PARMS
199
-
200
- if self.dist_parms is None or np.array_equal(
226
+ if not len(self.dist_parms) or np.array_equal(
201
227
  self.dist_parms, DEFAULT_DIST_PARMS
202
228
  ):
203
229
  if self.dist_type == "Uniform":
@@ -219,7 +245,7 @@ class MultithreadedRNG:
219
245
  def _fill(
220
246
  _rng: np.random.Generator,
221
247
  _dist_type: str,
222
- _dist_parms: ArrayDouble,
248
+ _dist_parms: ArrayFloat,
223
249
  _out: ArrayDouble,
224
250
  _first: int,
225
251
  _last: int,
@@ -254,7 +280,7 @@ class MultithreadedRNG:
254
280
  _fill,
255
281
  _random_generators[i],
256
282
  _dist_type,
257
- _dist_parms,
283
+ self.dist_parms,
258
284
  self.values,
259
285
  _range_first,
260
286
  _range_last,