mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -16,7 +16,14 @@ import numpy as np
16
16
  from attrs import Attribute, Converter, define, field
17
17
  from numpy.random import PCG64DXSM, Generator, SeedSequence
18
18
 
19
- from .. import NTHREADS, VERSION, ArrayDouble, ArrayFloat # noqa: TID252
19
+ from .. import ( # noqa: TID252
20
+ NTHREADS,
21
+ VERSION,
22
+ ArrayDouble,
23
+ ArrayFloat,
24
+ this_yaml,
25
+ yaml_rt_mapper,
26
+ )
20
27
 
21
28
  __version__ = VERSION
22
29
 
@@ -40,9 +47,22 @@ def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
40
47
  return Generator(PCG64DXSM(_s))
41
48
 
42
49
 
50
+ # Add yaml representer, constructor for SeedSequence
51
+
52
+
53
+ (_, _) = (
54
+ this_yaml.representer.add_representer(
55
+ SeedSequence, lambda _r, _d: _r.represent_mapping("!SeedSequence", _d.state)
56
+ ),
57
+ this_yaml.constructor.add_constructor(
58
+ "!SeedSequence", lambda _c, _n, /: SeedSequence(**yaml_rt_mapper(_c, _n))
59
+ ),
60
+ )
61
+
62
+
43
63
  def gen_seed_seq_list_default(
44
- _sseq_list_len: int = 3, /, *, generated_entropy: Sequence[int] | None = None
45
- ) -> list[SeedSequence]:
64
+ _len: int = 3, /, *, generated_entropy: Sequence[int] | None = None
65
+ ) -> tuple[SeedSequence, ...]:
46
66
  """
47
67
  Return specified number of SeedSequences, for generating random variates
48
68
 
@@ -54,7 +74,7 @@ def gen_seed_seq_list_default(
54
74
 
55
75
  Parameters
56
76
  ----------
57
- _sseq_list_len
77
+ _len
58
78
  Number of SeedSequences to initialize
59
79
 
60
80
  generated_entropy
@@ -92,8 +112,8 @@ def gen_seed_seq_list_default(
92
112
  63206306147411023146090085885772240748399174641427012462446714431253444120718,
93
113
  ]
94
114
 
95
- if _sseq_list_len > (_lge := len(generated_entropy)):
96
- _e_str_segs = (
115
+ if _len > (_lge := len(generated_entropy)):
116
+ e_str_segs = (
97
117
  "This function can presently create SeedSequences for generating up to ",
98
118
  f"{_lge:,d} independent random variates. If you really need to generate ",
99
119
  f"more than {_lge:,d} seeded independent random variates, please pass a ",
@@ -103,22 +123,24 @@ def gen_seed_seq_list_default(
103
123
  "bit_generators/generated/numpy.random.SeedSequence.html",
104
124
  ),
105
125
  )
106
- raise ValueError("".join(_e_str_segs))
126
+ raise ValueError("".join(e_str_segs))
107
127
 
108
- return [SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_sseq_list_len]]
128
+ return tuple(SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_len])
109
129
 
110
130
 
111
- def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
112
- if not len(_v):
131
+ def _dist_parms_conv(_v: ArrayFloat | None, _i: MultithreadedRNG) -> ArrayFloat:
132
+ if _v is None or not len(_v):
113
133
  return {
114
134
  "Beta": DEFAULT_BETA_DIST_PARMS,
115
135
  "Dirichlet": np.ones(_i.values.shape[-1], float),
136
+ "Normal": DEFAULT_DIST_PARMS,
137
+ "Uniform": DEFAULT_DIST_PARMS,
116
138
  }.get(_i.dist_type, DEFAULT_DIST_PARMS)
117
139
  elif isinstance(_v, Sequence | np.ndarray):
118
140
  return np.asarray(_v, float)
119
141
  else:
120
142
  raise ValueError(
121
- "Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
143
+ f"Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
122
144
  )
123
145
 
124
146
 
@@ -141,12 +163,9 @@ class MultithreadedRNG:
141
163
 
142
164
  @values.validator
143
165
  def _vsv(
144
- _instance: MultithreadedRNG,
145
- _attribute: Attribute[ArrayDouble],
146
- _value: ArrayDouble,
147
- /,
166
+ _i: MultithreadedRNG, _a: Attribute[ArrayDouble], _v: ArrayDouble, /
148
167
  ) -> None:
149
- if not len(_value):
168
+ if not len(_v):
150
169
  raise ValueError("Output array must at least be one dimension")
151
170
 
152
171
  dist_type: Literal[
@@ -158,10 +177,8 @@ class MultithreadedRNG:
158
177
  """
159
178
 
160
179
  @dist_type.validator
161
- def _dtv(
162
- _instance: MultithreadedRNG, _attribute: Attribute[str], _value: str, /
163
- ) -> None:
164
- if _value not in (
180
+ def _dtv(_i: MultithreadedRNG, _a: Attribute[str], _v: str, /) -> None:
181
+ if _v not in (
165
182
  _rdts := ("Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform")
166
183
  ):
167
184
  raise ValueError(f"Specified distribution must be one of {_rdts}")
@@ -173,29 +190,20 @@ class MultithreadedRNG:
173
190
  """
174
191
 
175
192
  @dist_parms.default
176
- def _dpd(_instance: MultithreadedRNG) -> ArrayFloat:
177
- return {
178
- "Beta": DEFAULT_BETA_DIST_PARMS,
179
- "Dirichlet": np.ones(_instance.values.shape[-1], float),
180
- }.get(_instance.dist_type, DEFAULT_DIST_PARMS)
193
+ def _dpd(_i: MultithreadedRNG) -> ArrayFloat:
194
+ return _dist_parms_conv(None, _i)
181
195
 
182
196
  @dist_parms.validator
183
197
  def _dpv(
184
- _instance: MultithreadedRNG,
185
- _attribute: Attribute[ArrayFloat],
186
- _value: ArrayFloat,
187
- /,
198
+ _i: MultithreadedRNG, _a: Attribute[ArrayFloat], _v: ArrayFloat, /
188
199
  ) -> None:
189
- if (
190
- _instance.dist_type != "Dirichlet"
191
- and (_lrdp := len(_value)) != (_trdp := 2)
192
- ) or (
193
- _instance.dist_type == "Dirichlet"
194
- and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
200
+ if (_i.dist_type != "Dirichlet" and (_lrdp := len(_v)) != (_trdp := 2)) or (
201
+ _i.dist_type == "Dirichlet"
202
+ and (_lrdp := len(_v)) != (_trdp := _i.values.shape[-1])
195
203
  ):
196
204
  raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
197
205
 
198
- elif _instance.dist_type in ("Beta", "Dirichlet") and (_value <= 0.0).any():
206
+ elif _i.dist_type in {"Beta", "Dirichlet"} and (_v <= 0.0).any():
199
207
  raise ValueError("Shape and location parameters must be strictly positive")
200
208
 
201
209
  seed_sequence: SeedSequence | None = field(default=None)
@@ -211,61 +219,62 @@ class MultithreadedRNG:
211
219
  self.dist_parms, DEFAULT_DIST_PARMS
212
220
  ):
213
221
  if self.dist_type == "Uniform":
214
- _dist_type = "Random"
222
+ dist_type = "Random"
215
223
  elif self.dist_type == "Normal":
216
- _dist_type = "Gaussian"
224
+ dist_type = "Gaussian"
217
225
  else:
218
- _dist_type = self.dist_type
226
+ dist_type = self.dist_type
219
227
 
220
- _step_size = (len(self.values) / self.nthreads).__ceil__()
221
- # int; function gives float unsuitable for slicing
228
+ step_size = (len(self.values) / self.nthreads).__ceil__()
222
229
 
223
- _seed_sequence = self.seed_sequence or SeedSequence(pool_size=8)
224
-
225
- _random_generators = tuple(
226
- prng(_t) for _t in _seed_sequence.spawn(self.nthreads)
230
+ seed_ = (
231
+ SeedSequence(pool_size=8)
232
+ if self.seed_sequence is None
233
+ else self.seed_sequence
227
234
  )
228
235
 
236
+ random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
237
+
229
238
  def _fill(
230
239
  _rng: np.random.Generator,
231
240
  _dist_type: str,
232
241
  _dist_parms: ArrayFloat,
233
- _out: ArrayDouble,
242
+ out_: ArrayDouble,
234
243
  _first: int,
235
244
  _last: int,
236
245
  /,
237
246
  ) -> None:
238
- _sz: tuple[int, ...] = _out[_first:_last].shape
247
+ _sz: tuple[int, ...] = out_[_first:_last].shape
239
248
  match _dist_type:
240
249
  case "Beta":
241
- _shape_a, _shape_b = _dist_parms
242
- _out[_first:_last] = _rng.beta(_shape_a, _shape_b, size=_sz)
250
+ shape_a, shape_b = _dist_parms
251
+ out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
243
252
  case "Dirichlet":
244
- _out[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
253
+ out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
245
254
  case "Gaussian":
246
- _rng.standard_normal(out=_out[_first:_last])
255
+ _rng.standard_normal(out=out_[_first:_last])
247
256
  case "Normal":
248
257
  _mu, _sigma = _dist_parms
249
- _out[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
258
+ out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
250
259
  case "Random":
251
- _rng.random(out=_out[_first:_last])
260
+ _rng.random(out=out_[_first:_last])
252
261
  case "Uniform":
253
- _uni_l, _uni_h = _dist_parms
254
- _out[_first:_last] = _rng.uniform(_uni_l, _uni_h, size=_sz)
262
+ uni_l, uni_h = _dist_parms
263
+ out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
255
264
  case _:
256
265
  "Unreachable. The validator would have rejected this as invalid."
257
266
 
258
- with concurrent.futures.ThreadPoolExecutor(self.nthreads) as _executor:
259
- for i in range(self.nthreads):
260
- _range_first = i * _step_size
261
- _range_last = min(len(self.values), (i + 1) * _step_size)
267
+ with concurrent.futures.ThreadPoolExecutor(self.nthreads) as executor_:
268
+ for _i in range(self.nthreads):
269
+ range_first = _i * step_size
270
+ range_last = min(len(self.values), (_i + 1) * step_size)
262
271
 
263
- _executor.submit(
272
+ executor_.submit(
264
273
  _fill,
265
- _random_generators[i],
266
- _dist_type,
274
+ random_generators[_i],
275
+ dist_type,
267
276
  self.dist_parms,
268
277
  self.values,
269
- _range_first,
270
- _range_last,
278
+ range_first,
279
+ range_last,
271
280
  )
Binary file
@@ -12,23 +12,23 @@ from matplotlib.ticker import StrMethodFormatter
12
12
  from numpy.random import PCG64DXSM, Generator, SeedSequence
13
13
  from scipy import stats # type: ignore
14
14
 
15
- import mergeron.core.empirical_margin_distribution as dmgn
15
+ import mergeron.core.empirical_margin_distribution as emd
16
16
  from mergeron import DATA_DIR
17
17
  from mergeron.core.guidelines_boundary_functions import boundary_plot
18
18
 
19
19
  SAMPLE_SIZE = 10**6
20
20
  BIN_COUNT = 25
21
- mgn_data_obs, mgn_data_wts, mgn_data_stats = dmgn.mgn_data_builder()
22
- print(repr(mgn_data_obs))
23
- print(repr(mgn_data_stats))
21
+ margin_data_obs, margin_data_wts, margin_data_stats = emd.margin_data_builder()
22
+ print(repr(margin_data_obs))
23
+ print(repr(margin_data_stats))
24
24
 
25
25
  plt, mgn_fig, mgn_ax, set_axis_def = boundary_plot(mktshares_plot_flag=False)
26
26
  mgn_fig.set_figheight(6.5)
27
27
  mgn_fig.set_figwidth(9.0)
28
28
 
29
29
  _, mgn_bins, _ = mgn_ax.hist(
30
- x=mgn_data_obs,
31
- weights=mgn_data_wts,
30
+ x=margin_data_obs,
31
+ weights=margin_data_wts,
32
32
  bins=BIN_COUNT,
33
33
  alpha=0.4,
34
34
  density=True,
@@ -44,7 +44,9 @@ with warnings.catch_warnings():
44
44
  for _g in mgn_ax.get_yticklabels()
45
45
  ])
46
46
 
47
- mgn_kde = stats.gaussian_kde(mgn_data_obs, weights=mgn_data_wts, bw_method="silverman")
47
+ mgn_kde = stats.gaussian_kde(
48
+ margin_data_obs, weights=margin_data_wts, bw_method="silverman"
49
+ )
48
50
  mgn_kde.set_bandwidth(bw_method=mgn_kde.factor / 3.0)
49
51
 
50
52
  mgn_ax.plot(