mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +103 -48
- mergeron/core/__init__.py +105 -4
- mergeron/core/empirical_margin_distribution.py +100 -78
- mergeron/core/ftc_merger_investigations_data.py +309 -316
- mergeron/core/guidelines_boundaries.py +67 -138
- mergeron/core/guidelines_boundary_functions.py +202 -379
- mergeron/core/guidelines_boundary_functions_extra.py +264 -106
- mergeron/core/pseudorandom_numbers.py +73 -64
- mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- mergeron/data/ftc_invdata.zip +0 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
- mergeron/gen/__init__.py +138 -161
- mergeron/gen/data_generation.py +181 -149
- mergeron/gen/data_generation_functions.py +220 -237
- mergeron/gen/enforcement_stats.py +78 -109
- mergeron/gen/upp_tests.py +119 -194
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/METADATA +2 -3
- mergeron-2025.739290.5.dist-info/RECORD +24 -0
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/WHEEL +1 -1
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron-2025.739290.3.dist-info/RECORD +0 -23
|
@@ -16,7 +16,14 @@ import numpy as np
|
|
|
16
16
|
from attrs import Attribute, Converter, define, field
|
|
17
17
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
18
18
|
|
|
19
|
-
from .. import
|
|
19
|
+
from .. import ( # noqa: TID252
|
|
20
|
+
NTHREADS,
|
|
21
|
+
VERSION,
|
|
22
|
+
ArrayDouble,
|
|
23
|
+
ArrayFloat,
|
|
24
|
+
this_yaml,
|
|
25
|
+
yaml_rt_mapper,
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
__version__ = VERSION
|
|
22
29
|
|
|
@@ -40,9 +47,22 @@ def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
|
|
|
40
47
|
return Generator(PCG64DXSM(_s))
|
|
41
48
|
|
|
42
49
|
|
|
50
|
+
# Add yaml representer, constructor for SeedSequence
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
(_, _) = (
|
|
54
|
+
this_yaml.representer.add_representer(
|
|
55
|
+
SeedSequence, lambda _r, _d: _r.represent_mapping("!SeedSequence", _d.state)
|
|
56
|
+
),
|
|
57
|
+
this_yaml.constructor.add_constructor(
|
|
58
|
+
"!SeedSequence", lambda _c, _n, /: SeedSequence(**yaml_rt_mapper(_c, _n))
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
43
63
|
def gen_seed_seq_list_default(
|
|
44
|
-
|
|
45
|
-
) ->
|
|
64
|
+
_len: int = 3, /, *, generated_entropy: Sequence[int] | None = None
|
|
65
|
+
) -> tuple[SeedSequence, ...]:
|
|
46
66
|
"""
|
|
47
67
|
Return specified number of SeedSequences, for generating random variates
|
|
48
68
|
|
|
@@ -54,7 +74,7 @@ def gen_seed_seq_list_default(
|
|
|
54
74
|
|
|
55
75
|
Parameters
|
|
56
76
|
----------
|
|
57
|
-
|
|
77
|
+
_len
|
|
58
78
|
Number of SeedSequences to initialize
|
|
59
79
|
|
|
60
80
|
generated_entropy
|
|
@@ -92,8 +112,8 @@ def gen_seed_seq_list_default(
|
|
|
92
112
|
63206306147411023146090085885772240748399174641427012462446714431253444120718,
|
|
93
113
|
]
|
|
94
114
|
|
|
95
|
-
if
|
|
96
|
-
|
|
115
|
+
if _len > (_lge := len(generated_entropy)):
|
|
116
|
+
e_str_segs = (
|
|
97
117
|
"This function can presently create SeedSequences for generating up to ",
|
|
98
118
|
f"{_lge:,d} independent random variates. If you really need to generate ",
|
|
99
119
|
f"more than {_lge:,d} seeded independent random variates, please pass a ",
|
|
@@ -103,22 +123,24 @@ def gen_seed_seq_list_default(
|
|
|
103
123
|
"bit_generators/generated/numpy.random.SeedSequence.html",
|
|
104
124
|
),
|
|
105
125
|
)
|
|
106
|
-
raise ValueError("".join(
|
|
126
|
+
raise ValueError("".join(e_str_segs))
|
|
107
127
|
|
|
108
|
-
return
|
|
128
|
+
return tuple(SeedSequence(_s, pool_size=8) for _s in generated_entropy[:_len])
|
|
109
129
|
|
|
110
130
|
|
|
111
|
-
def _dist_parms_conv(_v: ArrayFloat, _i: MultithreadedRNG) -> ArrayFloat:
|
|
112
|
-
if not len(_v):
|
|
131
|
+
def _dist_parms_conv(_v: ArrayFloat | None, _i: MultithreadedRNG) -> ArrayFloat:
|
|
132
|
+
if _v is None or not len(_v):
|
|
113
133
|
return {
|
|
114
134
|
"Beta": DEFAULT_BETA_DIST_PARMS,
|
|
115
135
|
"Dirichlet": np.ones(_i.values.shape[-1], float),
|
|
136
|
+
"Normal": DEFAULT_DIST_PARMS,
|
|
137
|
+
"Uniform": DEFAULT_DIST_PARMS,
|
|
116
138
|
}.get(_i.dist_type, DEFAULT_DIST_PARMS)
|
|
117
139
|
elif isinstance(_v, Sequence | np.ndarray):
|
|
118
140
|
return np.asarray(_v, float)
|
|
119
141
|
else:
|
|
120
142
|
raise ValueError(
|
|
121
|
-
"Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
|
|
143
|
+
f"Input, {_v!r} has invalid type. Must be None, Sequence of floats, or Numpy ndarray."
|
|
122
144
|
)
|
|
123
145
|
|
|
124
146
|
|
|
@@ -141,12 +163,9 @@ class MultithreadedRNG:
|
|
|
141
163
|
|
|
142
164
|
@values.validator
|
|
143
165
|
def _vsv(
|
|
144
|
-
|
|
145
|
-
_attribute: Attribute[ArrayDouble],
|
|
146
|
-
_value: ArrayDouble,
|
|
147
|
-
/,
|
|
166
|
+
_i: MultithreadedRNG, _a: Attribute[ArrayDouble], _v: ArrayDouble, /
|
|
148
167
|
) -> None:
|
|
149
|
-
if not len(
|
|
168
|
+
if not len(_v):
|
|
150
169
|
raise ValueError("Output array must at least be one dimension")
|
|
151
170
|
|
|
152
171
|
dist_type: Literal[
|
|
@@ -158,10 +177,8 @@ class MultithreadedRNG:
|
|
|
158
177
|
"""
|
|
159
178
|
|
|
160
179
|
@dist_type.validator
|
|
161
|
-
def _dtv(
|
|
162
|
-
|
|
163
|
-
) -> None:
|
|
164
|
-
if _value not in (
|
|
180
|
+
def _dtv(_i: MultithreadedRNG, _a: Attribute[str], _v: str, /) -> None:
|
|
181
|
+
if _v not in (
|
|
165
182
|
_rdts := ("Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform")
|
|
166
183
|
):
|
|
167
184
|
raise ValueError(f"Specified distribution must be one of {_rdts}")
|
|
@@ -173,29 +190,20 @@ class MultithreadedRNG:
|
|
|
173
190
|
"""
|
|
174
191
|
|
|
175
192
|
@dist_parms.default
|
|
176
|
-
def _dpd(
|
|
177
|
-
return
|
|
178
|
-
"Beta": DEFAULT_BETA_DIST_PARMS,
|
|
179
|
-
"Dirichlet": np.ones(_instance.values.shape[-1], float),
|
|
180
|
-
}.get(_instance.dist_type, DEFAULT_DIST_PARMS)
|
|
193
|
+
def _dpd(_i: MultithreadedRNG) -> ArrayFloat:
|
|
194
|
+
return _dist_parms_conv(None, _i)
|
|
181
195
|
|
|
182
196
|
@dist_parms.validator
|
|
183
197
|
def _dpv(
|
|
184
|
-
|
|
185
|
-
_attribute: Attribute[ArrayFloat],
|
|
186
|
-
_value: ArrayFloat,
|
|
187
|
-
/,
|
|
198
|
+
_i: MultithreadedRNG, _a: Attribute[ArrayFloat], _v: ArrayFloat, /
|
|
188
199
|
) -> None:
|
|
189
|
-
if (
|
|
190
|
-
|
|
191
|
-
and (_lrdp := len(
|
|
192
|
-
) or (
|
|
193
|
-
_instance.dist_type == "Dirichlet"
|
|
194
|
-
and (_lrdp := len(_value)) != (_trdp := _instance.values.shape[1])
|
|
200
|
+
if (_i.dist_type != "Dirichlet" and (_lrdp := len(_v)) != (_trdp := 2)) or (
|
|
201
|
+
_i.dist_type == "Dirichlet"
|
|
202
|
+
and (_lrdp := len(_v)) != (_trdp := _i.values.shape[-1])
|
|
195
203
|
):
|
|
196
204
|
raise ValueError(f"Expected {_trdp} parameters, got, {_lrdp}")
|
|
197
205
|
|
|
198
|
-
elif
|
|
206
|
+
elif _i.dist_type in {"Beta", "Dirichlet"} and (_v <= 0.0).any():
|
|
199
207
|
raise ValueError("Shape and location parameters must be strictly positive")
|
|
200
208
|
|
|
201
209
|
seed_sequence: SeedSequence | None = field(default=None)
|
|
@@ -211,61 +219,62 @@ class MultithreadedRNG:
|
|
|
211
219
|
self.dist_parms, DEFAULT_DIST_PARMS
|
|
212
220
|
):
|
|
213
221
|
if self.dist_type == "Uniform":
|
|
214
|
-
|
|
222
|
+
dist_type = "Random"
|
|
215
223
|
elif self.dist_type == "Normal":
|
|
216
|
-
|
|
224
|
+
dist_type = "Gaussian"
|
|
217
225
|
else:
|
|
218
|
-
|
|
226
|
+
dist_type = self.dist_type
|
|
219
227
|
|
|
220
|
-
|
|
221
|
-
# int; function gives float unsuitable for slicing
|
|
228
|
+
step_size = (len(self.values) / self.nthreads).__ceil__()
|
|
222
229
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
230
|
+
seed_ = (
|
|
231
|
+
SeedSequence(pool_size=8)
|
|
232
|
+
if self.seed_sequence is None
|
|
233
|
+
else self.seed_sequence
|
|
227
234
|
)
|
|
228
235
|
|
|
236
|
+
random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
|
|
237
|
+
|
|
229
238
|
def _fill(
|
|
230
239
|
_rng: np.random.Generator,
|
|
231
240
|
_dist_type: str,
|
|
232
241
|
_dist_parms: ArrayFloat,
|
|
233
|
-
|
|
242
|
+
out_: ArrayDouble,
|
|
234
243
|
_first: int,
|
|
235
244
|
_last: int,
|
|
236
245
|
/,
|
|
237
246
|
) -> None:
|
|
238
|
-
_sz: tuple[int, ...] =
|
|
247
|
+
_sz: tuple[int, ...] = out_[_first:_last].shape
|
|
239
248
|
match _dist_type:
|
|
240
249
|
case "Beta":
|
|
241
|
-
|
|
242
|
-
|
|
250
|
+
shape_a, shape_b = _dist_parms
|
|
251
|
+
out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
|
|
243
252
|
case "Dirichlet":
|
|
244
|
-
|
|
253
|
+
out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
|
|
245
254
|
case "Gaussian":
|
|
246
|
-
_rng.standard_normal(out=
|
|
255
|
+
_rng.standard_normal(out=out_[_first:_last])
|
|
247
256
|
case "Normal":
|
|
248
257
|
_mu, _sigma = _dist_parms
|
|
249
|
-
|
|
258
|
+
out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
|
|
250
259
|
case "Random":
|
|
251
|
-
_rng.random(out=
|
|
260
|
+
_rng.random(out=out_[_first:_last])
|
|
252
261
|
case "Uniform":
|
|
253
|
-
|
|
254
|
-
|
|
262
|
+
uni_l, uni_h = _dist_parms
|
|
263
|
+
out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
|
|
255
264
|
case _:
|
|
256
265
|
"Unreachable. The validator would have rejected this as invalid."
|
|
257
266
|
|
|
258
|
-
with concurrent.futures.ThreadPoolExecutor(self.nthreads) as
|
|
259
|
-
for
|
|
260
|
-
|
|
261
|
-
|
|
267
|
+
with concurrent.futures.ThreadPoolExecutor(self.nthreads) as executor_:
|
|
268
|
+
for _i in range(self.nthreads):
|
|
269
|
+
range_first = _i * step_size
|
|
270
|
+
range_last = min(len(self.values), (_i + 1) * step_size)
|
|
262
271
|
|
|
263
|
-
|
|
272
|
+
executor_.submit(
|
|
264
273
|
_fill,
|
|
265
|
-
|
|
266
|
-
|
|
274
|
+
random_generators[_i],
|
|
275
|
+
dist_type,
|
|
267
276
|
self.dist_parms,
|
|
268
277
|
self.values,
|
|
269
|
-
|
|
270
|
-
|
|
278
|
+
range_first,
|
|
279
|
+
range_last,
|
|
271
280
|
)
|
|
Binary file
|
|
Binary file
|
|
@@ -12,23 +12,23 @@ from matplotlib.ticker import StrMethodFormatter
|
|
|
12
12
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
13
13
|
from scipy import stats # type: ignore
|
|
14
14
|
|
|
15
|
-
import mergeron.core.empirical_margin_distribution as
|
|
15
|
+
import mergeron.core.empirical_margin_distribution as emd
|
|
16
16
|
from mergeron import DATA_DIR
|
|
17
17
|
from mergeron.core.guidelines_boundary_functions import boundary_plot
|
|
18
18
|
|
|
19
19
|
SAMPLE_SIZE = 10**6
|
|
20
20
|
BIN_COUNT = 25
|
|
21
|
-
|
|
22
|
-
print(repr(
|
|
23
|
-
print(repr(
|
|
21
|
+
margin_data_obs, margin_data_wts, margin_data_stats = emd.margin_data_builder()
|
|
22
|
+
print(repr(margin_data_obs))
|
|
23
|
+
print(repr(margin_data_stats))
|
|
24
24
|
|
|
25
25
|
plt, mgn_fig, mgn_ax, set_axis_def = boundary_plot(mktshares_plot_flag=False)
|
|
26
26
|
mgn_fig.set_figheight(6.5)
|
|
27
27
|
mgn_fig.set_figwidth(9.0)
|
|
28
28
|
|
|
29
29
|
_, mgn_bins, _ = mgn_ax.hist(
|
|
30
|
-
x=
|
|
31
|
-
weights=
|
|
30
|
+
x=margin_data_obs,
|
|
31
|
+
weights=margin_data_wts,
|
|
32
32
|
bins=BIN_COUNT,
|
|
33
33
|
alpha=0.4,
|
|
34
34
|
density=True,
|
|
@@ -44,7 +44,9 @@ with warnings.catch_warnings():
|
|
|
44
44
|
for _g in mgn_ax.get_yticklabels()
|
|
45
45
|
])
|
|
46
46
|
|
|
47
|
-
mgn_kde = stats.gaussian_kde(
|
|
47
|
+
mgn_kde = stats.gaussian_kde(
|
|
48
|
+
margin_data_obs, weights=margin_data_wts, bw_method="silverman"
|
|
49
|
+
)
|
|
48
50
|
mgn_kde.set_bandwidth(bw_method=mgn_kde.factor / 3.0)
|
|
49
51
|
|
|
50
52
|
mgn_ax.plot(
|