mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/upp_tests.py
CHANGED
|
@@ -1,259 +1,157 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods to
|
|
2
|
+
Methods to compute intrinsic clearance rates and intrinsic enforcement rates
|
|
3
3
|
from generated market data.
|
|
4
4
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from collections.abc import Sequence
|
|
8
8
|
from contextlib import suppress
|
|
9
|
-
from importlib.metadata import version
|
|
10
9
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
10
|
+
from typing import Any, Literal, TypedDict
|
|
12
11
|
|
|
13
12
|
import numpy as np
|
|
14
13
|
import tables as ptb # type: ignore
|
|
15
|
-
from attrs import evolve
|
|
16
|
-
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
17
14
|
from numpy.random import SeedSequence
|
|
18
15
|
from numpy.typing import NDArray
|
|
19
16
|
|
|
20
|
-
from
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
from .. import ( # noqa
|
|
18
|
+
VERSION,
|
|
19
|
+
ArrayBIGINT,
|
|
20
|
+
ArrayBoolean,
|
|
21
|
+
ArrayDouble,
|
|
22
|
+
ArrayFloat,
|
|
23
|
+
ArrayINT,
|
|
24
|
+
HMGPubYear,
|
|
25
|
+
UPPAggrSelector,
|
|
26
|
+
)
|
|
23
27
|
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
24
28
|
from . import (
|
|
25
|
-
|
|
29
|
+
DEFAULT_EMPTY_ARRAY,
|
|
26
30
|
DataclassInstance,
|
|
27
31
|
INVResolution,
|
|
28
32
|
MarketDataSample,
|
|
29
|
-
MarketSampleSpec,
|
|
30
33
|
UPPTestRegime,
|
|
31
34
|
UPPTestsCounts,
|
|
32
35
|
UPPTestsRaw,
|
|
33
36
|
)
|
|
34
|
-
from . import
|
|
35
|
-
from . import investigations_stats as isl
|
|
36
|
-
|
|
37
|
-
__version__ = version(_PKG_NAME)
|
|
37
|
+
from . import enforcement_stats as esl
|
|
38
38
|
|
|
39
|
+
__version__ = VERSION
|
|
39
40
|
|
|
40
|
-
ptb.
|
|
41
|
-
ptb.parameters.MAX_BLOSC_THREADS = 4
|
|
41
|
+
type SaveData = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
|
|
42
42
|
|
|
43
|
-
SaveData: TypeAlias = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
|
|
44
43
|
|
|
44
|
+
class INVRESCntsArgs(TypedDict, total=False):
|
|
45
|
+
"Keyword arguments of function, :code:`sim_enf_cnts`"
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
sim_test_regime: UPPTestRegime
|
|
50
|
-
saved_array_name_suffix: str
|
|
51
|
-
save_data_to_file: SaveData
|
|
52
|
-
seed_seq_list: list[SeedSequence]
|
|
47
|
+
sample_size: int
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
53
49
|
nthreads: int
|
|
50
|
+
save_data_to_file: SaveData
|
|
51
|
+
saved_array_name_suffix: str
|
|
54
52
|
|
|
55
53
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
54
|
+
def compute_upp_test_counts(
|
|
55
|
+
_market_data_sample: MarketDataSample,
|
|
56
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
57
|
+
_upp_test_regime: UPPTestRegime,
|
|
60
58
|
/,
|
|
61
59
|
) -> UPPTestsCounts:
|
|
62
|
-
"""
|
|
63
|
-
|
|
64
|
-
The parameters `_sim_invres_cnts_kwargs` are passed unaltered to
|
|
65
|
-
the parent function, `sim_invres_cnts()`, except that, if provided,
|
|
66
|
-
`seed_seq_list` is used to spawn a seed sequence for each thread,
|
|
67
|
-
to assure independent samples in each thread, and `nthreads` defines
|
|
68
|
-
the number of parallel processes used. The number of draws in
|
|
69
|
-
each thread may be tuned, by trial and error, to the amount of
|
|
70
|
-
memory (RAM) available.
|
|
60
|
+
"""Estimate enforcement and clearance counts from market data sample
|
|
71
61
|
|
|
72
62
|
Parameters
|
|
73
63
|
----------
|
|
64
|
+
_market_data_sample
|
|
65
|
+
Market data sample
|
|
74
66
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
_mkt_sample_spec
|
|
79
|
-
Configuration to use for generating sample data to test
|
|
67
|
+
_upp_test_parms
|
|
68
|
+
Threshold values for various Guidelines criteria
|
|
80
69
|
|
|
81
|
-
|
|
82
|
-
|
|
70
|
+
_upp_test_regime
|
|
71
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
72
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
73
|
+
default being to analyze enforcement based on the maximum
|
|
74
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
75
|
+
merging firms
|
|
83
76
|
|
|
84
77
|
Returns
|
|
85
78
|
-------
|
|
86
|
-
|
|
79
|
+
UPPTestsCounts
|
|
80
|
+
Enforced and cleared counts
|
|
87
81
|
|
|
88
82
|
"""
|
|
89
|
-
_sample_sz = _mkt_sample_spec.sample_size
|
|
90
|
-
_subsample_sz = 10**6
|
|
91
|
-
_iter_count = int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
92
|
-
_thread_count = cpu_count()
|
|
93
|
-
|
|
94
|
-
# Crate a copy, to avoid side effects in the outer scope
|
|
95
|
-
_mkt_sample_spec_here = evolve(_mkt_sample_spec, sample_size=_subsample_sz)
|
|
96
|
-
|
|
97
|
-
if (
|
|
98
|
-
_mkt_sample_spec.share_spec.recapture_form != RECConstants.OUTIN
|
|
99
|
-
and _mkt_sample_spec.share_spec.recapture_rate != _invres_parm_vec.rec
|
|
100
|
-
):
|
|
101
|
-
raise ValueError(
|
|
102
|
-
"{} {} {}".format(
|
|
103
|
-
f"Recapture rate from market sample spec, {_mkt_sample_spec.share_spec.recapture_rate}",
|
|
104
|
-
f"must match the value, {_invres_parm_vec.rec}",
|
|
105
|
-
"the guidelines thresholds vector.",
|
|
106
|
-
)
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
_rng_seed_seq_list = [None] * _iter_count
|
|
110
|
-
if _sim_invres_cnts_kwargs:
|
|
111
|
-
if _sseql := _sim_invres_cnts_kwargs.get("seed_seq_list"):
|
|
112
|
-
_rng_seed_seq_list = list(
|
|
113
|
-
zip(*[g.spawn(_iter_count) for g in _sseql], strict=True) # type: ignore
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
_sim_invres_cnts_kwargs: IVNRESCntsArgs = { # type: ignore
|
|
117
|
-
_k: _v
|
|
118
|
-
for _k, _v in _sim_invres_cnts_kwargs.items()
|
|
119
|
-
if _k != "seed_seq_list"
|
|
120
|
-
}
|
|
121
|
-
else:
|
|
122
|
-
_sim_invres_cnts_kwargs = {}
|
|
123
|
-
|
|
124
|
-
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
125
|
-
delayed(sim_invres_cnts)(
|
|
126
|
-
_mkt_sample_spec_here,
|
|
127
|
-
_invres_parm_vec,
|
|
128
|
-
**_sim_invres_cnts_kwargs,
|
|
129
|
-
saved_array_name_suffix=f"{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
|
|
130
|
-
seed_seq_list=_rng_seed_seq_list_ch,
|
|
131
|
-
)
|
|
132
|
-
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
_res_list_stacks = UPPTestsCounts(*[
|
|
136
|
-
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
137
|
-
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
138
|
-
])
|
|
139
|
-
upp_test_results = UPPTestsCounts(*[
|
|
140
|
-
np.column_stack((
|
|
141
|
-
(_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
|
|
142
|
-
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
143
|
-
))
|
|
144
|
-
for _g, _h in zip(
|
|
145
|
-
_res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
|
|
146
|
-
)
|
|
147
|
-
])
|
|
148
|
-
del _res_list, _res_list_stacks
|
|
149
|
-
|
|
150
|
-
return upp_test_results
|
|
151
|
-
|
|
152
83
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
/,
|
|
157
|
-
*,
|
|
158
|
-
sim_test_regime: UPPTestRegime,
|
|
159
|
-
saved_array_name_suffix: str = "",
|
|
160
|
-
save_data_to_file: SaveData = False,
|
|
161
|
-
seed_seq_list: list[SeedSequence] | None = None,
|
|
162
|
-
nthreads: int = 16,
|
|
163
|
-
) -> UPPTestsCounts:
|
|
164
|
-
# Generate market data
|
|
165
|
-
_market_data = dgl.gen_market_sample(
|
|
166
|
-
_mkt_sample_spec, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
_invalid_array_names = (
|
|
170
|
-
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
171
|
-
if _mkt_sample_spec.share_spec.dist_type == "Uniform"
|
|
172
|
-
else ()
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
save_data_to_hdf5(
|
|
176
|
-
_market_data,
|
|
177
|
-
saved_array_name_suffix,
|
|
178
|
-
_invalid_array_names,
|
|
179
|
-
save_data_to_file=save_data_to_file,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
_upp_tests_data = gen_upp_arrays(
|
|
183
|
-
_market_data,
|
|
184
|
-
_upp_test_parms,
|
|
185
|
-
sim_test_regime,
|
|
186
|
-
saved_array_name_suffix=saved_array_name_suffix,
|
|
187
|
-
save_data_to_file=save_data_to_file,
|
|
84
|
+
_enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
|
|
85
|
+
_upp_test_arrays = compute_upp_test_arrays(
|
|
86
|
+
_market_data_sample, _upp_test_parms, _upp_test_regime
|
|
188
87
|
)
|
|
189
88
|
|
|
190
89
|
_fcounts, _hhi_delta, _hhi_post = (
|
|
191
|
-
getattr(
|
|
90
|
+
getattr(_market_data_sample, _g) for _g in ("fcounts", "hhi_delta", "hhi_post")
|
|
192
91
|
)
|
|
193
|
-
del _market_data
|
|
194
92
|
|
|
195
93
|
_stats_rowlen = 6
|
|
196
94
|
# Clearance/enforcement counts --- by firm count
|
|
197
|
-
|
|
198
|
-
if
|
|
199
|
-
|
|
95
|
+
_firmcounts_list = np.unique(_fcounts)
|
|
96
|
+
if _firmcounts_list is not None and np.all(_firmcounts_list >= 0):
|
|
97
|
+
_max_firmcount = max(_firmcounts_list)
|
|
200
98
|
|
|
201
|
-
|
|
202
|
-
for
|
|
203
|
-
|
|
99
|
+
_enf_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
|
|
100
|
+
for _firmcount in np.arange(2, _max_firmcount + 1):
|
|
101
|
+
_firmcount_test = _fcounts == _firmcount
|
|
204
102
|
|
|
205
|
-
|
|
206
|
-
|
|
103
|
+
_enf_cnts_sim_byfirmcount_array = np.vstack((
|
|
104
|
+
_enf_cnts_sim_byfirmcount_array,
|
|
207
105
|
np.array([
|
|
208
|
-
|
|
209
|
-
np.einsum("ij->", 1 *
|
|
106
|
+
_firmcount,
|
|
107
|
+
np.einsum("ij->", 1 * _firmcount_test),
|
|
210
108
|
*[
|
|
211
109
|
np.einsum(
|
|
212
110
|
"ij->",
|
|
213
|
-
1 * (
|
|
111
|
+
1 * (_firmcount_test & getattr(_upp_test_arrays, _f)),
|
|
214
112
|
)
|
|
215
|
-
for _f in
|
|
113
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
216
114
|
],
|
|
217
115
|
]),
|
|
218
116
|
))
|
|
219
|
-
|
|
117
|
+
_enf_cnts_sim_byfirmcount_array = _enf_cnts_sim_byfirmcount_array[1:]
|
|
220
118
|
else:
|
|
221
|
-
|
|
119
|
+
_enf_cnts_sim_byfirmcount_array = np.array(
|
|
222
120
|
np.nan * np.empty((1, _stats_rowlen)), np.int64
|
|
223
121
|
)
|
|
224
|
-
|
|
122
|
+
_enf_cnts_sim_byfirmcount_array[0] = 2
|
|
225
123
|
|
|
226
|
-
# Clearance/
|
|
227
|
-
_hhi_delta_ranged =
|
|
228
|
-
|
|
229
|
-
for _hhi_delta_lim in
|
|
124
|
+
# Clearance/enforcement counts --- by delta
|
|
125
|
+
_hhi_delta_ranged = esl.hhi_delta_ranger(_hhi_delta)
|
|
126
|
+
_enf_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
|
|
127
|
+
for _hhi_delta_lim in esl.HHI_DELTA_KNOTS[:-1]:
|
|
230
128
|
_hhi_delta_test = _hhi_delta_ranged == _hhi_delta_lim
|
|
231
129
|
|
|
232
|
-
|
|
233
|
-
|
|
130
|
+
_enf_cnts_sim_bydelta_array = np.vstack((
|
|
131
|
+
_enf_cnts_sim_bydelta_array,
|
|
234
132
|
np.array([
|
|
235
133
|
_hhi_delta_lim,
|
|
236
134
|
np.einsum("ij->", 1 * _hhi_delta_test),
|
|
237
135
|
*[
|
|
238
136
|
np.einsum(
|
|
239
|
-
"ij->", 1 * (_hhi_delta_test & getattr(
|
|
137
|
+
"ij->", 1 * (_hhi_delta_test & getattr(_upp_test_arrays, _f))
|
|
240
138
|
)
|
|
241
|
-
for _f in
|
|
139
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
242
140
|
],
|
|
243
141
|
]),
|
|
244
142
|
))
|
|
245
143
|
|
|
246
|
-
|
|
144
|
+
_enf_cnts_sim_bydelta_array = _enf_cnts_sim_bydelta_array[1:]
|
|
247
145
|
|
|
248
|
-
# Clearance/
|
|
146
|
+
# Clearance/enforcement counts --- by zone
|
|
249
147
|
try:
|
|
250
|
-
_hhi_zone_post_ranged =
|
|
148
|
+
_hhi_zone_post_ranged = esl.hhi_zone_post_ranger(_hhi_post)
|
|
251
149
|
except ValueError as _err:
|
|
252
150
|
print(_hhi_post)
|
|
253
151
|
raise _err
|
|
254
152
|
|
|
255
153
|
_stats_byconczone_sim = -1 * np.ones(_stats_rowlen + 1, np.int64)
|
|
256
|
-
for _hhi_zone_post_knot in
|
|
154
|
+
for _hhi_zone_post_knot in esl.HHI_POST_ZONE_KNOTS[:-1]:
|
|
257
155
|
_level_test = _hhi_zone_post_ranged == _hhi_zone_post_knot
|
|
258
156
|
|
|
259
157
|
for _hhi_zone_delta_knot in [0, 100, 200]:
|
|
@@ -265,7 +163,7 @@ def sim_invres_cnts(
|
|
|
265
163
|
|
|
266
164
|
_conc_test = _level_test & _delta_test
|
|
267
165
|
|
|
268
|
-
_stats_byconczone_sim = np.
|
|
166
|
+
_stats_byconczone_sim = np.vstack((
|
|
269
167
|
_stats_byconczone_sim,
|
|
270
168
|
np.array([
|
|
271
169
|
_hhi_zone_post_knot,
|
|
@@ -273,50 +171,53 @@ def sim_invres_cnts(
|
|
|
273
171
|
np.einsum("ij->", 1 * _conc_test),
|
|
274
172
|
*[
|
|
275
173
|
np.einsum(
|
|
276
|
-
"ij->", 1 * (_conc_test & getattr(
|
|
174
|
+
"ij->", 1 * (_conc_test & getattr(_upp_test_arrays, _f))
|
|
277
175
|
)
|
|
278
|
-
for _f in
|
|
176
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
279
177
|
],
|
|
280
178
|
]),
|
|
281
179
|
))
|
|
282
180
|
|
|
283
|
-
|
|
284
|
-
_stats_byconczone_sim[1:]
|
|
285
|
-
)
|
|
181
|
+
_enf_cnts_sim_byconczone_array = esl.enf_cnts_byconczone(_stats_byconczone_sim[1:])
|
|
286
182
|
del _stats_byconczone_sim
|
|
287
183
|
del _hhi_delta, _hhi_post, _fcounts
|
|
288
184
|
|
|
289
185
|
return UPPTestsCounts(
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
186
|
+
_enf_cnts_sim_byfirmcount_array,
|
|
187
|
+
_enf_cnts_sim_bydelta_array,
|
|
188
|
+
_enf_cnts_sim_byconczone_array,
|
|
293
189
|
)
|
|
294
190
|
|
|
295
191
|
|
|
296
|
-
def
|
|
192
|
+
def compute_upp_test_arrays(
|
|
297
193
|
_market_data: MarketDataSample,
|
|
298
194
|
_upp_test_parms: gbl.HMGThresholds,
|
|
299
195
|
_sim_test_regime: UPPTestRegime,
|
|
300
196
|
/,
|
|
301
|
-
*,
|
|
302
|
-
saved_array_name_suffix: str = "",
|
|
303
|
-
save_data_to_file: SaveData = False,
|
|
304
197
|
) -> UPPTestsRaw:
|
|
305
198
|
"""
|
|
306
199
|
Generate UPP tests arrays for given configuration and market sample
|
|
307
200
|
|
|
308
201
|
Given a standards vector, market
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
_market_data
|
|
206
|
+
market data sample
|
|
207
|
+
_upp_test_parms
|
|
208
|
+
guidelines thresholds for testing UPP and related statistics
|
|
209
|
+
_sim_test_regime
|
|
210
|
+
configuration to use for generating UPP tests
|
|
211
|
+
|
|
309
212
|
"""
|
|
310
213
|
_g_bar, _divr_bar, _cmcr_bar, _ipr_bar = (
|
|
311
214
|
getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
|
|
312
215
|
)
|
|
313
216
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
for _f in ("resolution", "guppi_aggregator", "divr_aggregator")
|
|
217
|
+
_guppi_array, _ipr_array, _cmcr_array = (
|
|
218
|
+
np.empty_like(_market_data.price_array) for _ in range(3)
|
|
317
219
|
)
|
|
318
220
|
|
|
319
|
-
_guppi_array = np.empty_like(_market_data.divr_array)
|
|
320
221
|
np.einsum(
|
|
321
222
|
"ij,ij,ij->ij",
|
|
322
223
|
_market_data.divr_array,
|
|
@@ -325,153 +226,146 @@ def gen_upp_arrays(
|
|
|
325
226
|
out=_guppi_array,
|
|
326
227
|
)
|
|
327
228
|
|
|
328
|
-
_cmcr_array = np.empty_like(_market_data.divr_array)
|
|
329
|
-
np.divide(
|
|
330
|
-
np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
|
|
331
|
-
np.einsum("ij,ij->ij", 1 - _market_data.pcm_array, 1 - _market_data.divr_array),
|
|
332
|
-
out=_cmcr_array,
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
_ipr_array = np.empty_like(_market_data.divr_array)
|
|
336
229
|
np.divide(
|
|
337
230
|
np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
|
|
338
231
|
1 - _market_data.divr_array,
|
|
339
232
|
out=_ipr_array,
|
|
340
233
|
)
|
|
341
234
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
235
|
+
np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
|
|
236
|
+
|
|
237
|
+
(_divr_test_vector,) = _compute_test_array_seq(
|
|
238
|
+
(_market_data.divr_array,),
|
|
239
|
+
_market_data.frmshr_array,
|
|
240
|
+
_sim_test_regime.divr_aggregator,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
(_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_array_seq(
|
|
244
|
+
(_guppi_array, _cmcr_array, _ipr_array),
|
|
245
|
+
_market_data.frmshr_array,
|
|
246
|
+
_sim_test_regime.guppi_aggregator,
|
|
247
|
+
)
|
|
248
|
+
del _cmcr_array, _ipr_array, _guppi_array
|
|
249
|
+
|
|
250
|
+
if _sim_test_regime.resolution == INVResolution.ENFT:
|
|
251
|
+
_upp_test_arrays = UPPTestsRaw(
|
|
252
|
+
_guppi_test_vector >= _g_bar,
|
|
253
|
+
(_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
|
|
254
|
+
_cmcr_test_vector >= _cmcr_bar,
|
|
255
|
+
_ipr_test_vector >= _ipr_bar,
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
_upp_test_arrays = UPPTestsRaw(
|
|
259
|
+
_guppi_test_vector < _g_bar,
|
|
260
|
+
(_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
|
|
261
|
+
_cmcr_test_vector < _cmcr_bar,
|
|
262
|
+
_ipr_test_vector < _ipr_bar,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
return _upp_test_arrays
|
|
345
266
|
|
|
346
|
-
_test_measure_seq = (_market_data.divr_array, _guppi_array, _cmcr_array, _ipr_array)
|
|
347
267
|
|
|
268
|
+
def _compute_test_array_seq(
|
|
269
|
+
_test_measure_seq: tuple[ArrayDouble, ...],
|
|
270
|
+
_wt_array: ArrayDouble,
|
|
271
|
+
_aggregator: UPPAggrSelector,
|
|
272
|
+
) -> tuple[ArrayDouble, ...]:
|
|
348
273
|
_wt_array = (
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
if _guppi_aggregator
|
|
274
|
+
_wt_array / np.einsum("ij->i", _wt_array)[:, None]
|
|
275
|
+
if _aggregator
|
|
352
276
|
in (
|
|
353
277
|
UPPAggrSelector.CPA,
|
|
354
278
|
UPPAggrSelector.CPD,
|
|
355
279
|
UPPAggrSelector.OSA,
|
|
356
280
|
UPPAggrSelector.OSD,
|
|
357
281
|
)
|
|
358
|
-
else
|
|
282
|
+
else DEFAULT_EMPTY_ARRAY
|
|
359
283
|
)
|
|
360
284
|
|
|
361
|
-
match
|
|
285
|
+
match _aggregator:
|
|
362
286
|
case UPPAggrSelector.AVG:
|
|
363
|
-
|
|
287
|
+
_test_array_seq = (
|
|
364
288
|
1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
|
|
365
289
|
)
|
|
366
290
|
case UPPAggrSelector.CPA:
|
|
367
|
-
|
|
291
|
+
_test_array_seq = (
|
|
368
292
|
np.einsum("ij,ij->i", _wt_array[:, ::-1], _g)[:, None]
|
|
369
293
|
for _g in _test_measure_seq
|
|
370
294
|
)
|
|
371
295
|
case UPPAggrSelector.CPD:
|
|
372
|
-
|
|
296
|
+
_test_array_seq = (
|
|
373
297
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array[:, ::-1], _g, _g))[:, None]
|
|
374
298
|
for _g in _test_measure_seq
|
|
375
299
|
)
|
|
376
300
|
case UPPAggrSelector.DIS:
|
|
377
|
-
|
|
301
|
+
_test_array_seq = (
|
|
378
302
|
np.sqrt(1 / 2 * np.einsum("ij,ij->i", _g, _g))[:, None]
|
|
379
303
|
for _g in _test_measure_seq
|
|
380
304
|
)
|
|
381
305
|
case UPPAggrSelector.MAX:
|
|
382
|
-
|
|
306
|
+
_test_array_seq = (
|
|
383
307
|
_g.max(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
384
308
|
)
|
|
385
309
|
case UPPAggrSelector.MIN:
|
|
386
|
-
|
|
310
|
+
_test_array_seq = (
|
|
387
311
|
_g.min(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
388
312
|
)
|
|
389
313
|
case UPPAggrSelector.OSA:
|
|
390
|
-
|
|
314
|
+
_test_array_seq = (
|
|
391
315
|
np.einsum("ij,ij->i", _wt_array, _g)[:, None]
|
|
392
316
|
for _g in _test_measure_seq
|
|
393
317
|
)
|
|
394
318
|
case UPPAggrSelector.OSD:
|
|
395
|
-
|
|
319
|
+
_test_array_seq = (
|
|
396
320
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array, _g, _g))[:, None]
|
|
397
321
|
for _g in _test_measure_seq
|
|
398
322
|
)
|
|
399
323
|
case _:
|
|
400
324
|
raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
|
|
401
|
-
|
|
402
|
-
(_divr_test_vector, _guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = (
|
|
403
|
-
_test_value_seq
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
if _divr_aggregator == UPPAggrSelector.MAX:
|
|
407
|
-
_divr_test_vector = _market_data.divr_array.max(axis=1, keepdims=True)
|
|
408
|
-
|
|
409
|
-
if _invres_resolution == INVResolution.ENFT:
|
|
410
|
-
_upp_tests_data = UPPTestsRaw(
|
|
411
|
-
_guppi_test_vector >= _g_bar,
|
|
412
|
-
(_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
|
|
413
|
-
_cmcr_test_vector >= _cmcr_bar,
|
|
414
|
-
_ipr_test_vector >= _ipr_bar,
|
|
415
|
-
)
|
|
416
|
-
else:
|
|
417
|
-
_upp_tests_data = UPPTestsRaw(
|
|
418
|
-
_guppi_test_vector < _g_bar,
|
|
419
|
-
(_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
|
|
420
|
-
_cmcr_test_vector < _cmcr_bar,
|
|
421
|
-
_ipr_test_vector < _ipr_bar,
|
|
422
|
-
)
|
|
423
|
-
del _guppi_test_vector, _divr_test_vector, _cmcr_test_vector, _ipr_test_vector
|
|
424
|
-
|
|
425
|
-
save_data_to_hdf5(
|
|
426
|
-
_upp_tests_data,
|
|
427
|
-
saved_array_name_suffix,
|
|
428
|
-
(),
|
|
429
|
-
save_data_to_file=save_data_to_file,
|
|
430
|
-
)
|
|
431
|
-
|
|
432
|
-
return _upp_tests_data
|
|
325
|
+
return tuple(_test_array_seq)
|
|
433
326
|
|
|
434
327
|
|
|
435
328
|
def initialize_hd5(
|
|
436
|
-
_h5_path: Path, _hmg_pub_year:
|
|
329
|
+
_h5_path: Path, _hmg_pub_year: HMGPubYear, _test_regime: UPPTestRegime, /
|
|
437
330
|
) -> tuple[SaveData, str]:
|
|
438
331
|
_h5_title = f"HMG version: {_hmg_pub_year}; Test regime: {_test_regime}"
|
|
439
332
|
if _h5_path.is_file():
|
|
440
333
|
_h5_path.unlink()
|
|
441
|
-
_h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title)
|
|
442
|
-
_save_data_to_file:
|
|
443
|
-
|
|
334
|
+
_h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title) # pyright: ignore
|
|
335
|
+
_save_data_to_file: SaveData = (True, _h5_file, _h5_file.root)
|
|
336
|
+
_next_subgroup_name_root = "enf_{}_{}_{}_{}".format(
|
|
444
337
|
_hmg_pub_year,
|
|
445
|
-
*(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__),
|
|
338
|
+
*(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__), # pyright: ignore
|
|
446
339
|
)
|
|
447
|
-
return _save_data_to_file,
|
|
340
|
+
return _save_data_to_file, _next_subgroup_name_root
|
|
448
341
|
|
|
449
342
|
|
|
450
343
|
def save_data_to_hdf5(
|
|
451
344
|
_dclass: DataclassInstance,
|
|
452
|
-
_saved_array_name_suffix: str = "",
|
|
453
|
-
_excl_attrs: Sequence[str] = (),
|
|
454
345
|
/,
|
|
455
346
|
*,
|
|
347
|
+
saved_array_name_suffix: str | None = "",
|
|
348
|
+
excluded_attrs: Sequence[str] | None = (),
|
|
456
349
|
save_data_to_file: SaveData = False,
|
|
457
350
|
) -> None:
|
|
458
351
|
if save_data_to_file:
|
|
459
352
|
_, _h5_file, _h5_group = save_data_to_file
|
|
460
353
|
# Save market data arrays
|
|
354
|
+
excluded_attrs = excluded_attrs or ()
|
|
461
355
|
for _array_name in _dclass.__dataclass_fields__:
|
|
462
|
-
if _array_name in
|
|
356
|
+
if _array_name in excluded_attrs:
|
|
463
357
|
continue
|
|
464
358
|
save_array_to_hdf5(
|
|
465
359
|
getattr(_dclass, _array_name),
|
|
466
360
|
_array_name,
|
|
467
361
|
_h5_group,
|
|
468
362
|
_h5_file,
|
|
469
|
-
saved_array_name_suffix=
|
|
363
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
470
364
|
)
|
|
471
365
|
|
|
472
366
|
|
|
473
367
|
def save_array_to_hdf5(
|
|
474
|
-
_array_obj: NDArray[
|
|
368
|
+
_array_obj: NDArray[Any],
|
|
475
369
|
_array_name: str,
|
|
476
370
|
_h5_group: ptb.Group,
|
|
477
371
|
_h5_file: ptb.File,
|
|
@@ -479,7 +373,7 @@ def save_array_to_hdf5(
|
|
|
479
373
|
*,
|
|
480
374
|
saved_array_name_suffix: str | None = None,
|
|
481
375
|
) -> None:
|
|
482
|
-
_h5_array_name = f"{_array_name}_{saved_array_name_suffix or
|
|
376
|
+
_h5_array_name = f"{_array_name}_{saved_array_name_suffix or ''}".rstrip("_")
|
|
483
377
|
|
|
484
378
|
with suppress(ptb.NoSuchNodeError):
|
|
485
379
|
_h5_file.remove_node(_h5_group, name=_array_name)
|
|
@@ -489,6 +383,12 @@ def save_array_to_hdf5(
|
|
|
489
383
|
_h5_array_name,
|
|
490
384
|
atom=ptb.Atom.from_dtype(_array_obj.dtype),
|
|
491
385
|
shape=_array_obj.shape,
|
|
492
|
-
filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True),
|
|
386
|
+
filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True), # pyright: ignore
|
|
493
387
|
)
|
|
494
388
|
_h5_array[:] = _array_obj
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
if __name__ == "__main__":
|
|
392
|
+
print(
|
|
393
|
+
"This module defines classes with methods for generating UPP test arrays and UPP test-counts arrays on given data."
|
|
394
|
+
)
|