mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +147 -101
- mergeron/core/guidelines_boundaries.py +290 -1078
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +258 -246
- mergeron/gen/data_generation.py +473 -224
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +171 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -257
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738953.1.dist-info/METADATA +0 -93
- mergeron-2024.738953.1.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/upp_tests.py
CHANGED
|
@@ -1,248 +1,157 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Methods to compute intrinsic clearance rates and intrinsic enforcement rates
|
|
3
3
|
from generated market data.
|
|
4
4
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from collections.abc import Sequence
|
|
8
8
|
from contextlib import suppress
|
|
9
|
-
from dataclasses import fields
|
|
10
|
-
from importlib.metadata import version
|
|
11
9
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
10
|
+
from typing import Any, Literal, TypedDict
|
|
13
11
|
|
|
14
12
|
import numpy as np
|
|
15
13
|
import tables as ptb # type: ignore
|
|
16
|
-
from attrs import evolve
|
|
17
|
-
from attrs import fields as attrs_fields
|
|
18
|
-
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
19
14
|
from numpy.random import SeedSequence
|
|
20
15
|
from numpy.typing import NDArray
|
|
21
16
|
|
|
22
|
-
from
|
|
23
|
-
|
|
24
|
-
|
|
17
|
+
from .. import ( # noqa
|
|
18
|
+
VERSION,
|
|
19
|
+
ArrayBIGINT,
|
|
20
|
+
ArrayBoolean,
|
|
21
|
+
ArrayDouble,
|
|
22
|
+
ArrayFloat,
|
|
23
|
+
ArrayINT,
|
|
24
|
+
HMGPubYear,
|
|
25
|
+
UPPAggrSelector,
|
|
26
|
+
)
|
|
25
27
|
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
26
28
|
from . import (
|
|
27
|
-
|
|
29
|
+
DEFAULT_EMPTY_ARRAY,
|
|
28
30
|
DataclassInstance,
|
|
29
31
|
INVResolution,
|
|
30
32
|
MarketDataSample,
|
|
31
|
-
MarketSampleSpec,
|
|
32
33
|
UPPTestRegime,
|
|
33
34
|
UPPTestsCounts,
|
|
34
35
|
UPPTestsRaw,
|
|
35
36
|
)
|
|
36
|
-
from . import
|
|
37
|
-
from . import investigations_stats as isl
|
|
38
|
-
|
|
39
|
-
__version__ = version(_PKG_NAME)
|
|
37
|
+
from . import enforcement_stats as esl
|
|
40
38
|
|
|
39
|
+
__version__ = VERSION
|
|
41
40
|
|
|
42
|
-
ptb.
|
|
43
|
-
ptb.parameters.MAX_BLOSC_THREADS = 4
|
|
41
|
+
type SaveData = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
|
|
44
42
|
|
|
45
|
-
SaveData: TypeAlias = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
|
|
46
43
|
|
|
44
|
+
class INVRESCntsArgs(TypedDict, total=False):
|
|
45
|
+
"Keyword arguments of function, :code:`sim_enf_cnts`"
|
|
47
46
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
saved_array_name_suffix: str
|
|
51
|
-
save_data_to_file: SaveData
|
|
52
|
-
seed_seq_list: list[SeedSequence]
|
|
47
|
+
sample_size: int
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
53
49
|
nthreads: int
|
|
50
|
+
save_data_to_file: SaveData
|
|
51
|
+
saved_array_name_suffix: str
|
|
54
52
|
|
|
55
53
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
54
|
+
def compute_upp_test_counts(
|
|
55
|
+
_market_data_sample: MarketDataSample,
|
|
56
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
57
|
+
_upp_test_regime: UPPTestRegime,
|
|
60
58
|
/,
|
|
61
59
|
) -> UPPTestsCounts:
|
|
62
|
-
"""
|
|
63
|
-
A function to parallelize simulations
|
|
64
|
-
|
|
65
|
-
The parameters _sim_invres_cnts_kwargs is passed unaltered to
|
|
66
|
-
the parent function, sim_invres_cnts(), except that, if provided,
|
|
67
|
-
"seed_seq_list" is used to spawn a seed sequence for each thread,
|
|
68
|
-
to assure independent samples in each thread. The number of draws
|
|
69
|
-
in each thread may be tuned, by trial and error, to the amount of
|
|
70
|
-
memory (RAM) available.
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
_sample_sz = _mkt_sample_spec.sample_size
|
|
75
|
-
_subsample_sz = 10**6
|
|
76
|
-
_iter_count = int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
77
|
-
_thread_count = cpu_count()
|
|
60
|
+
"""Estimate enforcement and clearance counts from market data sample
|
|
78
61
|
|
|
79
|
-
|
|
80
|
-
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
_market_data_sample
|
|
65
|
+
Market data sample
|
|
81
66
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
and _mkt_sample_spec.share_spec.recapture_spec != RECConstants.OUTIN
|
|
85
|
-
):
|
|
86
|
-
_mkt_sample_spec_here = evolve(
|
|
87
|
-
_mkt_sample_spec_here, recapture_rate=_invres_parm_vec.rec
|
|
88
|
-
)
|
|
89
|
-
elif _mkt_sample_spec.recapture_rate != _invres_parm_vec.rec:
|
|
90
|
-
raise ValueError(
|
|
91
|
-
"{} {} {} {}".format(
|
|
92
|
-
f"Value, {_mkt_sample_spec.recapture_rate}",
|
|
93
|
-
"of recapture rate in the second positional argument",
|
|
94
|
-
f"must equal its value, {_invres_parm_vec.rec}",
|
|
95
|
-
"in the first positional argument.",
|
|
96
|
-
)
|
|
97
|
-
)
|
|
67
|
+
_upp_test_parms
|
|
68
|
+
Threshold values for various Guidelines criteria
|
|
98
69
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
70
|
+
_upp_test_regime
|
|
71
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
72
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
73
|
+
default being to analyze enforcement based on the maximum
|
|
74
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
75
|
+
merging firms
|
|
105
76
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
}
|
|
111
|
-
else:
|
|
112
|
-
_sim_invres_cnts_ll_kwargs = {}
|
|
113
|
-
|
|
114
|
-
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
115
|
-
delayed(sim_invres_cnts)(
|
|
116
|
-
_invres_parm_vec,
|
|
117
|
-
_mkt_sample_spec_here,
|
|
118
|
-
**_sim_invres_cnts_ll_kwargs,
|
|
119
|
-
saved_array_name_suffix=f"{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
|
|
120
|
-
seed_seq_list=_rng_seed_seq_list_ch,
|
|
121
|
-
)
|
|
122
|
-
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
_res_list_stacks = UPPTestsCounts(*[
|
|
126
|
-
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
127
|
-
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
128
|
-
])
|
|
129
|
-
upp_test_results = UPPTestsCounts(*[
|
|
130
|
-
np.column_stack((
|
|
131
|
-
(_gv := getattr(_res_list_stacks, _g.name))[0, :, :_h],
|
|
132
|
-
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
133
|
-
))
|
|
134
|
-
for _g, _h in zip(fields(_res_list_stacks), [1, 1, 3], strict=True)
|
|
135
|
-
])
|
|
136
|
-
del _res_list, _res_list_stacks
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
UPPTestsCounts
|
|
80
|
+
Enforced and cleared counts
|
|
137
81
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def sim_invres_cnts(
|
|
142
|
-
_upp_test_parms: gbl.HMGThresholds,
|
|
143
|
-
_mkt_sample_spec: MarketSampleSpec,
|
|
144
|
-
/,
|
|
145
|
-
*,
|
|
146
|
-
sim_test_regime: UPPTestRegime,
|
|
147
|
-
saved_array_name_suffix: str = "",
|
|
148
|
-
save_data_to_file: SaveData = False,
|
|
149
|
-
seed_seq_list: list[SeedSequence] | None = None,
|
|
150
|
-
nthreads: int = 16,
|
|
151
|
-
) -> UPPTestsCounts:
|
|
152
|
-
# Generate market data
|
|
153
|
-
_market_data = dgl.gen_market_sample(
|
|
154
|
-
_mkt_sample_spec, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
_invalid_array_names = (
|
|
158
|
-
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
159
|
-
if _mkt_sample_spec.share_spec.dist_type == "Uniform"
|
|
160
|
-
else ()
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
save_data_to_hdf5(
|
|
164
|
-
_market_data,
|
|
165
|
-
saved_array_name_suffix,
|
|
166
|
-
_invalid_array_names,
|
|
167
|
-
save_data_to_file=save_data_to_file,
|
|
168
|
-
)
|
|
82
|
+
"""
|
|
169
83
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
sim_test_regime,
|
|
174
|
-
saved_array_name_suffix=saved_array_name_suffix,
|
|
175
|
-
save_data_to_file=save_data_to_file,
|
|
84
|
+
_enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
|
|
85
|
+
_upp_test_arrays = compute_upp_test_arrays(
|
|
86
|
+
_market_data_sample, _upp_test_parms, _upp_test_regime
|
|
176
87
|
)
|
|
177
88
|
|
|
178
89
|
_fcounts, _hhi_delta, _hhi_post = (
|
|
179
|
-
getattr(
|
|
90
|
+
getattr(_market_data_sample, _g) for _g in ("fcounts", "hhi_delta", "hhi_post")
|
|
180
91
|
)
|
|
181
|
-
del _market_data
|
|
182
92
|
|
|
183
93
|
_stats_rowlen = 6
|
|
184
94
|
# Clearance/enforcement counts --- by firm count
|
|
185
|
-
|
|
186
|
-
if
|
|
187
|
-
|
|
95
|
+
_firmcounts_list = np.unique(_fcounts)
|
|
96
|
+
if _firmcounts_list is not None and np.all(_firmcounts_list >= 0):
|
|
97
|
+
_max_firmcount = max(_firmcounts_list)
|
|
188
98
|
|
|
189
|
-
|
|
190
|
-
for
|
|
191
|
-
|
|
99
|
+
_enf_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
|
|
100
|
+
for _firmcount in np.arange(2, _max_firmcount + 1):
|
|
101
|
+
_firmcount_test = _fcounts == _firmcount
|
|
192
102
|
|
|
193
|
-
|
|
194
|
-
|
|
103
|
+
_enf_cnts_sim_byfirmcount_array = np.vstack((
|
|
104
|
+
_enf_cnts_sim_byfirmcount_array,
|
|
195
105
|
np.array([
|
|
196
|
-
|
|
197
|
-
np.einsum("ij->", 1 *
|
|
106
|
+
_firmcount,
|
|
107
|
+
np.einsum("ij->", 1 * _firmcount_test),
|
|
198
108
|
*[
|
|
199
109
|
np.einsum(
|
|
200
110
|
"ij->",
|
|
201
|
-
1 * (
|
|
111
|
+
1 * (_firmcount_test & getattr(_upp_test_arrays, _f)),
|
|
202
112
|
)
|
|
203
|
-
for _f in
|
|
113
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
204
114
|
],
|
|
205
115
|
]),
|
|
206
116
|
))
|
|
207
|
-
|
|
117
|
+
_enf_cnts_sim_byfirmcount_array = _enf_cnts_sim_byfirmcount_array[1:]
|
|
208
118
|
else:
|
|
209
|
-
|
|
119
|
+
_enf_cnts_sim_byfirmcount_array = np.array(
|
|
210
120
|
np.nan * np.empty((1, _stats_rowlen)), np.int64
|
|
211
121
|
)
|
|
212
|
-
|
|
122
|
+
_enf_cnts_sim_byfirmcount_array[0] = 2
|
|
213
123
|
|
|
214
|
-
# Clearance/
|
|
215
|
-
_hhi_delta_ranged =
|
|
216
|
-
|
|
217
|
-
for _hhi_delta_lim in
|
|
124
|
+
# Clearance/enforcement counts --- by delta
|
|
125
|
+
_hhi_delta_ranged = esl.hhi_delta_ranger(_hhi_delta)
|
|
126
|
+
_enf_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
|
|
127
|
+
for _hhi_delta_lim in esl.HHI_DELTA_KNOTS[:-1]:
|
|
218
128
|
_hhi_delta_test = _hhi_delta_ranged == _hhi_delta_lim
|
|
219
129
|
|
|
220
|
-
|
|
221
|
-
|
|
130
|
+
_enf_cnts_sim_bydelta_array = np.vstack((
|
|
131
|
+
_enf_cnts_sim_bydelta_array,
|
|
222
132
|
np.array([
|
|
223
133
|
_hhi_delta_lim,
|
|
224
134
|
np.einsum("ij->", 1 * _hhi_delta_test),
|
|
225
135
|
*[
|
|
226
136
|
np.einsum(
|
|
227
|
-
"ij->",
|
|
228
|
-
1 * (_hhi_delta_test & getattr(_upp_tests_data, _f.name)),
|
|
137
|
+
"ij->", 1 * (_hhi_delta_test & getattr(_upp_test_arrays, _f))
|
|
229
138
|
)
|
|
230
|
-
for _f in
|
|
139
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
231
140
|
],
|
|
232
141
|
]),
|
|
233
142
|
))
|
|
234
143
|
|
|
235
|
-
|
|
144
|
+
_enf_cnts_sim_bydelta_array = _enf_cnts_sim_bydelta_array[1:]
|
|
236
145
|
|
|
237
|
-
# Clearance/
|
|
146
|
+
# Clearance/enforcement counts --- by zone
|
|
238
147
|
try:
|
|
239
|
-
_hhi_zone_post_ranged =
|
|
148
|
+
_hhi_zone_post_ranged = esl.hhi_zone_post_ranger(_hhi_post)
|
|
240
149
|
except ValueError as _err:
|
|
241
150
|
print(_hhi_post)
|
|
242
151
|
raise _err
|
|
243
152
|
|
|
244
153
|
_stats_byconczone_sim = -1 * np.ones(_stats_rowlen + 1, np.int64)
|
|
245
|
-
for _hhi_zone_post_knot in
|
|
154
|
+
for _hhi_zone_post_knot in esl.HHI_POST_ZONE_KNOTS[:-1]:
|
|
246
155
|
_level_test = _hhi_zone_post_ranged == _hhi_zone_post_knot
|
|
247
156
|
|
|
248
157
|
for _hhi_zone_delta_knot in [0, 100, 200]:
|
|
@@ -254,7 +163,7 @@ def sim_invres_cnts(
|
|
|
254
163
|
|
|
255
164
|
_conc_test = _level_test & _delta_test
|
|
256
165
|
|
|
257
|
-
_stats_byconczone_sim = np.
|
|
166
|
+
_stats_byconczone_sim = np.vstack((
|
|
258
167
|
_stats_byconczone_sim,
|
|
259
168
|
np.array([
|
|
260
169
|
_hhi_zone_post_knot,
|
|
@@ -262,45 +171,53 @@ def sim_invres_cnts(
|
|
|
262
171
|
np.einsum("ij->", 1 * _conc_test),
|
|
263
172
|
*[
|
|
264
173
|
np.einsum(
|
|
265
|
-
"ij->", 1 * (_conc_test & getattr(
|
|
174
|
+
"ij->", 1 * (_conc_test & getattr(_upp_test_arrays, _f))
|
|
266
175
|
)
|
|
267
|
-
for _f in
|
|
176
|
+
for _f in _upp_test_arrays.__dataclass_fields__
|
|
268
177
|
],
|
|
269
178
|
]),
|
|
270
179
|
))
|
|
271
180
|
|
|
272
|
-
|
|
273
|
-
_stats_byconczone_sim[1:]
|
|
274
|
-
)
|
|
181
|
+
_enf_cnts_sim_byconczone_array = esl.enf_cnts_byconczone(_stats_byconczone_sim[1:])
|
|
275
182
|
del _stats_byconczone_sim
|
|
276
183
|
del _hhi_delta, _hhi_post, _fcounts
|
|
277
184
|
|
|
278
185
|
return UPPTestsCounts(
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
186
|
+
_enf_cnts_sim_byfirmcount_array,
|
|
187
|
+
_enf_cnts_sim_bydelta_array,
|
|
188
|
+
_enf_cnts_sim_byconczone_array,
|
|
282
189
|
)
|
|
283
190
|
|
|
284
191
|
|
|
285
|
-
def
|
|
286
|
-
_upp_test_parms: gbl.HMGThresholds,
|
|
192
|
+
def compute_upp_test_arrays(
|
|
287
193
|
_market_data: MarketDataSample,
|
|
194
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
288
195
|
_sim_test_regime: UPPTestRegime,
|
|
289
196
|
/,
|
|
290
|
-
*,
|
|
291
|
-
saved_array_name_suffix: str = "",
|
|
292
|
-
save_data_to_file: SaveData = False,
|
|
293
197
|
) -> UPPTestsRaw:
|
|
198
|
+
"""
|
|
199
|
+
Generate UPP tests arrays for given configuration and market sample
|
|
200
|
+
|
|
201
|
+
Given a standards vector, market
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
_market_data
|
|
206
|
+
market data sample
|
|
207
|
+
_upp_test_parms
|
|
208
|
+
guidelines thresholds for testing UPP and related statistics
|
|
209
|
+
_sim_test_regime
|
|
210
|
+
configuration to use for generating UPP tests
|
|
211
|
+
|
|
212
|
+
"""
|
|
294
213
|
_g_bar, _divr_bar, _cmcr_bar, _ipr_bar = (
|
|
295
214
|
getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
|
|
296
215
|
)
|
|
297
216
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
for _f in ("resolution", "guppi_aggregator", "divr_aggregator")
|
|
217
|
+
_guppi_array, _ipr_array, _cmcr_array = (
|
|
218
|
+
np.empty_like(_market_data.price_array) for _ in range(3)
|
|
301
219
|
)
|
|
302
220
|
|
|
303
|
-
_guppi_array = np.empty_like(_market_data.divr_array)
|
|
304
221
|
np.einsum(
|
|
305
222
|
"ij,ij,ij->ij",
|
|
306
223
|
_market_data.divr_array,
|
|
@@ -309,157 +226,146 @@ def gen_upp_arrays(
|
|
|
309
226
|
out=_guppi_array,
|
|
310
227
|
)
|
|
311
228
|
|
|
312
|
-
_cmcr_array = np.empty_like(_market_data.divr_array)
|
|
313
|
-
np.divide(
|
|
314
|
-
np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
|
|
315
|
-
np.einsum("ij,ij->ij", 1 - _market_data.pcm_array, 1 - _market_data.divr_array),
|
|
316
|
-
out=_cmcr_array,
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
_ipr_array = np.empty_like(_market_data.divr_array)
|
|
320
229
|
np.divide(
|
|
321
230
|
np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
|
|
322
231
|
1 - _market_data.divr_array,
|
|
323
232
|
out=_ipr_array,
|
|
324
233
|
)
|
|
325
234
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
235
|
+
np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
|
|
236
|
+
|
|
237
|
+
(_divr_test_vector,) = _compute_test_array_seq(
|
|
238
|
+
(_market_data.divr_array,),
|
|
239
|
+
_market_data.frmshr_array,
|
|
240
|
+
_sim_test_regime.divr_aggregator,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
(_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_array_seq(
|
|
244
|
+
(_guppi_array, _cmcr_array, _ipr_array),
|
|
245
|
+
_market_data.frmshr_array,
|
|
246
|
+
_sim_test_regime.guppi_aggregator,
|
|
247
|
+
)
|
|
248
|
+
del _cmcr_array, _ipr_array, _guppi_array
|
|
249
|
+
|
|
250
|
+
if _sim_test_regime.resolution == INVResolution.ENFT:
|
|
251
|
+
_upp_test_arrays = UPPTestsRaw(
|
|
252
|
+
_guppi_test_vector >= _g_bar,
|
|
253
|
+
(_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
|
|
254
|
+
_cmcr_test_vector >= _cmcr_bar,
|
|
255
|
+
_ipr_test_vector >= _ipr_bar,
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
_upp_test_arrays = UPPTestsRaw(
|
|
259
|
+
_guppi_test_vector < _g_bar,
|
|
260
|
+
(_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
|
|
261
|
+
_cmcr_test_vector < _cmcr_bar,
|
|
262
|
+
_ipr_test_vector < _ipr_bar,
|
|
263
|
+
)
|
|
329
264
|
|
|
330
|
-
|
|
265
|
+
return _upp_test_arrays
|
|
331
266
|
|
|
267
|
+
|
|
268
|
+
def _compute_test_array_seq(
|
|
269
|
+
_test_measure_seq: tuple[ArrayDouble, ...],
|
|
270
|
+
_wt_array: ArrayDouble,
|
|
271
|
+
_aggregator: UPPAggrSelector,
|
|
272
|
+
) -> tuple[ArrayDouble, ...]:
|
|
332
273
|
_wt_array = (
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
if _guppi_aggregator
|
|
274
|
+
_wt_array / np.einsum("ij->i", _wt_array)[:, None]
|
|
275
|
+
if _aggregator
|
|
336
276
|
in (
|
|
337
277
|
UPPAggrSelector.CPA,
|
|
338
278
|
UPPAggrSelector.CPD,
|
|
339
279
|
UPPAggrSelector.OSA,
|
|
340
280
|
UPPAggrSelector.OSD,
|
|
341
281
|
)
|
|
342
|
-
else
|
|
282
|
+
else DEFAULT_EMPTY_ARRAY
|
|
343
283
|
)
|
|
344
284
|
|
|
345
|
-
match
|
|
285
|
+
match _aggregator:
|
|
346
286
|
case UPPAggrSelector.AVG:
|
|
347
|
-
|
|
287
|
+
_test_array_seq = (
|
|
348
288
|
1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
|
|
349
289
|
)
|
|
350
290
|
case UPPAggrSelector.CPA:
|
|
351
|
-
|
|
291
|
+
_test_array_seq = (
|
|
352
292
|
np.einsum("ij,ij->i", _wt_array[:, ::-1], _g)[:, None]
|
|
353
293
|
for _g in _test_measure_seq
|
|
354
294
|
)
|
|
355
295
|
case UPPAggrSelector.CPD:
|
|
356
|
-
|
|
296
|
+
_test_array_seq = (
|
|
357
297
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array[:, ::-1], _g, _g))[:, None]
|
|
358
298
|
for _g in _test_measure_seq
|
|
359
299
|
)
|
|
360
300
|
case UPPAggrSelector.DIS:
|
|
361
|
-
|
|
301
|
+
_test_array_seq = (
|
|
362
302
|
np.sqrt(1 / 2 * np.einsum("ij,ij->i", _g, _g))[:, None]
|
|
363
303
|
for _g in _test_measure_seq
|
|
364
304
|
)
|
|
365
305
|
case UPPAggrSelector.MAX:
|
|
366
|
-
|
|
306
|
+
_test_array_seq = (
|
|
367
307
|
_g.max(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
368
308
|
)
|
|
369
309
|
case UPPAggrSelector.MIN:
|
|
370
|
-
|
|
310
|
+
_test_array_seq = (
|
|
371
311
|
_g.min(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
372
312
|
)
|
|
373
313
|
case UPPAggrSelector.OSA:
|
|
374
|
-
|
|
314
|
+
_test_array_seq = (
|
|
375
315
|
np.einsum("ij,ij->i", _wt_array, _g)[:, None]
|
|
376
316
|
for _g in _test_measure_seq
|
|
377
317
|
)
|
|
378
318
|
case UPPAggrSelector.OSD:
|
|
379
|
-
|
|
319
|
+
_test_array_seq = (
|
|
380
320
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array, _g, _g))[:, None]
|
|
381
321
|
for _g in _test_measure_seq
|
|
382
322
|
)
|
|
383
323
|
case _:
|
|
384
324
|
raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
|
|
385
|
-
|
|
386
|
-
(_divr_test_vector, _guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = (
|
|
387
|
-
_test_value_seq
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
if _divr_aggregator == UPPAggrSelector.MAX:
|
|
391
|
-
_divr_test_vector = _market_data.divr_array.max(axis=1, keepdims=True)
|
|
392
|
-
|
|
393
|
-
if _invres_resolution == INVResolution.ENFT:
|
|
394
|
-
_upp_tests_data = UPPTestsRaw(
|
|
395
|
-
_guppi_test_vector >= _g_bar,
|
|
396
|
-
(_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
|
|
397
|
-
_cmcr_test_vector >= _cmcr_bar,
|
|
398
|
-
_ipr_test_vector >= _ipr_bar,
|
|
399
|
-
)
|
|
400
|
-
else:
|
|
401
|
-
_upp_tests_data = UPPTestsRaw(
|
|
402
|
-
_guppi_test_vector < _g_bar,
|
|
403
|
-
(_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
|
|
404
|
-
_cmcr_test_vector < _cmcr_bar,
|
|
405
|
-
_ipr_test_vector < _ipr_bar,
|
|
406
|
-
)
|
|
407
|
-
del _guppi_test_vector, _divr_test_vector, _cmcr_test_vector, _ipr_test_vector
|
|
408
|
-
|
|
409
|
-
save_data_to_hdf5(
|
|
410
|
-
_upp_tests_data,
|
|
411
|
-
saved_array_name_suffix,
|
|
412
|
-
(),
|
|
413
|
-
save_data_to_file=save_data_to_file,
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
return _upp_tests_data
|
|
325
|
+
return tuple(_test_array_seq)
|
|
417
326
|
|
|
418
327
|
|
|
419
328
|
def initialize_hd5(
|
|
420
|
-
_h5_path: Path, _hmg_pub_year:
|
|
329
|
+
_h5_path: Path, _hmg_pub_year: HMGPubYear, _test_regime: UPPTestRegime, /
|
|
421
330
|
) -> tuple[SaveData, str]:
|
|
422
331
|
_h5_title = f"HMG version: {_hmg_pub_year}; Test regime: {_test_regime}"
|
|
423
332
|
if _h5_path.is_file():
|
|
424
333
|
_h5_path.unlink()
|
|
425
|
-
_h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title)
|
|
426
|
-
_save_data_to_file:
|
|
427
|
-
|
|
334
|
+
_h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title) # pyright: ignore
|
|
335
|
+
_save_data_to_file: SaveData = (True, _h5_file, _h5_file.root)
|
|
336
|
+
_next_subgroup_name_root = "enf_{}_{}_{}_{}".format(
|
|
428
337
|
_hmg_pub_year,
|
|
429
|
-
*(
|
|
430
|
-
getattr(_test_regime, _f.name).name
|
|
431
|
-
for _f in attrs_fields(type(_test_regime))
|
|
432
|
-
),
|
|
338
|
+
*(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__), # pyright: ignore
|
|
433
339
|
)
|
|
434
|
-
return _save_data_to_file,
|
|
340
|
+
return _save_data_to_file, _next_subgroup_name_root
|
|
435
341
|
|
|
436
342
|
|
|
437
343
|
def save_data_to_hdf5(
|
|
438
344
|
_dclass: DataclassInstance,
|
|
439
|
-
_saved_array_name_suffix: str = "",
|
|
440
|
-
_excl_attrs: Sequence[str] = (),
|
|
441
345
|
/,
|
|
442
346
|
*,
|
|
347
|
+
saved_array_name_suffix: str | None = "",
|
|
348
|
+
excluded_attrs: Sequence[str] | None = (),
|
|
443
349
|
save_data_to_file: SaveData = False,
|
|
444
350
|
) -> None:
|
|
445
351
|
if save_data_to_file:
|
|
446
352
|
_, _h5_file, _h5_group = save_data_to_file
|
|
447
353
|
# Save market data arrays
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
if _array_name in
|
|
354
|
+
excluded_attrs = excluded_attrs or ()
|
|
355
|
+
for _array_name in _dclass.__dataclass_fields__:
|
|
356
|
+
if _array_name in excluded_attrs:
|
|
451
357
|
continue
|
|
452
358
|
save_array_to_hdf5(
|
|
453
359
|
getattr(_dclass, _array_name),
|
|
454
360
|
_array_name,
|
|
455
361
|
_h5_group,
|
|
456
362
|
_h5_file,
|
|
457
|
-
saved_array_name_suffix=
|
|
363
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
458
364
|
)
|
|
459
365
|
|
|
460
366
|
|
|
461
367
|
def save_array_to_hdf5(
|
|
462
|
-
_array_obj: NDArray[
|
|
368
|
+
_array_obj: NDArray[Any],
|
|
463
369
|
_array_name: str,
|
|
464
370
|
_h5_group: ptb.Group,
|
|
465
371
|
_h5_file: ptb.File,
|
|
@@ -467,7 +373,7 @@ def save_array_to_hdf5(
|
|
|
467
373
|
*,
|
|
468
374
|
saved_array_name_suffix: str | None = None,
|
|
469
375
|
) -> None:
|
|
470
|
-
_h5_array_name = f"{_array_name}_{saved_array_name_suffix or
|
|
376
|
+
_h5_array_name = f"{_array_name}_{saved_array_name_suffix or ''}".rstrip("_")
|
|
471
377
|
|
|
472
378
|
with suppress(ptb.NoSuchNodeError):
|
|
473
379
|
_h5_file.remove_node(_h5_group, name=_array_name)
|
|
@@ -477,6 +383,12 @@ def save_array_to_hdf5(
|
|
|
477
383
|
_h5_array_name,
|
|
478
384
|
atom=ptb.Atom.from_dtype(_array_obj.dtype),
|
|
479
385
|
shape=_array_obj.shape,
|
|
480
|
-
filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True),
|
|
386
|
+
filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True), # pyright: ignore
|
|
481
387
|
)
|
|
482
388
|
_h5_array[:] = _array_obj
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
if __name__ == "__main__":
|
|
392
|
+
print(
|
|
393
|
+
"This module defines classes with methods for generating UPP test arrays and UPP test-counts arrays on given data."
|
|
394
|
+
)
|