mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +147 -101
  5. mergeron/core/guidelines_boundaries.py +290 -1078
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +258 -246
  15. mergeron/gen/data_generation.py +473 -224
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +171 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -257
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738953.1.dist-info/METADATA +0 -93
  38. mergeron-2024.738953.1.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/upp_tests.py CHANGED
@@ -1,248 +1,157 @@
1
1
  """
2
- Routines to estimate intrinsic clearnace rates and intrinsic enforcement rates
2
+ Methods to compute intrinsic clearance rates and intrinsic enforcement rates
3
3
  from generated market data.
4
4
 
5
5
  """
6
6
 
7
7
  from collections.abc import Sequence
8
8
  from contextlib import suppress
9
- from dataclasses import fields
10
- from importlib.metadata import version
11
9
  from pathlib import Path
12
- from typing import Literal, TypeAlias, TypedDict
10
+ from typing import Any, Literal, TypedDict
13
11
 
14
12
  import numpy as np
15
13
  import tables as ptb # type: ignore
16
- from attrs import evolve
17
- from attrs import fields as attrs_fields
18
- from joblib import Parallel, cpu_count, delayed # type: ignore
19
14
  from numpy.random import SeedSequence
20
15
  from numpy.typing import NDArray
21
16
 
22
- from mergeron.core.pseudorandom_numbers import TF, TI
23
-
24
- from .. import _PKG_NAME, RECConstants, UPPAggrSelector # noqa: TID252
17
+ from .. import ( # noqa
18
+ VERSION,
19
+ ArrayBIGINT,
20
+ ArrayBoolean,
21
+ ArrayDouble,
22
+ ArrayFloat,
23
+ ArrayINT,
24
+ HMGPubYear,
25
+ UPPAggrSelector,
26
+ )
25
27
  from ..core import guidelines_boundaries as gbl # noqa: TID252
26
28
  from . import (
27
- EMPTY_ARRAY_DEFAULT,
29
+ DEFAULT_EMPTY_ARRAY,
28
30
  DataclassInstance,
29
31
  INVResolution,
30
32
  MarketDataSample,
31
- MarketSampleSpec,
32
33
  UPPTestRegime,
33
34
  UPPTestsCounts,
34
35
  UPPTestsRaw,
35
36
  )
36
- from . import data_generation as dgl
37
- from . import investigations_stats as isl
38
-
39
- __version__ = version(_PKG_NAME)
37
+ from . import enforcement_stats as esl
40
38
 
39
+ __version__ = VERSION
41
40
 
42
- ptb.parameters.MAX_NUMEXPR_THREADS = 8
43
- ptb.parameters.MAX_BLOSC_THREADS = 4
41
+ type SaveData = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
44
42
 
45
- SaveData: TypeAlias = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
46
43
 
44
+ class INVRESCntsArgs(TypedDict, total=False):
45
+ "Keyword arguments of function, :code:`sim_enf_cnts`"
47
46
 
48
- class IVNRESCntsArgs(TypedDict, total=False):
49
- sim_test_regime: UPPTestRegime
50
- saved_array_name_suffix: str
51
- save_data_to_file: SaveData
52
- seed_seq_list: list[SeedSequence]
47
+ sample_size: int
48
+ seed_seq_list: Sequence[SeedSequence] | None
53
49
  nthreads: int
50
+ save_data_to_file: SaveData
51
+ saved_array_name_suffix: str
54
52
 
55
53
 
56
- def sim_invres_cnts_ll(
57
- _invres_parm_vec: gbl.HMGThresholds,
58
- _mkt_sample_spec: MarketSampleSpec,
59
- _sim_invres_cnts_kwargs: IVNRESCntsArgs,
54
+ def compute_upp_test_counts(
55
+ _market_data_sample: MarketDataSample,
56
+ _upp_test_parms: gbl.HMGThresholds,
57
+ _upp_test_regime: UPPTestRegime,
60
58
  /,
61
59
  ) -> UPPTestsCounts:
62
- """
63
- A function to parallelize simulations
64
-
65
- The parameters _sim_invres_cnts_kwargs is passed unaltered to
66
- the parent function, sim_invres_cnts(), except that, if provided,
67
- "seed_seq_list" is used to spawn a seed sequence for each thread,
68
- to assure independent samples in each thread. The number of draws
69
- in each thread may be tuned, by trial and error, to the amount of
70
- memory (RAM) available.
71
-
72
- """
73
-
74
- _sample_sz = _mkt_sample_spec.sample_size
75
- _subsample_sz = 10**6
76
- _iter_count = int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
77
- _thread_count = cpu_count()
60
+ """Estimate enforcement and clearance counts from market data sample
78
61
 
79
- # Crate a copy, to avoid side effects in the outer scope
80
- _mkt_sample_spec_here = evolve(_mkt_sample_spec, sample_size=_subsample_sz)
62
+ Parameters
63
+ ----------
64
+ _market_data_sample
65
+ Market data sample
81
66
 
82
- if (
83
- _mkt_sample_spec.recapture_rate is None
84
- and _mkt_sample_spec.share_spec.recapture_spec != RECConstants.OUTIN
85
- ):
86
- _mkt_sample_spec_here = evolve(
87
- _mkt_sample_spec_here, recapture_rate=_invres_parm_vec.rec
88
- )
89
- elif _mkt_sample_spec.recapture_rate != _invres_parm_vec.rec:
90
- raise ValueError(
91
- "{} {} {} {}".format(
92
- f"Value, {_mkt_sample_spec.recapture_rate}",
93
- "of recapture rate in the second positional argument",
94
- f"must equal its value, {_invres_parm_vec.rec}",
95
- "in the first positional argument.",
96
- )
97
- )
67
+ _upp_test_parms
68
+ Threshold values for various Guidelines criteria
98
69
 
99
- _rng_seed_seq_list = [None] * _iter_count
100
- if _sim_invres_cnts_kwargs:
101
- if _sseql := _sim_invres_cnts_kwargs.get("seed_seq_list", None):
102
- _rng_seed_seq_list = list(
103
- zip(*[g.spawn(_iter_count) for g in _sseql], strict=True) # type: ignore
104
- )
70
+ _upp_test_regime
71
+ Specifies whether to analyze enforcement, clearance, or both
72
+ and the GUPPI and diversion ratio aggregators employed, with
73
+ default being to analyze enforcement based on the maximum
74
+ merging-firm GUPPI and maximum diversion ratio between the
75
+ merging firms
105
76
 
106
- _sim_invres_cnts_ll_kwargs: IVNRESCntsArgs = { # type: ignore
107
- _k: _v
108
- for _k, _v in _sim_invres_cnts_kwargs.items()
109
- if _k != "seed_seq_list"
110
- }
111
- else:
112
- _sim_invres_cnts_ll_kwargs = {}
113
-
114
- _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
115
- delayed(sim_invres_cnts)(
116
- _invres_parm_vec,
117
- _mkt_sample_spec_here,
118
- **_sim_invres_cnts_ll_kwargs,
119
- saved_array_name_suffix=f"{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
120
- seed_seq_list=_rng_seed_seq_list_ch,
121
- )
122
- for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
123
- )
124
-
125
- _res_list_stacks = UPPTestsCounts(*[
126
- np.stack([getattr(_j, _k) for _j in _res_list])
127
- for _k in ("by_firm_count", "by_delta", "by_conczone")
128
- ])
129
- upp_test_results = UPPTestsCounts(*[
130
- np.column_stack((
131
- (_gv := getattr(_res_list_stacks, _g.name))[0, :, :_h],
132
- np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
133
- ))
134
- for _g, _h in zip(fields(_res_list_stacks), [1, 1, 3], strict=True)
135
- ])
136
- del _res_list, _res_list_stacks
77
+ Returns
78
+ -------
79
+ UPPTestsCounts
80
+ Enforced and cleared counts
137
81
 
138
- return upp_test_results
139
-
140
-
141
- def sim_invres_cnts(
142
- _upp_test_parms: gbl.HMGThresholds,
143
- _mkt_sample_spec: MarketSampleSpec,
144
- /,
145
- *,
146
- sim_test_regime: UPPTestRegime,
147
- saved_array_name_suffix: str = "",
148
- save_data_to_file: SaveData = False,
149
- seed_seq_list: list[SeedSequence] | None = None,
150
- nthreads: int = 16,
151
- ) -> UPPTestsCounts:
152
- # Generate market data
153
- _market_data = dgl.gen_market_sample(
154
- _mkt_sample_spec, seed_seq_list=seed_seq_list, nthreads=nthreads
155
- )
156
-
157
- _invalid_array_names = (
158
- ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
159
- if _mkt_sample_spec.share_spec.dist_type == "Uniform"
160
- else ()
161
- )
162
-
163
- save_data_to_hdf5(
164
- _market_data,
165
- saved_array_name_suffix,
166
- _invalid_array_names,
167
- save_data_to_file=save_data_to_file,
168
- )
82
+ """
169
83
 
170
- _upp_tests_data = gen_upp_arrays(
171
- _upp_test_parms,
172
- _market_data,
173
- sim_test_regime,
174
- saved_array_name_suffix=saved_array_name_suffix,
175
- save_data_to_file=save_data_to_file,
84
+ _enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
85
+ _upp_test_arrays = compute_upp_test_arrays(
86
+ _market_data_sample, _upp_test_parms, _upp_test_regime
176
87
  )
177
88
 
178
89
  _fcounts, _hhi_delta, _hhi_post = (
179
- getattr(_market_data, _g) for _g in ["fcounts", "hhi_delta", "hhi_post"]
90
+ getattr(_market_data_sample, _g) for _g in ("fcounts", "hhi_delta", "hhi_post")
180
91
  )
181
- del _market_data
182
92
 
183
93
  _stats_rowlen = 6
184
94
  # Clearance/enforcement counts --- by firm count
185
- _firm_counts_weights = _mkt_sample_spec.share_spec.firm_counts_weights
186
- if _firm_counts_weights is not None and np.all(_firm_counts_weights >= 0):
187
- _max_firm_count = len(_firm_counts_weights)
95
+ _firmcounts_list = np.unique(_fcounts)
96
+ if _firmcounts_list is not None and np.all(_firmcounts_list >= 0):
97
+ _max_firmcount = max(_firmcounts_list)
188
98
 
189
- _invres_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
190
- for _firm_cnt in 2 + np.arange(_max_firm_count):
191
- _firm_count_test = _fcounts == _firm_cnt
99
+ _enf_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
100
+ for _firmcount in np.arange(2, _max_firmcount + 1):
101
+ _firmcount_test = _fcounts == _firmcount
192
102
 
193
- _invres_cnts_sim_byfirmcount_array = np.row_stack((
194
- _invres_cnts_sim_byfirmcount_array,
103
+ _enf_cnts_sim_byfirmcount_array = np.vstack((
104
+ _enf_cnts_sim_byfirmcount_array,
195
105
  np.array([
196
- _firm_cnt,
197
- np.einsum("ij->", 1 * _firm_count_test),
106
+ _firmcount,
107
+ np.einsum("ij->", 1 * _firmcount_test),
198
108
  *[
199
109
  np.einsum(
200
110
  "ij->",
201
- 1 * (_firm_count_test & getattr(_upp_tests_data, _f.name)),
111
+ 1 * (_firmcount_test & getattr(_upp_test_arrays, _f)),
202
112
  )
203
- for _f in fields(_upp_tests_data)
113
+ for _f in _upp_test_arrays.__dataclass_fields__
204
114
  ],
205
115
  ]),
206
116
  ))
207
- _invres_cnts_sim_byfirmcount_array = _invres_cnts_sim_byfirmcount_array[1:]
117
+ _enf_cnts_sim_byfirmcount_array = _enf_cnts_sim_byfirmcount_array[1:]
208
118
  else:
209
- _invres_cnts_sim_byfirmcount_array = np.array(
119
+ _enf_cnts_sim_byfirmcount_array = np.array(
210
120
  np.nan * np.empty((1, _stats_rowlen)), np.int64
211
121
  )
212
- _invres_cnts_sim_byfirmcount_array[0] = 2
122
+ _enf_cnts_sim_byfirmcount_array[0] = 2
213
123
 
214
- # Clearance/enfrocement counts --- by delta
215
- _hhi_delta_ranged = isl.hhi_delta_ranger(_hhi_delta)
216
- _invres_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
217
- for _hhi_delta_lim in isl.HHI_DELTA_KNOTS[:-1]:
124
+ # Clearance/enforcement counts --- by delta
125
+ _hhi_delta_ranged = esl.hhi_delta_ranger(_hhi_delta)
126
+ _enf_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
127
+ for _hhi_delta_lim in esl.HHI_DELTA_KNOTS[:-1]:
218
128
  _hhi_delta_test = _hhi_delta_ranged == _hhi_delta_lim
219
129
 
220
- _invres_cnts_sim_bydelta_array = np.row_stack((
221
- _invres_cnts_sim_bydelta_array,
130
+ _enf_cnts_sim_bydelta_array = np.vstack((
131
+ _enf_cnts_sim_bydelta_array,
222
132
  np.array([
223
133
  _hhi_delta_lim,
224
134
  np.einsum("ij->", 1 * _hhi_delta_test),
225
135
  *[
226
136
  np.einsum(
227
- "ij->",
228
- 1 * (_hhi_delta_test & getattr(_upp_tests_data, _f.name)),
137
+ "ij->", 1 * (_hhi_delta_test & getattr(_upp_test_arrays, _f))
229
138
  )
230
- for _f in fields(_upp_tests_data)
139
+ for _f in _upp_test_arrays.__dataclass_fields__
231
140
  ],
232
141
  ]),
233
142
  ))
234
143
 
235
- _invres_cnts_sim_bydelta_array = _invres_cnts_sim_bydelta_array[1:]
144
+ _enf_cnts_sim_bydelta_array = _enf_cnts_sim_bydelta_array[1:]
236
145
 
237
- # Clearance/enfrocement counts --- by zone
146
+ # Clearance/enforcement counts --- by zone
238
147
  try:
239
- _hhi_zone_post_ranged = isl.hhi_zone_post_ranger(_hhi_post)
148
+ _hhi_zone_post_ranged = esl.hhi_zone_post_ranger(_hhi_post)
240
149
  except ValueError as _err:
241
150
  print(_hhi_post)
242
151
  raise _err
243
152
 
244
153
  _stats_byconczone_sim = -1 * np.ones(_stats_rowlen + 1, np.int64)
245
- for _hhi_zone_post_knot in isl.HHI_POST_ZONE_KNOTS[:-1]:
154
+ for _hhi_zone_post_knot in esl.HHI_POST_ZONE_KNOTS[:-1]:
246
155
  _level_test = _hhi_zone_post_ranged == _hhi_zone_post_knot
247
156
 
248
157
  for _hhi_zone_delta_knot in [0, 100, 200]:
@@ -254,7 +163,7 @@ def sim_invres_cnts(
254
163
 
255
164
  _conc_test = _level_test & _delta_test
256
165
 
257
- _stats_byconczone_sim = np.row_stack((
166
+ _stats_byconczone_sim = np.vstack((
258
167
  _stats_byconczone_sim,
259
168
  np.array([
260
169
  _hhi_zone_post_knot,
@@ -262,45 +171,53 @@ def sim_invres_cnts(
262
171
  np.einsum("ij->", 1 * _conc_test),
263
172
  *[
264
173
  np.einsum(
265
- "ij->", 1 * (_conc_test & getattr(_upp_tests_data, _f.name))
174
+ "ij->", 1 * (_conc_test & getattr(_upp_test_arrays, _f))
266
175
  )
267
- for _f in fields(_upp_tests_data)
176
+ for _f in _upp_test_arrays.__dataclass_fields__
268
177
  ],
269
178
  ]),
270
179
  ))
271
180
 
272
- _invres_cnts_sim_byconczone_array = isl.invres_cnts_byconczone(
273
- _stats_byconczone_sim[1:]
274
- )
181
+ _enf_cnts_sim_byconczone_array = esl.enf_cnts_byconczone(_stats_byconczone_sim[1:])
275
182
  del _stats_byconczone_sim
276
183
  del _hhi_delta, _hhi_post, _fcounts
277
184
 
278
185
  return UPPTestsCounts(
279
- _invres_cnts_sim_byfirmcount_array,
280
- _invres_cnts_sim_bydelta_array,
281
- _invres_cnts_sim_byconczone_array,
186
+ _enf_cnts_sim_byfirmcount_array,
187
+ _enf_cnts_sim_bydelta_array,
188
+ _enf_cnts_sim_byconczone_array,
282
189
  )
283
190
 
284
191
 
285
- def gen_upp_arrays(
286
- _upp_test_parms: gbl.HMGThresholds,
192
+ def compute_upp_test_arrays(
287
193
  _market_data: MarketDataSample,
194
+ _upp_test_parms: gbl.HMGThresholds,
288
195
  _sim_test_regime: UPPTestRegime,
289
196
  /,
290
- *,
291
- saved_array_name_suffix: str = "",
292
- save_data_to_file: SaveData = False,
293
197
  ) -> UPPTestsRaw:
198
+ """
199
+ Generate UPP tests arrays for given configuration and market sample
200
+
201
+ Given a standards vector, market
202
+
203
+ Parameters
204
+ ----------
205
+ _market_data
206
+ market data sample
207
+ _upp_test_parms
208
+ guidelines thresholds for testing UPP and related statistics
209
+ _sim_test_regime
210
+ configuration to use for generating UPP tests
211
+
212
+ """
294
213
  _g_bar, _divr_bar, _cmcr_bar, _ipr_bar = (
295
214
  getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
296
215
  )
297
216
 
298
- _invres_resolution, _guppi_aggregator, _divr_aggregator = (
299
- getattr(_sim_test_regime, _f)
300
- for _f in ("resolution", "guppi_aggregator", "divr_aggregator")
217
+ _guppi_array, _ipr_array, _cmcr_array = (
218
+ np.empty_like(_market_data.price_array) for _ in range(3)
301
219
  )
302
220
 
303
- _guppi_array = np.empty_like(_market_data.divr_array)
304
221
  np.einsum(
305
222
  "ij,ij,ij->ij",
306
223
  _market_data.divr_array,
@@ -309,157 +226,146 @@ def gen_upp_arrays(
309
226
  out=_guppi_array,
310
227
  )
311
228
 
312
- _cmcr_array = np.empty_like(_market_data.divr_array)
313
- np.divide(
314
- np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
315
- np.einsum("ij,ij->ij", 1 - _market_data.pcm_array, 1 - _market_data.divr_array),
316
- out=_cmcr_array,
317
- )
318
-
319
- _ipr_array = np.empty_like(_market_data.divr_array)
320
229
  np.divide(
321
230
  np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
322
231
  1 - _market_data.divr_array,
323
232
  out=_ipr_array,
324
233
  )
325
234
 
326
- # This one needs further testing:
327
- # _ipr_array_alt = np.empty_like(_market_data.divr_array)
328
- # np.divide(_guppi_array, (1 - _market_data.divr_array[:, ::-1]), out=_ipr_array_alt)
235
+ np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
236
+
237
+ (_divr_test_vector,) = _compute_test_array_seq(
238
+ (_market_data.divr_array,),
239
+ _market_data.frmshr_array,
240
+ _sim_test_regime.divr_aggregator,
241
+ )
242
+
243
+ (_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_array_seq(
244
+ (_guppi_array, _cmcr_array, _ipr_array),
245
+ _market_data.frmshr_array,
246
+ _sim_test_regime.guppi_aggregator,
247
+ )
248
+ del _cmcr_array, _ipr_array, _guppi_array
249
+
250
+ if _sim_test_regime.resolution == INVResolution.ENFT:
251
+ _upp_test_arrays = UPPTestsRaw(
252
+ _guppi_test_vector >= _g_bar,
253
+ (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
254
+ _cmcr_test_vector >= _cmcr_bar,
255
+ _ipr_test_vector >= _ipr_bar,
256
+ )
257
+ else:
258
+ _upp_test_arrays = UPPTestsRaw(
259
+ _guppi_test_vector < _g_bar,
260
+ (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
261
+ _cmcr_test_vector < _cmcr_bar,
262
+ _ipr_test_vector < _ipr_bar,
263
+ )
329
264
 
330
- _test_measure_seq = (_market_data.divr_array, _guppi_array, _cmcr_array, _ipr_array)
265
+ return _upp_test_arrays
331
266
 
267
+
268
+ def _compute_test_array_seq(
269
+ _test_measure_seq: tuple[ArrayDouble, ...],
270
+ _wt_array: ArrayDouble,
271
+ _aggregator: UPPAggrSelector,
272
+ ) -> tuple[ArrayDouble, ...]:
332
273
  _wt_array = (
333
- _market_data.frmshr_array
334
- / np.einsum("ij->i", _market_data.frmshr_array)[:, None]
335
- if _guppi_aggregator
274
+ _wt_array / np.einsum("ij->i", _wt_array)[:, None]
275
+ if _aggregator
336
276
  in (
337
277
  UPPAggrSelector.CPA,
338
278
  UPPAggrSelector.CPD,
339
279
  UPPAggrSelector.OSA,
340
280
  UPPAggrSelector.OSD,
341
281
  )
342
- else EMPTY_ARRAY_DEFAULT
282
+ else DEFAULT_EMPTY_ARRAY
343
283
  )
344
284
 
345
- match _guppi_aggregator:
285
+ match _aggregator:
346
286
  case UPPAggrSelector.AVG:
347
- _test_value_seq = (
287
+ _test_array_seq = (
348
288
  1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
349
289
  )
350
290
  case UPPAggrSelector.CPA:
351
- _test_value_seq = (
291
+ _test_array_seq = (
352
292
  np.einsum("ij,ij->i", _wt_array[:, ::-1], _g)[:, None]
353
293
  for _g in _test_measure_seq
354
294
  )
355
295
  case UPPAggrSelector.CPD:
356
- _test_value_seq = (
296
+ _test_array_seq = (
357
297
  np.sqrt(np.einsum("ij,ij,ij->i", _wt_array[:, ::-1], _g, _g))[:, None]
358
298
  for _g in _test_measure_seq
359
299
  )
360
300
  case UPPAggrSelector.DIS:
361
- _test_value_seq = (
301
+ _test_array_seq = (
362
302
  np.sqrt(1 / 2 * np.einsum("ij,ij->i", _g, _g))[:, None]
363
303
  for _g in _test_measure_seq
364
304
  )
365
305
  case UPPAggrSelector.MAX:
366
- _test_value_seq = (
306
+ _test_array_seq = (
367
307
  _g.max(axis=1, keepdims=True) for _g in _test_measure_seq
368
308
  )
369
309
  case UPPAggrSelector.MIN:
370
- _test_value_seq = (
310
+ _test_array_seq = (
371
311
  _g.min(axis=1, keepdims=True) for _g in _test_measure_seq
372
312
  )
373
313
  case UPPAggrSelector.OSA:
374
- _test_value_seq = (
314
+ _test_array_seq = (
375
315
  np.einsum("ij,ij->i", _wt_array, _g)[:, None]
376
316
  for _g in _test_measure_seq
377
317
  )
378
318
  case UPPAggrSelector.OSD:
379
- _test_value_seq = (
319
+ _test_array_seq = (
380
320
  np.sqrt(np.einsum("ij,ij,ij->i", _wt_array, _g, _g))[:, None]
381
321
  for _g in _test_measure_seq
382
322
  )
383
323
  case _:
384
324
  raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
385
- del _cmcr_array, _guppi_array
386
- (_divr_test_vector, _guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = (
387
- _test_value_seq
388
- )
389
-
390
- if _divr_aggregator == UPPAggrSelector.MAX:
391
- _divr_test_vector = _market_data.divr_array.max(axis=1, keepdims=True)
392
-
393
- if _invres_resolution == INVResolution.ENFT:
394
- _upp_tests_data = UPPTestsRaw(
395
- _guppi_test_vector >= _g_bar,
396
- (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
397
- _cmcr_test_vector >= _cmcr_bar,
398
- _ipr_test_vector >= _ipr_bar,
399
- )
400
- else:
401
- _upp_tests_data = UPPTestsRaw(
402
- _guppi_test_vector < _g_bar,
403
- (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
404
- _cmcr_test_vector < _cmcr_bar,
405
- _ipr_test_vector < _ipr_bar,
406
- )
407
- del _guppi_test_vector, _divr_test_vector, _cmcr_test_vector, _ipr_test_vector
408
-
409
- save_data_to_hdf5(
410
- _upp_tests_data,
411
- saved_array_name_suffix,
412
- (),
413
- save_data_to_file=save_data_to_file,
414
- )
415
-
416
- return _upp_tests_data
325
+ return tuple(_test_array_seq)
417
326
 
418
327
 
419
328
  def initialize_hd5(
420
- _h5_path: Path, _hmg_pub_year: gbl.HMGPubYear, _test_regime: UPPTestRegime, /
329
+ _h5_path: Path, _hmg_pub_year: HMGPubYear, _test_regime: UPPTestRegime, /
421
330
  ) -> tuple[SaveData, str]:
422
331
  _h5_title = f"HMG version: {_hmg_pub_year}; Test regime: {_test_regime}"
423
332
  if _h5_path.is_file():
424
333
  _h5_path.unlink()
425
- _h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title)
426
- _save_data_to_file: tuple[Literal[True], ptb.File, str] = (True, _h5_file, "/")
427
- _next_subgroup_name = "invres_{}_{}_{}_{}".format(
334
+ _h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title) # pyright: ignore
335
+ _save_data_to_file: SaveData = (True, _h5_file, _h5_file.root)
336
+ _next_subgroup_name_root = "enf_{}_{}_{}_{}".format(
428
337
  _hmg_pub_year,
429
- *(
430
- getattr(_test_regime, _f.name).name
431
- for _f in attrs_fields(type(_test_regime))
432
- ),
338
+ *(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__), # pyright: ignore
433
339
  )
434
- return _save_data_to_file, _next_subgroup_name
340
+ return _save_data_to_file, _next_subgroup_name_root
435
341
 
436
342
 
437
343
  def save_data_to_hdf5(
438
344
  _dclass: DataclassInstance,
439
- _saved_array_name_suffix: str = "",
440
- _excl_attrs: Sequence[str] = (),
441
345
  /,
442
346
  *,
347
+ saved_array_name_suffix: str | None = "",
348
+ excluded_attrs: Sequence[str] | None = (),
443
349
  save_data_to_file: SaveData = False,
444
350
  ) -> None:
445
351
  if save_data_to_file:
446
352
  _, _h5_file, _h5_group = save_data_to_file
447
353
  # Save market data arrays
448
- for _array_field in fields(_dclass):
449
- _array_name = _array_field.name
450
- if _array_name in _excl_attrs:
354
+ excluded_attrs = excluded_attrs or ()
355
+ for _array_name in _dclass.__dataclass_fields__:
356
+ if _array_name in excluded_attrs:
451
357
  continue
452
358
  save_array_to_hdf5(
453
359
  getattr(_dclass, _array_name),
454
360
  _array_name,
455
361
  _h5_group,
456
362
  _h5_file,
457
- saved_array_name_suffix=_saved_array_name_suffix,
363
+ saved_array_name_suffix=saved_array_name_suffix,
458
364
  )
459
365
 
460
366
 
461
367
  def save_array_to_hdf5(
462
- _array_obj: NDArray[np.floating[TF] | np.integer[TI] | np.bool_],
368
+ _array_obj: NDArray[Any],
463
369
  _array_name: str,
464
370
  _h5_group: ptb.Group,
465
371
  _h5_file: ptb.File,
@@ -467,7 +373,7 @@ def save_array_to_hdf5(
467
373
  *,
468
374
  saved_array_name_suffix: str | None = None,
469
375
  ) -> None:
470
- _h5_array_name = f"{_array_name}_{saved_array_name_suffix or ""}".rstrip("_")
376
+ _h5_array_name = f"{_array_name}_{saved_array_name_suffix or ''}".rstrip("_")
471
377
 
472
378
  with suppress(ptb.NoSuchNodeError):
473
379
  _h5_file.remove_node(_h5_group, name=_array_name)
@@ -477,6 +383,12 @@ def save_array_to_hdf5(
477
383
  _h5_array_name,
478
384
  atom=ptb.Atom.from_dtype(_array_obj.dtype),
479
385
  shape=_array_obj.shape,
480
- filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True),
386
+ filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True), # pyright: ignore
481
387
  )
482
388
  _h5_array[:] = _array_obj
389
+
390
+
391
+ if __name__ == "__main__":
392
+ print(
393
+ "This module defines classes with methods for generating UPP test arrays and UPP test-counts arrays on given data."
394
+ )