mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +142 -93
  5. mergeron/core/guidelines_boundaries.py +289 -1077
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +257 -245
  15. mergeron/gen/data_generation.py +473 -221
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +159 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -259
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738963.0.dist-info/METADATA +0 -108
  38. mergeron-2024.738963.0.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/upp_tests.py CHANGED
@@ -1,259 +1,157 @@
1
1
  """
2
- Methods to estimate intrinsic clearnace rates and intrinsic enforcement rates
2
+ Methods to compute intrinsic clearance rates and intrinsic enforcement rates
3
3
  from generated market data.
4
4
 
5
5
  """
6
6
 
7
7
  from collections.abc import Sequence
8
8
  from contextlib import suppress
9
- from importlib.metadata import version
10
9
  from pathlib import Path
11
- from typing import Literal, TypeAlias, TypedDict
10
+ from typing import Any, Literal, TypedDict
12
11
 
13
12
  import numpy as np
14
13
  import tables as ptb # type: ignore
15
- from attrs import evolve
16
- from joblib import Parallel, cpu_count, delayed # type: ignore
17
14
  from numpy.random import SeedSequence
18
15
  from numpy.typing import NDArray
19
16
 
20
- from mergeron.core.pseudorandom_numbers import TF, TI
21
-
22
- from .. import _PKG_NAME, RECConstants, UPPAggrSelector # noqa: TID252
17
+ from .. import ( # noqa
18
+ VERSION,
19
+ ArrayBIGINT,
20
+ ArrayBoolean,
21
+ ArrayDouble,
22
+ ArrayFloat,
23
+ ArrayINT,
24
+ HMGPubYear,
25
+ UPPAggrSelector,
26
+ )
23
27
  from ..core import guidelines_boundaries as gbl # noqa: TID252
24
28
  from . import (
25
- EMPTY_ARRAY_DEFAULT,
29
+ DEFAULT_EMPTY_ARRAY,
26
30
  DataclassInstance,
27
31
  INVResolution,
28
32
  MarketDataSample,
29
- MarketSampleSpec,
30
33
  UPPTestRegime,
31
34
  UPPTestsCounts,
32
35
  UPPTestsRaw,
33
36
  )
34
- from . import data_generation as dgl
35
- from . import investigations_stats as isl
36
-
37
- __version__ = version(_PKG_NAME)
37
+ from . import enforcement_stats as esl
38
38
 
39
+ __version__ = VERSION
39
40
 
40
- ptb.parameters.MAX_NUMEXPR_THREADS = 8
41
- ptb.parameters.MAX_BLOSC_THREADS = 4
41
+ type SaveData = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
42
42
 
43
- SaveData: TypeAlias = Literal[False] | tuple[Literal[True], ptb.File, ptb.Group]
44
43
 
44
+ class INVRESCntsArgs(TypedDict, total=False):
45
+ "Keyword arguments of function, :code:`sim_enf_cnts`"
45
46
 
46
- class IVNRESCntsArgs(TypedDict, total=False):
47
- "Keyword arguments of function, :code:`sim_invres_cnts`"
48
-
49
- sim_test_regime: UPPTestRegime
50
- saved_array_name_suffix: str
51
- save_data_to_file: SaveData
52
- seed_seq_list: list[SeedSequence]
47
+ sample_size: int
48
+ seed_seq_list: Sequence[SeedSequence] | None
53
49
  nthreads: int
50
+ save_data_to_file: SaveData
51
+ saved_array_name_suffix: str
54
52
 
55
53
 
56
- def sim_invres_cnts_ll(
57
- _mkt_sample_spec: MarketSampleSpec,
58
- _invres_parm_vec: gbl.HMGThresholds,
59
- _sim_invres_cnts_kwargs: IVNRESCntsArgs,
54
+ def compute_upp_test_counts(
55
+ _market_data_sample: MarketDataSample,
56
+ _upp_test_parms: gbl.HMGThresholds,
57
+ _upp_test_regime: UPPTestRegime,
60
58
  /,
61
59
  ) -> UPPTestsCounts:
62
- """A function to parallelize data-generation and testing
63
-
64
- The parameters `_sim_invres_cnts_kwargs` are passed unaltered to
65
- the parent function, `sim_invres_cnts()`, except that, if provided,
66
- `seed_seq_list` is used to spawn a seed sequence for each thread,
67
- to assure independent samples in each thread, and `nthreads` defines
68
- the number of parallel processes used. The number of draws in
69
- each thread may be tuned, by trial and error, to the amount of
70
- memory (RAM) available.
60
+ """Estimate enforcement and clearance counts from market data sample
71
61
 
72
62
  Parameters
73
63
  ----------
64
+ _market_data_sample
65
+ Market data sample
74
66
 
75
- _invres_parm_vec
76
- Guidelines thresholds to test against
77
-
78
- _mkt_sample_spec
79
- Configuration to use for generating sample data to test
67
+ _upp_test_parms
68
+ Threshold values for various Guidelines criteria
80
69
 
81
- _sim_invres_cnts_kwargs
82
- Arguments to downstream test function `sim_invres_cnts`
70
+ _upp_test_regime
71
+ Specifies whether to analyze enforcement, clearance, or both
72
+ and the GUPPI and diversion ratio aggregators employed, with
73
+ default being to analyze enforcement based on the maximum
74
+ merging-firm GUPPI and maximum diversion ratio between the
75
+ merging firms
83
76
 
84
77
  Returns
85
78
  -------
86
- Arrays of UPPTestCounts
79
+ UPPTestsCounts
80
+ Enforced and cleared counts
87
81
 
88
82
  """
89
- _sample_sz = _mkt_sample_spec.sample_size
90
- _subsample_sz = 10**6
91
- _iter_count = int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
92
- _thread_count = cpu_count()
93
-
94
- # Crate a copy, to avoid side effects in the outer scope
95
- _mkt_sample_spec_here = evolve(_mkt_sample_spec, sample_size=_subsample_sz)
96
-
97
- if (
98
- _mkt_sample_spec.share_spec.recapture_form != RECConstants.OUTIN
99
- and _mkt_sample_spec.share_spec.recapture_rate != _invres_parm_vec.rec
100
- ):
101
- raise ValueError(
102
- "{} {} {}".format(
103
- f"Recapture rate from market sample spec, {_mkt_sample_spec.share_spec.recapture_rate}",
104
- f"must match the value, {_invres_parm_vec.rec}",
105
- "the guidelines thresholds vector.",
106
- )
107
- )
108
-
109
- _rng_seed_seq_list = [None] * _iter_count
110
- if _sim_invres_cnts_kwargs:
111
- if _sseql := _sim_invres_cnts_kwargs.get("seed_seq_list"):
112
- _rng_seed_seq_list = list(
113
- zip(*[g.spawn(_iter_count) for g in _sseql], strict=True) # type: ignore
114
- )
115
-
116
- _sim_invres_cnts_kwargs: IVNRESCntsArgs = { # type: ignore
117
- _k: _v
118
- for _k, _v in _sim_invres_cnts_kwargs.items()
119
- if _k != "seed_seq_list"
120
- }
121
- else:
122
- _sim_invres_cnts_kwargs = {}
123
-
124
- _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
125
- delayed(sim_invres_cnts)(
126
- _mkt_sample_spec_here,
127
- _invres_parm_vec,
128
- **_sim_invres_cnts_kwargs,
129
- saved_array_name_suffix=f"{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
130
- seed_seq_list=_rng_seed_seq_list_ch,
131
- )
132
- for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
133
- )
134
-
135
- _res_list_stacks = UPPTestsCounts(*[
136
- np.stack([getattr(_j, _k) for _j in _res_list])
137
- for _k in ("by_firm_count", "by_delta", "by_conczone")
138
- ])
139
- upp_test_results = UPPTestsCounts(*[
140
- np.column_stack((
141
- (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
142
- np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
143
- ))
144
- for _g, _h in zip(
145
- _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
146
- )
147
- ])
148
- del _res_list, _res_list_stacks
149
-
150
- return upp_test_results
151
-
152
83
 
153
- def sim_invres_cnts(
154
- _mkt_sample_spec: MarketSampleSpec,
155
- _upp_test_parms: gbl.HMGThresholds,
156
- /,
157
- *,
158
- sim_test_regime: UPPTestRegime,
159
- saved_array_name_suffix: str = "",
160
- save_data_to_file: SaveData = False,
161
- seed_seq_list: list[SeedSequence] | None = None,
162
- nthreads: int = 16,
163
- ) -> UPPTestsCounts:
164
- # Generate market data
165
- _market_data = dgl.gen_market_sample(
166
- _mkt_sample_spec, seed_seq_list=seed_seq_list, nthreads=nthreads
167
- )
168
-
169
- _invalid_array_names = (
170
- ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
171
- if _mkt_sample_spec.share_spec.dist_type == "Uniform"
172
- else ()
173
- )
174
-
175
- save_data_to_hdf5(
176
- _market_data,
177
- saved_array_name_suffix,
178
- _invalid_array_names,
179
- save_data_to_file=save_data_to_file,
180
- )
181
-
182
- _upp_tests_data = gen_upp_arrays(
183
- _market_data,
184
- _upp_test_parms,
185
- sim_test_regime,
186
- saved_array_name_suffix=saved_array_name_suffix,
187
- save_data_to_file=save_data_to_file,
84
+ _enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
85
+ _upp_test_arrays = compute_upp_test_arrays(
86
+ _market_data_sample, _upp_test_parms, _upp_test_regime
188
87
  )
189
88
 
190
89
  _fcounts, _hhi_delta, _hhi_post = (
191
- getattr(_market_data, _g) for _g in ["fcounts", "hhi_delta", "hhi_post"]
90
+ getattr(_market_data_sample, _g) for _g in ("fcounts", "hhi_delta", "hhi_post")
192
91
  )
193
- del _market_data
194
92
 
195
93
  _stats_rowlen = 6
196
94
  # Clearance/enforcement counts --- by firm count
197
- _firm_counts_weights = _mkt_sample_spec.share_spec.firm_counts_weights
198
- if _firm_counts_weights is not None and np.all(_firm_counts_weights >= 0):
199
- _max_firm_count = len(_firm_counts_weights)
95
+ _firmcounts_list = np.unique(_fcounts)
96
+ if _firmcounts_list is not None and np.all(_firmcounts_list >= 0):
97
+ _max_firmcount = max(_firmcounts_list)
200
98
 
201
- _invres_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
202
- for _firm_cnt in 2 + np.arange(_max_firm_count):
203
- _firm_count_test = _fcounts == _firm_cnt
99
+ _enf_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
100
+ for _firmcount in np.arange(2, _max_firmcount + 1):
101
+ _firmcount_test = _fcounts == _firmcount
204
102
 
205
- _invres_cnts_sim_byfirmcount_array = np.row_stack((
206
- _invres_cnts_sim_byfirmcount_array,
103
+ _enf_cnts_sim_byfirmcount_array = np.vstack((
104
+ _enf_cnts_sim_byfirmcount_array,
207
105
  np.array([
208
- _firm_cnt,
209
- np.einsum("ij->", 1 * _firm_count_test),
106
+ _firmcount,
107
+ np.einsum("ij->", 1 * _firmcount_test),
210
108
  *[
211
109
  np.einsum(
212
110
  "ij->",
213
- 1 * (_firm_count_test & getattr(_upp_tests_data, _f)),
111
+ 1 * (_firmcount_test & getattr(_upp_test_arrays, _f)),
214
112
  )
215
- for _f in _upp_tests_data.__dataclass_fields__
113
+ for _f in _upp_test_arrays.__dataclass_fields__
216
114
  ],
217
115
  ]),
218
116
  ))
219
- _invres_cnts_sim_byfirmcount_array = _invres_cnts_sim_byfirmcount_array[1:]
117
+ _enf_cnts_sim_byfirmcount_array = _enf_cnts_sim_byfirmcount_array[1:]
220
118
  else:
221
- _invres_cnts_sim_byfirmcount_array = np.array(
119
+ _enf_cnts_sim_byfirmcount_array = np.array(
222
120
  np.nan * np.empty((1, _stats_rowlen)), np.int64
223
121
  )
224
- _invres_cnts_sim_byfirmcount_array[0] = 2
122
+ _enf_cnts_sim_byfirmcount_array[0] = 2
225
123
 
226
- # Clearance/enfrocement counts --- by delta
227
- _hhi_delta_ranged = isl.hhi_delta_ranger(_hhi_delta)
228
- _invres_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
229
- for _hhi_delta_lim in isl.HHI_DELTA_KNOTS[:-1]:
124
+ # Clearance/enforcement counts --- by delta
125
+ _hhi_delta_ranged = esl.hhi_delta_ranger(_hhi_delta)
126
+ _enf_cnts_sim_bydelta_array = -1 * np.ones(_stats_rowlen, np.int64)
127
+ for _hhi_delta_lim in esl.HHI_DELTA_KNOTS[:-1]:
230
128
  _hhi_delta_test = _hhi_delta_ranged == _hhi_delta_lim
231
129
 
232
- _invres_cnts_sim_bydelta_array = np.row_stack((
233
- _invres_cnts_sim_bydelta_array,
130
+ _enf_cnts_sim_bydelta_array = np.vstack((
131
+ _enf_cnts_sim_bydelta_array,
234
132
  np.array([
235
133
  _hhi_delta_lim,
236
134
  np.einsum("ij->", 1 * _hhi_delta_test),
237
135
  *[
238
136
  np.einsum(
239
- "ij->", 1 * (_hhi_delta_test & getattr(_upp_tests_data, _f))
137
+ "ij->", 1 * (_hhi_delta_test & getattr(_upp_test_arrays, _f))
240
138
  )
241
- for _f in _upp_tests_data.__dataclass_fields__
139
+ for _f in _upp_test_arrays.__dataclass_fields__
242
140
  ],
243
141
  ]),
244
142
  ))
245
143
 
246
- _invres_cnts_sim_bydelta_array = _invres_cnts_sim_bydelta_array[1:]
144
+ _enf_cnts_sim_bydelta_array = _enf_cnts_sim_bydelta_array[1:]
247
145
 
248
- # Clearance/enfrocement counts --- by zone
146
+ # Clearance/enforcement counts --- by zone
249
147
  try:
250
- _hhi_zone_post_ranged = isl.hhi_zone_post_ranger(_hhi_post)
148
+ _hhi_zone_post_ranged = esl.hhi_zone_post_ranger(_hhi_post)
251
149
  except ValueError as _err:
252
150
  print(_hhi_post)
253
151
  raise _err
254
152
 
255
153
  _stats_byconczone_sim = -1 * np.ones(_stats_rowlen + 1, np.int64)
256
- for _hhi_zone_post_knot in isl.HHI_POST_ZONE_KNOTS[:-1]:
154
+ for _hhi_zone_post_knot in esl.HHI_POST_ZONE_KNOTS[:-1]:
257
155
  _level_test = _hhi_zone_post_ranged == _hhi_zone_post_knot
258
156
 
259
157
  for _hhi_zone_delta_knot in [0, 100, 200]:
@@ -265,7 +163,7 @@ def sim_invres_cnts(
265
163
 
266
164
  _conc_test = _level_test & _delta_test
267
165
 
268
- _stats_byconczone_sim = np.row_stack((
166
+ _stats_byconczone_sim = np.vstack((
269
167
  _stats_byconczone_sim,
270
168
  np.array([
271
169
  _hhi_zone_post_knot,
@@ -273,50 +171,53 @@ def sim_invres_cnts(
273
171
  np.einsum("ij->", 1 * _conc_test),
274
172
  *[
275
173
  np.einsum(
276
- "ij->", 1 * (_conc_test & getattr(_upp_tests_data, _f))
174
+ "ij->", 1 * (_conc_test & getattr(_upp_test_arrays, _f))
277
175
  )
278
- for _f in _upp_tests_data.__dataclass_fields__
176
+ for _f in _upp_test_arrays.__dataclass_fields__
279
177
  ],
280
178
  ]),
281
179
  ))
282
180
 
283
- _invres_cnts_sim_byconczone_array = isl.invres_cnts_byconczone(
284
- _stats_byconczone_sim[1:]
285
- )
181
+ _enf_cnts_sim_byconczone_array = esl.enf_cnts_byconczone(_stats_byconczone_sim[1:])
286
182
  del _stats_byconczone_sim
287
183
  del _hhi_delta, _hhi_post, _fcounts
288
184
 
289
185
  return UPPTestsCounts(
290
- _invres_cnts_sim_byfirmcount_array,
291
- _invres_cnts_sim_bydelta_array,
292
- _invres_cnts_sim_byconczone_array,
186
+ _enf_cnts_sim_byfirmcount_array,
187
+ _enf_cnts_sim_bydelta_array,
188
+ _enf_cnts_sim_byconczone_array,
293
189
  )
294
190
 
295
191
 
296
- def gen_upp_arrays(
192
+ def compute_upp_test_arrays(
297
193
  _market_data: MarketDataSample,
298
194
  _upp_test_parms: gbl.HMGThresholds,
299
195
  _sim_test_regime: UPPTestRegime,
300
196
  /,
301
- *,
302
- saved_array_name_suffix: str = "",
303
- save_data_to_file: SaveData = False,
304
197
  ) -> UPPTestsRaw:
305
198
  """
306
199
  Generate UPP tests arrays for given configuration and market sample
307
200
 
308
201
  Given a standards vector, market
202
+
203
+ Parameters
204
+ ----------
205
+ _market_data
206
+ market data sample
207
+ _upp_test_parms
208
+ guidelines thresholds for testing UPP and related statistics
209
+ _sim_test_regime
210
+ configuration to use for generating UPP tests
211
+
309
212
  """
310
213
  _g_bar, _divr_bar, _cmcr_bar, _ipr_bar = (
311
214
  getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
312
215
  )
313
216
 
314
- _invres_resolution, _guppi_aggregator, _divr_aggregator = (
315
- getattr(_sim_test_regime, _f)
316
- for _f in ("resolution", "guppi_aggregator", "divr_aggregator")
217
+ _guppi_array, _ipr_array, _cmcr_array = (
218
+ np.empty_like(_market_data.price_array) for _ in range(3)
317
219
  )
318
220
 
319
- _guppi_array = np.empty_like(_market_data.divr_array)
320
221
  np.einsum(
321
222
  "ij,ij,ij->ij",
322
223
  _market_data.divr_array,
@@ -325,153 +226,146 @@ def gen_upp_arrays(
325
226
  out=_guppi_array,
326
227
  )
327
228
 
328
- _cmcr_array = np.empty_like(_market_data.divr_array)
329
- np.divide(
330
- np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
331
- np.einsum("ij,ij->ij", 1 - _market_data.pcm_array, 1 - _market_data.divr_array),
332
- out=_cmcr_array,
333
- )
334
-
335
- _ipr_array = np.empty_like(_market_data.divr_array)
336
229
  np.divide(
337
230
  np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
338
231
  1 - _market_data.divr_array,
339
232
  out=_ipr_array,
340
233
  )
341
234
 
342
- # This one needs further testing:
343
- # _ipr_array_alt = np.empty_like(_market_data.divr_array)
344
- # np.divide(_guppi_array, (1 - _market_data.divr_array[:, ::-1]), out=_ipr_array_alt)
235
+ np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
236
+
237
+ (_divr_test_vector,) = _compute_test_array_seq(
238
+ (_market_data.divr_array,),
239
+ _market_data.frmshr_array,
240
+ _sim_test_regime.divr_aggregator,
241
+ )
242
+
243
+ (_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_array_seq(
244
+ (_guppi_array, _cmcr_array, _ipr_array),
245
+ _market_data.frmshr_array,
246
+ _sim_test_regime.guppi_aggregator,
247
+ )
248
+ del _cmcr_array, _ipr_array, _guppi_array
249
+
250
+ if _sim_test_regime.resolution == INVResolution.ENFT:
251
+ _upp_test_arrays = UPPTestsRaw(
252
+ _guppi_test_vector >= _g_bar,
253
+ (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
254
+ _cmcr_test_vector >= _cmcr_bar,
255
+ _ipr_test_vector >= _ipr_bar,
256
+ )
257
+ else:
258
+ _upp_test_arrays = UPPTestsRaw(
259
+ _guppi_test_vector < _g_bar,
260
+ (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
261
+ _cmcr_test_vector < _cmcr_bar,
262
+ _ipr_test_vector < _ipr_bar,
263
+ )
264
+
265
+ return _upp_test_arrays
345
266
 
346
- _test_measure_seq = (_market_data.divr_array, _guppi_array, _cmcr_array, _ipr_array)
347
267
 
268
+ def _compute_test_array_seq(
269
+ _test_measure_seq: tuple[ArrayDouble, ...],
270
+ _wt_array: ArrayDouble,
271
+ _aggregator: UPPAggrSelector,
272
+ ) -> tuple[ArrayDouble, ...]:
348
273
  _wt_array = (
349
- _market_data.frmshr_array
350
- / np.einsum("ij->i", _market_data.frmshr_array)[:, None]
351
- if _guppi_aggregator
274
+ _wt_array / np.einsum("ij->i", _wt_array)[:, None]
275
+ if _aggregator
352
276
  in (
353
277
  UPPAggrSelector.CPA,
354
278
  UPPAggrSelector.CPD,
355
279
  UPPAggrSelector.OSA,
356
280
  UPPAggrSelector.OSD,
357
281
  )
358
- else EMPTY_ARRAY_DEFAULT
282
+ else DEFAULT_EMPTY_ARRAY
359
283
  )
360
284
 
361
- match _guppi_aggregator:
285
+ match _aggregator:
362
286
  case UPPAggrSelector.AVG:
363
- _test_value_seq = (
287
+ _test_array_seq = (
364
288
  1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
365
289
  )
366
290
  case UPPAggrSelector.CPA:
367
- _test_value_seq = (
291
+ _test_array_seq = (
368
292
  np.einsum("ij,ij->i", _wt_array[:, ::-1], _g)[:, None]
369
293
  for _g in _test_measure_seq
370
294
  )
371
295
  case UPPAggrSelector.CPD:
372
- _test_value_seq = (
296
+ _test_array_seq = (
373
297
  np.sqrt(np.einsum("ij,ij,ij->i", _wt_array[:, ::-1], _g, _g))[:, None]
374
298
  for _g in _test_measure_seq
375
299
  )
376
300
  case UPPAggrSelector.DIS:
377
- _test_value_seq = (
301
+ _test_array_seq = (
378
302
  np.sqrt(1 / 2 * np.einsum("ij,ij->i", _g, _g))[:, None]
379
303
  for _g in _test_measure_seq
380
304
  )
381
305
  case UPPAggrSelector.MAX:
382
- _test_value_seq = (
306
+ _test_array_seq = (
383
307
  _g.max(axis=1, keepdims=True) for _g in _test_measure_seq
384
308
  )
385
309
  case UPPAggrSelector.MIN:
386
- _test_value_seq = (
310
+ _test_array_seq = (
387
311
  _g.min(axis=1, keepdims=True) for _g in _test_measure_seq
388
312
  )
389
313
  case UPPAggrSelector.OSA:
390
- _test_value_seq = (
314
+ _test_array_seq = (
391
315
  np.einsum("ij,ij->i", _wt_array, _g)[:, None]
392
316
  for _g in _test_measure_seq
393
317
  )
394
318
  case UPPAggrSelector.OSD:
395
- _test_value_seq = (
319
+ _test_array_seq = (
396
320
  np.sqrt(np.einsum("ij,ij,ij->i", _wt_array, _g, _g))[:, None]
397
321
  for _g in _test_measure_seq
398
322
  )
399
323
  case _:
400
324
  raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
401
- del _cmcr_array, _guppi_array
402
- (_divr_test_vector, _guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = (
403
- _test_value_seq
404
- )
405
-
406
- if _divr_aggregator == UPPAggrSelector.MAX:
407
- _divr_test_vector = _market_data.divr_array.max(axis=1, keepdims=True)
408
-
409
- if _invres_resolution == INVResolution.ENFT:
410
- _upp_tests_data = UPPTestsRaw(
411
- _guppi_test_vector >= _g_bar,
412
- (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
413
- _cmcr_test_vector >= _cmcr_bar,
414
- _ipr_test_vector >= _ipr_bar,
415
- )
416
- else:
417
- _upp_tests_data = UPPTestsRaw(
418
- _guppi_test_vector < _g_bar,
419
- (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
420
- _cmcr_test_vector < _cmcr_bar,
421
- _ipr_test_vector < _ipr_bar,
422
- )
423
- del _guppi_test_vector, _divr_test_vector, _cmcr_test_vector, _ipr_test_vector
424
-
425
- save_data_to_hdf5(
426
- _upp_tests_data,
427
- saved_array_name_suffix,
428
- (),
429
- save_data_to_file=save_data_to_file,
430
- )
431
-
432
- return _upp_tests_data
325
+ return tuple(_test_array_seq)
433
326
 
434
327
 
435
328
  def initialize_hd5(
436
- _h5_path: Path, _hmg_pub_year: gbl.HMGPubYear, _test_regime: UPPTestRegime, /
329
+ _h5_path: Path, _hmg_pub_year: HMGPubYear, _test_regime: UPPTestRegime, /
437
330
  ) -> tuple[SaveData, str]:
438
331
  _h5_title = f"HMG version: {_hmg_pub_year}; Test regime: {_test_regime}"
439
332
  if _h5_path.is_file():
440
333
  _h5_path.unlink()
441
- _h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title)
442
- _save_data_to_file: tuple[Literal[True], ptb.File, str] = (True, _h5_file, "/")
443
- _next_subgroup_name = "invres_{}_{}_{}_{}".format(
334
+ _h5_file = ptb.open_file(_h5_path, mode="w", title=_h5_title) # pyright: ignore
335
+ _save_data_to_file: SaveData = (True, _h5_file, _h5_file.root)
336
+ _next_subgroup_name_root = "enf_{}_{}_{}_{}".format(
444
337
  _hmg_pub_year,
445
- *(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__),
338
+ *(getattr(_test_regime, _f.name).name for _f in _test_regime.__attrs_attrs__), # pyright: ignore
446
339
  )
447
- return _save_data_to_file, _next_subgroup_name
340
+ return _save_data_to_file, _next_subgroup_name_root
448
341
 
449
342
 
450
343
  def save_data_to_hdf5(
451
344
  _dclass: DataclassInstance,
452
- _saved_array_name_suffix: str = "",
453
- _excl_attrs: Sequence[str] = (),
454
345
  /,
455
346
  *,
347
+ saved_array_name_suffix: str | None = "",
348
+ excluded_attrs: Sequence[str] | None = (),
456
349
  save_data_to_file: SaveData = False,
457
350
  ) -> None:
458
351
  if save_data_to_file:
459
352
  _, _h5_file, _h5_group = save_data_to_file
460
353
  # Save market data arrays
354
+ excluded_attrs = excluded_attrs or ()
461
355
  for _array_name in _dclass.__dataclass_fields__:
462
- if _array_name in _excl_attrs:
356
+ if _array_name in excluded_attrs:
463
357
  continue
464
358
  save_array_to_hdf5(
465
359
  getattr(_dclass, _array_name),
466
360
  _array_name,
467
361
  _h5_group,
468
362
  _h5_file,
469
- saved_array_name_suffix=_saved_array_name_suffix,
363
+ saved_array_name_suffix=saved_array_name_suffix,
470
364
  )
471
365
 
472
366
 
473
367
  def save_array_to_hdf5(
474
- _array_obj: NDArray[np.floating[TF] | np.integer[TI] | np.bool_],
368
+ _array_obj: NDArray[Any],
475
369
  _array_name: str,
476
370
  _h5_group: ptb.Group,
477
371
  _h5_file: ptb.File,
@@ -479,7 +373,7 @@ def save_array_to_hdf5(
479
373
  *,
480
374
  saved_array_name_suffix: str | None = None,
481
375
  ) -> None:
482
- _h5_array_name = f"{_array_name}_{saved_array_name_suffix or ""}".rstrip("_")
376
+ _h5_array_name = f"{_array_name}_{saved_array_name_suffix or ''}".rstrip("_")
483
377
 
484
378
  with suppress(ptb.NoSuchNodeError):
485
379
  _h5_file.remove_node(_h5_group, name=_array_name)
@@ -489,6 +383,12 @@ def save_array_to_hdf5(
489
383
  _h5_array_name,
490
384
  atom=ptb.Atom.from_dtype(_array_obj.dtype),
491
385
  shape=_array_obj.shape,
492
- filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True),
386
+ filters=ptb.Filters(complevel=3, complib="blosc:lz4hc", fletcher32=True), # pyright: ignore
493
387
  )
494
388
  _h5_array[:] = _array_obj
389
+
390
+
391
+ if __name__ == "__main__":
392
+ print(
393
+ "This module defines classes with methods for generating UPP test arrays and UPP test-counts arrays on given data."
394
+ )