mergeron 2024.739099.2__py3-none-any.whl → 2024.739104.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -1,283 +1,518 @@
1
1
  """
2
- Methods to generate market data, including shares price, marginsm, and diversion ratios
3
- for analyzing merger enforcement policy.
2
+ Methods to generate data for analyzing merger enforcement policy.
4
3
 
5
4
  """
6
5
 
7
6
  from __future__ import annotations
8
7
 
9
- from typing import NamedTuple
8
+ from collections.abc import Sequence
9
+ from typing import TypedDict
10
10
 
11
11
  import numpy as np
12
+ from attrs import Attribute, define, field, validators
13
+ from joblib import Parallel, cpu_count, delayed # type: ignore
12
14
  from numpy.random import SeedSequence
13
15
 
14
- from .. import VERSION, ArrayDouble, RECConstants # noqa: TID252
16
+ from .. import VERSION, RECTypes # noqa: TID252 # noqa
17
+ from ..core import guidelines_boundaries as gbl # noqa: TID252
18
+ from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
15
19
  from . import (
16
- EMPTY_ARRAY_DEFAULT,
17
20
  FM2Constants,
18
21
  MarketDataSample,
19
- MarketSpec,
20
- PriceConstants,
21
- SHRConstants,
22
+ PCMDistributions,
23
+ PCMSpec,
24
+ PriceSpec,
25
+ ShareSpec,
26
+ SHRDistributions,
22
27
  SSZConstants,
28
+ UPPTestRegime,
29
+ UPPTestsCounts,
23
30
  )
24
- from ._data_generation_functions import _gen_margin_price_data, _gen_share_data
31
+ from .data_generation_functions import (
32
+ gen_divr_array,
33
+ gen_margin_price_data,
34
+ gen_share_data,
35
+ parse_seed_seq_list,
36
+ )
37
+ from .upp_tests import SaveData, enf_cnts, save_data_to_hdf5
25
38
 
26
39
  __version__ = VERSION
27
40
 
28
41
 
29
- class SeedSequenceData(NamedTuple):
30
- mktshr_rng_seed_seq: SeedSequence
31
- pcm_rng_seed_seq: SeedSequence
32
- fcount_rng_seed_seq: SeedSequence | None
33
- pr_rng_seed_seq: SeedSequence | None
42
+ class SamplingFunctionKWArgs(TypedDict, total=False):
43
+ "Keyword arguments of function, :func:`MarketSample.sim_enf_cnts`"
34
44
 
45
+ sample_size: int
46
+ """number of draws to generate"""
35
47
 
36
- def gen_market_sample(
37
- _mkt_sample_spec: MarketSpec,
38
- /,
39
- *,
40
- sample_size: int = 10**6,
41
- seed_seq_list: list[SeedSequence] | None = None,
42
- nthreads: int = 16,
43
- ) -> MarketDataSample:
44
- """
45
- Generate share, diversion ratio, price, and margin data for MarketSpec.
46
-
47
-
48
- Parameters
49
- ----------
50
- _mkt_sample_spec
51
- class specifying parameters for data generation, see :class:`mergeron.gen.MarketSpec`
52
- sample_size
53
- number of draws to generate
54
- seed_seq_list
55
- tuple of SeedSequences to ensure replicable data generation with
56
- appropriately independent random streams
57
- nthreads
58
- optionally specify the number of CPU threads for the PRNG
59
-
60
- Returns
61
- -------
62
- Merging firms' shares, margins, etc. for each hypothetical merger
63
- in the sample
48
+ seed_seq_list: Sequence[SeedSequence] | None
49
+ """sequence of SeedSequences to ensure replicable data generation with
50
+ appropriately independent random streams
51
+
52
+ NOTES
53
+ -----
54
+
55
+ See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
+ specification of this parameter.
64
57
 
65
58
  """
66
59
 
67
- _recapture_form = _mkt_sample_spec.share_spec.recapture_form
68
- _recapture_rate = _mkt_sample_spec.share_spec.recapture_rate
69
- _dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
70
- _dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
71
- _hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
72
-
73
- (
74
- _mktshr_rng_seed_seq,
75
- _pcm_rng_seed_seq,
76
- _fcount_rng_seed_seq,
77
- _pr_rng_seed_seq,
78
- ) = parse_seed_seq_list(
79
- seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
80
- )
60
+ nthreads: int
61
+ """number of parallel threads to use"""
81
62
 
82
- _shr_sample_size = 1.0 * sample_size
83
- # Scale up sample size to offset discards based on specified criteria
84
- _shr_sample_size *= _hsr_filing_test_type
85
- if _dist_firm2_pcm == FM2Constants.MNL:
86
- _shr_sample_size *= SSZConstants.MNL_DEP
87
- _shr_sample_size = int(_shr_sample_size)
88
-
89
- # Generate share data
90
- _mktshr_data = _gen_share_data(
91
- _shr_sample_size,
92
- _mkt_sample_spec.share_spec,
93
- _fcount_rng_seed_seq,
94
- _mktshr_rng_seed_seq,
95
- nthreads,
96
- )
63
+ save_data_to_file: SaveData
64
+ """optionally save data to HDF5 file"""
97
65
 
98
- _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
99
- getattr(_mktshr_data, _f)
100
- for _f in (
101
- "mktshr_array",
102
- "fcounts",
103
- "aggregate_purchase_prob",
104
- "nth_firm_share",
105
- )
106
- )
66
+ saved_array_name_suffix: str
67
+ """optionally specify a suffix for the HDF5 array names"""
107
68
 
108
- # Generate merging-firm price and PCM data
109
- _margin_data, _price_data = _gen_margin_price_data(
110
- _mktshr_array[:, :2],
111
- _nth_firm_share,
112
- _aggregate_purchase_prob,
113
- _mkt_sample_spec.pcm_spec,
114
- _mkt_sample_spec.price_spec,
115
- _mkt_sample_spec.hsr_filing_test_type,
116
- _pcm_rng_seed_seq,
117
- _pr_rng_seed_seq,
118
- nthreads,
119
- )
120
69
 
121
- _price_array, _hsr_filing_test = (
122
- getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
123
- )
70
+ @define
71
+ class MarketSample:
72
+ """Parameter specification for market data generation."""
124
73
 
125
- _pcm_array, _mnl_test_rows = (
126
- getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
74
+ share_spec: ShareSpec = field(
75
+ kw_only=True,
76
+ default=ShareSpec(SHRDistributions.UNI, None, None, RECTypes.INOUT, 0.8),
77
+ validator=validators.instance_of(ShareSpec),
127
78
  )
79
+ """Market-share specification, see :class:`ShareSpec`"""
128
80
 
129
- _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
130
- _s_size = sample_size # originally-specified sample size
131
- if _dist_firm2_pcm == FM2Constants.MNL:
132
- _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
133
- _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
134
- _price_array = _price_array[_mnl_test_rows][:_s_size]
135
- _fcounts = _fcounts[_mnl_test_rows][:_s_size]
136
- _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
137
- _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
138
-
139
- # Calculate diversion ratios
140
- _divr_array = gen_divr_array(
141
- _recapture_form, _recapture_rate, _mktshr_array[:, :2], _aggregate_purchase_prob
81
+ pcm_spec: PCMSpec = field(
82
+ kw_only=True, default=PCMSpec(PCMDistributions.UNI, None, FM2Constants.IID)
142
83
  )
84
+ """Margin specification, see :class:`PCMSpec`"""
85
+
86
+ @pcm_spec.validator
87
+ def _check_pcm(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
88
+ if (
89
+ self.share_spec.recapture_form == RECTypes.FIXED
90
+ and _v.firm2_pcm_constraint == FM2Constants.MNL
91
+ ):
92
+ raise ValueError(
93
+ f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
94
+ "requires Firm 2 margin must have property, "
95
+ f'"{FM2Constants.IID}" or "{FM2Constants.SYM}".'
96
+ )
143
97
 
144
- del _mnl_test_rows, _s_size
145
-
146
- _frmshr_array = _mktshr_array[:, :2]
147
- _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
148
-
149
- _hhi_post = (
150
- _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
98
+ price_spec: PriceSpec = field(
99
+ kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
151
100
  )
101
+ """Price specification, see :class:`PriceSpec`"""
152
102
 
153
- return MarketDataSample(
154
- _frmshr_array,
155
- _pcm_array,
156
- _price_array,
157
- _fcounts,
158
- _aggregate_purchase_prob,
159
- _nth_firm_share,
160
- _divr_array,
161
- _hhi_post,
162
- _hhi_delta,
103
+ hsr_filing_test_type: SSZConstants = field(
104
+ kw_only=True,
105
+ default=SSZConstants.ONE,
106
+ validator=validators.instance_of(SSZConstants),
163
107
  )
108
+ """Method for modeling HSR filing threholds, see :class:`SSZConstants`"""
109
+
110
+ data: MarketDataSample = field(default=None)
111
+
112
+ enf_counts: UPPTestsCounts = field(default=None)
113
+
114
+ def gen_market_sample(
115
+ self,
116
+ /,
117
+ *,
118
+ sample_size: int = 10**6,
119
+ seed_seq_list: Sequence[SeedSequence] | None = None,
120
+ nthreads: int = 16,
121
+ save_data_to_file: SaveData = False,
122
+ saved_array_name_suffix: str = "",
123
+ ) -> MarketDataSample:
124
+ """
125
+ Generate share, diversion ratio, price, and margin data for MarketSpec.
126
+
127
+ see :attr:`SamplingFunctionKWArgs` for description of parameters
128
+
129
+ Returns
130
+ -------
131
+ Merging firms' shares, margins, etc. for each hypothetical merger
132
+ in the sample
133
+
134
+ """
135
+
136
+ _recapture_form = self.share_spec.recapture_form
137
+ _recapture_rate = self.share_spec.recapture_rate
138
+ _dist_type_mktshr = self.share_spec.dist_type
139
+ _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
+ _hsr_filing_test_type = self.hsr_filing_test_type
141
+
142
+ (
143
+ _mktshr_rng_seed_seq,
144
+ _pcm_rng_seed_seq,
145
+ _fcount_rng_seed_seq,
146
+ _pr_rng_seed_seq,
147
+ ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
148
+
149
+ _shr_sample_size = 1.0 * sample_size
150
+ # Scale up sample size to offset discards based on specified criteria
151
+ _shr_sample_size *= _hsr_filing_test_type
152
+ if _dist_firm2_pcm == FM2Constants.MNL:
153
+ _shr_sample_size *= SSZConstants.MNL_DEP
154
+ _shr_sample_size = int(_shr_sample_size)
155
+
156
+ # Generate share data
157
+ _mktshr_data = gen_share_data(
158
+ _shr_sample_size,
159
+ self.share_spec,
160
+ _fcount_rng_seed_seq,
161
+ _mktshr_rng_seed_seq,
162
+ nthreads,
163
+ )
164
164
 
165
+ _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
166
+ getattr(_mktshr_data, _f)
167
+ for _f in (
168
+ "mktshr_array",
169
+ "fcounts",
170
+ "aggregate_purchase_prob",
171
+ "nth_firm_share",
172
+ )
173
+ )
165
174
 
166
- def parse_seed_seq_list(
167
- _sseq_list: list[SeedSequence] | None,
168
- _mktshr_dist_type: SHRConstants,
169
- _price_spec: PriceConstants,
170
- /,
171
- ) -> SeedSequenceData:
172
- """Initialize RNG seed sequences to ensure independence of distinct random streams.
175
+ # Generate merging-firm price and PCM data
176
+ _margin_data, _price_data = gen_margin_price_data(
177
+ _mktshr_array[:, :2],
178
+ _nth_firm_share,
179
+ _aggregate_purchase_prob,
180
+ self.pcm_spec,
181
+ self.price_spec,
182
+ self.hsr_filing_test_type,
183
+ _pcm_rng_seed_seq,
184
+ _pr_rng_seed_seq,
185
+ nthreads,
186
+ )
173
187
 
174
- The tuple of SeedSequences, is parsed in the following order
175
- for generating the relevant random variates:
176
- 1.) quantity shares
177
- 2.) price-cost margins
178
- 3.) firm-counts, if :code:`MarketSpec.share_spec.dist_type` is a Dirichlet distribution
179
- 4.) prices, if :code:`MarketSpec.price_spec ==`:attr:`mergeron.gen.PriceConstants.ZERO`.
188
+ _price_array, _hsr_filing_test = (
189
+ getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
190
+ )
180
191
 
192
+ _pcm_array, _mnl_test_rows = (
193
+ getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
194
+ )
181
195
 
196
+ _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
197
+ _s_size = sample_size # originally-specified sample size
198
+ if _dist_firm2_pcm == FM2Constants.MNL:
199
+ _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
200
+ _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
201
+ _price_array = _price_array[_mnl_test_rows][:_s_size]
202
+ _fcounts = _fcounts[_mnl_test_rows][:_s_size]
203
+ _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
204
+ :_s_size
205
+ ]
206
+ _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
207
+
208
+ # Calculate diversion ratios
209
+ _divr_array = gen_divr_array(
210
+ _recapture_form,
211
+ _recapture_rate,
212
+ _mktshr_array[:, :2],
213
+ _aggregate_purchase_prob,
214
+ )
182
215
 
183
- Parameters
184
- ----------
185
- _sseq_list
186
- List of RNG seed sequences
216
+ del _mnl_test_rows, _s_size
187
217
 
188
- _mktshr_dist_type
189
- Market share distribution type
218
+ _frmshr_array = _mktshr_array[:, :2]
219
+ _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
220
+ :, None
221
+ ]
190
222
 
191
- _price_spec
192
- Price specification
223
+ _hhi_post = (
224
+ _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
225
+ )
193
226
 
194
- Returns
195
- -------
196
- Seed sequence data
227
+ return MarketDataSample(
228
+ _frmshr_array,
229
+ _pcm_array,
230
+ _price_array,
231
+ _fcounts,
232
+ _aggregate_purchase_prob,
233
+ _nth_firm_share,
234
+ _divr_array,
235
+ _hhi_post,
236
+ _hhi_delta,
237
+ )
197
238
 
198
- """
199
- _fcount_rng_seed_seq: SeedSequence | None = None
200
- _pr_rng_seed_seq: SeedSequence | None = None
201
-
202
- if _price_spec == PriceConstants.ZERO:
203
- _pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
204
-
205
- if _mktshr_dist_type == SHRConstants.UNI:
206
- _fcount_rng_seed_seq = None
207
- _seed_count = 2
208
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
209
- _sseq_list[:_seed_count]
210
- if _sseq_list
211
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
239
+ def generate_sample(
240
+ self,
241
+ /,
242
+ *,
243
+ sample_size: int = 10**6,
244
+ seed_seq_list: list[SeedSequence] | None,
245
+ nthreads: int,
246
+ save_data_to_file: SaveData = False,
247
+ saved_array_name_suffix: str = "",
248
+ ) -> None:
249
+ """Generate market data"""
250
+
251
+ self.data = self.gen_market_sample(
252
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
212
253
  )
213
- else:
214
- _seed_count = 3
215
- (_mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq) = (
216
- _sseq_list[:_seed_count]
217
- if _sseq_list
218
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
254
+
255
+ _invalid_array_names = (
256
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
257
+ if self.share_spec.dist_type == "Uniform"
258
+ else ()
219
259
  )
220
260
 
221
- return SeedSequenceData(
222
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq, _pr_rng_seed_seq
223
- )
261
+ save_data_to_hdf5(
262
+ self.data,
263
+ saved_array_name_suffix=saved_array_name_suffix,
264
+ excluded_attrs=_invalid_array_names,
265
+ save_data_to_file=save_data_to_file,
266
+ )
224
267
 
268
+ def sim_enf_cnts(
269
+ self,
270
+ _upp_test_parms: gbl.HMGThresholds,
271
+ _sim_test_regime: UPPTestRegime,
272
+ /,
273
+ *,
274
+ sample_size: int = 10**6,
275
+ seed_seq_list: list[SeedSequence] | None = None,
276
+ nthreads: int = 16,
277
+ save_data_to_file: SaveData = False,
278
+ saved_array_name_suffix: str = "",
279
+ ) -> UPPTestsCounts:
280
+ """Generate market data and etstimate UPP test counts on same.
225
281
 
226
- def gen_divr_array(
227
- _recapture_form: RECConstants,
228
- _recapture_rate: float | None,
229
- _frmshr_array: ArrayDouble,
230
- _aggregate_purchase_prob: ArrayDouble = EMPTY_ARRAY_DEFAULT,
231
- /,
232
- ) -> ArrayDouble:
233
- """
234
- Given merging-firm shares and related parameters, return diverion ratios.
282
+ Parameters
283
+ ----------
235
284
 
236
- If recapture is specified as :attr:`mergeron.RECConstants.OUTIN`, then the
237
- choice-probability for the outside good must be supplied.
285
+ _upp_test_parms
286
+ Guidelines thresholds for testing UPP and related statistics
238
287
 
239
- Parameters
240
- ----------
241
- _recapture_form
242
- Enum specifying Fixed (proportional), Inside-out, or Outside-in
288
+ _sim_test_regime
289
+ Configuration to use for testing; UPPTestsRegime object
290
+ specifying whether investigation results in enforcement, clearance,
291
+ or both; and aggregation methods used for GUPPI and diversion ratio
292
+ measures
243
293
 
244
- _recapture_rate
245
- If recapture is proportional or inside-out, the recapture rate
246
- for the firm with the smaller share.
294
+ sample_size
295
+ Number of draws to generate
247
296
 
248
- _frmshr_array
249
- Merging-firm shares.
297
+ seed_seq_list
298
+ List of seed sequences, to assure independent samples in each thread
250
299
 
251
- _aggregate_purchase_prob
252
- 1 minus probability that the outside good is chosen; converts
253
- market shares to choice probabilities by multiplication.
300
+ nthreads
301
+ Number of parallel processes to use
254
302
 
255
- Returns
256
- -------
257
- Merging-firm diversion ratios for mergers in the sample.
303
+ save_data_to_file
304
+ Whether to save data to an HDF5 file, and where to save it
258
305
 
259
- """
306
+ saved_array_name_suffix
307
+ Suffix to add to the array names in the HDF5 file
308
+
309
+ Returns
310
+ -------
311
+ UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
312
+
313
+ """
314
+
315
+ _market_data_sample = self.gen_market_sample(
316
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
317
+ )
318
+
319
+ _invalid_array_names = (
320
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
321
+ if self.share_spec.dist_type == "Uniform"
322
+ else ()
323
+ )
324
+
325
+ save_data_to_hdf5(
326
+ _market_data_sample,
327
+ saved_array_name_suffix=saved_array_name_suffix,
328
+ excluded_attrs=_invalid_array_names,
329
+ save_data_to_file=save_data_to_file,
330
+ )
260
331
 
261
- _divr_array: ArrayDouble
262
- if _recapture_form == RECConstants.FIXED:
263
- _divr_array = _recapture_rate * _frmshr_array[:, ::-1] / (1 - _frmshr_array) # type: ignore
264
-
265
- else:
266
- _purchprob_array = _aggregate_purchase_prob * _frmshr_array
267
- _divr_array = _purchprob_array[:, ::-1] / (1 - _purchprob_array)
268
-
269
- _divr_assert_test = (
270
- (np.round(np.einsum("ij->i", _frmshr_array), 15) == 1)
271
- | (np.argmin(_frmshr_array, axis=1) == np.argmax(_divr_array, axis=1))
272
- )[:, None]
273
- if not all(_divr_assert_test):
274
- raise ValueError(
275
- "{} {} {} {}".format(
276
- "Data construction fails tests:",
277
- "the index of min(s_1, s_2) must equal",
278
- "the index of max(d_12, d_21), for all draws.",
279
- "unless frmshr_array sums to 1.00.",
332
+ _upp_test_arrays = enf_cnts(
333
+ _market_data_sample, _upp_test_parms, _sim_test_regime
334
+ )
335
+
336
+ save_data_to_hdf5(
337
+ _upp_test_arrays,
338
+ saved_array_name_suffix=saved_array_name_suffix,
339
+ save_data_to_file=save_data_to_file,
340
+ )
341
+
342
+ return _upp_test_arrays
343
+
344
+ def sim_enf_cnts_ll(
345
+ self,
346
+ _enf_parm_vec: gbl.HMGThresholds,
347
+ _sim_test_regime: UPPTestRegime,
348
+ /,
349
+ *,
350
+ sample_size: int = 10**6,
351
+ seed_seq_list: list[SeedSequence] | None = None,
352
+ nthreads: int = 16,
353
+ save_data_to_file: SaveData = False,
354
+ saved_array_name_suffix: str = "",
355
+ ) -> UPPTestsCounts:
356
+ """A function to parallelize data-generation and testing
357
+
358
+ The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
359
+ the parent function, `sim_enf_cnts()`, except that, if provided,
360
+ `seed_seq_list` is used to spawn a seed sequence for each thread,
361
+ to assure independent samples in each thread, and `nthreads` defines
362
+ the number of parallel processes used. The number of draws in
363
+ each thread may be tuned, by trial and error, to the amount of
364
+ memory (RAM) available.
365
+
366
+ Parameters
367
+ ----------
368
+
369
+ _enf_parm_vec
370
+ Guidelines thresholds to test against
371
+
372
+ _sim_test_regime
373
+ Configuration to use for testing
374
+
375
+ sample_size
376
+ Number of draws to simulate
377
+
378
+ seed_seq_list
379
+ List of seed sequences, to assure independent samples in each thread
380
+
381
+ nthreads
382
+ Number of parallel processes to use
383
+
384
+ save_data_to_file
385
+ Whether to save data to an HDF5 file, and where to save it
386
+
387
+ saved_array_name_suffix
388
+ Suffix to add to the array names in the HDF5 file
389
+
390
+ Returns
391
+ -------
392
+ Arrays of UPPTestCounts
393
+
394
+ """
395
+ _sample_sz = sample_size
396
+ _subsample_sz = 10**6
397
+ _iter_count = (
398
+ int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
399
+ )
400
+ _thread_count = cpu_count()
401
+
402
+ if (
403
+ self.share_spec.recapture_form != RECTypes.OUTIN
404
+ and self.share_spec.recapture_rate != _enf_parm_vec.rec
405
+ ):
406
+ raise ValueError(
407
+ "{} {} {}".format(
408
+ f"Recapture rate from market sample spec, {self.share_spec.recapture_rate}",
409
+ f"must match the value, {_enf_parm_vec.rec}",
410
+ "the guidelines thresholds vector.",
411
+ )
280
412
  )
413
+
414
+ _rng_seed_seq_list = [None] * _iter_count
415
+ if seed_seq_list:
416
+ _rng_seed_seq_list = list(
417
+ zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
418
+ )
419
+
420
+ _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
421
+ "sample_size": _subsample_sz,
422
+ "save_data_to_file": save_data_to_file,
423
+ "nthreads": nthreads,
424
+ })
425
+
426
+ _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
427
+ delayed(self.sim_enf_cnts)(
428
+ _enf_parm_vec,
429
+ _sim_test_regime,
430
+ **_sim_enf_cnts_kwargs,
431
+ saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
432
+ seed_seq_list=_rng_seed_seq_list_ch,
433
+ )
434
+ for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
281
435
  )
282
436
 
283
- return _divr_array
437
+ _res_list_stacks = UPPTestsCounts(*[
438
+ np.stack([getattr(_j, _k) for _j in _res_list])
439
+ for _k in ("by_firm_count", "by_delta", "by_conczone")
440
+ ])
441
+ upp_test_results = UPPTestsCounts(*[
442
+ np.column_stack((
443
+ (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
444
+ np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
445
+ ))
446
+ for _g, _h in zip(
447
+ _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
448
+ )
449
+ ])
450
+ del _res_list, _res_list_stacks
451
+
452
+ return upp_test_results
453
+
454
+ def estimate_enf_counts(
455
+ self,
456
+ _enf_parm_vec: HMGThresholds,
457
+ _upp_test_regime: UPPTestRegime,
458
+ /,
459
+ *,
460
+ sample_size: int = 10**6,
461
+ seed_seq_list: list[SeedSequence] | None,
462
+ nthreads: int,
463
+ save_data_to_file: SaveData = False,
464
+ saved_array_name_suffix: str = "",
465
+ ) -> None:
466
+ """Estimate enforcement counts
467
+
468
+ Parameters
469
+ ----------
470
+ _enf_parm_vec
471
+ Threshold values for various Guidelines criteria
472
+
473
+ _upp_test_regime
474
+ Specifies whether to analyze enforcement, clearance, or both
475
+ and the GUPPI and diversion ratio aggregators employed, with
476
+ default being to analyze enforcement based on the maximum
477
+ merging-firm GUPPI and maximum diversion ratio between the
478
+ merging firms
479
+
480
+ sample_size
481
+ Size of the market sample drawn
482
+
483
+ seed_seq_list
484
+ List of :code:`numpy.random.SeedSequence` objects
485
+
486
+ nthreads
487
+ Number of threads to use
488
+
489
+ save_data_to_file
490
+ Save data to given HDF5 file, at specified group node
491
+
492
+ saved_array_name_suffix
493
+ Suffix to add to the array names in the HDF5 file
494
+
495
+ Returns
496
+ -------
497
+ None
498
+
499
+ """
500
+
501
+ if self.data is None:
502
+ self.enf_counts = self.sim_enf_cnts_ll(
503
+ _enf_parm_vec,
504
+ _upp_test_regime,
505
+ sample_size=sample_size,
506
+ seed_seq_list=seed_seq_list,
507
+ nthreads=nthreads,
508
+ save_data_to_file=save_data_to_file,
509
+ saved_array_name_suffix=saved_array_name_suffix,
510
+ )
511
+ else:
512
+ self.enf_counts = enf_cnts(self.data, _enf_parm_vec, _upp_test_regime)
513
+ if save_data_to_file:
514
+ save_data_to_hdf5(
515
+ self.enf_counts,
516
+ save_data_to_file=save_data_to_file,
517
+ saved_array_name_suffix=saved_array_name_suffix,
518
+ )