mergeron 2024.739099.2__py3-none-any.whl → 2024.739105.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -1,283 +1,527 @@
1
1
  """
2
- Methods to generate market data, including shares price, marginsm, and diversion ratios
3
- for analyzing merger enforcement policy.
2
+ Methods to generate data for analyzing merger enforcement policy.
4
3
 
5
4
  """
6
5
 
7
6
  from __future__ import annotations
8
7
 
9
- from typing import NamedTuple
8
+ from collections.abc import Sequence
9
+ from typing import TypedDict
10
10
 
11
11
  import numpy as np
12
+ from attrs import Attribute, define, field, validators
13
+ from joblib import Parallel, cpu_count, delayed # type: ignore
12
14
  from numpy.random import SeedSequence
13
15
 
14
- from .. import VERSION, ArrayDouble, RECConstants # noqa: TID252
16
+ from .. import VERSION, RECForm # noqa: TID252 # noqa
17
+ from ..core import guidelines_boundaries as gbl # noqa: TID252
18
+ from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
15
19
  from . import (
16
- EMPTY_ARRAY_DEFAULT,
17
- FM2Constants,
20
+ FM2Constraint,
18
21
  MarketDataSample,
19
- MarketSpec,
20
- PriceConstants,
21
- SHRConstants,
22
- SSZConstants,
22
+ PCMDistribution,
23
+ PCMSpec,
24
+ PriceSpec,
25
+ ShareSpec,
26
+ SHRDistribution,
27
+ SSZConstant,
28
+ UPPTestRegime,
29
+ UPPTestsCounts,
23
30
  )
24
- from ._data_generation_functions import _gen_margin_price_data, _gen_share_data
31
+ from .data_generation_functions import (
32
+ gen_divr_array,
33
+ gen_margin_price_data,
34
+ gen_share_data,
35
+ parse_seed_seq_list,
36
+ )
37
+ from .upp_tests import SaveData, enf_cnts, save_data_to_hdf5
25
38
 
26
39
  __version__ = VERSION
27
40
 
28
41
 
29
- class SeedSequenceData(NamedTuple):
30
- mktshr_rng_seed_seq: SeedSequence
31
- pcm_rng_seed_seq: SeedSequence
32
- fcount_rng_seed_seq: SeedSequence | None
33
- pr_rng_seed_seq: SeedSequence | None
42
+ class SamplingFunctionKWArgs(TypedDict, total=False):
43
+ "Keyword arguments of sampling methods defined below"
34
44
 
45
+ sample_size: int
46
+ """number of draws to generate"""
35
47
 
36
- def gen_market_sample(
37
- _mkt_sample_spec: MarketSpec,
38
- /,
39
- *,
40
- sample_size: int = 10**6,
41
- seed_seq_list: list[SeedSequence] | None = None,
42
- nthreads: int = 16,
43
- ) -> MarketDataSample:
44
- """
45
- Generate share, diversion ratio, price, and margin data for MarketSpec.
46
-
47
-
48
- Parameters
49
- ----------
50
- _mkt_sample_spec
51
- class specifying parameters for data generation, see :class:`mergeron.gen.MarketSpec`
52
- sample_size
53
- number of draws to generate
54
- seed_seq_list
55
- tuple of SeedSequences to ensure replicable data generation with
56
- appropriately independent random streams
57
- nthreads
58
- optionally specify the number of CPU threads for the PRNG
59
-
60
- Returns
61
- -------
62
- Merging firms' shares, margins, etc. for each hypothetical merger
63
- in the sample
48
+ seed_seq_list: Sequence[SeedSequence] | None
49
+ """sequence of SeedSequences to ensure replicable data generation with
50
+ appropriately independent random streams
51
+
52
+ NOTES
53
+ -----
54
+
55
+ See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
+ specification of this parameter.
64
57
 
65
58
  """
66
59
 
67
- _recapture_form = _mkt_sample_spec.share_spec.recapture_form
68
- _recapture_rate = _mkt_sample_spec.share_spec.recapture_rate
69
- _dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
70
- _dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
71
- _hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
72
-
73
- (
74
- _mktshr_rng_seed_seq,
75
- _pcm_rng_seed_seq,
76
- _fcount_rng_seed_seq,
77
- _pr_rng_seed_seq,
78
- ) = parse_seed_seq_list(
79
- seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
80
- )
60
+ nthreads: int
61
+ """number of parallel threads to use"""
81
62
 
82
- _shr_sample_size = 1.0 * sample_size
83
- # Scale up sample size to offset discards based on specified criteria
84
- _shr_sample_size *= _hsr_filing_test_type
85
- if _dist_firm2_pcm == FM2Constants.MNL:
86
- _shr_sample_size *= SSZConstants.MNL_DEP
87
- _shr_sample_size = int(_shr_sample_size)
88
-
89
- # Generate share data
90
- _mktshr_data = _gen_share_data(
91
- _shr_sample_size,
92
- _mkt_sample_spec.share_spec,
93
- _fcount_rng_seed_seq,
94
- _mktshr_rng_seed_seq,
95
- nthreads,
96
- )
63
+ save_data_to_file: SaveData
64
+ """optionally save data to HDF5 file"""
65
+
66
+ saved_array_name_suffix: str
67
+ """optionally specify a suffix for the HDF5 array names"""
97
68
 
98
- _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
99
- getattr(_mktshr_data, _f)
100
- for _f in (
101
- "mktshr_array",
102
- "fcounts",
103
- "aggregate_purchase_prob",
104
- "nth_firm_share",
105
- )
106
- )
107
69
 
108
- # Generate merging-firm price and PCM data
109
- _margin_data, _price_data = _gen_margin_price_data(
110
- _mktshr_array[:, :2],
111
- _nth_firm_share,
112
- _aggregate_purchase_prob,
113
- _mkt_sample_spec.pcm_spec,
114
- _mkt_sample_spec.price_spec,
115
- _mkt_sample_spec.hsr_filing_test_type,
116
- _pcm_rng_seed_seq,
117
- _pr_rng_seed_seq,
118
- nthreads,
70
+ @define
71
+ class MarketSample:
72
+ """Parameter specification for market data generation."""
73
+
74
+ share_spec: ShareSpec = field(
75
+ kw_only=True,
76
+ default=ShareSpec(SHRDistribution.UNI, None, None, RECForm.INOUT, 0.8),
77
+ validator=validators.instance_of(ShareSpec),
119
78
  )
79
+ """Market-share specification, see :class:`ShareSpec`"""
120
80
 
121
- _price_array, _hsr_filing_test = (
122
- getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
81
+ pcm_spec: PCMSpec = field(
82
+ kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
123
83
  )
84
+ """Margin specification, see :class:`PCMSpec`"""
85
+
86
+ @pcm_spec.validator
87
+ def _check_pcm(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
88
+ if (
89
+ self.share_spec.recapture_form == RECForm.FIXED
90
+ and _v.firm2_pcm_constraint == FM2Constraint.MNL
91
+ ):
92
+ raise ValueError(
93
+ f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
94
+ "requires Firm 2 margin must have property, "
95
+ f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
96
+ )
124
97
 
125
- _pcm_array, _mnl_test_rows = (
126
- getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
98
+ price_spec: PriceSpec = field(
99
+ kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
127
100
  )
101
+ """Price specification, see :class:`PriceSpec`"""
128
102
 
129
- _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
130
- _s_size = sample_size # originally-specified sample size
131
- if _dist_firm2_pcm == FM2Constants.MNL:
132
- _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
133
- _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
134
- _price_array = _price_array[_mnl_test_rows][:_s_size]
135
- _fcounts = _fcounts[_mnl_test_rows][:_s_size]
136
- _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
137
- _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
138
-
139
- # Calculate diversion ratios
140
- _divr_array = gen_divr_array(
141
- _recapture_form, _recapture_rate, _mktshr_array[:, :2], _aggregate_purchase_prob
103
+ hsr_filing_test_type: SSZConstant = field(
104
+ kw_only=True,
105
+ default=SSZConstant.ONE,
106
+ validator=validators.instance_of(SSZConstant),
142
107
  )
108
+ """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
109
+
110
+ data: MarketDataSample = field(default=None)
111
+
112
+ enf_counts: UPPTestsCounts = field(default=None)
113
+
114
+ def gen_market_sample(
115
+ self,
116
+ /,
117
+ *,
118
+ sample_size: int = 10**6,
119
+ seed_seq_list: Sequence[SeedSequence] | None = None,
120
+ nthreads: int = 16,
121
+ save_data_to_file: SaveData = False,
122
+ saved_array_name_suffix: str = "",
123
+ ) -> MarketDataSample:
124
+ """
125
+ Generate share, diversion ratio, price, and margin data for MarketSpec.
126
+
127
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
128
+
129
+ Returns
130
+ -------
131
+ Merging firms' shares, margins, etc. for each hypothetical merger
132
+ in the sample
133
+
134
+ """
135
+
136
+ _recapture_form = self.share_spec.recapture_form
137
+ _recapture_rate = self.share_spec.recapture_rate
138
+ _dist_type_mktshr = self.share_spec.dist_type
139
+ _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
+ _hsr_filing_test_type = self.hsr_filing_test_type
141
+
142
+ (
143
+ _mktshr_rng_seed_seq,
144
+ _pcm_rng_seed_seq,
145
+ _fcount_rng_seed_seq,
146
+ _pr_rng_seed_seq,
147
+ ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
148
+
149
+ _shr_sample_size = 1.0 * sample_size
150
+ # Scale up sample size to offset discards based on specified criteria
151
+ _shr_sample_size *= _hsr_filing_test_type
152
+ if _dist_firm2_pcm == FM2Constraint.MNL:
153
+ _shr_sample_size *= SSZConstant.MNL_DEP
154
+ _shr_sample_size = int(_shr_sample_size)
155
+
156
+ # Generate share data
157
+ _mktshr_data = gen_share_data(
158
+ _shr_sample_size,
159
+ self.share_spec,
160
+ _fcount_rng_seed_seq,
161
+ _mktshr_rng_seed_seq,
162
+ nthreads,
163
+ )
143
164
 
144
- del _mnl_test_rows, _s_size
165
+ _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
166
+ getattr(_mktshr_data, _f)
167
+ for _f in (
168
+ "mktshr_array",
169
+ "fcounts",
170
+ "aggregate_purchase_prob",
171
+ "nth_firm_share",
172
+ )
173
+ )
145
174
 
146
- _frmshr_array = _mktshr_array[:, :2]
147
- _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
175
+ # Generate merging-firm price and PCM data
176
+ _margin_data, _price_data = gen_margin_price_data(
177
+ _mktshr_array[:, :2],
178
+ _nth_firm_share,
179
+ _aggregate_purchase_prob,
180
+ self.pcm_spec,
181
+ self.price_spec,
182
+ self.hsr_filing_test_type,
183
+ _pcm_rng_seed_seq,
184
+ _pr_rng_seed_seq,
185
+ nthreads,
186
+ )
148
187
 
149
- _hhi_post = (
150
- _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
151
- )
188
+ _price_array, _hsr_filing_test = (
189
+ getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
190
+ )
152
191
 
153
- return MarketDataSample(
154
- _frmshr_array,
155
- _pcm_array,
156
- _price_array,
157
- _fcounts,
158
- _aggregate_purchase_prob,
159
- _nth_firm_share,
160
- _divr_array,
161
- _hhi_post,
162
- _hhi_delta,
163
- )
192
+ _pcm_array, _mnl_test_rows = (
193
+ getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
194
+ )
164
195
 
196
+ _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
197
+ _s_size = sample_size # originally-specified sample size
198
+ if _dist_firm2_pcm == FM2Constraint.MNL:
199
+ _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
200
+ _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
201
+ _price_array = _price_array[_mnl_test_rows][:_s_size]
202
+ _fcounts = _fcounts[_mnl_test_rows][:_s_size]
203
+ _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
204
+ :_s_size
205
+ ]
206
+ _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
207
+
208
+ # Calculate diversion ratios
209
+ _divr_array = gen_divr_array(
210
+ _recapture_form,
211
+ _recapture_rate,
212
+ _mktshr_array[:, :2],
213
+ _aggregate_purchase_prob,
214
+ )
165
215
 
166
- def parse_seed_seq_list(
167
- _sseq_list: list[SeedSequence] | None,
168
- _mktshr_dist_type: SHRConstants,
169
- _price_spec: PriceConstants,
170
- /,
171
- ) -> SeedSequenceData:
172
- """Initialize RNG seed sequences to ensure independence of distinct random streams.
216
+ del _mnl_test_rows, _s_size
173
217
 
174
- The tuple of SeedSequences, is parsed in the following order
175
- for generating the relevant random variates:
176
- 1.) quantity shares
177
- 2.) price-cost margins
178
- 3.) firm-counts, if :code:`MarketSpec.share_spec.dist_type` is a Dirichlet distribution
179
- 4.) prices, if :code:`MarketSpec.price_spec ==`:attr:`mergeron.gen.PriceConstants.ZERO`.
218
+ _frmshr_array = _mktshr_array[:, :2]
219
+ _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
220
+ :, None
221
+ ]
180
222
 
223
+ _hhi_post = (
224
+ _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
225
+ )
181
226
 
227
+ return MarketDataSample(
228
+ _frmshr_array,
229
+ _pcm_array,
230
+ _price_array,
231
+ _fcounts,
232
+ _aggregate_purchase_prob,
233
+ _nth_firm_share,
234
+ _divr_array,
235
+ _hhi_post,
236
+ _hhi_delta,
237
+ )
182
238
 
183
- Parameters
184
- ----------
185
- _sseq_list
186
- List of RNG seed sequences
239
+ def generate_sample(
240
+ self,
241
+ /,
242
+ *,
243
+ sample_size: int,
244
+ seed_seq_list: Sequence[SeedSequence],
245
+ nthreads: int,
246
+ save_data_to_file: SaveData,
247
+ saved_array_name_suffix: str,
248
+ ) -> None:
249
+ """Populate :attr:`data` with generated data
187
250
 
188
- _mktshr_dist_type
189
- Market share distribution type
251
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
190
252
 
191
- _price_spec
192
- Price specification
253
+ Returns
254
+ -------
255
+ None
193
256
 
194
- Returns
195
- -------
196
- Seed sequence data
257
+ """
197
258
 
198
- """
199
- _fcount_rng_seed_seq: SeedSequence | None = None
200
- _pr_rng_seed_seq: SeedSequence | None = None
201
-
202
- if _price_spec == PriceConstants.ZERO:
203
- _pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
204
-
205
- if _mktshr_dist_type == SHRConstants.UNI:
206
- _fcount_rng_seed_seq = None
207
- _seed_count = 2
208
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
209
- _sseq_list[:_seed_count]
210
- if _sseq_list
211
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
259
+ self.data = self.gen_market_sample(
260
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
212
261
  )
213
- else:
214
- _seed_count = 3
215
- (_mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq) = (
216
- _sseq_list[:_seed_count]
217
- if _sseq_list
218
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
262
+
263
+ _invalid_array_names = (
264
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
265
+ if self.share_spec.dist_type == "Uniform"
266
+ else ()
219
267
  )
220
268
 
221
- return SeedSequenceData(
222
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq, _pr_rng_seed_seq
223
- )
269
+ save_data_to_hdf5(
270
+ self.data,
271
+ saved_array_name_suffix=saved_array_name_suffix,
272
+ excluded_attrs=_invalid_array_names,
273
+ save_data_to_file=save_data_to_file,
274
+ )
224
275
 
276
+ def sim_enf_cnts(
277
+ self,
278
+ _upp_test_parms: gbl.HMGThresholds,
279
+ _sim_test_regime: UPPTestRegime,
280
+ /,
281
+ *,
282
+ sample_size: int = 10**6,
283
+ seed_seq_list: list[SeedSequence] | None = None,
284
+ nthreads: int = 16,
285
+ save_data_to_file: SaveData = False,
286
+ saved_array_name_suffix: str = "",
287
+ ) -> UPPTestsCounts:
288
+ """Generate market data and etstimate UPP test counts on same.
225
289
 
226
- def gen_divr_array(
227
- _recapture_form: RECConstants,
228
- _recapture_rate: float | None,
229
- _frmshr_array: ArrayDouble,
230
- _aggregate_purchase_prob: ArrayDouble = EMPTY_ARRAY_DEFAULT,
231
- /,
232
- ) -> ArrayDouble:
233
- """
234
- Given merging-firm shares and related parameters, return diverion ratios.
290
+ Parameters
291
+ ----------
235
292
 
236
- If recapture is specified as :attr:`mergeron.RECConstants.OUTIN`, then the
237
- choice-probability for the outside good must be supplied.
293
+ _upp_test_parms
294
+ Guidelines thresholds for testing UPP and related statistics
238
295
 
239
- Parameters
240
- ----------
241
- _recapture_form
242
- Enum specifying Fixed (proportional), Inside-out, or Outside-in
296
+ _sim_test_regime
297
+ Configuration to use for testing; UPPTestsRegime object
298
+ specifying whether investigation results in enforcement, clearance,
299
+ or both; and aggregation methods used for GUPPI and diversion ratio
300
+ measures
243
301
 
244
- _recapture_rate
245
- If recapture is proportional or inside-out, the recapture rate
246
- for the firm with the smaller share.
302
+ sample_size
303
+ Number of draws to generate
247
304
 
248
- _frmshr_array
249
- Merging-firm shares.
305
+ seed_seq_list
306
+ List of seed sequences, to assure independent samples in each thread
250
307
 
251
- _aggregate_purchase_prob
252
- 1 minus probability that the outside good is chosen; converts
253
- market shares to choice probabilities by multiplication.
308
+ nthreads
309
+ Number of parallel processes to use
254
310
 
255
- Returns
256
- -------
257
- Merging-firm diversion ratios for mergers in the sample.
311
+ save_data_to_file
312
+ Whether to save data to an HDF5 file, and where to save it
258
313
 
259
- """
314
+ saved_array_name_suffix
315
+ Suffix to add to the array names in the HDF5 file
316
+
317
+ Returns
318
+ -------
319
+ UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
320
+
321
+ """
322
+
323
+ _market_data_sample = self.gen_market_sample(
324
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
325
+ )
326
+
327
+ _invalid_array_names = (
328
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
329
+ if self.share_spec.dist_type == "Uniform"
330
+ else ()
331
+ )
332
+
333
+ save_data_to_hdf5(
334
+ _market_data_sample,
335
+ saved_array_name_suffix=saved_array_name_suffix,
336
+ excluded_attrs=_invalid_array_names,
337
+ save_data_to_file=save_data_to_file,
338
+ )
260
339
 
261
- _divr_array: ArrayDouble
262
- if _recapture_form == RECConstants.FIXED:
263
- _divr_array = _recapture_rate * _frmshr_array[:, ::-1] / (1 - _frmshr_array) # type: ignore
264
-
265
- else:
266
- _purchprob_array = _aggregate_purchase_prob * _frmshr_array
267
- _divr_array = _purchprob_array[:, ::-1] / (1 - _purchprob_array)
268
-
269
- _divr_assert_test = (
270
- (np.round(np.einsum("ij->i", _frmshr_array), 15) == 1)
271
- | (np.argmin(_frmshr_array, axis=1) == np.argmax(_divr_array, axis=1))
272
- )[:, None]
273
- if not all(_divr_assert_test):
274
- raise ValueError(
275
- "{} {} {} {}".format(
276
- "Data construction fails tests:",
277
- "the index of min(s_1, s_2) must equal",
278
- "the index of max(d_12, d_21), for all draws.",
279
- "unless frmshr_array sums to 1.00.",
340
+ _upp_test_arrays = enf_cnts(
341
+ _market_data_sample, _upp_test_parms, _sim_test_regime
342
+ )
343
+
344
+ save_data_to_hdf5(
345
+ _upp_test_arrays,
346
+ saved_array_name_suffix=saved_array_name_suffix,
347
+ save_data_to_file=save_data_to_file,
348
+ )
349
+
350
+ return _upp_test_arrays
351
+
352
+ def sim_enf_cnts_ll(
353
+ self,
354
+ _enf_parm_vec: gbl.HMGThresholds,
355
+ _sim_test_regime: UPPTestRegime,
356
+ /,
357
+ *,
358
+ sample_size: int = 10**6,
359
+ seed_seq_list: list[SeedSequence] | None = None,
360
+ nthreads: int = 16,
361
+ save_data_to_file: SaveData = False,
362
+ saved_array_name_suffix: str = "",
363
+ ) -> UPPTestsCounts:
364
+ """A function to parallelize data-generation and testing
365
+
366
+ The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
367
+ the parent function, `sim_enf_cnts()`, except that, if provided,
368
+ `seed_seq_list` is used to spawn a seed sequence for each thread,
369
+ to assure independent samples in each thread, and `nthreads` defines
370
+ the number of parallel processes used. The number of draws in
371
+ each thread may be tuned, by trial and error, to the amount of
372
+ memory (RAM) available.
373
+
374
+ Parameters
375
+ ----------
376
+
377
+ _enf_parm_vec
378
+ Guidelines thresholds to test against
379
+
380
+ _sim_test_regime
381
+ Configuration to use for testing
382
+
383
+ sample_size
384
+ Number of draws to simulate
385
+
386
+ seed_seq_list
387
+ List of seed sequences, to assure independent samples in each thread
388
+
389
+ nthreads
390
+ Number of parallel processes to use
391
+
392
+ save_data_to_file
393
+ Whether to save data to an HDF5 file, and where to save it
394
+
395
+ saved_array_name_suffix
396
+ Suffix to add to the array names in the HDF5 file
397
+
398
+ Returns
399
+ -------
400
+ Arrays of enforcement counts or clearance counts by firm count,
401
+ ΔHHI and concentration zone
402
+
403
+ """
404
+ _sample_sz = sample_size
405
+ _subsample_sz = 10**6
406
+ _iter_count = (
407
+ int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
408
+ )
409
+ _thread_count = cpu_count()
410
+
411
+ if (
412
+ self.share_spec.recapture_form != RECForm.OUTIN
413
+ and self.share_spec.recapture_rate != _enf_parm_vec.rec
414
+ ):
415
+ raise ValueError(
416
+ "{} {} {}".format(
417
+ f"Recapture rate from market sample spec, {self.share_spec.recapture_rate}",
418
+ f"must match the value, {_enf_parm_vec.rec}",
419
+ "the guidelines thresholds vector.",
420
+ )
280
421
  )
422
+
423
+ _rng_seed_seq_list = [None] * _iter_count
424
+ if seed_seq_list:
425
+ _rng_seed_seq_list = list(
426
+ zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
427
+ )
428
+
429
+ _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
430
+ "sample_size": _subsample_sz,
431
+ "save_data_to_file": save_data_to_file,
432
+ "nthreads": nthreads,
433
+ })
434
+
435
+ _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
436
+ delayed(self.sim_enf_cnts)(
437
+ _enf_parm_vec,
438
+ _sim_test_regime,
439
+ **_sim_enf_cnts_kwargs,
440
+ saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
441
+ seed_seq_list=_rng_seed_seq_list_ch,
442
+ )
443
+ for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
281
444
  )
282
445
 
283
- return _divr_array
446
+ _res_list_stacks = UPPTestsCounts(*[
447
+ np.stack([getattr(_j, _k) for _j in _res_list])
448
+ for _k in ("by_firm_count", "by_delta", "by_conczone")
449
+ ])
450
+ upp_test_results = UPPTestsCounts(*[
451
+ np.column_stack((
452
+ (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
453
+ np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
454
+ ))
455
+ for _g, _h in zip(
456
+ _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
457
+ )
458
+ ])
459
+ del _res_list, _res_list_stacks
460
+
461
+ return upp_test_results
462
+
463
+ def estimate_enf_counts(
464
+ self,
465
+ _enf_parm_vec: HMGThresholds,
466
+ _upp_test_regime: UPPTestRegime,
467
+ /,
468
+ *,
469
+ sample_size: int = 10**6,
470
+ seed_seq_list: Sequence[SeedSequence] | None = None,
471
+ nthreads: int = 16,
472
+ save_data_to_file: SaveData = False,
473
+ saved_array_name_suffix: str = "",
474
+ ) -> None:
475
+ """Populate :attr:`enf_counts` etimated test counts.
476
+
477
+ Parameters
478
+ ----------
479
+ _enf_parm_vec
480
+ Threshold values for various Guidelines criteria
481
+
482
+ _upp_test_regime
483
+ Specifies whether to analyze enforcement, clearance, or both
484
+ and the GUPPI and diversion ratio aggregators employed, with
485
+ default being to analyze enforcement based on the maximum
486
+ merging-firm GUPPI and maximum diversion ratio between the
487
+ merging firms
488
+
489
+ sample_size
490
+ Number of draws to simulate
491
+
492
+ seed_seq_list
493
+ List of seed sequences, to assure independent samples in each thread
494
+
495
+ nthreads
496
+ Number of parallel processes to use
497
+
498
+ save_data_to_file
499
+ Whether to save data to an HDF5 file, and where to save it
500
+
501
+ saved_array_name_suffix
502
+ Suffix to add to the array names in the HDF5 file
503
+
504
+ Returns
505
+ -------
506
+ None
507
+
508
+ """
509
+
510
+ if self.data is None:
511
+ self.enf_counts = self.sim_enf_cnts_ll(
512
+ _enf_parm_vec,
513
+ _upp_test_regime,
514
+ sample_size=sample_size,
515
+ seed_seq_list=seed_seq_list,
516
+ nthreads=nthreads,
517
+ save_data_to_file=save_data_to_file,
518
+ saved_array_name_suffix=saved_array_name_suffix,
519
+ )
520
+ else:
521
+ self.enf_counts = enf_cnts(self.data, _enf_parm_vec, _upp_test_regime)
522
+ if save_data_to_file:
523
+ save_data_to_hdf5(
524
+ self.enf_counts,
525
+ save_data_to_file=save_data_to_file,
526
+ saved_array_name_suffix=saved_array_name_suffix,
527
+ )