mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +142 -93
  5. mergeron/core/guidelines_boundaries.py +289 -1077
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +257 -245
  15. mergeron/gen/data_generation.py +473 -221
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +159 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -259
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738963.0.dist-info/METADATA +0 -108
  38. mergeron-2024.738963.0.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
@@ -5,273 +5,525 @@ Methods to generate data for analyzing merger enforcement policy.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from importlib.metadata import version
8
+ from collections.abc import Sequence
9
+ from typing import TypedDict
9
10
 
10
- import attrs
11
11
  import numpy as np
12
+ from attrs import Attribute, define, field, validators
13
+ from joblib import Parallel, cpu_count, delayed # type: ignore
12
14
  from numpy.random import SeedSequence
13
- from numpy.typing import NDArray
14
15
 
15
- from .. import _PKG_NAME, RECConstants # noqa: TID252
16
+ from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
17
+ from ..core import guidelines_boundaries as gbl # noqa: TID252
18
+ from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
16
19
  from . import (
17
- EMPTY_ARRAY_DEFAULT,
18
- FM2Constants,
20
+ FM2Constraint,
19
21
  MarketDataSample,
20
- MarketSampleSpec,
21
- PRIConstants,
22
- SHRConstants,
23
- SSZConstants,
22
+ PCMDistribution,
23
+ PCMSpec,
24
+ PriceSpec,
25
+ ShareSpec,
26
+ SHRDistribution,
27
+ SSZConstant,
28
+ UPPTestRegime,
29
+ UPPTestsCounts,
24
30
  )
25
- from ._data_generation_functions_nonpublic import (
26
- _gen_market_shares_dirichlet, # noqa: F401 easter-egg for external modules
27
- _gen_market_shares_uniform, # noqa: F401 easter-egg for external modules
28
- _gen_pcm_data,
29
- _gen_price_data,
30
- _gen_share_data,
31
+ from .data_generation_functions import (
32
+ gen_divr_array,
33
+ gen_margin_price_data,
34
+ gen_share_data,
35
+ parse_seed_seq_list,
31
36
  )
37
+ from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
32
38
 
33
- __version__ = version(_PKG_NAME)
39
+ __version__ = VERSION
34
40
 
35
41
 
36
- def gen_market_sample(
37
- _mkt_sample_spec: MarketSampleSpec,
38
- /,
39
- *,
40
- seed_seq_list: list[SeedSequence] | None = None,
41
- nthreads: int = 16,
42
- ) -> MarketDataSample:
43
- """
44
- Generate share, diversion ratio, price, and margin data based on supplied parameters
45
-
46
- Diversion ratios generated assuming share-proportionality, unless
47
- `recapture_form` = "proportional", in which case both firms' recapture rate
48
- is set to `r_bar`.
49
-
50
- The tuple of SeedSequences, if specified, is parsed in the following order
51
- for generating the relevant random variates:
52
- 1.) quantity shares
53
- 2.) price-cost margins
54
- 3.) firm-counts, from :code:`[2, 2 + len(firm_counts_weights)]`,
55
- weighted by :code:`firm_counts_weights`, where relevant
56
- 4.) prices, if :code:`price_spec == PRIConstants.ZERO`.
57
-
58
- Parameters
59
- ----------
60
- _mkt_sample_spec
61
- class specifying parameters for data generation
62
- seed_seq_list
63
- tuple of SeedSequences to ensure replicable data generation with
64
- appropriately independent random streams
65
- nthreads
66
- optionally specify the number of CPU threads for the PRNG
67
-
68
- Returns
69
- -------
70
- Merging firms' shares, margins, etc. for each hypothetical merger
71
- in the sample
42
+ class SamplingFunctionKWArgs(TypedDict, total=False):
43
+ "Keyword arguments of sampling methods defined below"
44
+
45
+ sample_size: int
46
+ """number of draws to generate"""
47
+
48
+ seed_seq_list: Sequence[SeedSequence] | None
49
+ """sequence of SeedSequences to ensure replicable data generation with
50
+ appropriately independent random streams
51
+
52
+ NOTES
53
+ -----
54
+
55
+ See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
+ specification of this parameter.
72
57
 
73
58
  """
74
59
 
75
- _mkt_sample_spec = _mkt_sample_spec or MarketSampleSpec()
76
-
77
- _recapture_form = _mkt_sample_spec.share_spec.recapture_form
78
- _recapture_rate = _mkt_sample_spec.share_spec.recapture_rate
79
- _dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
80
- _dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
81
- _hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
82
-
83
- (
84
- _mktshr_rng_seed_seq,
85
- _pcm_rng_seed_seq,
86
- _fcount_rng_seed_seq,
87
- _pr_rng_seed_seq,
88
- ) = parse_seed_seq_list(
89
- seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
90
- )
60
+ nthreads: int
61
+ """number of parallel threads to use"""
91
62
 
92
- _shr_sample_size = 1.0 * _mkt_sample_spec.sample_size
93
- # Scale up sample size to offset discards based on specified criteria
94
- _shr_sample_size *= _hsr_filing_test_type
95
- if _dist_firm2_pcm == FM2Constants.MNL:
96
- _shr_sample_size *= SSZConstants.MNL_DEP
97
- _mkt_sample_spec_here = attrs.evolve(
98
- _mkt_sample_spec, sample_size=int(_shr_sample_size)
99
- )
100
- del _shr_sample_size
63
+ save_data_to_file: SaveData
64
+ """optionally save data to HDF5 file"""
101
65
 
102
- # Generate share data
103
- _mktshr_data = _gen_share_data(
104
- _mkt_sample_spec_here, _fcount_rng_seed_seq, _mktshr_rng_seed_seq, nthreads
66
+ saved_array_name_suffix: str
67
+ """optionally specify a suffix for the HDF5 array names"""
68
+
69
+
70
+ @define
71
+ class MarketSample:
72
+ """Parameter specification for market data generation."""
73
+
74
+ share_spec: ShareSpec = field(
75
+ kw_only=True,
76
+ default=ShareSpec(
77
+ SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
78
+ ),
79
+ validator=validators.instance_of(ShareSpec),
105
80
  )
81
+ """Market-share specification, see :class:`ShareSpec`"""
106
82
 
107
- _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
108
- getattr(_mktshr_data, _f)
109
- for _f in (
110
- "mktshr_array",
111
- "fcounts",
112
- "aggregate_purchase_prob",
113
- "nth_firm_share",
114
- )
83
+ pcm_spec: PCMSpec = field(
84
+ kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
115
85
  )
86
+ """Margin specification, see :class:`PCMSpec`"""
87
+
88
+ @pcm_spec.validator # pyright: ignore
89
+ def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
90
+ if (
91
+ self.share_spec.recapture_form == RECForm.FIXED
92
+ and _v.firm2_pcm_constraint == FM2Constraint.MNL
93
+ ):
94
+ raise ValueError(
95
+ f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
96
+ "requires Firm 2 margin must have property, "
97
+ f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
98
+ )
116
99
 
117
- # Generate merging-firm price data
118
- _price_data = _gen_price_data(
119
- _mktshr_array[:, :2], _nth_firm_share, _mkt_sample_spec_here, _pr_rng_seed_seq
100
+ price_spec: PriceSpec = field(
101
+ kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
120
102
  )
103
+ """Price specification, see :class:`PriceSpec`"""
121
104
 
122
- _price_array, _hsr_filing_test = (
123
- getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
105
+ hsr_filing_test_type: SSZConstant = field(
106
+ kw_only=True,
107
+ default=SSZConstant.ONE,
108
+ validator=validators.instance_of(SSZConstant),
124
109
  )
110
+ """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
111
+
112
+ data: MarketDataSample = field(default=None)
113
+
114
+ enf_counts: UPPTestsCounts = field(default=None)
115
+
116
+ def __gen_market_sample(
117
+ self,
118
+ /,
119
+ *,
120
+ sample_size: int,
121
+ seed_seq_list: Sequence[SeedSequence] | None,
122
+ nthreads: int,
123
+ ) -> MarketDataSample:
124
+ """
125
+ Generate share, diversion ratio, price, and margin data for MarketSpec.
126
+
127
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
128
+
129
+ Returns
130
+ -------
131
+ Merging firms' shares, margins, etc. for each hypothetical merger
132
+ in the sample
133
+
134
+ """
135
+
136
+ _recapture_form = self.share_spec.recapture_form
137
+ _recapture_ratio = self.share_spec.recapture_ratio
138
+ _dist_type_mktshr = self.share_spec.dist_type
139
+ _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
+ _hsr_filing_test_type = self.hsr_filing_test_type
141
+
142
+ (
143
+ _mktshr_rng_seed_seq,
144
+ _pcm_rng_seed_seq,
145
+ _fcount_rng_seed_seq,
146
+ _pr_rng_seed_seq,
147
+ ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
148
+
149
+ _shr_sample_size = 1.0 * sample_size
150
+ # Scale up sample size to offset discards based on specified criteria
151
+ _shr_sample_size *= _hsr_filing_test_type
152
+ if _dist_firm2_pcm == FM2Constraint.MNL:
153
+ _shr_sample_size *= SSZConstant.MNL_DEP
154
+ _shr_sample_size = int(_shr_sample_size)
155
+
156
+ # Generate share data
157
+ _mktshr_data = gen_share_data(
158
+ _shr_sample_size,
159
+ self.share_spec,
160
+ _fcount_rng_seed_seq,
161
+ _mktshr_rng_seed_seq,
162
+ nthreads,
163
+ )
164
+
165
+ _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
166
+ getattr(_mktshr_data, _f)
167
+ for _f in (
168
+ "mktshr_array",
169
+ "fcounts",
170
+ "aggregate_purchase_prob",
171
+ "nth_firm_share",
172
+ )
173
+ )
125
174
 
126
- if _hsr_filing_test_type != SSZConstants.ONE:
127
- _mktshr_array = _mktshr_array[_hsr_filing_test]
128
- _fcounts = _fcounts[_hsr_filing_test]
129
- _aggregate_purchase_prob = _aggregate_purchase_prob[_hsr_filing_test]
130
- _nth_firm_share = _nth_firm_share[_hsr_filing_test]
131
- _price_array = _price_array[_hsr_filing_test]
175
+ # Generate merging-firm price and PCM data
176
+ _margin_data, _price_data = gen_margin_price_data(
177
+ _mktshr_array[:, :2],
178
+ _nth_firm_share,
179
+ _aggregate_purchase_prob,
180
+ self.pcm_spec,
181
+ self.price_spec,
182
+ self.hsr_filing_test_type,
183
+ _pcm_rng_seed_seq,
184
+ _pr_rng_seed_seq,
185
+ nthreads,
186
+ )
132
187
 
133
- # Calculate diversion ratios
134
- _divr_array = gen_divr_array(
135
- _recapture_form, _recapture_rate, _mktshr_array[:, :2], _aggregate_purchase_prob
136
- )
188
+ _price_array, _hsr_filing_test = (
189
+ getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
190
+ )
137
191
 
138
- # Generate margin data
139
- _pcm_data = _gen_pcm_data(
140
- _mktshr_array[:, :2],
141
- _mkt_sample_spec_here,
142
- _price_array,
143
- _aggregate_purchase_prob,
144
- _pcm_rng_seed_seq,
145
- nthreads,
146
- )
147
- _pcm_array, _mnl_test_rows = (
148
- getattr(_pcm_data, _f) for _f in ("pcm_array", "mnl_test_array")
149
- )
192
+ _pcm_array, _mnl_test_rows = (
193
+ getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
194
+ )
150
195
 
151
- _s_size = _mkt_sample_spec.sample_size # originally-specified sample size
152
- if _dist_firm2_pcm == FM2Constants.MNL:
153
- _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
154
- _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
155
- _price_array = _price_array[_mnl_test_rows][:_s_size]
156
- _fcounts = _fcounts[_mnl_test_rows][:_s_size]
157
- _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
158
- _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
159
- _divr_array = _divr_array[_mnl_test_rows][:_s_size]
196
+ _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
197
+ _s_size = sample_size # originally-specified sample size
198
+ if _dist_firm2_pcm == FM2Constraint.MNL:
199
+ _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
200
+ _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
201
+ _price_array = _price_array[_mnl_test_rows][:_s_size]
202
+ _fcounts = _fcounts[_mnl_test_rows][:_s_size]
203
+ _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
204
+ :_s_size
205
+ ]
206
+ _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
207
+
208
+ # Calculate diversion ratios
209
+ _divr_array = gen_divr_array(
210
+ _recapture_form,
211
+ _recapture_ratio,
212
+ _mktshr_array[:, :2],
213
+ _aggregate_purchase_prob,
214
+ )
160
215
 
161
- del _mnl_test_rows, _s_size
216
+ del _mnl_test_rows, _s_size
162
217
 
163
- _frmshr_array = _mktshr_array[:, :2]
164
- _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
218
+ _frmshr_array = _mktshr_array[:, :2]
219
+ _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
220
+ :, None
221
+ ]
165
222
 
166
- _hhi_post = (
167
- _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
168
- )
223
+ _hhi_post = (
224
+ _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
225
+ )
169
226
 
170
- return MarketDataSample(
171
- _frmshr_array,
172
- _pcm_array,
173
- _price_array,
174
- _fcounts,
175
- _aggregate_purchase_prob,
176
- _nth_firm_share,
177
- _divr_array,
178
- _hhi_post,
179
- _hhi_delta,
180
- )
227
+ return MarketDataSample(
228
+ _frmshr_array,
229
+ _pcm_array,
230
+ _price_array,
231
+ _fcounts,
232
+ _aggregate_purchase_prob,
233
+ _nth_firm_share,
234
+ _divr_array,
235
+ _hhi_post,
236
+ _hhi_delta,
237
+ )
238
+
239
+ def generate_sample(
240
+ self,
241
+ /,
242
+ *,
243
+ sample_size: int = 10**6,
244
+ seed_seq_list: Sequence[SeedSequence] | None = None,
245
+ nthreads: int = 16,
246
+ save_data_to_file: SaveData = False,
247
+ saved_array_name_suffix: str = "",
248
+ ) -> None:
249
+ """Populate :attr:`data` with generated data
181
250
 
251
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
182
252
 
183
- def parse_seed_seq_list(
184
- _sseq_list: list[SeedSequence] | None,
185
- _mktshr_dist_type: SHRConstants,
186
- _price_spec: PRIConstants,
187
- /,
188
- ) -> tuple[SeedSequence, SeedSequence, SeedSequence | None, SeedSequence | None]:
189
- """Initialize RNG seed sequences to ensure independence of distinct random streams."""
190
- _fcount_rng_seed_seq: SeedSequence | None = None
191
- _pr_rng_seed_seq: SeedSequence | None = None
192
-
193
- if _price_spec == PRIConstants.ZERO:
194
- _pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
195
-
196
- if _mktshr_dist_type == SHRConstants.UNI:
197
- _fcount_rng_seed_seq = None
198
- _seed_count = 2
199
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
200
- _sseq_list[:_seed_count]
201
- if _sseq_list
202
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
253
+ Returns
254
+ -------
255
+ None
256
+
257
+ """
258
+
259
+ self.data = self.__gen_market_sample(
260
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
203
261
  )
204
- else:
205
- _seed_count = 3
206
- (_mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq) = (
207
- _sseq_list[:_seed_count]
208
- if _sseq_list
209
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
262
+
263
+ _invalid_array_names = (
264
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
265
+ if self.share_spec.dist_type == "Uniform"
266
+ else ()
210
267
  )
211
268
 
212
- return (
213
- _mktshr_rng_seed_seq,
214
- _pcm_rng_seed_seq,
215
- _fcount_rng_seed_seq,
216
- _pr_rng_seed_seq,
217
- )
269
+ save_data_to_hdf5(
270
+ self.data,
271
+ saved_array_name_suffix=saved_array_name_suffix,
272
+ excluded_attrs=_invalid_array_names,
273
+ save_data_to_file=save_data_to_file,
274
+ )
218
275
 
276
+ def __sim_enf_cnts(
277
+ self,
278
+ _upp_test_parms: gbl.HMGThresholds,
279
+ _sim_test_regime: UPPTestRegime,
280
+ /,
281
+ *,
282
+ sample_size: int = 10**6,
283
+ seed_seq_list: Sequence[SeedSequence] | None = None,
284
+ nthreads: int = 16,
285
+ save_data_to_file: SaveData = False,
286
+ saved_array_name_suffix: str = "",
287
+ ) -> UPPTestsCounts:
288
+ """Generate market data and etstimate UPP test counts on same.
219
289
 
220
- def gen_divr_array(
221
- _recapture_form: RECConstants,
222
- _recapture_rate: float | None,
223
- _frmshr_array: NDArray[np.float64],
224
- _aggregate_purchase_prob: NDArray[np.float64] = EMPTY_ARRAY_DEFAULT,
225
- /,
226
- ) -> NDArray[np.float64]:
227
- """
228
- Given merging-firm shares and related parameters, return diverion ratios.
290
+ Parameters
291
+ ----------
229
292
 
230
- If recapture is specified as "Outside-in" (RECConstants.OUTIN), then the
231
- choice-probability for the outside good must be supplied.
293
+ _upp_test_parms
294
+ Guidelines thresholds for testing UPP and related statistics
232
295
 
233
- Parameters
234
- ----------
235
- _recapture_form
236
- Enum specifying Fixed (proportional), Inside-out, or Outside-in
296
+ _sim_test_regime
297
+ Configuration to use for testing; UPPTestsRegime object
298
+ specifying whether investigation results in enforcement, clearance,
299
+ or both; and aggregation methods used for GUPPI and diversion ratio
300
+ measures
237
301
 
238
- _recapture_rate
239
- If recapture is proportional or inside-out, the recapture rate
240
- for the firm with the smaller share.
302
+ sample_size
303
+ Number of draws to generate
241
304
 
242
- _frmshr_array
243
- Merging-firm shares.
305
+ seed_seq_list
306
+ List of seed sequences, to assure independent samples in each thread
244
307
 
245
- _aggregate_purchase_prob
246
- 1 minus probability that the outside good is chosen; converts
247
- market shares to choice probabilities by multiplication.
308
+ nthreads
309
+ Number of parallel processes to use
248
310
 
249
- Returns
250
- -------
251
- Merging-firm diversion ratios for mergers in the sample.
311
+ save_data_to_file
312
+ Whether to save data to an HDF5 file, and where to save it
252
313
 
253
- """
314
+ saved_array_name_suffix
315
+ Suffix to add to the array names in the HDF5 file
254
316
 
255
- _divr_array: NDArray[np.float64]
256
- if _recapture_form == RECConstants.FIXED:
257
- _divr_array = _recapture_rate * _frmshr_array[:, ::-1] / (1 - _frmshr_array) # type: ignore
258
-
259
- else:
260
- _purchprob_array = _aggregate_purchase_prob * _frmshr_array
261
- _divr_array = _purchprob_array[:, ::-1] / (1 - _purchprob_array)
262
-
263
- _divr_assert_test = (
264
- (np.round(np.einsum("ij->i", _frmshr_array), 15) == 1)
265
- | (np.argmin(_frmshr_array, axis=1) == np.argmax(_divr_array, axis=1))
266
- )[:, None]
267
- if not all(_divr_assert_test):
268
- raise ValueError(
269
- "{} {} {} {}".format(
270
- "Data construction fails tests:",
271
- "the index of min(s_1, s_2) must equal",
272
- "the index of max(d_12, d_21), for all draws.",
273
- "unless frmshr_array sums to 1.00.",
317
+ Returns
318
+ -------
319
+ UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
320
+
321
+ """
322
+
323
+ _market_data_sample = self.__gen_market_sample(
324
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
325
+ )
326
+
327
+ _invalid_array_names = (
328
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
329
+ if self.share_spec.dist_type == "Uniform"
330
+ else ()
331
+ )
332
+
333
+ save_data_to_hdf5(
334
+ _market_data_sample,
335
+ saved_array_name_suffix=saved_array_name_suffix,
336
+ excluded_attrs=_invalid_array_names,
337
+ save_data_to_file=save_data_to_file,
338
+ )
339
+
340
+ _upp_test_arrays = compute_upp_test_counts(
341
+ _market_data_sample, _upp_test_parms, _sim_test_regime
342
+ )
343
+
344
+ save_data_to_hdf5(
345
+ _upp_test_arrays,
346
+ saved_array_name_suffix=saved_array_name_suffix,
347
+ save_data_to_file=save_data_to_file,
348
+ )
349
+
350
+ return _upp_test_arrays
351
+
352
+ def __sim_enf_cnts_ll(
353
+ self,
354
+ _enf_parm_vec: gbl.HMGThresholds,
355
+ _sim_test_regime: UPPTestRegime,
356
+ /,
357
+ *,
358
+ sample_size: int = 10**6,
359
+ seed_seq_list: Sequence[SeedSequence] | None = None,
360
+ nthreads: int = 16,
361
+ save_data_to_file: SaveData = False,
362
+ saved_array_name_suffix: str = "",
363
+ ) -> UPPTestsCounts:
364
+ """A function to parallelize data-generation and testing
365
+
366
+ The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
367
+ the parent function, `sim_enf_cnts()`, except that, if provided,
368
+ `seed_seq_list` is used to spawn a seed sequence for each thread,
369
+ to assure independent samples in each thread, and `nthreads` defines
370
+ the number of parallel processes used. The number of draws in
371
+ each thread may be tuned, by trial and error, to the amount of
372
+ memory (RAM) available.
373
+
374
+ Parameters
375
+ ----------
376
+
377
+ _enf_parm_vec
378
+ Guidelines thresholds to test against
379
+
380
+ _sim_test_regime
381
+ Configuration to use for testing
382
+
383
+ sample_size
384
+ Number of draws to simulate
385
+
386
+ seed_seq_list
387
+ List of seed sequences, to assure independent samples in each thread
388
+
389
+ nthreads
390
+ Number of parallel processes to use
391
+
392
+ save_data_to_file
393
+ Whether to save data to an HDF5 file, and where to save it
394
+
395
+ saved_array_name_suffix
396
+ Suffix to add to the array names in the HDF5 file
397
+
398
+ Returns
399
+ -------
400
+ Arrays of enforcement counts or clearance counts by firm count,
401
+ ΔHHI and concentration zone
402
+
403
+ """
404
+ _sample_sz = sample_size
405
+ _subsample_sz = 10**6
406
+ _iter_count = (
407
+ int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
408
+ )
409
+ _thread_count = cpu_count()
410
+
411
+ if (
412
+ self.share_spec.recapture_form != RECForm.OUTIN
413
+ and self.share_spec.recapture_ratio != _enf_parm_vec.rec
414
+ ):
415
+ raise ValueError(
416
+ "{} {} {}".format(
417
+ f"Recapture ratio from market sample spec, {self.share_spec.recapture_ratio}",
418
+ f"must match the value, {_enf_parm_vec.rec}",
419
+ "the guidelines thresholds vector.",
420
+ )
274
421
  )
422
+
423
+ _rng_seed_seq_list = [None] * _iter_count
424
+ if seed_seq_list:
425
+ _rng_seed_seq_list = list(
426
+ zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
427
+ )
428
+
429
+ _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
430
+ "sample_size": _subsample_sz,
431
+ "save_data_to_file": save_data_to_file,
432
+ "nthreads": nthreads,
433
+ })
434
+
435
+ _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
436
+ delayed(self.__sim_enf_cnts)(
437
+ _enf_parm_vec,
438
+ _sim_test_regime,
439
+ **_sim_enf_cnts_kwargs,
440
+ saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}", # pyright: ignore
441
+ seed_seq_list=_rng_seed_seq_list_ch, # pyright: ignore
442
+ )
443
+ for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
275
444
  )
276
445
 
277
- return _divr_array
446
+ _res_list_stacks = UPPTestsCounts(*[
447
+ np.stack([getattr(_j, _k) for _j in _res_list])
448
+ for _k in ("by_firm_count", "by_delta", "by_conczone")
449
+ ])
450
+ upp_test_results = UPPTestsCounts(*[
451
+ np.column_stack((
452
+ (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
453
+ np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
454
+ ))
455
+ for _g, _h in zip(
456
+ _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
457
+ )
458
+ ])
459
+ del _res_list, _res_list_stacks
460
+
461
+ return upp_test_results
462
+
463
+ def estimate_enf_counts(
464
+ self,
465
+ _enf_parm_vec: HMGThresholds,
466
+ _upp_test_regime: UPPTestRegime,
467
+ /,
468
+ *,
469
+ sample_size: int = 10**6,
470
+ seed_seq_list: Sequence[SeedSequence] | None = None,
471
+ nthreads: int = 16,
472
+ save_data_to_file: SaveData = False,
473
+ saved_array_name_suffix: str = "",
474
+ ) -> None:
475
+ """Populate :attr:`enf_counts` with estimated UPP test counts.
476
+
477
+ Parameters
478
+ ----------
479
+ _enf_parm_vec
480
+ Threshold values for various Guidelines criteria
481
+
482
+ _upp_test_regime
483
+ Specifies whether to analyze enforcement, clearance, or both
484
+ and the GUPPI and diversion ratio aggregators employed, with
485
+ default being to analyze enforcement based on the maximum
486
+ merging-firm GUPPI and maximum diversion ratio between the
487
+ merging firms
488
+
489
+ sample_size
490
+ Number of draws to simulate
491
+
492
+ seed_seq_list
493
+ List of seed sequences, to assure independent samples in each thread
494
+
495
+ nthreads
496
+ Number of parallel processes to use
497
+
498
+ save_data_to_file
499
+ Whether to save data to an HDF5 file, and where to save it
500
+
501
+ saved_array_name_suffix
502
+ Suffix to add to the array names in the HDF5 file
503
+
504
+ Returns
505
+ -------
506
+ None
507
+
508
+ """
509
+
510
+ if self.data is None:
511
+ self.enf_counts = self.__sim_enf_cnts_ll(
512
+ _enf_parm_vec,
513
+ _upp_test_regime,
514
+ sample_size=sample_size,
515
+ seed_seq_list=seed_seq_list,
516
+ nthreads=nthreads,
517
+ save_data_to_file=save_data_to_file,
518
+ saved_array_name_suffix=saved_array_name_suffix,
519
+ )
520
+ else:
521
+ self.enf_counts = compute_upp_test_counts(
522
+ self.data, _enf_parm_vec, _upp_test_regime
523
+ )
524
+ if save_data_to_file:
525
+ save_data_to_hdf5(
526
+ self.enf_counts,
527
+ save_data_to_file=save_data_to_file,
528
+ saved_array_name_suffix=saved_array_name_suffix,
529
+ )