mergeron 2024.739099.1__py3-none-any.whl → 2024.739104.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +7 -10
- mergeron/core/ftc_merger_investigations_data.py +4 -2
- mergeron/core/guidelines_boundaries.py +11 -11
- mergeron/core/guidelines_boundary_functions.py +3 -3
- mergeron/gen/__init__.py +140 -160
- mergeron/gen/data_generation.py +456 -221
- mergeron/gen/{_data_generation_functions.py → data_generation_functions.py} +225 -139
- mergeron/gen/enforcement_stats.py +31 -33
- mergeron/gen/upp_tests.py +70 -212
- {mergeron-2024.739099.1.dist-info → mergeron-2024.739104.1.dist-info}/METADATA +1 -1
- {mergeron-2024.739099.1.dist-info → mergeron-2024.739104.1.dist-info}/RECORD +12 -13
- mergeron/gen/market_sample.py +0 -143
- {mergeron-2024.739099.1.dist-info → mergeron-2024.739104.1.dist-info}/WHEEL +0 -0
mergeron/gen/data_generation.py
CHANGED
|
@@ -1,283 +1,518 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods to generate
|
|
3
|
-
for analyzing merger enforcement policy.
|
|
2
|
+
Methods to generate data for analyzing merger enforcement policy.
|
|
4
3
|
|
|
5
4
|
"""
|
|
6
5
|
|
|
7
6
|
from __future__ import annotations
|
|
8
7
|
|
|
9
|
-
from
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from typing import TypedDict
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
|
+
from attrs import Attribute, define, field, validators
|
|
13
|
+
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
12
14
|
from numpy.random import SeedSequence
|
|
13
15
|
|
|
14
|
-
from .. import VERSION,
|
|
16
|
+
from .. import VERSION, RECTypes # noqa: TID252 # noqa
|
|
17
|
+
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
|
+
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
15
19
|
from . import (
|
|
16
|
-
EMPTY_ARRAY_DEFAULT,
|
|
17
20
|
FM2Constants,
|
|
18
21
|
MarketDataSample,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
PCMDistributions,
|
|
23
|
+
PCMSpec,
|
|
24
|
+
PriceSpec,
|
|
25
|
+
ShareSpec,
|
|
26
|
+
SHRDistributions,
|
|
22
27
|
SSZConstants,
|
|
28
|
+
UPPTestRegime,
|
|
29
|
+
UPPTestsCounts,
|
|
23
30
|
)
|
|
24
|
-
from .
|
|
31
|
+
from .data_generation_functions import (
|
|
32
|
+
gen_divr_array,
|
|
33
|
+
gen_margin_price_data,
|
|
34
|
+
gen_share_data,
|
|
35
|
+
parse_seed_seq_list,
|
|
36
|
+
)
|
|
37
|
+
from .upp_tests import SaveData, enf_cnts, save_data_to_hdf5
|
|
25
38
|
|
|
26
39
|
__version__ = VERSION
|
|
27
40
|
|
|
28
41
|
|
|
29
|
-
class
|
|
30
|
-
|
|
31
|
-
pcm_rng_seed_seq: SeedSequence
|
|
32
|
-
fcount_rng_seed_seq: SeedSequence | None
|
|
33
|
-
pr_rng_seed_seq: SeedSequence | None
|
|
42
|
+
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
|
+
"Keyword arguments of function, :func:`MarketSample.sim_enf_cnts`"
|
|
34
44
|
|
|
45
|
+
sample_size: int
|
|
46
|
+
"""number of draws to generate"""
|
|
35
47
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
Parameters
|
|
49
|
-
----------
|
|
50
|
-
_mkt_sample_spec
|
|
51
|
-
class specifying parameters for data generation, see :class:`mergeron.gen.MarketSpec`
|
|
52
|
-
sample_size
|
|
53
|
-
number of draws to generate
|
|
54
|
-
seed_seq_list
|
|
55
|
-
tuple of SeedSequences to ensure replicable data generation with
|
|
56
|
-
appropriately independent random streams
|
|
57
|
-
nthreads
|
|
58
|
-
optionally specify the number of CPU threads for the PRNG
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
63
|
-
in the sample
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
49
|
+
"""sequence of SeedSequences to ensure replicable data generation with
|
|
50
|
+
appropriately independent random streams
|
|
51
|
+
|
|
52
|
+
NOTES
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
|
|
56
|
+
specification of this parameter.
|
|
64
57
|
|
|
65
58
|
"""
|
|
66
59
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
_dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
|
|
70
|
-
_dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
|
|
71
|
-
_hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
|
|
72
|
-
|
|
73
|
-
(
|
|
74
|
-
_mktshr_rng_seed_seq,
|
|
75
|
-
_pcm_rng_seed_seq,
|
|
76
|
-
_fcount_rng_seed_seq,
|
|
77
|
-
_pr_rng_seed_seq,
|
|
78
|
-
) = parse_seed_seq_list(
|
|
79
|
-
seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
|
|
80
|
-
)
|
|
60
|
+
nthreads: int
|
|
61
|
+
"""number of parallel threads to use"""
|
|
81
62
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
_shr_sample_size *= _hsr_filing_test_type
|
|
85
|
-
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
86
|
-
_shr_sample_size *= SSZConstants.MNL_DEP
|
|
87
|
-
_shr_sample_size = int(_shr_sample_size)
|
|
88
|
-
|
|
89
|
-
# Generate share data
|
|
90
|
-
_mktshr_data = _gen_share_data(
|
|
91
|
-
_shr_sample_size,
|
|
92
|
-
_mkt_sample_spec.share_spec,
|
|
93
|
-
_fcount_rng_seed_seq,
|
|
94
|
-
_mktshr_rng_seed_seq,
|
|
95
|
-
nthreads,
|
|
96
|
-
)
|
|
63
|
+
save_data_to_file: SaveData
|
|
64
|
+
"""optionally save data to HDF5 file"""
|
|
97
65
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
for _f in (
|
|
101
|
-
"mktshr_array",
|
|
102
|
-
"fcounts",
|
|
103
|
-
"aggregate_purchase_prob",
|
|
104
|
-
"nth_firm_share",
|
|
105
|
-
)
|
|
106
|
-
)
|
|
66
|
+
saved_array_name_suffix: str
|
|
67
|
+
"""optionally specify a suffix for the HDF5 array names"""
|
|
107
68
|
|
|
108
|
-
# Generate merging-firm price and PCM data
|
|
109
|
-
_margin_data, _price_data = _gen_margin_price_data(
|
|
110
|
-
_mktshr_array[:, :2],
|
|
111
|
-
_nth_firm_share,
|
|
112
|
-
_aggregate_purchase_prob,
|
|
113
|
-
_mkt_sample_spec.pcm_spec,
|
|
114
|
-
_mkt_sample_spec.price_spec,
|
|
115
|
-
_mkt_sample_spec.hsr_filing_test_type,
|
|
116
|
-
_pcm_rng_seed_seq,
|
|
117
|
-
_pr_rng_seed_seq,
|
|
118
|
-
nthreads,
|
|
119
|
-
)
|
|
120
69
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
70
|
+
@define
|
|
71
|
+
class MarketSample:
|
|
72
|
+
"""Parameter specification for market data generation."""
|
|
124
73
|
|
|
125
|
-
|
|
126
|
-
|
|
74
|
+
share_spec: ShareSpec = field(
|
|
75
|
+
kw_only=True,
|
|
76
|
+
default=ShareSpec(SHRDistributions.UNI, None, None, RECTypes.INOUT, 0.8),
|
|
77
|
+
validator=validators.instance_of(ShareSpec),
|
|
127
78
|
)
|
|
79
|
+
"""Market-share specification, see :class:`ShareSpec`"""
|
|
128
80
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
132
|
-
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
133
|
-
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
134
|
-
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
135
|
-
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
136
|
-
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
|
|
137
|
-
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
138
|
-
|
|
139
|
-
# Calculate diversion ratios
|
|
140
|
-
_divr_array = gen_divr_array(
|
|
141
|
-
_recapture_form, _recapture_rate, _mktshr_array[:, :2], _aggregate_purchase_prob
|
|
81
|
+
pcm_spec: PCMSpec = field(
|
|
82
|
+
kw_only=True, default=PCMSpec(PCMDistributions.UNI, None, FM2Constants.IID)
|
|
142
83
|
)
|
|
84
|
+
"""Margin specification, see :class:`PCMSpec`"""
|
|
85
|
+
|
|
86
|
+
@pcm_spec.validator
|
|
87
|
+
def _check_pcm(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
|
|
88
|
+
if (
|
|
89
|
+
self.share_spec.recapture_form == RECTypes.FIXED
|
|
90
|
+
and _v.firm2_pcm_constraint == FM2Constants.MNL
|
|
91
|
+
):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
|
|
94
|
+
"requires Firm 2 margin must have property, "
|
|
95
|
+
f'"{FM2Constants.IID}" or "{FM2Constants.SYM}".'
|
|
96
|
+
)
|
|
143
97
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
_frmshr_array = _mktshr_array[:, :2]
|
|
147
|
-
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
|
|
148
|
-
|
|
149
|
-
_hhi_post = (
|
|
150
|
-
_hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
|
|
98
|
+
price_spec: PriceSpec = field(
|
|
99
|
+
kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
|
|
151
100
|
)
|
|
101
|
+
"""Price specification, see :class:`PriceSpec`"""
|
|
152
102
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
_fcounts,
|
|
158
|
-
_aggregate_purchase_prob,
|
|
159
|
-
_nth_firm_share,
|
|
160
|
-
_divr_array,
|
|
161
|
-
_hhi_post,
|
|
162
|
-
_hhi_delta,
|
|
103
|
+
hsr_filing_test_type: SSZConstants = field(
|
|
104
|
+
kw_only=True,
|
|
105
|
+
default=SSZConstants.ONE,
|
|
106
|
+
validator=validators.instance_of(SSZConstants),
|
|
163
107
|
)
|
|
108
|
+
"""Method for modeling HSR filing threholds, see :class:`SSZConstants`"""
|
|
109
|
+
|
|
110
|
+
data: MarketDataSample = field(default=None)
|
|
111
|
+
|
|
112
|
+
enf_counts: UPPTestsCounts = field(default=None)
|
|
113
|
+
|
|
114
|
+
def gen_market_sample(
|
|
115
|
+
self,
|
|
116
|
+
/,
|
|
117
|
+
*,
|
|
118
|
+
sample_size: int = 10**6,
|
|
119
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
120
|
+
nthreads: int = 16,
|
|
121
|
+
save_data_to_file: SaveData = False,
|
|
122
|
+
saved_array_name_suffix: str = "",
|
|
123
|
+
) -> MarketDataSample:
|
|
124
|
+
"""
|
|
125
|
+
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
126
|
+
|
|
127
|
+
see :attr:`SamplingFunctionKWArgs` for description of parameters
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
132
|
+
in the sample
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
_recapture_form = self.share_spec.recapture_form
|
|
137
|
+
_recapture_rate = self.share_spec.recapture_rate
|
|
138
|
+
_dist_type_mktshr = self.share_spec.dist_type
|
|
139
|
+
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
140
|
+
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
141
|
+
|
|
142
|
+
(
|
|
143
|
+
_mktshr_rng_seed_seq,
|
|
144
|
+
_pcm_rng_seed_seq,
|
|
145
|
+
_fcount_rng_seed_seq,
|
|
146
|
+
_pr_rng_seed_seq,
|
|
147
|
+
) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
|
|
148
|
+
|
|
149
|
+
_shr_sample_size = 1.0 * sample_size
|
|
150
|
+
# Scale up sample size to offset discards based on specified criteria
|
|
151
|
+
_shr_sample_size *= _hsr_filing_test_type
|
|
152
|
+
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
153
|
+
_shr_sample_size *= SSZConstants.MNL_DEP
|
|
154
|
+
_shr_sample_size = int(_shr_sample_size)
|
|
155
|
+
|
|
156
|
+
# Generate share data
|
|
157
|
+
_mktshr_data = gen_share_data(
|
|
158
|
+
_shr_sample_size,
|
|
159
|
+
self.share_spec,
|
|
160
|
+
_fcount_rng_seed_seq,
|
|
161
|
+
_mktshr_rng_seed_seq,
|
|
162
|
+
nthreads,
|
|
163
|
+
)
|
|
164
164
|
|
|
165
|
+
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
166
|
+
getattr(_mktshr_data, _f)
|
|
167
|
+
for _f in (
|
|
168
|
+
"mktshr_array",
|
|
169
|
+
"fcounts",
|
|
170
|
+
"aggregate_purchase_prob",
|
|
171
|
+
"nth_firm_share",
|
|
172
|
+
)
|
|
173
|
+
)
|
|
165
174
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
175
|
+
# Generate merging-firm price and PCM data
|
|
176
|
+
_margin_data, _price_data = gen_margin_price_data(
|
|
177
|
+
_mktshr_array[:, :2],
|
|
178
|
+
_nth_firm_share,
|
|
179
|
+
_aggregate_purchase_prob,
|
|
180
|
+
self.pcm_spec,
|
|
181
|
+
self.price_spec,
|
|
182
|
+
self.hsr_filing_test_type,
|
|
183
|
+
_pcm_rng_seed_seq,
|
|
184
|
+
_pr_rng_seed_seq,
|
|
185
|
+
nthreads,
|
|
186
|
+
)
|
|
173
187
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
2.) price-cost margins
|
|
178
|
-
3.) firm-counts, if :code:`MarketSpec.share_spec.dist_type` is a Dirichlet distribution
|
|
179
|
-
4.) prices, if :code:`MarketSpec.price_spec ==`:attr:`mergeron.gen.PriceConstants.ZERO`.
|
|
188
|
+
_price_array, _hsr_filing_test = (
|
|
189
|
+
getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
|
|
190
|
+
)
|
|
180
191
|
|
|
192
|
+
_pcm_array, _mnl_test_rows = (
|
|
193
|
+
getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
194
|
+
)
|
|
181
195
|
|
|
196
|
+
_mnl_test_rows = _mnl_test_rows * _hsr_filing_test
|
|
197
|
+
_s_size = sample_size # originally-specified sample size
|
|
198
|
+
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
199
|
+
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
200
|
+
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
201
|
+
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
202
|
+
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
203
|
+
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
204
|
+
:_s_size
|
|
205
|
+
]
|
|
206
|
+
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
207
|
+
|
|
208
|
+
# Calculate diversion ratios
|
|
209
|
+
_divr_array = gen_divr_array(
|
|
210
|
+
_recapture_form,
|
|
211
|
+
_recapture_rate,
|
|
212
|
+
_mktshr_array[:, :2],
|
|
213
|
+
_aggregate_purchase_prob,
|
|
214
|
+
)
|
|
182
215
|
|
|
183
|
-
|
|
184
|
-
----------
|
|
185
|
-
_sseq_list
|
|
186
|
-
List of RNG seed sequences
|
|
216
|
+
del _mnl_test_rows, _s_size
|
|
187
217
|
|
|
188
|
-
|
|
189
|
-
|
|
218
|
+
_frmshr_array = _mktshr_array[:, :2]
|
|
219
|
+
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
220
|
+
:, None
|
|
221
|
+
]
|
|
190
222
|
|
|
191
|
-
|
|
192
|
-
|
|
223
|
+
_hhi_post = (
|
|
224
|
+
_hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
|
|
225
|
+
)
|
|
193
226
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
227
|
+
return MarketDataSample(
|
|
228
|
+
_frmshr_array,
|
|
229
|
+
_pcm_array,
|
|
230
|
+
_price_array,
|
|
231
|
+
_fcounts,
|
|
232
|
+
_aggregate_purchase_prob,
|
|
233
|
+
_nth_firm_share,
|
|
234
|
+
_divr_array,
|
|
235
|
+
_hhi_post,
|
|
236
|
+
_hhi_delta,
|
|
237
|
+
)
|
|
197
238
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
239
|
+
def generate_sample(
|
|
240
|
+
self,
|
|
241
|
+
/,
|
|
242
|
+
*,
|
|
243
|
+
sample_size: int = 10**6,
|
|
244
|
+
seed_seq_list: list[SeedSequence] | None,
|
|
245
|
+
nthreads: int,
|
|
246
|
+
save_data_to_file: SaveData = False,
|
|
247
|
+
saved_array_name_suffix: str = "",
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Generate market data"""
|
|
250
|
+
|
|
251
|
+
self.data = self.gen_market_sample(
|
|
252
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
212
253
|
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
254
|
+
|
|
255
|
+
_invalid_array_names = (
|
|
256
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
257
|
+
if self.share_spec.dist_type == "Uniform"
|
|
258
|
+
else ()
|
|
219
259
|
)
|
|
220
260
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
261
|
+
save_data_to_hdf5(
|
|
262
|
+
self.data,
|
|
263
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
264
|
+
excluded_attrs=_invalid_array_names,
|
|
265
|
+
save_data_to_file=save_data_to_file,
|
|
266
|
+
)
|
|
224
267
|
|
|
268
|
+
def sim_enf_cnts(
|
|
269
|
+
self,
|
|
270
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
271
|
+
_sim_test_regime: UPPTestRegime,
|
|
272
|
+
/,
|
|
273
|
+
*,
|
|
274
|
+
sample_size: int = 10**6,
|
|
275
|
+
seed_seq_list: list[SeedSequence] | None = None,
|
|
276
|
+
nthreads: int = 16,
|
|
277
|
+
save_data_to_file: SaveData = False,
|
|
278
|
+
saved_array_name_suffix: str = "",
|
|
279
|
+
) -> UPPTestsCounts:
|
|
280
|
+
"""Generate market data and etstimate UPP test counts on same.
|
|
225
281
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
_recapture_rate: float | None,
|
|
229
|
-
_frmshr_array: ArrayDouble,
|
|
230
|
-
_aggregate_purchase_prob: ArrayDouble = EMPTY_ARRAY_DEFAULT,
|
|
231
|
-
/,
|
|
232
|
-
) -> ArrayDouble:
|
|
233
|
-
"""
|
|
234
|
-
Given merging-firm shares and related parameters, return diverion ratios.
|
|
282
|
+
Parameters
|
|
283
|
+
----------
|
|
235
284
|
|
|
236
|
-
|
|
237
|
-
|
|
285
|
+
_upp_test_parms
|
|
286
|
+
Guidelines thresholds for testing UPP and related statistics
|
|
238
287
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
288
|
+
_sim_test_regime
|
|
289
|
+
Configuration to use for testing; UPPTestsRegime object
|
|
290
|
+
specifying whether investigation results in enforcement, clearance,
|
|
291
|
+
or both; and aggregation methods used for GUPPI and diversion ratio
|
|
292
|
+
measures
|
|
243
293
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
for the firm with the smaller share.
|
|
294
|
+
sample_size
|
|
295
|
+
Number of draws to generate
|
|
247
296
|
|
|
248
|
-
|
|
249
|
-
|
|
297
|
+
seed_seq_list
|
|
298
|
+
List of seed sequences, to assure independent samples in each thread
|
|
250
299
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
market shares to choice probabilities by multiplication.
|
|
300
|
+
nthreads
|
|
301
|
+
Number of parallel processes to use
|
|
254
302
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
Merging-firm diversion ratios for mergers in the sample.
|
|
303
|
+
save_data_to_file
|
|
304
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
258
305
|
|
|
259
|
-
|
|
306
|
+
saved_array_name_suffix
|
|
307
|
+
Suffix to add to the array names in the HDF5 file
|
|
308
|
+
|
|
309
|
+
Returns
|
|
310
|
+
-------
|
|
311
|
+
UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
|
|
312
|
+
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
_market_data_sample = self.gen_market_sample(
|
|
316
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
_invalid_array_names = (
|
|
320
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
321
|
+
if self.share_spec.dist_type == "Uniform"
|
|
322
|
+
else ()
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
save_data_to_hdf5(
|
|
326
|
+
_market_data_sample,
|
|
327
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
328
|
+
excluded_attrs=_invalid_array_names,
|
|
329
|
+
save_data_to_file=save_data_to_file,
|
|
330
|
+
)
|
|
260
331
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
332
|
+
_upp_test_arrays = enf_cnts(
|
|
333
|
+
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
save_data_to_hdf5(
|
|
337
|
+
_upp_test_arrays,
|
|
338
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
339
|
+
save_data_to_file=save_data_to_file,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return _upp_test_arrays
|
|
343
|
+
|
|
344
|
+
def sim_enf_cnts_ll(
|
|
345
|
+
self,
|
|
346
|
+
_enf_parm_vec: gbl.HMGThresholds,
|
|
347
|
+
_sim_test_regime: UPPTestRegime,
|
|
348
|
+
/,
|
|
349
|
+
*,
|
|
350
|
+
sample_size: int = 10**6,
|
|
351
|
+
seed_seq_list: list[SeedSequence] | None = None,
|
|
352
|
+
nthreads: int = 16,
|
|
353
|
+
save_data_to_file: SaveData = False,
|
|
354
|
+
saved_array_name_suffix: str = "",
|
|
355
|
+
) -> UPPTestsCounts:
|
|
356
|
+
"""A function to parallelize data-generation and testing
|
|
357
|
+
|
|
358
|
+
The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
|
|
359
|
+
the parent function, `sim_enf_cnts()`, except that, if provided,
|
|
360
|
+
`seed_seq_list` is used to spawn a seed sequence for each thread,
|
|
361
|
+
to assure independent samples in each thread, and `nthreads` defines
|
|
362
|
+
the number of parallel processes used. The number of draws in
|
|
363
|
+
each thread may be tuned, by trial and error, to the amount of
|
|
364
|
+
memory (RAM) available.
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
|
|
369
|
+
_enf_parm_vec
|
|
370
|
+
Guidelines thresholds to test against
|
|
371
|
+
|
|
372
|
+
_sim_test_regime
|
|
373
|
+
Configuration to use for testing
|
|
374
|
+
|
|
375
|
+
sample_size
|
|
376
|
+
Number of draws to simulate
|
|
377
|
+
|
|
378
|
+
seed_seq_list
|
|
379
|
+
List of seed sequences, to assure independent samples in each thread
|
|
380
|
+
|
|
381
|
+
nthreads
|
|
382
|
+
Number of parallel processes to use
|
|
383
|
+
|
|
384
|
+
save_data_to_file
|
|
385
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
386
|
+
|
|
387
|
+
saved_array_name_suffix
|
|
388
|
+
Suffix to add to the array names in the HDF5 file
|
|
389
|
+
|
|
390
|
+
Returns
|
|
391
|
+
-------
|
|
392
|
+
Arrays of UPPTestCounts
|
|
393
|
+
|
|
394
|
+
"""
|
|
395
|
+
_sample_sz = sample_size
|
|
396
|
+
_subsample_sz = 10**6
|
|
397
|
+
_iter_count = (
|
|
398
|
+
int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
399
|
+
)
|
|
400
|
+
_thread_count = cpu_count()
|
|
401
|
+
|
|
402
|
+
if (
|
|
403
|
+
self.share_spec.recapture_form != RECTypes.OUTIN
|
|
404
|
+
and self.share_spec.recapture_rate != _enf_parm_vec.rec
|
|
405
|
+
):
|
|
406
|
+
raise ValueError(
|
|
407
|
+
"{} {} {}".format(
|
|
408
|
+
f"Recapture rate from market sample spec, {self.share_spec.recapture_rate}",
|
|
409
|
+
f"must match the value, {_enf_parm_vec.rec}",
|
|
410
|
+
"the guidelines thresholds vector.",
|
|
411
|
+
)
|
|
280
412
|
)
|
|
413
|
+
|
|
414
|
+
_rng_seed_seq_list = [None] * _iter_count
|
|
415
|
+
if seed_seq_list:
|
|
416
|
+
_rng_seed_seq_list = list(
|
|
417
|
+
zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
_sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
421
|
+
"sample_size": _subsample_sz,
|
|
422
|
+
"save_data_to_file": save_data_to_file,
|
|
423
|
+
"nthreads": nthreads,
|
|
424
|
+
})
|
|
425
|
+
|
|
426
|
+
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
427
|
+
delayed(self.sim_enf_cnts)(
|
|
428
|
+
_enf_parm_vec,
|
|
429
|
+
_sim_test_regime,
|
|
430
|
+
**_sim_enf_cnts_kwargs,
|
|
431
|
+
saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
|
|
432
|
+
seed_seq_list=_rng_seed_seq_list_ch,
|
|
433
|
+
)
|
|
434
|
+
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
281
435
|
)
|
|
282
436
|
|
|
283
|
-
|
|
437
|
+
_res_list_stacks = UPPTestsCounts(*[
|
|
438
|
+
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
439
|
+
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
440
|
+
])
|
|
441
|
+
upp_test_results = UPPTestsCounts(*[
|
|
442
|
+
np.column_stack((
|
|
443
|
+
(_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
|
|
444
|
+
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
445
|
+
))
|
|
446
|
+
for _g, _h in zip(
|
|
447
|
+
_res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
|
|
448
|
+
)
|
|
449
|
+
])
|
|
450
|
+
del _res_list, _res_list_stacks
|
|
451
|
+
|
|
452
|
+
return upp_test_results
|
|
453
|
+
|
|
454
|
+
def estimate_enf_counts(
|
|
455
|
+
self,
|
|
456
|
+
_enf_parm_vec: HMGThresholds,
|
|
457
|
+
_upp_test_regime: UPPTestRegime,
|
|
458
|
+
/,
|
|
459
|
+
*,
|
|
460
|
+
sample_size: int = 10**6,
|
|
461
|
+
seed_seq_list: list[SeedSequence] | None,
|
|
462
|
+
nthreads: int,
|
|
463
|
+
save_data_to_file: SaveData = False,
|
|
464
|
+
saved_array_name_suffix: str = "",
|
|
465
|
+
) -> None:
|
|
466
|
+
"""Estimate enforcement counts
|
|
467
|
+
|
|
468
|
+
Parameters
|
|
469
|
+
----------
|
|
470
|
+
_enf_parm_vec
|
|
471
|
+
Threshold values for various Guidelines criteria
|
|
472
|
+
|
|
473
|
+
_upp_test_regime
|
|
474
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
475
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
476
|
+
default being to analyze enforcement based on the maximum
|
|
477
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
478
|
+
merging firms
|
|
479
|
+
|
|
480
|
+
sample_size
|
|
481
|
+
Size of the market sample drawn
|
|
482
|
+
|
|
483
|
+
seed_seq_list
|
|
484
|
+
List of :code:`numpy.random.SeedSequence` objects
|
|
485
|
+
|
|
486
|
+
nthreads
|
|
487
|
+
Number of threads to use
|
|
488
|
+
|
|
489
|
+
save_data_to_file
|
|
490
|
+
Save data to given HDF5 file, at specified group node
|
|
491
|
+
|
|
492
|
+
saved_array_name_suffix
|
|
493
|
+
Suffix to add to the array names in the HDF5 file
|
|
494
|
+
|
|
495
|
+
Returns
|
|
496
|
+
-------
|
|
497
|
+
None
|
|
498
|
+
|
|
499
|
+
"""
|
|
500
|
+
|
|
501
|
+
if self.data is None:
|
|
502
|
+
self.enf_counts = self.sim_enf_cnts_ll(
|
|
503
|
+
_enf_parm_vec,
|
|
504
|
+
_upp_test_regime,
|
|
505
|
+
sample_size=sample_size,
|
|
506
|
+
seed_seq_list=seed_seq_list,
|
|
507
|
+
nthreads=nthreads,
|
|
508
|
+
save_data_to_file=save_data_to_file,
|
|
509
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
self.enf_counts = enf_cnts(self.data, _enf_parm_vec, _upp_test_regime)
|
|
513
|
+
if save_data_to_file:
|
|
514
|
+
save_data_to_hdf5(
|
|
515
|
+
self.enf_counts,
|
|
516
|
+
save_data_to_file=save_data_to_file,
|
|
517
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
518
|
+
)
|