mergeron 2024.739099.2__py3-none-any.whl → 2024.739105.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +7 -10
- mergeron/core/ftc_merger_investigations_data.py +4 -2
- mergeron/core/guidelines_boundaries.py +11 -11
- mergeron/gen/__init__.py +142 -162
- mergeron/gen/data_generation.py +465 -221
- mergeron/gen/{_data_generation_functions.py → data_generation_functions.py} +239 -145
- mergeron/gen/enforcement_stats.py +40 -42
- mergeron/gen/upp_tests.py +75 -206
- mergeron-2024.739105.2.dist-info/METADATA +115 -0
- {mergeron-2024.739099.2.dist-info → mergeron-2024.739105.2.dist-info}/RECORD +11 -12
- mergeron/gen/market_sample.py +0 -143
- mergeron-2024.739099.2.dist-info/METADATA +0 -102
- {mergeron-2024.739099.2.dist-info → mergeron-2024.739105.2.dist-info}/WHEEL +0 -0
mergeron/gen/data_generation.py
CHANGED
|
@@ -1,283 +1,527 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods to generate
|
|
3
|
-
for analyzing merger enforcement policy.
|
|
2
|
+
Methods to generate data for analyzing merger enforcement policy.
|
|
4
3
|
|
|
5
4
|
"""
|
|
6
5
|
|
|
7
6
|
from __future__ import annotations
|
|
8
7
|
|
|
9
|
-
from
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from typing import TypedDict
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
|
+
from attrs import Attribute, define, field, validators
|
|
13
|
+
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
12
14
|
from numpy.random import SeedSequence
|
|
13
15
|
|
|
14
|
-
from .. import VERSION,
|
|
16
|
+
from .. import VERSION, RECForm # noqa: TID252 # noqa
|
|
17
|
+
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
|
+
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
15
19
|
from . import (
|
|
16
|
-
|
|
17
|
-
FM2Constants,
|
|
20
|
+
FM2Constraint,
|
|
18
21
|
MarketDataSample,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
PCMDistribution,
|
|
23
|
+
PCMSpec,
|
|
24
|
+
PriceSpec,
|
|
25
|
+
ShareSpec,
|
|
26
|
+
SHRDistribution,
|
|
27
|
+
SSZConstant,
|
|
28
|
+
UPPTestRegime,
|
|
29
|
+
UPPTestsCounts,
|
|
23
30
|
)
|
|
24
|
-
from .
|
|
31
|
+
from .data_generation_functions import (
|
|
32
|
+
gen_divr_array,
|
|
33
|
+
gen_margin_price_data,
|
|
34
|
+
gen_share_data,
|
|
35
|
+
parse_seed_seq_list,
|
|
36
|
+
)
|
|
37
|
+
from .upp_tests import SaveData, enf_cnts, save_data_to_hdf5
|
|
25
38
|
|
|
26
39
|
__version__ = VERSION
|
|
27
40
|
|
|
28
41
|
|
|
29
|
-
class
|
|
30
|
-
|
|
31
|
-
pcm_rng_seed_seq: SeedSequence
|
|
32
|
-
fcount_rng_seed_seq: SeedSequence | None
|
|
33
|
-
pr_rng_seed_seq: SeedSequence | None
|
|
42
|
+
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
|
+
"Keyword arguments of sampling methods defined below"
|
|
34
44
|
|
|
45
|
+
sample_size: int
|
|
46
|
+
"""number of draws to generate"""
|
|
35
47
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
Parameters
|
|
49
|
-
----------
|
|
50
|
-
_mkt_sample_spec
|
|
51
|
-
class specifying parameters for data generation, see :class:`mergeron.gen.MarketSpec`
|
|
52
|
-
sample_size
|
|
53
|
-
number of draws to generate
|
|
54
|
-
seed_seq_list
|
|
55
|
-
tuple of SeedSequences to ensure replicable data generation with
|
|
56
|
-
appropriately independent random streams
|
|
57
|
-
nthreads
|
|
58
|
-
optionally specify the number of CPU threads for the PRNG
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
63
|
-
in the sample
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
49
|
+
"""sequence of SeedSequences to ensure replicable data generation with
|
|
50
|
+
appropriately independent random streams
|
|
51
|
+
|
|
52
|
+
NOTES
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
|
|
56
|
+
specification of this parameter.
|
|
64
57
|
|
|
65
58
|
"""
|
|
66
59
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
_dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
|
|
70
|
-
_dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
|
|
71
|
-
_hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
|
|
72
|
-
|
|
73
|
-
(
|
|
74
|
-
_mktshr_rng_seed_seq,
|
|
75
|
-
_pcm_rng_seed_seq,
|
|
76
|
-
_fcount_rng_seed_seq,
|
|
77
|
-
_pr_rng_seed_seq,
|
|
78
|
-
) = parse_seed_seq_list(
|
|
79
|
-
seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
|
|
80
|
-
)
|
|
60
|
+
nthreads: int
|
|
61
|
+
"""number of parallel threads to use"""
|
|
81
62
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
_shr_sample_size = int(_shr_sample_size)
|
|
88
|
-
|
|
89
|
-
# Generate share data
|
|
90
|
-
_mktshr_data = _gen_share_data(
|
|
91
|
-
_shr_sample_size,
|
|
92
|
-
_mkt_sample_spec.share_spec,
|
|
93
|
-
_fcount_rng_seed_seq,
|
|
94
|
-
_mktshr_rng_seed_seq,
|
|
95
|
-
nthreads,
|
|
96
|
-
)
|
|
63
|
+
save_data_to_file: SaveData
|
|
64
|
+
"""optionally save data to HDF5 file"""
|
|
65
|
+
|
|
66
|
+
saved_array_name_suffix: str
|
|
67
|
+
"""optionally specify a suffix for the HDF5 array names"""
|
|
97
68
|
|
|
98
|
-
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
99
|
-
getattr(_mktshr_data, _f)
|
|
100
|
-
for _f in (
|
|
101
|
-
"mktshr_array",
|
|
102
|
-
"fcounts",
|
|
103
|
-
"aggregate_purchase_prob",
|
|
104
|
-
"nth_firm_share",
|
|
105
|
-
)
|
|
106
|
-
)
|
|
107
69
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
_pcm_rng_seed_seq,
|
|
117
|
-
_pr_rng_seed_seq,
|
|
118
|
-
nthreads,
|
|
70
|
+
@define
|
|
71
|
+
class MarketSample:
|
|
72
|
+
"""Parameter specification for market data generation."""
|
|
73
|
+
|
|
74
|
+
share_spec: ShareSpec = field(
|
|
75
|
+
kw_only=True,
|
|
76
|
+
default=ShareSpec(SHRDistribution.UNI, None, None, RECForm.INOUT, 0.8),
|
|
77
|
+
validator=validators.instance_of(ShareSpec),
|
|
119
78
|
)
|
|
79
|
+
"""Market-share specification, see :class:`ShareSpec`"""
|
|
120
80
|
|
|
121
|
-
|
|
122
|
-
|
|
81
|
+
pcm_spec: PCMSpec = field(
|
|
82
|
+
kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
|
|
123
83
|
)
|
|
84
|
+
"""Margin specification, see :class:`PCMSpec`"""
|
|
85
|
+
|
|
86
|
+
@pcm_spec.validator
|
|
87
|
+
def _check_pcm(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
|
|
88
|
+
if (
|
|
89
|
+
self.share_spec.recapture_form == RECForm.FIXED
|
|
90
|
+
and _v.firm2_pcm_constraint == FM2Constraint.MNL
|
|
91
|
+
):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
|
|
94
|
+
"requires Firm 2 margin must have property, "
|
|
95
|
+
f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
|
|
96
|
+
)
|
|
124
97
|
|
|
125
|
-
|
|
126
|
-
|
|
98
|
+
price_spec: PriceSpec = field(
|
|
99
|
+
kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
|
|
127
100
|
)
|
|
101
|
+
"""Price specification, see :class:`PriceSpec`"""
|
|
128
102
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
134
|
-
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
135
|
-
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
136
|
-
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
|
|
137
|
-
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
138
|
-
|
|
139
|
-
# Calculate diversion ratios
|
|
140
|
-
_divr_array = gen_divr_array(
|
|
141
|
-
_recapture_form, _recapture_rate, _mktshr_array[:, :2], _aggregate_purchase_prob
|
|
103
|
+
hsr_filing_test_type: SSZConstant = field(
|
|
104
|
+
kw_only=True,
|
|
105
|
+
default=SSZConstant.ONE,
|
|
106
|
+
validator=validators.instance_of(SSZConstant),
|
|
142
107
|
)
|
|
108
|
+
"""Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
|
|
109
|
+
|
|
110
|
+
data: MarketDataSample = field(default=None)
|
|
111
|
+
|
|
112
|
+
enf_counts: UPPTestsCounts = field(default=None)
|
|
113
|
+
|
|
114
|
+
def gen_market_sample(
|
|
115
|
+
self,
|
|
116
|
+
/,
|
|
117
|
+
*,
|
|
118
|
+
sample_size: int = 10**6,
|
|
119
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
120
|
+
nthreads: int = 16,
|
|
121
|
+
save_data_to_file: SaveData = False,
|
|
122
|
+
saved_array_name_suffix: str = "",
|
|
123
|
+
) -> MarketDataSample:
|
|
124
|
+
"""
|
|
125
|
+
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
126
|
+
|
|
127
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
132
|
+
in the sample
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
_recapture_form = self.share_spec.recapture_form
|
|
137
|
+
_recapture_rate = self.share_spec.recapture_rate
|
|
138
|
+
_dist_type_mktshr = self.share_spec.dist_type
|
|
139
|
+
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
140
|
+
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
141
|
+
|
|
142
|
+
(
|
|
143
|
+
_mktshr_rng_seed_seq,
|
|
144
|
+
_pcm_rng_seed_seq,
|
|
145
|
+
_fcount_rng_seed_seq,
|
|
146
|
+
_pr_rng_seed_seq,
|
|
147
|
+
) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
|
|
148
|
+
|
|
149
|
+
_shr_sample_size = 1.0 * sample_size
|
|
150
|
+
# Scale up sample size to offset discards based on specified criteria
|
|
151
|
+
_shr_sample_size *= _hsr_filing_test_type
|
|
152
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
153
|
+
_shr_sample_size *= SSZConstant.MNL_DEP
|
|
154
|
+
_shr_sample_size = int(_shr_sample_size)
|
|
155
|
+
|
|
156
|
+
# Generate share data
|
|
157
|
+
_mktshr_data = gen_share_data(
|
|
158
|
+
_shr_sample_size,
|
|
159
|
+
self.share_spec,
|
|
160
|
+
_fcount_rng_seed_seq,
|
|
161
|
+
_mktshr_rng_seed_seq,
|
|
162
|
+
nthreads,
|
|
163
|
+
)
|
|
143
164
|
|
|
144
|
-
|
|
165
|
+
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
166
|
+
getattr(_mktshr_data, _f)
|
|
167
|
+
for _f in (
|
|
168
|
+
"mktshr_array",
|
|
169
|
+
"fcounts",
|
|
170
|
+
"aggregate_purchase_prob",
|
|
171
|
+
"nth_firm_share",
|
|
172
|
+
)
|
|
173
|
+
)
|
|
145
174
|
|
|
146
|
-
|
|
147
|
-
|
|
175
|
+
# Generate merging-firm price and PCM data
|
|
176
|
+
_margin_data, _price_data = gen_margin_price_data(
|
|
177
|
+
_mktshr_array[:, :2],
|
|
178
|
+
_nth_firm_share,
|
|
179
|
+
_aggregate_purchase_prob,
|
|
180
|
+
self.pcm_spec,
|
|
181
|
+
self.price_spec,
|
|
182
|
+
self.hsr_filing_test_type,
|
|
183
|
+
_pcm_rng_seed_seq,
|
|
184
|
+
_pr_rng_seed_seq,
|
|
185
|
+
nthreads,
|
|
186
|
+
)
|
|
148
187
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
188
|
+
_price_array, _hsr_filing_test = (
|
|
189
|
+
getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
|
|
190
|
+
)
|
|
152
191
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
_price_array,
|
|
157
|
-
_fcounts,
|
|
158
|
-
_aggregate_purchase_prob,
|
|
159
|
-
_nth_firm_share,
|
|
160
|
-
_divr_array,
|
|
161
|
-
_hhi_post,
|
|
162
|
-
_hhi_delta,
|
|
163
|
-
)
|
|
192
|
+
_pcm_array, _mnl_test_rows = (
|
|
193
|
+
getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
194
|
+
)
|
|
164
195
|
|
|
196
|
+
_mnl_test_rows = _mnl_test_rows * _hsr_filing_test
|
|
197
|
+
_s_size = sample_size # originally-specified sample size
|
|
198
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
199
|
+
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
200
|
+
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
201
|
+
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
202
|
+
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
203
|
+
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
204
|
+
:_s_size
|
|
205
|
+
]
|
|
206
|
+
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
207
|
+
|
|
208
|
+
# Calculate diversion ratios
|
|
209
|
+
_divr_array = gen_divr_array(
|
|
210
|
+
_recapture_form,
|
|
211
|
+
_recapture_rate,
|
|
212
|
+
_mktshr_array[:, :2],
|
|
213
|
+
_aggregate_purchase_prob,
|
|
214
|
+
)
|
|
165
215
|
|
|
166
|
-
|
|
167
|
-
_sseq_list: list[SeedSequence] | None,
|
|
168
|
-
_mktshr_dist_type: SHRConstants,
|
|
169
|
-
_price_spec: PriceConstants,
|
|
170
|
-
/,
|
|
171
|
-
) -> SeedSequenceData:
|
|
172
|
-
"""Initialize RNG seed sequences to ensure independence of distinct random streams.
|
|
216
|
+
del _mnl_test_rows, _s_size
|
|
173
217
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
3.) firm-counts, if :code:`MarketSpec.share_spec.dist_type` is a Dirichlet distribution
|
|
179
|
-
4.) prices, if :code:`MarketSpec.price_spec ==`:attr:`mergeron.gen.PriceConstants.ZERO`.
|
|
218
|
+
_frmshr_array = _mktshr_array[:, :2]
|
|
219
|
+
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
220
|
+
:, None
|
|
221
|
+
]
|
|
180
222
|
|
|
223
|
+
_hhi_post = (
|
|
224
|
+
_hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
|
|
225
|
+
)
|
|
181
226
|
|
|
227
|
+
return MarketDataSample(
|
|
228
|
+
_frmshr_array,
|
|
229
|
+
_pcm_array,
|
|
230
|
+
_price_array,
|
|
231
|
+
_fcounts,
|
|
232
|
+
_aggregate_purchase_prob,
|
|
233
|
+
_nth_firm_share,
|
|
234
|
+
_divr_array,
|
|
235
|
+
_hhi_post,
|
|
236
|
+
_hhi_delta,
|
|
237
|
+
)
|
|
182
238
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
239
|
+
def generate_sample(
|
|
240
|
+
self,
|
|
241
|
+
/,
|
|
242
|
+
*,
|
|
243
|
+
sample_size: int,
|
|
244
|
+
seed_seq_list: Sequence[SeedSequence],
|
|
245
|
+
nthreads: int,
|
|
246
|
+
save_data_to_file: SaveData,
|
|
247
|
+
saved_array_name_suffix: str,
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Populate :attr:`data` with generated data
|
|
187
250
|
|
|
188
|
-
|
|
189
|
-
Market share distribution type
|
|
251
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
190
252
|
|
|
191
|
-
|
|
192
|
-
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None
|
|
193
256
|
|
|
194
|
-
|
|
195
|
-
-------
|
|
196
|
-
Seed sequence data
|
|
257
|
+
"""
|
|
197
258
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
_pr_rng_seed_seq: SeedSequence | None = None
|
|
201
|
-
|
|
202
|
-
if _price_spec == PriceConstants.ZERO:
|
|
203
|
-
_pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
|
|
204
|
-
|
|
205
|
-
if _mktshr_dist_type == SHRConstants.UNI:
|
|
206
|
-
_fcount_rng_seed_seq = None
|
|
207
|
-
_seed_count = 2
|
|
208
|
-
_mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
|
|
209
|
-
_sseq_list[:_seed_count]
|
|
210
|
-
if _sseq_list
|
|
211
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
259
|
+
self.data = self.gen_market_sample(
|
|
260
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
212
261
|
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
262
|
+
|
|
263
|
+
_invalid_array_names = (
|
|
264
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
265
|
+
if self.share_spec.dist_type == "Uniform"
|
|
266
|
+
else ()
|
|
219
267
|
)
|
|
220
268
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
269
|
+
save_data_to_hdf5(
|
|
270
|
+
self.data,
|
|
271
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
272
|
+
excluded_attrs=_invalid_array_names,
|
|
273
|
+
save_data_to_file=save_data_to_file,
|
|
274
|
+
)
|
|
224
275
|
|
|
276
|
+
def sim_enf_cnts(
|
|
277
|
+
self,
|
|
278
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
279
|
+
_sim_test_regime: UPPTestRegime,
|
|
280
|
+
/,
|
|
281
|
+
*,
|
|
282
|
+
sample_size: int = 10**6,
|
|
283
|
+
seed_seq_list: list[SeedSequence] | None = None,
|
|
284
|
+
nthreads: int = 16,
|
|
285
|
+
save_data_to_file: SaveData = False,
|
|
286
|
+
saved_array_name_suffix: str = "",
|
|
287
|
+
) -> UPPTestsCounts:
|
|
288
|
+
"""Generate market data and etstimate UPP test counts on same.
|
|
225
289
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
_recapture_rate: float | None,
|
|
229
|
-
_frmshr_array: ArrayDouble,
|
|
230
|
-
_aggregate_purchase_prob: ArrayDouble = EMPTY_ARRAY_DEFAULT,
|
|
231
|
-
/,
|
|
232
|
-
) -> ArrayDouble:
|
|
233
|
-
"""
|
|
234
|
-
Given merging-firm shares and related parameters, return diverion ratios.
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
235
292
|
|
|
236
|
-
|
|
237
|
-
|
|
293
|
+
_upp_test_parms
|
|
294
|
+
Guidelines thresholds for testing UPP and related statistics
|
|
238
295
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
296
|
+
_sim_test_regime
|
|
297
|
+
Configuration to use for testing; UPPTestsRegime object
|
|
298
|
+
specifying whether investigation results in enforcement, clearance,
|
|
299
|
+
or both; and aggregation methods used for GUPPI and diversion ratio
|
|
300
|
+
measures
|
|
243
301
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
for the firm with the smaller share.
|
|
302
|
+
sample_size
|
|
303
|
+
Number of draws to generate
|
|
247
304
|
|
|
248
|
-
|
|
249
|
-
|
|
305
|
+
seed_seq_list
|
|
306
|
+
List of seed sequences, to assure independent samples in each thread
|
|
250
307
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
market shares to choice probabilities by multiplication.
|
|
308
|
+
nthreads
|
|
309
|
+
Number of parallel processes to use
|
|
254
310
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
Merging-firm diversion ratios for mergers in the sample.
|
|
311
|
+
save_data_to_file
|
|
312
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
258
313
|
|
|
259
|
-
|
|
314
|
+
saved_array_name_suffix
|
|
315
|
+
Suffix to add to the array names in the HDF5 file
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
_market_data_sample = self.gen_market_sample(
|
|
324
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
_invalid_array_names = (
|
|
328
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
329
|
+
if self.share_spec.dist_type == "Uniform"
|
|
330
|
+
else ()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
save_data_to_hdf5(
|
|
334
|
+
_market_data_sample,
|
|
335
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
336
|
+
excluded_attrs=_invalid_array_names,
|
|
337
|
+
save_data_to_file=save_data_to_file,
|
|
338
|
+
)
|
|
260
339
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
340
|
+
_upp_test_arrays = enf_cnts(
|
|
341
|
+
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
save_data_to_hdf5(
|
|
345
|
+
_upp_test_arrays,
|
|
346
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
347
|
+
save_data_to_file=save_data_to_file,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return _upp_test_arrays
|
|
351
|
+
|
|
352
|
+
def sim_enf_cnts_ll(
|
|
353
|
+
self,
|
|
354
|
+
_enf_parm_vec: gbl.HMGThresholds,
|
|
355
|
+
_sim_test_regime: UPPTestRegime,
|
|
356
|
+
/,
|
|
357
|
+
*,
|
|
358
|
+
sample_size: int = 10**6,
|
|
359
|
+
seed_seq_list: list[SeedSequence] | None = None,
|
|
360
|
+
nthreads: int = 16,
|
|
361
|
+
save_data_to_file: SaveData = False,
|
|
362
|
+
saved_array_name_suffix: str = "",
|
|
363
|
+
) -> UPPTestsCounts:
|
|
364
|
+
"""A function to parallelize data-generation and testing
|
|
365
|
+
|
|
366
|
+
The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
|
|
367
|
+
the parent function, `sim_enf_cnts()`, except that, if provided,
|
|
368
|
+
`seed_seq_list` is used to spawn a seed sequence for each thread,
|
|
369
|
+
to assure independent samples in each thread, and `nthreads` defines
|
|
370
|
+
the number of parallel processes used. The number of draws in
|
|
371
|
+
each thread may be tuned, by trial and error, to the amount of
|
|
372
|
+
memory (RAM) available.
|
|
373
|
+
|
|
374
|
+
Parameters
|
|
375
|
+
----------
|
|
376
|
+
|
|
377
|
+
_enf_parm_vec
|
|
378
|
+
Guidelines thresholds to test against
|
|
379
|
+
|
|
380
|
+
_sim_test_regime
|
|
381
|
+
Configuration to use for testing
|
|
382
|
+
|
|
383
|
+
sample_size
|
|
384
|
+
Number of draws to simulate
|
|
385
|
+
|
|
386
|
+
seed_seq_list
|
|
387
|
+
List of seed sequences, to assure independent samples in each thread
|
|
388
|
+
|
|
389
|
+
nthreads
|
|
390
|
+
Number of parallel processes to use
|
|
391
|
+
|
|
392
|
+
save_data_to_file
|
|
393
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
394
|
+
|
|
395
|
+
saved_array_name_suffix
|
|
396
|
+
Suffix to add to the array names in the HDF5 file
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
Arrays of enforcement counts or clearance counts by firm count,
|
|
401
|
+
ΔHHI and concentration zone
|
|
402
|
+
|
|
403
|
+
"""
|
|
404
|
+
_sample_sz = sample_size
|
|
405
|
+
_subsample_sz = 10**6
|
|
406
|
+
_iter_count = (
|
|
407
|
+
int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
408
|
+
)
|
|
409
|
+
_thread_count = cpu_count()
|
|
410
|
+
|
|
411
|
+
if (
|
|
412
|
+
self.share_spec.recapture_form != RECForm.OUTIN
|
|
413
|
+
and self.share_spec.recapture_rate != _enf_parm_vec.rec
|
|
414
|
+
):
|
|
415
|
+
raise ValueError(
|
|
416
|
+
"{} {} {}".format(
|
|
417
|
+
f"Recapture rate from market sample spec, {self.share_spec.recapture_rate}",
|
|
418
|
+
f"must match the value, {_enf_parm_vec.rec}",
|
|
419
|
+
"the guidelines thresholds vector.",
|
|
420
|
+
)
|
|
280
421
|
)
|
|
422
|
+
|
|
423
|
+
_rng_seed_seq_list = [None] * _iter_count
|
|
424
|
+
if seed_seq_list:
|
|
425
|
+
_rng_seed_seq_list = list(
|
|
426
|
+
zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
_sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
430
|
+
"sample_size": _subsample_sz,
|
|
431
|
+
"save_data_to_file": save_data_to_file,
|
|
432
|
+
"nthreads": nthreads,
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
436
|
+
delayed(self.sim_enf_cnts)(
|
|
437
|
+
_enf_parm_vec,
|
|
438
|
+
_sim_test_regime,
|
|
439
|
+
**_sim_enf_cnts_kwargs,
|
|
440
|
+
saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
|
|
441
|
+
seed_seq_list=_rng_seed_seq_list_ch,
|
|
442
|
+
)
|
|
443
|
+
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
281
444
|
)
|
|
282
445
|
|
|
283
|
-
|
|
446
|
+
_res_list_stacks = UPPTestsCounts(*[
|
|
447
|
+
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
448
|
+
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
449
|
+
])
|
|
450
|
+
upp_test_results = UPPTestsCounts(*[
|
|
451
|
+
np.column_stack((
|
|
452
|
+
(_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
|
|
453
|
+
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
454
|
+
))
|
|
455
|
+
for _g, _h in zip(
|
|
456
|
+
_res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
|
|
457
|
+
)
|
|
458
|
+
])
|
|
459
|
+
del _res_list, _res_list_stacks
|
|
460
|
+
|
|
461
|
+
return upp_test_results
|
|
462
|
+
|
|
463
|
+
def estimate_enf_counts(
|
|
464
|
+
self,
|
|
465
|
+
_enf_parm_vec: HMGThresholds,
|
|
466
|
+
_upp_test_regime: UPPTestRegime,
|
|
467
|
+
/,
|
|
468
|
+
*,
|
|
469
|
+
sample_size: int = 10**6,
|
|
470
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
471
|
+
nthreads: int = 16,
|
|
472
|
+
save_data_to_file: SaveData = False,
|
|
473
|
+
saved_array_name_suffix: str = "",
|
|
474
|
+
) -> None:
|
|
475
|
+
"""Populate :attr:`enf_counts` etimated test counts.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
_enf_parm_vec
|
|
480
|
+
Threshold values for various Guidelines criteria
|
|
481
|
+
|
|
482
|
+
_upp_test_regime
|
|
483
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
484
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
485
|
+
default being to analyze enforcement based on the maximum
|
|
486
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
487
|
+
merging firms
|
|
488
|
+
|
|
489
|
+
sample_size
|
|
490
|
+
Number of draws to simulate
|
|
491
|
+
|
|
492
|
+
seed_seq_list
|
|
493
|
+
List of seed sequences, to assure independent samples in each thread
|
|
494
|
+
|
|
495
|
+
nthreads
|
|
496
|
+
Number of parallel processes to use
|
|
497
|
+
|
|
498
|
+
save_data_to_file
|
|
499
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
500
|
+
|
|
501
|
+
saved_array_name_suffix
|
|
502
|
+
Suffix to add to the array names in the HDF5 file
|
|
503
|
+
|
|
504
|
+
Returns
|
|
505
|
+
-------
|
|
506
|
+
None
|
|
507
|
+
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
if self.data is None:
|
|
511
|
+
self.enf_counts = self.sim_enf_cnts_ll(
|
|
512
|
+
_enf_parm_vec,
|
|
513
|
+
_upp_test_regime,
|
|
514
|
+
sample_size=sample_size,
|
|
515
|
+
seed_seq_list=seed_seq_list,
|
|
516
|
+
nthreads=nthreads,
|
|
517
|
+
save_data_to_file=save_data_to_file,
|
|
518
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
519
|
+
)
|
|
520
|
+
else:
|
|
521
|
+
self.enf_counts = enf_cnts(self.data, _enf_parm_vec, _upp_test_regime)
|
|
522
|
+
if save_data_to_file:
|
|
523
|
+
save_data_to_hdf5(
|
|
524
|
+
self.enf_counts,
|
|
525
|
+
save_data_to_file=save_data_to_file,
|
|
526
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
527
|
+
)
|