mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/data_generation.py
CHANGED
|
@@ -5,273 +5,525 @@ Methods to generate data for analyzing merger enforcement policy.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from typing import TypedDict
|
|
9
10
|
|
|
10
|
-
import attrs
|
|
11
11
|
import numpy as np
|
|
12
|
+
from attrs import Attribute, define, field, validators
|
|
13
|
+
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
12
14
|
from numpy.random import SeedSequence
|
|
13
|
-
from numpy.typing import NDArray
|
|
14
15
|
|
|
15
|
-
from .. import
|
|
16
|
+
from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
|
|
17
|
+
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
|
+
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
16
19
|
from . import (
|
|
17
|
-
|
|
18
|
-
FM2Constants,
|
|
20
|
+
FM2Constraint,
|
|
19
21
|
MarketDataSample,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
PCMDistribution,
|
|
23
|
+
PCMSpec,
|
|
24
|
+
PriceSpec,
|
|
25
|
+
ShareSpec,
|
|
26
|
+
SHRDistribution,
|
|
27
|
+
SSZConstant,
|
|
28
|
+
UPPTestRegime,
|
|
29
|
+
UPPTestsCounts,
|
|
24
30
|
)
|
|
25
|
-
from .
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
_gen_share_data,
|
|
31
|
+
from .data_generation_functions import (
|
|
32
|
+
gen_divr_array,
|
|
33
|
+
gen_margin_price_data,
|
|
34
|
+
gen_share_data,
|
|
35
|
+
parse_seed_seq_list,
|
|
31
36
|
)
|
|
37
|
+
from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
|
|
32
38
|
|
|
33
|
-
__version__ =
|
|
39
|
+
__version__ = VERSION
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
for generating the relevant random variates:
|
|
52
|
-
1.) quantity shares
|
|
53
|
-
2.) price-cost margins
|
|
54
|
-
3.) firm-counts, from :code:`[2, 2 + len(firm_counts_weights)]`,
|
|
55
|
-
weighted by :code:`firm_counts_weights`, where relevant
|
|
56
|
-
4.) prices, if :code:`price_spec == PRIConstants.ZERO`.
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
_mkt_sample_spec
|
|
61
|
-
class specifying parameters for data generation
|
|
62
|
-
seed_seq_list
|
|
63
|
-
tuple of SeedSequences to ensure replicable data generation with
|
|
64
|
-
appropriately independent random streams
|
|
65
|
-
nthreads
|
|
66
|
-
optionally specify the number of CPU threads for the PRNG
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
71
|
-
in the sample
|
|
42
|
+
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
|
+
"Keyword arguments of sampling methods defined below"
|
|
44
|
+
|
|
45
|
+
sample_size: int
|
|
46
|
+
"""number of draws to generate"""
|
|
47
|
+
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
49
|
+
"""sequence of SeedSequences to ensure replicable data generation with
|
|
50
|
+
appropriately independent random streams
|
|
51
|
+
|
|
52
|
+
NOTES
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
|
|
56
|
+
specification of this parameter.
|
|
72
57
|
|
|
73
58
|
"""
|
|
74
59
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
_recapture_form = _mkt_sample_spec.share_spec.recapture_form
|
|
78
|
-
_recapture_rate = _mkt_sample_spec.share_spec.recapture_rate
|
|
79
|
-
_dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
|
|
80
|
-
_dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
|
|
81
|
-
_hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
|
|
82
|
-
|
|
83
|
-
(
|
|
84
|
-
_mktshr_rng_seed_seq,
|
|
85
|
-
_pcm_rng_seed_seq,
|
|
86
|
-
_fcount_rng_seed_seq,
|
|
87
|
-
_pr_rng_seed_seq,
|
|
88
|
-
) = parse_seed_seq_list(
|
|
89
|
-
seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.price_spec
|
|
90
|
-
)
|
|
60
|
+
nthreads: int
|
|
61
|
+
"""number of parallel threads to use"""
|
|
91
62
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
_shr_sample_size *= _hsr_filing_test_type
|
|
95
|
-
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
96
|
-
_shr_sample_size *= SSZConstants.MNL_DEP
|
|
97
|
-
_mkt_sample_spec_here = attrs.evolve(
|
|
98
|
-
_mkt_sample_spec, sample_size=int(_shr_sample_size)
|
|
99
|
-
)
|
|
100
|
-
del _shr_sample_size
|
|
63
|
+
save_data_to_file: SaveData
|
|
64
|
+
"""optionally save data to HDF5 file"""
|
|
101
65
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
66
|
+
saved_array_name_suffix: str
|
|
67
|
+
"""optionally specify a suffix for the HDF5 array names"""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@define
|
|
71
|
+
class MarketSample:
|
|
72
|
+
"""Parameter specification for market data generation."""
|
|
73
|
+
|
|
74
|
+
share_spec: ShareSpec = field(
|
|
75
|
+
kw_only=True,
|
|
76
|
+
default=ShareSpec(
|
|
77
|
+
SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
|
|
78
|
+
),
|
|
79
|
+
validator=validators.instance_of(ShareSpec),
|
|
105
80
|
)
|
|
81
|
+
"""Market-share specification, see :class:`ShareSpec`"""
|
|
106
82
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
for _f in (
|
|
110
|
-
"mktshr_array",
|
|
111
|
-
"fcounts",
|
|
112
|
-
"aggregate_purchase_prob",
|
|
113
|
-
"nth_firm_share",
|
|
114
|
-
)
|
|
83
|
+
pcm_spec: PCMSpec = field(
|
|
84
|
+
kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
|
|
115
85
|
)
|
|
86
|
+
"""Margin specification, see :class:`PCMSpec`"""
|
|
87
|
+
|
|
88
|
+
@pcm_spec.validator # pyright: ignore
|
|
89
|
+
def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
|
|
90
|
+
if (
|
|
91
|
+
self.share_spec.recapture_form == RECForm.FIXED
|
|
92
|
+
and _v.firm2_pcm_constraint == FM2Constraint.MNL
|
|
93
|
+
):
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
|
|
96
|
+
"requires Firm 2 margin must have property, "
|
|
97
|
+
f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
|
|
98
|
+
)
|
|
116
99
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
_mktshr_array[:, :2], _nth_firm_share, _mkt_sample_spec_here, _pr_rng_seed_seq
|
|
100
|
+
price_spec: PriceSpec = field(
|
|
101
|
+
kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
|
|
120
102
|
)
|
|
103
|
+
"""Price specification, see :class:`PriceSpec`"""
|
|
121
104
|
|
|
122
|
-
|
|
123
|
-
|
|
105
|
+
hsr_filing_test_type: SSZConstant = field(
|
|
106
|
+
kw_only=True,
|
|
107
|
+
default=SSZConstant.ONE,
|
|
108
|
+
validator=validators.instance_of(SSZConstant),
|
|
124
109
|
)
|
|
110
|
+
"""Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
|
|
111
|
+
|
|
112
|
+
data: MarketDataSample = field(default=None)
|
|
113
|
+
|
|
114
|
+
enf_counts: UPPTestsCounts = field(default=None)
|
|
115
|
+
|
|
116
|
+
def __gen_market_sample(
|
|
117
|
+
self,
|
|
118
|
+
/,
|
|
119
|
+
*,
|
|
120
|
+
sample_size: int,
|
|
121
|
+
seed_seq_list: Sequence[SeedSequence] | None,
|
|
122
|
+
nthreads: int,
|
|
123
|
+
) -> MarketDataSample:
|
|
124
|
+
"""
|
|
125
|
+
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
126
|
+
|
|
127
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
132
|
+
in the sample
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
_recapture_form = self.share_spec.recapture_form
|
|
137
|
+
_recapture_ratio = self.share_spec.recapture_ratio
|
|
138
|
+
_dist_type_mktshr = self.share_spec.dist_type
|
|
139
|
+
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
140
|
+
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
141
|
+
|
|
142
|
+
(
|
|
143
|
+
_mktshr_rng_seed_seq,
|
|
144
|
+
_pcm_rng_seed_seq,
|
|
145
|
+
_fcount_rng_seed_seq,
|
|
146
|
+
_pr_rng_seed_seq,
|
|
147
|
+
) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
|
|
148
|
+
|
|
149
|
+
_shr_sample_size = 1.0 * sample_size
|
|
150
|
+
# Scale up sample size to offset discards based on specified criteria
|
|
151
|
+
_shr_sample_size *= _hsr_filing_test_type
|
|
152
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
153
|
+
_shr_sample_size *= SSZConstant.MNL_DEP
|
|
154
|
+
_shr_sample_size = int(_shr_sample_size)
|
|
155
|
+
|
|
156
|
+
# Generate share data
|
|
157
|
+
_mktshr_data = gen_share_data(
|
|
158
|
+
_shr_sample_size,
|
|
159
|
+
self.share_spec,
|
|
160
|
+
_fcount_rng_seed_seq,
|
|
161
|
+
_mktshr_rng_seed_seq,
|
|
162
|
+
nthreads,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
166
|
+
getattr(_mktshr_data, _f)
|
|
167
|
+
for _f in (
|
|
168
|
+
"mktshr_array",
|
|
169
|
+
"fcounts",
|
|
170
|
+
"aggregate_purchase_prob",
|
|
171
|
+
"nth_firm_share",
|
|
172
|
+
)
|
|
173
|
+
)
|
|
125
174
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
175
|
+
# Generate merging-firm price and PCM data
|
|
176
|
+
_margin_data, _price_data = gen_margin_price_data(
|
|
177
|
+
_mktshr_array[:, :2],
|
|
178
|
+
_nth_firm_share,
|
|
179
|
+
_aggregate_purchase_prob,
|
|
180
|
+
self.pcm_spec,
|
|
181
|
+
self.price_spec,
|
|
182
|
+
self.hsr_filing_test_type,
|
|
183
|
+
_pcm_rng_seed_seq,
|
|
184
|
+
_pr_rng_seed_seq,
|
|
185
|
+
nthreads,
|
|
186
|
+
)
|
|
132
187
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
188
|
+
_price_array, _hsr_filing_test = (
|
|
189
|
+
getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
|
|
190
|
+
)
|
|
137
191
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
_mkt_sample_spec_here,
|
|
142
|
-
_price_array,
|
|
143
|
-
_aggregate_purchase_prob,
|
|
144
|
-
_pcm_rng_seed_seq,
|
|
145
|
-
nthreads,
|
|
146
|
-
)
|
|
147
|
-
_pcm_array, _mnl_test_rows = (
|
|
148
|
-
getattr(_pcm_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
149
|
-
)
|
|
192
|
+
_pcm_array, _mnl_test_rows = (
|
|
193
|
+
getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
194
|
+
)
|
|
150
195
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
196
|
+
_mnl_test_rows = _mnl_test_rows * _hsr_filing_test
|
|
197
|
+
_s_size = sample_size # originally-specified sample size
|
|
198
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
199
|
+
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
200
|
+
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
201
|
+
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
202
|
+
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
203
|
+
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
204
|
+
:_s_size
|
|
205
|
+
]
|
|
206
|
+
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
207
|
+
|
|
208
|
+
# Calculate diversion ratios
|
|
209
|
+
_divr_array = gen_divr_array(
|
|
210
|
+
_recapture_form,
|
|
211
|
+
_recapture_ratio,
|
|
212
|
+
_mktshr_array[:, :2],
|
|
213
|
+
_aggregate_purchase_prob,
|
|
214
|
+
)
|
|
160
215
|
|
|
161
|
-
|
|
216
|
+
del _mnl_test_rows, _s_size
|
|
162
217
|
|
|
163
|
-
|
|
164
|
-
|
|
218
|
+
_frmshr_array = _mktshr_array[:, :2]
|
|
219
|
+
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
220
|
+
:, None
|
|
221
|
+
]
|
|
165
222
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
223
|
+
_hhi_post = (
|
|
224
|
+
_hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
|
|
225
|
+
)
|
|
169
226
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
227
|
+
return MarketDataSample(
|
|
228
|
+
_frmshr_array,
|
|
229
|
+
_pcm_array,
|
|
230
|
+
_price_array,
|
|
231
|
+
_fcounts,
|
|
232
|
+
_aggregate_purchase_prob,
|
|
233
|
+
_nth_firm_share,
|
|
234
|
+
_divr_array,
|
|
235
|
+
_hhi_post,
|
|
236
|
+
_hhi_delta,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def generate_sample(
|
|
240
|
+
self,
|
|
241
|
+
/,
|
|
242
|
+
*,
|
|
243
|
+
sample_size: int = 10**6,
|
|
244
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
245
|
+
nthreads: int = 16,
|
|
246
|
+
save_data_to_file: SaveData = False,
|
|
247
|
+
saved_array_name_suffix: str = "",
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Populate :attr:`data` with generated data
|
|
181
250
|
|
|
251
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
182
252
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
_pr_rng_seed_seq: SeedSequence | None = None
|
|
192
|
-
|
|
193
|
-
if _price_spec == PRIConstants.ZERO:
|
|
194
|
-
_pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
|
|
195
|
-
|
|
196
|
-
if _mktshr_dist_type == SHRConstants.UNI:
|
|
197
|
-
_fcount_rng_seed_seq = None
|
|
198
|
-
_seed_count = 2
|
|
199
|
-
_mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
|
|
200
|
-
_sseq_list[:_seed_count]
|
|
201
|
-
if _sseq_list
|
|
202
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
self.data = self.__gen_market_sample(
|
|
260
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
203
261
|
)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
262
|
+
|
|
263
|
+
_invalid_array_names = (
|
|
264
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
265
|
+
if self.share_spec.dist_type == "Uniform"
|
|
266
|
+
else ()
|
|
210
267
|
)
|
|
211
268
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
269
|
+
save_data_to_hdf5(
|
|
270
|
+
self.data,
|
|
271
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
272
|
+
excluded_attrs=_invalid_array_names,
|
|
273
|
+
save_data_to_file=save_data_to_file,
|
|
274
|
+
)
|
|
218
275
|
|
|
276
|
+
def __sim_enf_cnts(
|
|
277
|
+
self,
|
|
278
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
279
|
+
_sim_test_regime: UPPTestRegime,
|
|
280
|
+
/,
|
|
281
|
+
*,
|
|
282
|
+
sample_size: int = 10**6,
|
|
283
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
284
|
+
nthreads: int = 16,
|
|
285
|
+
save_data_to_file: SaveData = False,
|
|
286
|
+
saved_array_name_suffix: str = "",
|
|
287
|
+
) -> UPPTestsCounts:
|
|
288
|
+
"""Generate market data and etstimate UPP test counts on same.
|
|
219
289
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
_recapture_rate: float | None,
|
|
223
|
-
_frmshr_array: NDArray[np.float64],
|
|
224
|
-
_aggregate_purchase_prob: NDArray[np.float64] = EMPTY_ARRAY_DEFAULT,
|
|
225
|
-
/,
|
|
226
|
-
) -> NDArray[np.float64]:
|
|
227
|
-
"""
|
|
228
|
-
Given merging-firm shares and related parameters, return diverion ratios.
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
229
292
|
|
|
230
|
-
|
|
231
|
-
|
|
293
|
+
_upp_test_parms
|
|
294
|
+
Guidelines thresholds for testing UPP and related statistics
|
|
232
295
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
296
|
+
_sim_test_regime
|
|
297
|
+
Configuration to use for testing; UPPTestsRegime object
|
|
298
|
+
specifying whether investigation results in enforcement, clearance,
|
|
299
|
+
or both; and aggregation methods used for GUPPI and diversion ratio
|
|
300
|
+
measures
|
|
237
301
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
for the firm with the smaller share.
|
|
302
|
+
sample_size
|
|
303
|
+
Number of draws to generate
|
|
241
304
|
|
|
242
|
-
|
|
243
|
-
|
|
305
|
+
seed_seq_list
|
|
306
|
+
List of seed sequences, to assure independent samples in each thread
|
|
244
307
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
market shares to choice probabilities by multiplication.
|
|
308
|
+
nthreads
|
|
309
|
+
Number of parallel processes to use
|
|
248
310
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
Merging-firm diversion ratios for mergers in the sample.
|
|
311
|
+
save_data_to_file
|
|
312
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
252
313
|
|
|
253
|
-
|
|
314
|
+
saved_array_name_suffix
|
|
315
|
+
Suffix to add to the array names in the HDF5 file
|
|
254
316
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
_market_data_sample = self.__gen_market_sample(
|
|
324
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
_invalid_array_names = (
|
|
328
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
329
|
+
if self.share_spec.dist_type == "Uniform"
|
|
330
|
+
else ()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
save_data_to_hdf5(
|
|
334
|
+
_market_data_sample,
|
|
335
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
336
|
+
excluded_attrs=_invalid_array_names,
|
|
337
|
+
save_data_to_file=save_data_to_file,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
_upp_test_arrays = compute_upp_test_counts(
|
|
341
|
+
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
save_data_to_hdf5(
|
|
345
|
+
_upp_test_arrays,
|
|
346
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
347
|
+
save_data_to_file=save_data_to_file,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return _upp_test_arrays
|
|
351
|
+
|
|
352
|
+
def __sim_enf_cnts_ll(
|
|
353
|
+
self,
|
|
354
|
+
_enf_parm_vec: gbl.HMGThresholds,
|
|
355
|
+
_sim_test_regime: UPPTestRegime,
|
|
356
|
+
/,
|
|
357
|
+
*,
|
|
358
|
+
sample_size: int = 10**6,
|
|
359
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
360
|
+
nthreads: int = 16,
|
|
361
|
+
save_data_to_file: SaveData = False,
|
|
362
|
+
saved_array_name_suffix: str = "",
|
|
363
|
+
) -> UPPTestsCounts:
|
|
364
|
+
"""A function to parallelize data-generation and testing
|
|
365
|
+
|
|
366
|
+
The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
|
|
367
|
+
the parent function, `sim_enf_cnts()`, except that, if provided,
|
|
368
|
+
`seed_seq_list` is used to spawn a seed sequence for each thread,
|
|
369
|
+
to assure independent samples in each thread, and `nthreads` defines
|
|
370
|
+
the number of parallel processes used. The number of draws in
|
|
371
|
+
each thread may be tuned, by trial and error, to the amount of
|
|
372
|
+
memory (RAM) available.
|
|
373
|
+
|
|
374
|
+
Parameters
|
|
375
|
+
----------
|
|
376
|
+
|
|
377
|
+
_enf_parm_vec
|
|
378
|
+
Guidelines thresholds to test against
|
|
379
|
+
|
|
380
|
+
_sim_test_regime
|
|
381
|
+
Configuration to use for testing
|
|
382
|
+
|
|
383
|
+
sample_size
|
|
384
|
+
Number of draws to simulate
|
|
385
|
+
|
|
386
|
+
seed_seq_list
|
|
387
|
+
List of seed sequences, to assure independent samples in each thread
|
|
388
|
+
|
|
389
|
+
nthreads
|
|
390
|
+
Number of parallel processes to use
|
|
391
|
+
|
|
392
|
+
save_data_to_file
|
|
393
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
394
|
+
|
|
395
|
+
saved_array_name_suffix
|
|
396
|
+
Suffix to add to the array names in the HDF5 file
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
Arrays of enforcement counts or clearance counts by firm count,
|
|
401
|
+
ΔHHI and concentration zone
|
|
402
|
+
|
|
403
|
+
"""
|
|
404
|
+
_sample_sz = sample_size
|
|
405
|
+
_subsample_sz = 10**6
|
|
406
|
+
_iter_count = (
|
|
407
|
+
int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
408
|
+
)
|
|
409
|
+
_thread_count = cpu_count()
|
|
410
|
+
|
|
411
|
+
if (
|
|
412
|
+
self.share_spec.recapture_form != RECForm.OUTIN
|
|
413
|
+
and self.share_spec.recapture_ratio != _enf_parm_vec.rec
|
|
414
|
+
):
|
|
415
|
+
raise ValueError(
|
|
416
|
+
"{} {} {}".format(
|
|
417
|
+
f"Recapture ratio from market sample spec, {self.share_spec.recapture_ratio}",
|
|
418
|
+
f"must match the value, {_enf_parm_vec.rec}",
|
|
419
|
+
"the guidelines thresholds vector.",
|
|
420
|
+
)
|
|
274
421
|
)
|
|
422
|
+
|
|
423
|
+
_rng_seed_seq_list = [None] * _iter_count
|
|
424
|
+
if seed_seq_list:
|
|
425
|
+
_rng_seed_seq_list = list(
|
|
426
|
+
zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
_sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
430
|
+
"sample_size": _subsample_sz,
|
|
431
|
+
"save_data_to_file": save_data_to_file,
|
|
432
|
+
"nthreads": nthreads,
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
436
|
+
delayed(self.__sim_enf_cnts)(
|
|
437
|
+
_enf_parm_vec,
|
|
438
|
+
_sim_test_regime,
|
|
439
|
+
**_sim_enf_cnts_kwargs,
|
|
440
|
+
saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}", # pyright: ignore
|
|
441
|
+
seed_seq_list=_rng_seed_seq_list_ch, # pyright: ignore
|
|
442
|
+
)
|
|
443
|
+
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
275
444
|
)
|
|
276
445
|
|
|
277
|
-
|
|
446
|
+
_res_list_stacks = UPPTestsCounts(*[
|
|
447
|
+
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
448
|
+
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
449
|
+
])
|
|
450
|
+
upp_test_results = UPPTestsCounts(*[
|
|
451
|
+
np.column_stack((
|
|
452
|
+
(_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
|
|
453
|
+
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
454
|
+
))
|
|
455
|
+
for _g, _h in zip(
|
|
456
|
+
_res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
|
|
457
|
+
)
|
|
458
|
+
])
|
|
459
|
+
del _res_list, _res_list_stacks
|
|
460
|
+
|
|
461
|
+
return upp_test_results
|
|
462
|
+
|
|
463
|
+
def estimate_enf_counts(
|
|
464
|
+
self,
|
|
465
|
+
_enf_parm_vec: HMGThresholds,
|
|
466
|
+
_upp_test_regime: UPPTestRegime,
|
|
467
|
+
/,
|
|
468
|
+
*,
|
|
469
|
+
sample_size: int = 10**6,
|
|
470
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
471
|
+
nthreads: int = 16,
|
|
472
|
+
save_data_to_file: SaveData = False,
|
|
473
|
+
saved_array_name_suffix: str = "",
|
|
474
|
+
) -> None:
|
|
475
|
+
"""Populate :attr:`enf_counts` with estimated UPP test counts.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
_enf_parm_vec
|
|
480
|
+
Threshold values for various Guidelines criteria
|
|
481
|
+
|
|
482
|
+
_upp_test_regime
|
|
483
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
484
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
485
|
+
default being to analyze enforcement based on the maximum
|
|
486
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
487
|
+
merging firms
|
|
488
|
+
|
|
489
|
+
sample_size
|
|
490
|
+
Number of draws to simulate
|
|
491
|
+
|
|
492
|
+
seed_seq_list
|
|
493
|
+
List of seed sequences, to assure independent samples in each thread
|
|
494
|
+
|
|
495
|
+
nthreads
|
|
496
|
+
Number of parallel processes to use
|
|
497
|
+
|
|
498
|
+
save_data_to_file
|
|
499
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
500
|
+
|
|
501
|
+
saved_array_name_suffix
|
|
502
|
+
Suffix to add to the array names in the HDF5 file
|
|
503
|
+
|
|
504
|
+
Returns
|
|
505
|
+
-------
|
|
506
|
+
None
|
|
507
|
+
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
if self.data is None:
|
|
511
|
+
self.enf_counts = self.__sim_enf_cnts_ll(
|
|
512
|
+
_enf_parm_vec,
|
|
513
|
+
_upp_test_regime,
|
|
514
|
+
sample_size=sample_size,
|
|
515
|
+
seed_seq_list=seed_seq_list,
|
|
516
|
+
nthreads=nthreads,
|
|
517
|
+
save_data_to_file=save_data_to_file,
|
|
518
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
519
|
+
)
|
|
520
|
+
else:
|
|
521
|
+
self.enf_counts = compute_upp_test_counts(
|
|
522
|
+
self.data, _enf_parm_vec, _upp_test_regime
|
|
523
|
+
)
|
|
524
|
+
if save_data_to_file:
|
|
525
|
+
save_data_to_hdf5(
|
|
526
|
+
self.enf_counts,
|
|
527
|
+
save_data_to_file=save_data_to_file,
|
|
528
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
529
|
+
)
|