mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +147 -101
- mergeron/core/guidelines_boundaries.py +290 -1078
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +258 -246
- mergeron/gen/data_generation.py +473 -224
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +171 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -257
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738953.1.dist-info/METADATA +0 -93
- mergeron-2024.738953.1.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/gen/data_generation.py
CHANGED
|
@@ -1,280 +1,529 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Methods to generate data for analyzing merger enforcement policy.
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from typing import TypedDict
|
|
9
10
|
|
|
10
|
-
import attrs
|
|
11
11
|
import numpy as np
|
|
12
|
+
from attrs import Attribute, define, field, validators
|
|
13
|
+
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
12
14
|
from numpy.random import SeedSequence
|
|
13
|
-
from numpy.typing import NDArray
|
|
14
15
|
|
|
15
|
-
from .. import
|
|
16
|
+
from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
|
|
17
|
+
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
|
+
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
16
19
|
from . import (
|
|
17
|
-
|
|
18
|
-
TF,
|
|
19
|
-
FM2Constants,
|
|
20
|
+
FM2Constraint,
|
|
20
21
|
MarketDataSample,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
PCMDistribution,
|
|
23
|
+
PCMSpec,
|
|
24
|
+
PriceSpec,
|
|
25
|
+
ShareSpec,
|
|
26
|
+
SHRDistribution,
|
|
27
|
+
SSZConstant,
|
|
28
|
+
UPPTestRegime,
|
|
29
|
+
UPPTestsCounts,
|
|
25
30
|
)
|
|
26
|
-
from .
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
_gen_share_data,
|
|
31
|
+
from .data_generation_functions import (
|
|
32
|
+
gen_divr_array,
|
|
33
|
+
gen_margin_price_data,
|
|
34
|
+
gen_share_data,
|
|
35
|
+
parse_seed_seq_list,
|
|
32
36
|
)
|
|
37
|
+
from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
|
|
33
38
|
|
|
34
|
-
__version__ =
|
|
39
|
+
__version__ = VERSION
|
|
35
40
|
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
for generating the relevant random variates:
|
|
53
|
-
1.) quantity shares
|
|
54
|
-
2.) price-cost margins
|
|
55
|
-
3.) firm-counts, from :code:`[2, 2 + len(firm_counts_weights)]`,
|
|
56
|
-
weighted by :code:`firm_counts_weights`, where relevant
|
|
57
|
-
4.) prices, if :code:`pr_sym_spec == PRIConstants.ZERO`.
|
|
58
|
-
|
|
59
|
-
Parameters
|
|
60
|
-
----------
|
|
61
|
-
_mkt_sample_spec
|
|
62
|
-
class specifying parameters for data generation
|
|
63
|
-
seed_seq_list
|
|
64
|
-
tuple of SeedSequences to ensure replicable data generation with
|
|
65
|
-
appropriately independent random streams
|
|
66
|
-
nthreads
|
|
67
|
-
optionally specify the number of CPU threads for the PRNG
|
|
68
|
-
|
|
69
|
-
Returns
|
|
70
|
-
-------
|
|
71
|
-
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
72
|
-
in the sample
|
|
42
|
+
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
|
+
"Keyword arguments of sampling methods defined below"
|
|
44
|
+
|
|
45
|
+
sample_size: int
|
|
46
|
+
"""number of draws to generate"""
|
|
47
|
+
|
|
48
|
+
seed_seq_list: Sequence[SeedSequence] | None
|
|
49
|
+
"""sequence of SeedSequences to ensure replicable data generation with
|
|
50
|
+
appropriately independent random streams
|
|
51
|
+
|
|
52
|
+
NOTES
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
|
|
56
|
+
specification of this parameter.
|
|
73
57
|
|
|
74
58
|
"""
|
|
75
59
|
|
|
76
|
-
|
|
60
|
+
nthreads: int
|
|
61
|
+
"""number of parallel threads to use"""
|
|
77
62
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
_dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
|
|
81
|
-
_hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
|
|
63
|
+
save_data_to_file: SaveData
|
|
64
|
+
"""optionally save data to HDF5 file"""
|
|
82
65
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
_pcm_rng_seed_seq,
|
|
86
|
-
_fcount_rng_seed_seq,
|
|
87
|
-
_pr_rng_seed_seq,
|
|
88
|
-
) = parse_seed_seq_list(
|
|
89
|
-
seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.pr_sym_spec
|
|
90
|
-
)
|
|
66
|
+
saved_array_name_suffix: str
|
|
67
|
+
"""optionally specify a suffix for the HDF5 array names"""
|
|
91
68
|
|
|
92
|
-
_shr_sample_size = 1.0 * _mkt_sample_spec.sample_size
|
|
93
|
-
# Scale up sample size to offset discards based on specified criteria
|
|
94
|
-
_shr_sample_size *= _hsr_filing_test_type
|
|
95
|
-
if _dist_firm2_pcm == FM2Constants.MNL:
|
|
96
|
-
_shr_sample_size *= SSZConstants.MNL_DEP
|
|
97
|
-
_mkt_sample_spec_here = attrs.evolve(
|
|
98
|
-
_mkt_sample_spec, sample_size=int(_shr_sample_size)
|
|
99
|
-
)
|
|
100
|
-
del _shr_sample_size
|
|
101
69
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
)
|
|
70
|
+
@define
|
|
71
|
+
class MarketSample:
|
|
72
|
+
"""Parameter specification for market data generation."""
|
|
106
73
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
"nth_firm_share",
|
|
114
|
-
)
|
|
74
|
+
share_spec: ShareSpec = field(
|
|
75
|
+
kw_only=True,
|
|
76
|
+
default=ShareSpec(
|
|
77
|
+
SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
|
|
78
|
+
),
|
|
79
|
+
validator=validators.instance_of(ShareSpec),
|
|
115
80
|
)
|
|
81
|
+
"""Market-share specification, see :class:`ShareSpec`"""
|
|
116
82
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
_mktshr_array[:, :2], _nth_firm_share, _mkt_sample_spec_here, _pr_rng_seed_seq
|
|
83
|
+
pcm_spec: PCMSpec = field(
|
|
84
|
+
kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
|
|
120
85
|
)
|
|
86
|
+
"""Margin specification, see :class:`PCMSpec`"""
|
|
87
|
+
|
|
88
|
+
@pcm_spec.validator # pyright: ignore
|
|
89
|
+
def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
|
|
90
|
+
if (
|
|
91
|
+
self.share_spec.recapture_form == RECForm.FIXED
|
|
92
|
+
and _v.firm2_pcm_constraint == FM2Constraint.MNL
|
|
93
|
+
):
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
|
|
96
|
+
"requires Firm 2 margin must have property, "
|
|
97
|
+
f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
|
|
98
|
+
)
|
|
121
99
|
|
|
122
|
-
|
|
123
|
-
|
|
100
|
+
price_spec: PriceSpec = field(
|
|
101
|
+
kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
|
|
124
102
|
)
|
|
103
|
+
"""Price specification, see :class:`PriceSpec`"""
|
|
125
104
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
_nth_firm_share = _nth_firm_share[_hsr_filing_test]
|
|
131
|
-
_price_array = _price_array[_hsr_filing_test]
|
|
132
|
-
|
|
133
|
-
# Calculate diversion ratios
|
|
134
|
-
_divr_array = gen_divr_array(
|
|
135
|
-
_mktshr_array[:, :2],
|
|
136
|
-
_mkt_sample_spec_here.recapture_rate or 0.8,
|
|
137
|
-
_recapture_spec,
|
|
138
|
-
_aggregate_purchase_prob,
|
|
105
|
+
hsr_filing_test_type: SSZConstant = field(
|
|
106
|
+
kw_only=True,
|
|
107
|
+
default=SSZConstant.ONE,
|
|
108
|
+
validator=validators.instance_of(SSZConstant),
|
|
139
109
|
)
|
|
110
|
+
"""Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
|
|
111
|
+
|
|
112
|
+
data: MarketDataSample = field(default=None)
|
|
113
|
+
|
|
114
|
+
enf_counts: UPPTestsCounts = field(default=None)
|
|
115
|
+
|
|
116
|
+
def __gen_market_sample(
|
|
117
|
+
self,
|
|
118
|
+
/,
|
|
119
|
+
*,
|
|
120
|
+
sample_size: int,
|
|
121
|
+
seed_seq_list: Sequence[SeedSequence] | None,
|
|
122
|
+
nthreads: int,
|
|
123
|
+
) -> MarketDataSample:
|
|
124
|
+
"""
|
|
125
|
+
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
126
|
+
|
|
127
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Merging firms' shares, margins, etc. for each hypothetical merger
|
|
132
|
+
in the sample
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
_recapture_form = self.share_spec.recapture_form
|
|
137
|
+
_recapture_ratio = self.share_spec.recapture_ratio
|
|
138
|
+
_dist_type_mktshr = self.share_spec.dist_type
|
|
139
|
+
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
140
|
+
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
141
|
+
|
|
142
|
+
(
|
|
143
|
+
_mktshr_rng_seed_seq,
|
|
144
|
+
_pcm_rng_seed_seq,
|
|
145
|
+
_fcount_rng_seed_seq,
|
|
146
|
+
_pr_rng_seed_seq,
|
|
147
|
+
) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
|
|
148
|
+
|
|
149
|
+
_shr_sample_size = 1.0 * sample_size
|
|
150
|
+
# Scale up sample size to offset discards based on specified criteria
|
|
151
|
+
_shr_sample_size *= _hsr_filing_test_type
|
|
152
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
153
|
+
_shr_sample_size *= SSZConstant.MNL_DEP
|
|
154
|
+
_shr_sample_size = int(_shr_sample_size)
|
|
155
|
+
|
|
156
|
+
# Generate share data
|
|
157
|
+
_mktshr_data = gen_share_data(
|
|
158
|
+
_shr_sample_size,
|
|
159
|
+
self.share_spec,
|
|
160
|
+
_fcount_rng_seed_seq,
|
|
161
|
+
_mktshr_rng_seed_seq,
|
|
162
|
+
nthreads,
|
|
163
|
+
)
|
|
140
164
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
_pcm_array, _mnl_test_rows = (
|
|
151
|
-
getattr(_pcm_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
152
|
-
)
|
|
165
|
+
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
166
|
+
getattr(_mktshr_data, _f)
|
|
167
|
+
for _f in (
|
|
168
|
+
"mktshr_array",
|
|
169
|
+
"fcounts",
|
|
170
|
+
"aggregate_purchase_prob",
|
|
171
|
+
"nth_firm_share",
|
|
172
|
+
)
|
|
173
|
+
)
|
|
153
174
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
175
|
+
# Generate merging-firm price and PCM data
|
|
176
|
+
_margin_data, _price_data = gen_margin_price_data(
|
|
177
|
+
_mktshr_array[:, :2],
|
|
178
|
+
_nth_firm_share,
|
|
179
|
+
_aggregate_purchase_prob,
|
|
180
|
+
self.pcm_spec,
|
|
181
|
+
self.price_spec,
|
|
182
|
+
self.hsr_filing_test_type,
|
|
183
|
+
_pcm_rng_seed_seq,
|
|
184
|
+
_pr_rng_seed_seq,
|
|
185
|
+
nthreads,
|
|
186
|
+
)
|
|
163
187
|
|
|
164
|
-
|
|
188
|
+
_price_array, _hsr_filing_test = (
|
|
189
|
+
getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
|
|
190
|
+
)
|
|
165
191
|
|
|
166
|
-
|
|
167
|
-
|
|
192
|
+
_pcm_array, _mnl_test_rows = (
|
|
193
|
+
getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
|
|
194
|
+
)
|
|
168
195
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
196
|
+
_mnl_test_rows = _mnl_test_rows * _hsr_filing_test
|
|
197
|
+
_s_size = sample_size # originally-specified sample size
|
|
198
|
+
if _dist_firm2_pcm == FM2Constraint.MNL:
|
|
199
|
+
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
200
|
+
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
201
|
+
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
202
|
+
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
203
|
+
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
204
|
+
:_s_size
|
|
205
|
+
]
|
|
206
|
+
_nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
|
|
207
|
+
|
|
208
|
+
# Calculate diversion ratios
|
|
209
|
+
_divr_array = gen_divr_array(
|
|
210
|
+
_recapture_form,
|
|
211
|
+
_recapture_ratio,
|
|
212
|
+
_mktshr_array[:, :2],
|
|
213
|
+
_aggregate_purchase_prob,
|
|
214
|
+
)
|
|
172
215
|
|
|
173
|
-
|
|
174
|
-
_frmshr_array,
|
|
175
|
-
_pcm_array,
|
|
176
|
-
_price_array,
|
|
177
|
-
_fcounts,
|
|
178
|
-
_aggregate_purchase_prob,
|
|
179
|
-
_nth_firm_share,
|
|
180
|
-
_divr_array,
|
|
181
|
-
_hhi_post,
|
|
182
|
-
_hhi_delta,
|
|
183
|
-
)
|
|
216
|
+
del _mnl_test_rows, _s_size
|
|
184
217
|
|
|
218
|
+
_frmshr_array = _mktshr_array[:, :2]
|
|
219
|
+
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
220
|
+
:, None
|
|
221
|
+
]
|
|
185
222
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
_dist_type_mktshr: SHRConstants,
|
|
189
|
-
_pr_sym_spec: PRIConstants,
|
|
190
|
-
/,
|
|
191
|
-
) -> tuple[SeedSequence, SeedSequence, SeedSequence | None, SeedSequence | None]:
|
|
192
|
-
"""Initialize RNG seed sequences to ensure independence of distinct random streams."""
|
|
193
|
-
_fcount_rng_seed_seq: SeedSequence | None = None
|
|
194
|
-
_pr_rng_seed_seq: SeedSequence | None = None
|
|
195
|
-
|
|
196
|
-
if _pr_sym_spec == PRIConstants.ZERO:
|
|
197
|
-
_pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
|
|
198
|
-
|
|
199
|
-
if _dist_type_mktshr == SHRConstants.UNI:
|
|
200
|
-
_fcount_rng_seed_seq = None
|
|
201
|
-
_seed_count = 2
|
|
202
|
-
_mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
|
|
203
|
-
_sseq_list[:_seed_count]
|
|
204
|
-
if _sseq_list
|
|
205
|
-
else (SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
223
|
+
_hhi_post = (
|
|
224
|
+
_hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
|
|
206
225
|
)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
226
|
+
|
|
227
|
+
return MarketDataSample(
|
|
228
|
+
_frmshr_array,
|
|
229
|
+
_pcm_array,
|
|
230
|
+
_price_array,
|
|
231
|
+
_fcounts,
|
|
232
|
+
_aggregate_purchase_prob,
|
|
233
|
+
_nth_firm_share,
|
|
234
|
+
_divr_array,
|
|
235
|
+
_hhi_post,
|
|
236
|
+
_hhi_delta,
|
|
213
237
|
)
|
|
214
238
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
239
|
+
def generate_sample(
|
|
240
|
+
self,
|
|
241
|
+
/,
|
|
242
|
+
*,
|
|
243
|
+
sample_size: int = 10**6,
|
|
244
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
245
|
+
nthreads: int = 16,
|
|
246
|
+
save_data_to_file: SaveData = False,
|
|
247
|
+
saved_array_name_suffix: str = "",
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Populate :attr:`data` with generated data
|
|
221
250
|
|
|
251
|
+
see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
|
|
222
252
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
self.data = self.__gen_market_sample(
|
|
260
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
261
|
+
)
|
|
232
262
|
|
|
233
|
-
|
|
234
|
-
|
|
263
|
+
_invalid_array_names = (
|
|
264
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
265
|
+
if self.share_spec.dist_type == "Uniform"
|
|
266
|
+
else ()
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
save_data_to_hdf5(
|
|
270
|
+
self.data,
|
|
271
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
272
|
+
excluded_attrs=_invalid_array_names,
|
|
273
|
+
save_data_to_file=save_data_to_file,
|
|
274
|
+
)
|
|
235
275
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
276
|
+
def __sim_enf_cnts(
|
|
277
|
+
self,
|
|
278
|
+
_upp_test_parms: gbl.HMGThresholds,
|
|
279
|
+
_sim_test_regime: UPPTestRegime,
|
|
280
|
+
/,
|
|
281
|
+
*,
|
|
282
|
+
sample_size: int = 10**6,
|
|
283
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
284
|
+
nthreads: int = 16,
|
|
285
|
+
save_data_to_file: SaveData = False,
|
|
286
|
+
saved_array_name_suffix: str = "",
|
|
287
|
+
) -> UPPTestsCounts:
|
|
288
|
+
"""Generate market data and etstimate UPP test counts on same.
|
|
240
289
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
for the firm with the smaller share.
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
244
292
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
market shares to choice probabilities by multiplication.
|
|
293
|
+
_upp_test_parms
|
|
294
|
+
Guidelines thresholds for testing UPP and related statistics
|
|
248
295
|
|
|
249
|
-
|
|
250
|
-
|
|
296
|
+
_sim_test_regime
|
|
297
|
+
Configuration to use for testing; UPPTestsRegime object
|
|
298
|
+
specifying whether investigation results in enforcement, clearance,
|
|
299
|
+
or both; and aggregation methods used for GUPPI and diversion ratio
|
|
300
|
+
measures
|
|
251
301
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
Merging-firm diversion ratios for mergers in the sample.
|
|
302
|
+
sample_size
|
|
303
|
+
Number of draws to generate
|
|
255
304
|
|
|
256
|
-
|
|
305
|
+
seed_seq_list
|
|
306
|
+
List of seed sequences, to assure independent samples in each thread
|
|
307
|
+
|
|
308
|
+
nthreads
|
|
309
|
+
Number of parallel processes to use
|
|
310
|
+
|
|
311
|
+
save_data_to_file
|
|
312
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
313
|
+
|
|
314
|
+
saved_array_name_suffix
|
|
315
|
+
Suffix to add to the array names in the HDF5 file
|
|
257
316
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
_market_data_sample = self.__gen_market_sample(
|
|
324
|
+
sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
_invalid_array_names = (
|
|
328
|
+
("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
|
|
329
|
+
if self.share_spec.dist_type == "Uniform"
|
|
330
|
+
else ()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
save_data_to_hdf5(
|
|
334
|
+
_market_data_sample,
|
|
335
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
336
|
+
excluded_attrs=_invalid_array_names,
|
|
337
|
+
save_data_to_file=save_data_to_file,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
_upp_test_arrays = compute_upp_test_counts(
|
|
341
|
+
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
save_data_to_hdf5(
|
|
345
|
+
_upp_test_arrays,
|
|
346
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
347
|
+
save_data_to_file=save_data_to_file,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return _upp_test_arrays
|
|
351
|
+
|
|
352
|
+
def __sim_enf_cnts_ll(
|
|
353
|
+
self,
|
|
354
|
+
_enf_parm_vec: gbl.HMGThresholds,
|
|
355
|
+
_sim_test_regime: UPPTestRegime,
|
|
356
|
+
/,
|
|
357
|
+
*,
|
|
358
|
+
sample_size: int = 10**6,
|
|
359
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
360
|
+
nthreads: int = 16,
|
|
361
|
+
save_data_to_file: SaveData = False,
|
|
362
|
+
saved_array_name_suffix: str = "",
|
|
363
|
+
) -> UPPTestsCounts:
|
|
364
|
+
"""A function to parallelize data-generation and testing
|
|
365
|
+
|
|
366
|
+
The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
|
|
367
|
+
the parent function, `sim_enf_cnts()`, except that, if provided,
|
|
368
|
+
`seed_seq_list` is used to spawn a seed sequence for each thread,
|
|
369
|
+
to assure independent samples in each thread, and `nthreads` defines
|
|
370
|
+
the number of parallel processes used. The number of draws in
|
|
371
|
+
each thread may be tuned, by trial and error, to the amount of
|
|
372
|
+
memory (RAM) available.
|
|
373
|
+
|
|
374
|
+
Parameters
|
|
375
|
+
----------
|
|
376
|
+
|
|
377
|
+
_enf_parm_vec
|
|
378
|
+
Guidelines thresholds to test against
|
|
379
|
+
|
|
380
|
+
_sim_test_regime
|
|
381
|
+
Configuration to use for testing
|
|
382
|
+
|
|
383
|
+
sample_size
|
|
384
|
+
Number of draws to simulate
|
|
385
|
+
|
|
386
|
+
seed_seq_list
|
|
387
|
+
List of seed sequences, to assure independent samples in each thread
|
|
388
|
+
|
|
389
|
+
nthreads
|
|
390
|
+
Number of parallel processes to use
|
|
391
|
+
|
|
392
|
+
save_data_to_file
|
|
393
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
394
|
+
|
|
395
|
+
saved_array_name_suffix
|
|
396
|
+
Suffix to add to the array names in the HDF5 file
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
Arrays of enforcement counts or clearance counts by firm count,
|
|
401
|
+
ΔHHI and concentration zone
|
|
402
|
+
|
|
403
|
+
"""
|
|
404
|
+
_sample_sz = sample_size
|
|
405
|
+
_subsample_sz = 10**6
|
|
406
|
+
_iter_count = (
|
|
407
|
+
int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
|
|
408
|
+
)
|
|
409
|
+
_thread_count = cpu_count()
|
|
410
|
+
|
|
411
|
+
if (
|
|
412
|
+
self.share_spec.recapture_form != RECForm.OUTIN
|
|
413
|
+
and self.share_spec.recapture_ratio != _enf_parm_vec.rec
|
|
414
|
+
):
|
|
415
|
+
raise ValueError(
|
|
416
|
+
"{} {} {}".format(
|
|
417
|
+
f"Recapture ratio from market sample spec, {self.share_spec.recapture_ratio}",
|
|
418
|
+
f"must match the value, {_enf_parm_vec.rec}",
|
|
419
|
+
"the guidelines thresholds vector.",
|
|
420
|
+
)
|
|
277
421
|
)
|
|
422
|
+
|
|
423
|
+
_rng_seed_seq_list = [None] * _iter_count
|
|
424
|
+
if seed_seq_list:
|
|
425
|
+
_rng_seed_seq_list = list(
|
|
426
|
+
zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
_sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
430
|
+
"sample_size": _subsample_sz,
|
|
431
|
+
"save_data_to_file": save_data_to_file,
|
|
432
|
+
"nthreads": nthreads,
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
_res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
|
|
436
|
+
delayed(self.__sim_enf_cnts)(
|
|
437
|
+
_enf_parm_vec,
|
|
438
|
+
_sim_test_regime,
|
|
439
|
+
**_sim_enf_cnts_kwargs,
|
|
440
|
+
saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}", # pyright: ignore
|
|
441
|
+
seed_seq_list=_rng_seed_seq_list_ch, # pyright: ignore
|
|
442
|
+
)
|
|
443
|
+
for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
|
|
278
444
|
)
|
|
279
445
|
|
|
280
|
-
|
|
446
|
+
_res_list_stacks = UPPTestsCounts(*[
|
|
447
|
+
np.stack([getattr(_j, _k) for _j in _res_list])
|
|
448
|
+
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
449
|
+
])
|
|
450
|
+
upp_test_results = UPPTestsCounts(*[
|
|
451
|
+
np.column_stack((
|
|
452
|
+
(_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
|
|
453
|
+
np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
|
|
454
|
+
))
|
|
455
|
+
for _g, _h in zip(
|
|
456
|
+
_res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
|
|
457
|
+
)
|
|
458
|
+
])
|
|
459
|
+
del _res_list, _res_list_stacks
|
|
460
|
+
|
|
461
|
+
return upp_test_results
|
|
462
|
+
|
|
463
|
+
def estimate_enf_counts(
|
|
464
|
+
self,
|
|
465
|
+
_enf_parm_vec: HMGThresholds,
|
|
466
|
+
_upp_test_regime: UPPTestRegime,
|
|
467
|
+
/,
|
|
468
|
+
*,
|
|
469
|
+
sample_size: int = 10**6,
|
|
470
|
+
seed_seq_list: Sequence[SeedSequence] | None = None,
|
|
471
|
+
nthreads: int = 16,
|
|
472
|
+
save_data_to_file: SaveData = False,
|
|
473
|
+
saved_array_name_suffix: str = "",
|
|
474
|
+
) -> None:
|
|
475
|
+
"""Populate :attr:`enf_counts` with estimated UPP test counts.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
_enf_parm_vec
|
|
480
|
+
Threshold values for various Guidelines criteria
|
|
481
|
+
|
|
482
|
+
_upp_test_regime
|
|
483
|
+
Specifies whether to analyze enforcement, clearance, or both
|
|
484
|
+
and the GUPPI and diversion ratio aggregators employed, with
|
|
485
|
+
default being to analyze enforcement based on the maximum
|
|
486
|
+
merging-firm GUPPI and maximum diversion ratio between the
|
|
487
|
+
merging firms
|
|
488
|
+
|
|
489
|
+
sample_size
|
|
490
|
+
Number of draws to simulate
|
|
491
|
+
|
|
492
|
+
seed_seq_list
|
|
493
|
+
List of seed sequences, to assure independent samples in each thread
|
|
494
|
+
|
|
495
|
+
nthreads
|
|
496
|
+
Number of parallel processes to use
|
|
497
|
+
|
|
498
|
+
save_data_to_file
|
|
499
|
+
Whether to save data to an HDF5 file, and where to save it
|
|
500
|
+
|
|
501
|
+
saved_array_name_suffix
|
|
502
|
+
Suffix to add to the array names in the HDF5 file
|
|
503
|
+
|
|
504
|
+
Returns
|
|
505
|
+
-------
|
|
506
|
+
None
|
|
507
|
+
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
if self.data is None:
|
|
511
|
+
self.enf_counts = self.__sim_enf_cnts_ll(
|
|
512
|
+
_enf_parm_vec,
|
|
513
|
+
_upp_test_regime,
|
|
514
|
+
sample_size=sample_size,
|
|
515
|
+
seed_seq_list=seed_seq_list,
|
|
516
|
+
nthreads=nthreads,
|
|
517
|
+
save_data_to_file=save_data_to_file,
|
|
518
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
519
|
+
)
|
|
520
|
+
else:
|
|
521
|
+
self.enf_counts = compute_upp_test_counts(
|
|
522
|
+
self.data, _enf_parm_vec, _upp_test_regime
|
|
523
|
+
)
|
|
524
|
+
if save_data_to_file:
|
|
525
|
+
save_data_to_hdf5(
|
|
526
|
+
self.enf_counts,
|
|
527
|
+
save_data_to_file=save_data_to_file,
|
|
528
|
+
saved_array_name_suffix=saved_array_name_suffix,
|
|
529
|
+
)
|