mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +147 -101
  5. mergeron/core/guidelines_boundaries.py +290 -1078
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +258 -246
  15. mergeron/gen/data_generation.py +473 -224
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +171 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -257
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738953.1.dist-info/METADATA +0 -93
  38. mergeron-2024.738953.1.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
@@ -1,280 +1,529 @@
1
1
  """
2
- Routines to generate data for analyzing merger enforcement policy.
2
+ Methods to generate data for analyzing merger enforcement policy.
3
3
 
4
4
  """
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from importlib.metadata import version
8
+ from collections.abc import Sequence
9
+ from typing import TypedDict
9
10
 
10
- import attrs
11
11
  import numpy as np
12
+ from attrs import Attribute, define, field, validators
13
+ from joblib import Parallel, cpu_count, delayed # type: ignore
12
14
  from numpy.random import SeedSequence
13
- from numpy.typing import NDArray
14
15
 
15
- from .. import _PKG_NAME, RECConstants # noqa: TID252
16
+ from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
17
+ from ..core import guidelines_boundaries as gbl # noqa: TID252
18
+ from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
16
19
  from . import (
17
- EMPTY_ARRAY_DEFAULT,
18
- TF,
19
- FM2Constants,
20
+ FM2Constraint,
20
21
  MarketDataSample,
21
- MarketSampleSpec,
22
- PRIConstants,
23
- SHRConstants,
24
- SSZConstants,
22
+ PCMDistribution,
23
+ PCMSpec,
24
+ PriceSpec,
25
+ ShareSpec,
26
+ SHRDistribution,
27
+ SSZConstant,
28
+ UPPTestRegime,
29
+ UPPTestsCounts,
25
30
  )
26
- from ._data_generation_functions_nonpublic import (
27
- _gen_market_shares_dirichlet, # noqa: F401 easter-egg for external modules
28
- _gen_market_shares_uniform, # noqa: F401 easter-egg for external modules
29
- _gen_pcm_data,
30
- _gen_pr_data,
31
- _gen_share_data,
31
+ from .data_generation_functions import (
32
+ gen_divr_array,
33
+ gen_margin_price_data,
34
+ gen_share_data,
35
+ parse_seed_seq_list,
32
36
  )
37
+ from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
33
38
 
34
- __version__ = version(_PKG_NAME)
39
+ __version__ = VERSION
35
40
 
36
41
 
37
- def gen_market_sample(
38
- _mkt_sample_spec: MarketSampleSpec,
39
- /,
40
- *,
41
- seed_seq_list: list[SeedSequence] | None = None,
42
- nthreads: int = 16,
43
- ) -> MarketDataSample:
44
- """
45
- Generate share, diversion ratio, price, and margin data based on supplied parameters
46
-
47
- Diversion ratios generated assuming share-proportionality, unless
48
- `recapture_spec` = "proportional", in which case both firms' recapture rate
49
- is set to `r_bar`.
50
-
51
- The tuple of SeedSequences, if specified, is parsed in the following order
52
- for generating the relevant random variates:
53
- 1.) quantity shares
54
- 2.) price-cost margins
55
- 3.) firm-counts, from :code:`[2, 2 + len(firm_counts_weights)]`,
56
- weighted by :code:`firm_counts_weights`, where relevant
57
- 4.) prices, if :code:`pr_sym_spec == PRIConstants.ZERO`.
58
-
59
- Parameters
60
- ----------
61
- _mkt_sample_spec
62
- class specifying parameters for data generation
63
- seed_seq_list
64
- tuple of SeedSequences to ensure replicable data generation with
65
- appropriately independent random streams
66
- nthreads
67
- optionally specify the number of CPU threads for the PRNG
68
-
69
- Returns
70
- -------
71
- Merging firms' shares, margins, etc. for each hypothetical merger
72
- in the sample
42
+ class SamplingFunctionKWArgs(TypedDict, total=False):
43
+ "Keyword arguments of sampling methods defined below"
44
+
45
+ sample_size: int
46
+ """number of draws to generate"""
47
+
48
+ seed_seq_list: Sequence[SeedSequence] | None
49
+ """sequence of SeedSequences to ensure replicable data generation with
50
+ appropriately independent random streams
51
+
52
+ NOTES
53
+ -----
54
+
55
+ See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
+ specification of this parameter.
73
57
 
74
58
  """
75
59
 
76
- _mkt_sample_spec = _mkt_sample_spec or MarketSampleSpec()
60
+ nthreads: int
61
+ """number of parallel threads to use"""
77
62
 
78
- _recapture_spec = _mkt_sample_spec.share_spec.recapture_spec
79
- _dist_type_mktshr = _mkt_sample_spec.share_spec.dist_type
80
- _dist_firm2_pcm = _mkt_sample_spec.pcm_spec.firm2_pcm_constraint
81
- _hsr_filing_test_type = _mkt_sample_spec.hsr_filing_test_type
63
+ save_data_to_file: SaveData
64
+ """optionally save data to HDF5 file"""
82
65
 
83
- (
84
- _mktshr_rng_seed_seq,
85
- _pcm_rng_seed_seq,
86
- _fcount_rng_seed_seq,
87
- _pr_rng_seed_seq,
88
- ) = parse_seed_seq_list(
89
- seed_seq_list, _dist_type_mktshr, _mkt_sample_spec.pr_sym_spec
90
- )
66
+ saved_array_name_suffix: str
67
+ """optionally specify a suffix for the HDF5 array names"""
91
68
 
92
- _shr_sample_size = 1.0 * _mkt_sample_spec.sample_size
93
- # Scale up sample size to offset discards based on specified criteria
94
- _shr_sample_size *= _hsr_filing_test_type
95
- if _dist_firm2_pcm == FM2Constants.MNL:
96
- _shr_sample_size *= SSZConstants.MNL_DEP
97
- _mkt_sample_spec_here = attrs.evolve(
98
- _mkt_sample_spec, sample_size=int(_shr_sample_size)
99
- )
100
- del _shr_sample_size
101
69
 
102
- # Generate share data
103
- _mktshr_data = _gen_share_data(
104
- _mkt_sample_spec_here, _fcount_rng_seed_seq, _mktshr_rng_seed_seq, nthreads
105
- )
70
+ @define
71
+ class MarketSample:
72
+ """Parameter specification for market data generation."""
106
73
 
107
- _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
108
- getattr(_mktshr_data, _f)
109
- for _f in (
110
- "mktshr_array",
111
- "fcounts",
112
- "aggregate_purchase_prob",
113
- "nth_firm_share",
114
- )
74
+ share_spec: ShareSpec = field(
75
+ kw_only=True,
76
+ default=ShareSpec(
77
+ SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
78
+ ),
79
+ validator=validators.instance_of(ShareSpec),
115
80
  )
81
+ """Market-share specification, see :class:`ShareSpec`"""
116
82
 
117
- # Generate merging-firm price data
118
- _price_data = _gen_pr_data(
119
- _mktshr_array[:, :2], _nth_firm_share, _mkt_sample_spec_here, _pr_rng_seed_seq
83
+ pcm_spec: PCMSpec = field(
84
+ kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
120
85
  )
86
+ """Margin specification, see :class:`PCMSpec`"""
87
+
88
+ @pcm_spec.validator # pyright: ignore
89
+ def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
90
+ if (
91
+ self.share_spec.recapture_form == RECForm.FIXED
92
+ and _v.firm2_pcm_constraint == FM2Constraint.MNL
93
+ ):
94
+ raise ValueError(
95
+ f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
96
+ "requires Firm 2 margin must have property, "
97
+ f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
98
+ )
121
99
 
122
- _price_array, _hsr_filing_test = (
123
- getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
100
+ price_spec: PriceSpec = field(
101
+ kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
124
102
  )
103
+ """Price specification, see :class:`PriceSpec`"""
125
104
 
126
- if _hsr_filing_test_type != SSZConstants.ONE:
127
- _mktshr_array = _mktshr_array[_hsr_filing_test]
128
- _fcounts = _fcounts[_hsr_filing_test]
129
- _aggregate_purchase_prob = _aggregate_purchase_prob[_hsr_filing_test]
130
- _nth_firm_share = _nth_firm_share[_hsr_filing_test]
131
- _price_array = _price_array[_hsr_filing_test]
132
-
133
- # Calculate diversion ratios
134
- _divr_array = gen_divr_array(
135
- _mktshr_array[:, :2],
136
- _mkt_sample_spec_here.recapture_rate or 0.8,
137
- _recapture_spec,
138
- _aggregate_purchase_prob,
105
+ hsr_filing_test_type: SSZConstant = field(
106
+ kw_only=True,
107
+ default=SSZConstant.ONE,
108
+ validator=validators.instance_of(SSZConstant),
139
109
  )
110
+ """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
111
+
112
+ data: MarketDataSample = field(default=None)
113
+
114
+ enf_counts: UPPTestsCounts = field(default=None)
115
+
116
+ def __gen_market_sample(
117
+ self,
118
+ /,
119
+ *,
120
+ sample_size: int,
121
+ seed_seq_list: Sequence[SeedSequence] | None,
122
+ nthreads: int,
123
+ ) -> MarketDataSample:
124
+ """
125
+ Generate share, diversion ratio, price, and margin data for MarketSpec.
126
+
127
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
128
+
129
+ Returns
130
+ -------
131
+ Merging firms' shares, margins, etc. for each hypothetical merger
132
+ in the sample
133
+
134
+ """
135
+
136
+ _recapture_form = self.share_spec.recapture_form
137
+ _recapture_ratio = self.share_spec.recapture_ratio
138
+ _dist_type_mktshr = self.share_spec.dist_type
139
+ _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
+ _hsr_filing_test_type = self.hsr_filing_test_type
141
+
142
+ (
143
+ _mktshr_rng_seed_seq,
144
+ _pcm_rng_seed_seq,
145
+ _fcount_rng_seed_seq,
146
+ _pr_rng_seed_seq,
147
+ ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
148
+
149
+ _shr_sample_size = 1.0 * sample_size
150
+ # Scale up sample size to offset discards based on specified criteria
151
+ _shr_sample_size *= _hsr_filing_test_type
152
+ if _dist_firm2_pcm == FM2Constraint.MNL:
153
+ _shr_sample_size *= SSZConstant.MNL_DEP
154
+ _shr_sample_size = int(_shr_sample_size)
155
+
156
+ # Generate share data
157
+ _mktshr_data = gen_share_data(
158
+ _shr_sample_size,
159
+ self.share_spec,
160
+ _fcount_rng_seed_seq,
161
+ _mktshr_rng_seed_seq,
162
+ nthreads,
163
+ )
140
164
 
141
- # Generate margin data
142
- _pcm_data = _gen_pcm_data(
143
- _mktshr_array[:, :2],
144
- _mkt_sample_spec_here,
145
- _price_array,
146
- _aggregate_purchase_prob,
147
- _pcm_rng_seed_seq,
148
- nthreads,
149
- )
150
- _pcm_array, _mnl_test_rows = (
151
- getattr(_pcm_data, _f) for _f in ("pcm_array", "mnl_test_array")
152
- )
165
+ _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
166
+ getattr(_mktshr_data, _f)
167
+ for _f in (
168
+ "mktshr_array",
169
+ "fcounts",
170
+ "aggregate_purchase_prob",
171
+ "nth_firm_share",
172
+ )
173
+ )
153
174
 
154
- _s_size = _mkt_sample_spec.sample_size # originally-specified sample size
155
- if _dist_firm2_pcm == FM2Constants.MNL:
156
- _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
157
- _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
158
- _price_array = _price_array[_mnl_test_rows][:_s_size]
159
- _fcounts = _fcounts[_mnl_test_rows][:_s_size]
160
- _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][:_s_size]
161
- _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
162
- _divr_array = _divr_array[_mnl_test_rows][:_s_size]
175
+ # Generate merging-firm price and PCM data
176
+ _margin_data, _price_data = gen_margin_price_data(
177
+ _mktshr_array[:, :2],
178
+ _nth_firm_share,
179
+ _aggregate_purchase_prob,
180
+ self.pcm_spec,
181
+ self.price_spec,
182
+ self.hsr_filing_test_type,
183
+ _pcm_rng_seed_seq,
184
+ _pr_rng_seed_seq,
185
+ nthreads,
186
+ )
163
187
 
164
- del _mnl_test_rows, _s_size
188
+ _price_array, _hsr_filing_test = (
189
+ getattr(_price_data, _f) for _f in ("price_array", "hsr_filing_test")
190
+ )
165
191
 
166
- _frmshr_array = _mktshr_array[:, :2]
167
- _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[:, None]
192
+ _pcm_array, _mnl_test_rows = (
193
+ getattr(_margin_data, _f) for _f in ("pcm_array", "mnl_test_array")
194
+ )
168
195
 
169
- _hhi_post = (
170
- _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
171
- )
196
+ _mnl_test_rows = _mnl_test_rows * _hsr_filing_test
197
+ _s_size = sample_size # originally-specified sample size
198
+ if _dist_firm2_pcm == FM2Constraint.MNL:
199
+ _mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
200
+ _pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
201
+ _price_array = _price_array[_mnl_test_rows][:_s_size]
202
+ _fcounts = _fcounts[_mnl_test_rows][:_s_size]
203
+ _aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
204
+ :_s_size
205
+ ]
206
+ _nth_firm_share = _nth_firm_share[_mnl_test_rows][:_s_size]
207
+
208
+ # Calculate diversion ratios
209
+ _divr_array = gen_divr_array(
210
+ _recapture_form,
211
+ _recapture_ratio,
212
+ _mktshr_array[:, :2],
213
+ _aggregate_purchase_prob,
214
+ )
172
215
 
173
- return MarketDataSample(
174
- _frmshr_array,
175
- _pcm_array,
176
- _price_array,
177
- _fcounts,
178
- _aggregate_purchase_prob,
179
- _nth_firm_share,
180
- _divr_array,
181
- _hhi_post,
182
- _hhi_delta,
183
- )
216
+ del _mnl_test_rows, _s_size
184
217
 
218
+ _frmshr_array = _mktshr_array[:, :2]
219
+ _hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
220
+ :, None
221
+ ]
185
222
 
186
- def parse_seed_seq_list(
187
- _sseq_list: list[SeedSequence] | None,
188
- _dist_type_mktshr: SHRConstants,
189
- _pr_sym_spec: PRIConstants,
190
- /,
191
- ) -> tuple[SeedSequence, SeedSequence, SeedSequence | None, SeedSequence | None]:
192
- """Initialize RNG seed sequences to ensure independence of distinct random streams."""
193
- _fcount_rng_seed_seq: SeedSequence | None = None
194
- _pr_rng_seed_seq: SeedSequence | None = None
195
-
196
- if _pr_sym_spec == PRIConstants.ZERO:
197
- _pr_rng_seed_seq = _sseq_list.pop() if _sseq_list else SeedSequence(pool_size=8)
198
-
199
- if _dist_type_mktshr == SHRConstants.UNI:
200
- _fcount_rng_seed_seq = None
201
- _seed_count = 2
202
- _mktshr_rng_seed_seq, _pcm_rng_seed_seq = (
203
- _sseq_list[:_seed_count]
204
- if _sseq_list
205
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
223
+ _hhi_post = (
224
+ _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
206
225
  )
207
- else:
208
- _seed_count = 3
209
- (_mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq) = (
210
- _sseq_list[:_seed_count]
211
- if _sseq_list
212
- else (SeedSequence(pool_size=8) for _ in range(_seed_count))
226
+
227
+ return MarketDataSample(
228
+ _frmshr_array,
229
+ _pcm_array,
230
+ _price_array,
231
+ _fcounts,
232
+ _aggregate_purchase_prob,
233
+ _nth_firm_share,
234
+ _divr_array,
235
+ _hhi_post,
236
+ _hhi_delta,
213
237
  )
214
238
 
215
- return (
216
- _mktshr_rng_seed_seq,
217
- _pcm_rng_seed_seq,
218
- _fcount_rng_seed_seq,
219
- _pr_rng_seed_seq,
220
- )
239
+ def generate_sample(
240
+ self,
241
+ /,
242
+ *,
243
+ sample_size: int = 10**6,
244
+ seed_seq_list: Sequence[SeedSequence] | None = None,
245
+ nthreads: int = 16,
246
+ save_data_to_file: SaveData = False,
247
+ saved_array_name_suffix: str = "",
248
+ ) -> None:
249
+ """Populate :attr:`data` with generated data
221
250
 
251
+ see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
222
252
 
223
- def gen_divr_array(
224
- _frmshr_array: NDArray[np.floating[TF]],
225
- _r_bar: float,
226
- _recapture_spec: RECConstants = RECConstants.INOUT,
227
- _aggregate_purchase_prob: NDArray[np.floating[TF]] = EMPTY_ARRAY_DEFAULT,
228
- /,
229
- ) -> NDArray[np.float64]:
230
- """
231
- Given merging-firm shares and related parameters, return diverion ratios.
253
+ Returns
254
+ -------
255
+ None
256
+
257
+ """
258
+
259
+ self.data = self.__gen_market_sample(
260
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
261
+ )
232
262
 
233
- If recapture is specified as "Outside-in" (RECConstants.OUTIN), then the
234
- choice-probability for the outside good must be supplied.
263
+ _invalid_array_names = (
264
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
265
+ if self.share_spec.dist_type == "Uniform"
266
+ else ()
267
+ )
268
+
269
+ save_data_to_hdf5(
270
+ self.data,
271
+ saved_array_name_suffix=saved_array_name_suffix,
272
+ excluded_attrs=_invalid_array_names,
273
+ save_data_to_file=save_data_to_file,
274
+ )
235
275
 
236
- Parameters
237
- ----------
238
- _frmshr_array
239
- Merging-firm shares.
276
+ def __sim_enf_cnts(
277
+ self,
278
+ _upp_test_parms: gbl.HMGThresholds,
279
+ _sim_test_regime: UPPTestRegime,
280
+ /,
281
+ *,
282
+ sample_size: int = 10**6,
283
+ seed_seq_list: Sequence[SeedSequence] | None = None,
284
+ nthreads: int = 16,
285
+ save_data_to_file: SaveData = False,
286
+ saved_array_name_suffix: str = "",
287
+ ) -> UPPTestsCounts:
288
+ """Generate market data and etstimate UPP test counts on same.
240
289
 
241
- _r_bar
242
- If recapture is proportional or inside-out, the recapture rate
243
- for the firm with the smaller share.
290
+ Parameters
291
+ ----------
244
292
 
245
- _aggregate_purchase_prob
246
- 1 minus probability that the outside good is chosen; converts
247
- market shares to choice probabilities by multiplication.
293
+ _upp_test_parms
294
+ Guidelines thresholds for testing UPP and related statistics
248
295
 
249
- _recapture_spec
250
- Enum specifying Fixed (proportional), Inside-out, or Outside-in
296
+ _sim_test_regime
297
+ Configuration to use for testing; UPPTestsRegime object
298
+ specifying whether investigation results in enforcement, clearance,
299
+ or both; and aggregation methods used for GUPPI and diversion ratio
300
+ measures
251
301
 
252
- Returns
253
- -------
254
- Merging-firm diversion ratios for mergers in the sample.
302
+ sample_size
303
+ Number of draws to generate
255
304
 
256
- """
305
+ seed_seq_list
306
+ List of seed sequences, to assure independent samples in each thread
307
+
308
+ nthreads
309
+ Number of parallel processes to use
310
+
311
+ save_data_to_file
312
+ Whether to save data to an HDF5 file, and where to save it
313
+
314
+ saved_array_name_suffix
315
+ Suffix to add to the array names in the HDF5 file
257
316
 
258
- _divr_array: NDArray[np.float64]
259
- if _recapture_spec == RECConstants.FIXED:
260
- _divr_array = _r_bar * _frmshr_array[:, ::-1] / (1 - _frmshr_array)
261
-
262
- else:
263
- _purchprob_array = _aggregate_purchase_prob * _frmshr_array
264
- _divr_array = _purchprob_array[:, ::-1] / (1 - _purchprob_array)
265
-
266
- _divr_assert_test = (
267
- (np.round(np.einsum("ij->i", _frmshr_array), 15) == 1)
268
- | (np.argmin(_frmshr_array, axis=1) == np.argmax(_divr_array, axis=1))
269
- )[:, None]
270
- if not all(_divr_assert_test):
271
- raise ValueError(
272
- "{} {} {} {}".format(
273
- "Data construction fails tests:",
274
- "the index of min(s_1, s_2) must equal",
275
- "the index of max(d_12, d_21), for all draws.",
276
- "unless frmshr_array sums to 1.00.",
317
+ Returns
318
+ -------
319
+ UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
320
+
321
+ """
322
+
323
+ _market_data_sample = self.__gen_market_sample(
324
+ sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
325
+ )
326
+
327
+ _invalid_array_names = (
328
+ ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
329
+ if self.share_spec.dist_type == "Uniform"
330
+ else ()
331
+ )
332
+
333
+ save_data_to_hdf5(
334
+ _market_data_sample,
335
+ saved_array_name_suffix=saved_array_name_suffix,
336
+ excluded_attrs=_invalid_array_names,
337
+ save_data_to_file=save_data_to_file,
338
+ )
339
+
340
+ _upp_test_arrays = compute_upp_test_counts(
341
+ _market_data_sample, _upp_test_parms, _sim_test_regime
342
+ )
343
+
344
+ save_data_to_hdf5(
345
+ _upp_test_arrays,
346
+ saved_array_name_suffix=saved_array_name_suffix,
347
+ save_data_to_file=save_data_to_file,
348
+ )
349
+
350
+ return _upp_test_arrays
351
+
352
+ def __sim_enf_cnts_ll(
353
+ self,
354
+ _enf_parm_vec: gbl.HMGThresholds,
355
+ _sim_test_regime: UPPTestRegime,
356
+ /,
357
+ *,
358
+ sample_size: int = 10**6,
359
+ seed_seq_list: Sequence[SeedSequence] | None = None,
360
+ nthreads: int = 16,
361
+ save_data_to_file: SaveData = False,
362
+ saved_array_name_suffix: str = "",
363
+ ) -> UPPTestsCounts:
364
+ """A function to parallelize data-generation and testing
365
+
366
+ The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
367
+ the parent function, `sim_enf_cnts()`, except that, if provided,
368
+ `seed_seq_list` is used to spawn a seed sequence for each thread,
369
+ to assure independent samples in each thread, and `nthreads` defines
370
+ the number of parallel processes used. The number of draws in
371
+ each thread may be tuned, by trial and error, to the amount of
372
+ memory (RAM) available.
373
+
374
+ Parameters
375
+ ----------
376
+
377
+ _enf_parm_vec
378
+ Guidelines thresholds to test against
379
+
380
+ _sim_test_regime
381
+ Configuration to use for testing
382
+
383
+ sample_size
384
+ Number of draws to simulate
385
+
386
+ seed_seq_list
387
+ List of seed sequences, to assure independent samples in each thread
388
+
389
+ nthreads
390
+ Number of parallel processes to use
391
+
392
+ save_data_to_file
393
+ Whether to save data to an HDF5 file, and where to save it
394
+
395
+ saved_array_name_suffix
396
+ Suffix to add to the array names in the HDF5 file
397
+
398
+ Returns
399
+ -------
400
+ Arrays of enforcement counts or clearance counts by firm count,
401
+ ΔHHI and concentration zone
402
+
403
+ """
404
+ _sample_sz = sample_size
405
+ _subsample_sz = 10**6
406
+ _iter_count = (
407
+ int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
408
+ )
409
+ _thread_count = cpu_count()
410
+
411
+ if (
412
+ self.share_spec.recapture_form != RECForm.OUTIN
413
+ and self.share_spec.recapture_ratio != _enf_parm_vec.rec
414
+ ):
415
+ raise ValueError(
416
+ "{} {} {}".format(
417
+ f"Recapture ratio from market sample spec, {self.share_spec.recapture_ratio}",
418
+ f"must match the value, {_enf_parm_vec.rec}",
419
+ "the guidelines thresholds vector.",
420
+ )
277
421
  )
422
+
423
+ _rng_seed_seq_list = [None] * _iter_count
424
+ if seed_seq_list:
425
+ _rng_seed_seq_list = list(
426
+ zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
427
+ )
428
+
429
+ _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
430
+ "sample_size": _subsample_sz,
431
+ "save_data_to_file": save_data_to_file,
432
+ "nthreads": nthreads,
433
+ })
434
+
435
+ _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
436
+ delayed(self.__sim_enf_cnts)(
437
+ _enf_parm_vec,
438
+ _sim_test_regime,
439
+ **_sim_enf_cnts_kwargs,
440
+ saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}", # pyright: ignore
441
+ seed_seq_list=_rng_seed_seq_list_ch, # pyright: ignore
442
+ )
443
+ for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
278
444
  )
279
445
 
280
- return _divr_array
446
+ _res_list_stacks = UPPTestsCounts(*[
447
+ np.stack([getattr(_j, _k) for _j in _res_list])
448
+ for _k in ("by_firm_count", "by_delta", "by_conczone")
449
+ ])
450
+ upp_test_results = UPPTestsCounts(*[
451
+ np.column_stack((
452
+ (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
453
+ np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
454
+ ))
455
+ for _g, _h in zip(
456
+ _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
457
+ )
458
+ ])
459
+ del _res_list, _res_list_stacks
460
+
461
+ return upp_test_results
462
+
463
+ def estimate_enf_counts(
464
+ self,
465
+ _enf_parm_vec: HMGThresholds,
466
+ _upp_test_regime: UPPTestRegime,
467
+ /,
468
+ *,
469
+ sample_size: int = 10**6,
470
+ seed_seq_list: Sequence[SeedSequence] | None = None,
471
+ nthreads: int = 16,
472
+ save_data_to_file: SaveData = False,
473
+ saved_array_name_suffix: str = "",
474
+ ) -> None:
475
+ """Populate :attr:`enf_counts` with estimated UPP test counts.
476
+
477
+ Parameters
478
+ ----------
479
+ _enf_parm_vec
480
+ Threshold values for various Guidelines criteria
481
+
482
+ _upp_test_regime
483
+ Specifies whether to analyze enforcement, clearance, or both
484
+ and the GUPPI and diversion ratio aggregators employed, with
485
+ default being to analyze enforcement based on the maximum
486
+ merging-firm GUPPI and maximum diversion ratio between the
487
+ merging firms
488
+
489
+ sample_size
490
+ Number of draws to simulate
491
+
492
+ seed_seq_list
493
+ List of seed sequences, to assure independent samples in each thread
494
+
495
+ nthreads
496
+ Number of parallel processes to use
497
+
498
+ save_data_to_file
499
+ Whether to save data to an HDF5 file, and where to save it
500
+
501
+ saved_array_name_suffix
502
+ Suffix to add to the array names in the HDF5 file
503
+
504
+ Returns
505
+ -------
506
+ None
507
+
508
+ """
509
+
510
+ if self.data is None:
511
+ self.enf_counts = self.__sim_enf_cnts_ll(
512
+ _enf_parm_vec,
513
+ _upp_test_regime,
514
+ sample_size=sample_size,
515
+ seed_seq_list=seed_seq_list,
516
+ nthreads=nthreads,
517
+ save_data_to_file=save_data_to_file,
518
+ saved_array_name_suffix=saved_array_name_suffix,
519
+ )
520
+ else:
521
+ self.enf_counts = compute_upp_test_counts(
522
+ self.data, _enf_parm_vec, _upp_test_regime
523
+ )
524
+ if save_data_to_file:
525
+ save_data_to_hdf5(
526
+ self.enf_counts,
527
+ save_data_to_file=save_data_to_file,
528
+ saved_array_name_suffix=saved_array_name_suffix,
529
+ )