mergeron 2024.739099.2__py3-none-any.whl → 2024.739105.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -21,7 +21,6 @@ from scipy.stats import beta, norm # type: ignore
21
21
  from .. import ( # noqa: TID252
22
22
  _PKG_NAME,
23
23
  DATA_DIR,
24
- TI,
25
24
  VERSION,
26
25
  ArrayBIGINT,
27
26
  ArrayDouble,
@@ -34,7 +33,7 @@ __version__ = VERSION
34
33
 
35
34
 
36
35
  @enum.unique
37
- class INDGRPConstants(enum.StrEnum):
36
+ class IndustryGroup(enum.StrEnum):
38
37
  ALL = "All Markets"
39
38
  GRO = "Grocery Markets"
40
39
  OIL = "Oil Markets"
@@ -48,7 +47,7 @@ class INDGRPConstants(enum.StrEnum):
48
47
 
49
48
 
50
49
  @enum.unique
51
- class EVIDENConstants(enum.StrEnum):
50
+ class OtherEvidence(enum.StrEnum):
52
51
  HD = "Hot Documents Identified"
53
52
  CC = "Strong Customer Complaints"
54
53
  NE = "No Entry Evidence"
@@ -165,32 +164,6 @@ hhi_delta_ranger, hhi_zone_post_ranger = (
165
164
  for _f in (HHI_DELTA_KNOTS, HHI_POST_ZONE_KNOTS)
166
165
  )
167
166
 
168
- HMG_PRESUMPTION_ZONE_DICT = {
169
- HHI_POST_ZONE_KNOTS[0]: {
170
- HHI_DELTA_KNOTS[0]: (0, 0, 0),
171
- HHI_DELTA_KNOTS[1]: (0, 0, 0),
172
- HHI_DELTA_KNOTS[2]: (0, 0, 0),
173
- },
174
- HHI_POST_ZONE_KNOTS[1]: {
175
- HHI_DELTA_KNOTS[0]: (0, 1, 1),
176
- HHI_DELTA_KNOTS[1]: (1, 1, 2),
177
- HHI_DELTA_KNOTS[2]: (1, 1, 2),
178
- },
179
- HHI_POST_ZONE_KNOTS[2]: {
180
- HHI_DELTA_KNOTS[0]: (0, 2, 1),
181
- HHI_DELTA_KNOTS[1]: (1, 2, 3),
182
- HHI_DELTA_KNOTS[2]: (2, 2, 4),
183
- },
184
- }
185
-
186
- ZONE_VALS = np.unique(
187
- np.vstack([
188
- tuple(HMG_PRESUMPTION_ZONE_DICT[_k].values())
189
- for _k in HMG_PRESUMPTION_ZONE_DICT
190
- ]),
191
- axis=0,
192
- )
193
-
194
167
  ZONE_STRINGS = {
195
168
  0: R"Green Zone (Safeharbor)",
196
169
  1: R"Yellow Zone",
@@ -234,12 +207,37 @@ ZONE_DETAIL_STRINGS_DELTA_LATEX = {
234
207
  4: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[2]} pts.}}",
235
208
  }
236
209
 
210
+ HMG_PRESUMPTION_ZONE_MAP = {
211
+ HHI_POST_ZONE_KNOTS[0]: {
212
+ HHI_DELTA_KNOTS[0]: (0, 0, 0),
213
+ HHI_DELTA_KNOTS[1]: (0, 0, 0),
214
+ HHI_DELTA_KNOTS[2]: (0, 0, 0),
215
+ },
216
+ HHI_POST_ZONE_KNOTS[1]: {
217
+ HHI_DELTA_KNOTS[0]: (0, 1, 1),
218
+ HHI_DELTA_KNOTS[1]: (1, 1, 2),
219
+ HHI_DELTA_KNOTS[2]: (1, 1, 2),
220
+ },
221
+ HHI_POST_ZONE_KNOTS[2]: {
222
+ HHI_DELTA_KNOTS[0]: (0, 2, 1),
223
+ HHI_DELTA_KNOTS[1]: (1, 2, 3),
224
+ HHI_DELTA_KNOTS[2]: (2, 2, 4),
225
+ },
226
+ }
227
+
228
+ ZONE_VALS = np.unique(
229
+ np.vstack([
230
+ tuple(HMG_PRESUMPTION_ZONE_MAP[_k].values()) for _k in HMG_PRESUMPTION_ZONE_MAP
231
+ ]),
232
+ axis=0,
233
+ )
234
+
237
235
 
238
236
  def enf_stats_output(
239
237
  _data_array_dict: fid.INVData,
240
238
  _data_period: str = "1996-2003",
241
- _table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
242
- _table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
239
+ _table_ind_group: IndustryGroup = IndustryGroup.ALL,
240
+ _table_evid_cond: OtherEvidence = OtherEvidence.UR,
243
241
  _stats_group: StatsGrpSelector = StatsGrpSelector.FC,
244
242
  _enf_spec: INVResolution = INVResolution.CLRN,
245
243
  /,
@@ -295,8 +293,8 @@ def enf_stats_output(
295
293
  def enf_stats_listing_by_group(
296
294
  _invdata_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
297
295
  _study_period: str,
298
- _table_ind_grp: INDGRPConstants,
299
- _table_evid_cond: EVIDENConstants,
296
+ _table_ind_grp: IndustryGroup,
297
+ _table_evid_cond: OtherEvidence,
300
298
  _stats_group: StatsGrpSelector,
301
299
  _enf_spec: INVResolution,
302
300
  /,
@@ -331,8 +329,8 @@ def enf_stats_listing_by_group(
331
329
  def enf_cnts_listing_byfirmcount(
332
330
  _data_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
333
331
  _data_period: str = "1996-2003",
334
- _table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
335
- _table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
332
+ _table_ind_group: IndustryGroup = IndustryGroup.ALL,
333
+ _table_evid_cond: OtherEvidence = OtherEvidence.UR,
336
334
  _enf_spec: INVResolution = INVResolution.CLRN,
337
335
  /,
338
336
  ) -> ArrayBIGINT:
@@ -367,8 +365,8 @@ def enf_cnts_listing_byfirmcount(
367
365
  def enf_cnts_listing_byhhianddelta(
368
366
  _data_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
369
367
  _data_period: str = "1996-2003",
370
- _table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
371
- _table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
368
+ _table_ind_group: IndustryGroup = IndustryGroup.ALL,
369
+ _table_evid_cond: OtherEvidence = OtherEvidence.UR,
372
370
  _enf_spec: INVResolution = INVResolution.CLRN,
373
371
  /,
374
372
  ) -> ArrayBIGINT:
@@ -402,8 +400,8 @@ def enf_cnts_listing_byhhianddelta(
402
400
 
403
401
  def table_no_lku(
404
402
  _data_array_dict_sub: Mapping[str, fid.INVTableData],
405
- _table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
406
- _table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
403
+ _table_ind_group: IndustryGroup = IndustryGroup.ALL,
404
+ _table_evid_cond: OtherEvidence = OtherEvidence.UR,
407
405
  /,
408
406
  ) -> str:
409
407
  if _table_ind_group not in (
@@ -475,7 +473,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
475
473
  else (_hhi_delta_ranged == _hhi_zone_delta_lim)
476
474
  )
477
475
 
478
- _zone_val = HMG_PRESUMPTION_ZONE_DICT[_hhi_zone_post_lim][
476
+ _zone_val = HMG_PRESUMPTION_ZONE_MAP[_hhi_zone_post_lim][
479
477
  _hhi_zone_delta_lim
480
478
  ]
481
479
 
@@ -723,8 +721,8 @@ def stats_print_rows(
723
721
 
724
722
 
725
723
  def propn_ci(
726
- _npos: ArrayINT[TI] | int = 4,
727
- _nobs: ArrayINT[TI] | int = 10,
724
+ _npos: ArrayINT | int = 4,
725
+ _nobs: ArrayINT | int = 10,
728
726
  /,
729
727
  *,
730
728
  alpha: float = 0.05,
mergeron/gen/upp_tests.py CHANGED
@@ -11,19 +11,16 @@ from typing import Literal, TypeAlias, TypedDict
11
11
 
12
12
  import numpy as np
13
13
  import tables as ptb # type: ignore
14
- from joblib import Parallel, cpu_count, delayed # type: ignore
15
14
  from numpy.random import SeedSequence
16
15
 
17
- from .. import ( # noqa: TID252
18
- TF,
19
- TI,
16
+ from .. import ( # noqa
20
17
  VERSION,
21
18
  ArrayBIGINT,
22
19
  ArrayBoolean,
23
20
  ArrayDouble,
24
21
  ArrayFloat,
25
22
  ArrayINT,
26
- RECConstants,
23
+ RECForm,
27
24
  UPPAggrSelector,
28
25
  )
29
26
  from ..core import guidelines_boundaries as gbl # noqa: TID252
@@ -32,12 +29,10 @@ from . import (
32
29
  DataclassInstance,
33
30
  INVResolution,
34
31
  MarketDataSample,
35
- MarketSpec,
36
32
  UPPTestRegime,
37
33
  UPPTestsCounts,
38
34
  UPPTestsRaw,
39
35
  )
40
- from . import data_generation as dgl
41
36
  from . import enforcement_stats as esl
42
37
 
43
38
  __version__ = VERSION
@@ -58,169 +53,37 @@ class INVRESCntsArgs(TypedDict, total=False):
58
53
  saved_array_name_suffix: str
59
54
 
60
55
 
61
- def sim_enf_cnts_ll(
62
- _mkt_sample_spec: MarketSpec,
63
- _enf_parm_vec: gbl.HMGThresholds,
64
- _sim_test_regime: UPPTestRegime,
56
+ def enf_cnts(
57
+ _market_data_sample: MarketDataSample,
58
+ _upp_test_parms: gbl.HMGThresholds,
59
+ _upp_test_regime: UPPTestRegime,
65
60
  /,
66
- *,
67
- sample_size: int = 10**6,
68
- seed_seq_list: list[SeedSequence] | None = None,
69
- nthreads: int = 16,
70
- save_data_to_file: SaveData = False,
71
- saved_array_name_suffix: str = "",
72
61
  ) -> UPPTestsCounts:
73
- """A function to parallelize data-generation and testing
74
-
75
- The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
76
- the parent function, `sim_enf_cnts()`, except that, if provided,
77
- `seed_seq_list` is used to spawn a seed sequence for each thread,
78
- to assure independent samples in each thread, and `nthreads` defines
79
- the number of parallel processes used. The number of draws in
80
- each thread may be tuned, by trial and error, to the amount of
81
- memory (RAM) available.
62
+ """Estimate enforcement and clearance counts from market data sample
82
63
 
83
64
  Parameters
84
65
  ----------
66
+ _market_data_sample
67
+ Market data sample
85
68
 
86
- _enf_parm_vec
87
- Guidelines thresholds to test against
88
-
89
- _mkt_sample_spec
90
- Configuration to use for generating sample data to test
91
-
92
- _sim_test_regime
93
- Configuration to use for testing
94
-
95
- saved_array_name_suffix
96
- Suffix to add to the array names in the HDF5 file
97
-
98
- save_data_to_file
99
- Whether to save data to an HDF5 file, and where to save it
100
-
101
- sample_size
102
- Number of draws to simulate
103
-
104
- seed_seq_list
105
- List of seed sequences, to assure independent samples in each thread
69
+ _upp_test_parms
70
+ Threshold values for various Guidelines criteria
106
71
 
107
- nthreads
108
- Number of parallel processes to use
72
+ _upp_test_regime
73
+ Specifies whether to analyze enforcement, clearance, or both
74
+ and the GUPPI and diversion ratio aggregators employed, with
75
+ default being to analyze enforcement based on the maximum
76
+ merging-firm GUPPI and maximum diversion ratio between the
77
+ merging firms
109
78
 
110
79
  Returns
111
80
  -------
112
- Arrays of UPPTestCounts
81
+ UPPTestsCounts
82
+ Enforced and cleared counts
113
83
 
114
84
  """
115
- _sample_sz = sample_size
116
- _subsample_sz = 10**6
117
- _iter_count = int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
118
- _thread_count = cpu_count()
119
-
120
- if (
121
- _mkt_sample_spec.share_spec.recapture_form != RECConstants.OUTIN
122
- and _mkt_sample_spec.share_spec.recapture_rate != _enf_parm_vec.rec
123
- ):
124
- raise ValueError(
125
- "{} {} {}".format(
126
- f"Recapture rate from market sample spec, {_mkt_sample_spec.share_spec.recapture_rate}",
127
- f"must match the value, {_enf_parm_vec.rec}",
128
- "the guidelines thresholds vector.",
129
- )
130
- )
131
-
132
- _rng_seed_seq_list = [None] * _iter_count
133
- if seed_seq_list:
134
- _rng_seed_seq_list = list(
135
- zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
136
- )
137
-
138
- _sim_enf_cnts_kwargs: INVRESCntsArgs = INVRESCntsArgs({
139
- "sample_size": _subsample_sz,
140
- "save_data_to_file": save_data_to_file,
141
- "nthreads": nthreads,
142
- })
143
-
144
- _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
145
- delayed(sim_enf_cnts)(
146
- _mkt_sample_spec,
147
- _enf_parm_vec,
148
- _sim_test_regime,
149
- **_sim_enf_cnts_kwargs,
150
- saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
151
- seed_seq_list=_rng_seed_seq_list_ch,
152
- )
153
- for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
154
- )
155
-
156
- _res_list_stacks = UPPTestsCounts(*[
157
- np.stack([getattr(_j, _k) for _j in _res_list])
158
- for _k in ("by_firm_count", "by_delta", "by_conczone")
159
- ])
160
- upp_test_results = UPPTestsCounts(*[
161
- np.column_stack((
162
- (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
163
- np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
164
- ))
165
- for _g, _h in zip(
166
- _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
167
- )
168
- ])
169
- del _res_list, _res_list_stacks
170
-
171
- return upp_test_results
172
-
173
-
174
- def sim_enf_cnts(
175
- _mkt_sample_spec: MarketSpec,
176
- _upp_test_parms: gbl.HMGThresholds,
177
- _sim_test_regime: UPPTestRegime,
178
- /,
179
- *,
180
- sample_size: int = 10**6,
181
- seed_seq_list: list[SeedSequence] | None = None,
182
- nthreads: int = 16,
183
- save_data_to_file: SaveData = False,
184
- saved_array_name_suffix: str = "",
185
- ) -> UPPTestsCounts:
186
- # Generate market data
187
- _market_data_sample = dgl.gen_market_sample(
188
- _mkt_sample_spec,
189
- sample_size=sample_size,
190
- seed_seq_list=seed_seq_list,
191
- nthreads=nthreads,
192
- )
193
-
194
- _invalid_array_names = (
195
- ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
196
- if _mkt_sample_spec.share_spec.dist_type == "Uniform"
197
- else ()
198
- )
199
-
200
- save_data_to_hdf5(
201
- _market_data_sample,
202
- saved_array_name_suffix=saved_array_name_suffix,
203
- excluded_attrs=_invalid_array_names,
204
- save_data_to_file=save_data_to_file,
205
- )
206
-
207
- _upp_test_arrays = enf_cnts(_market_data_sample, _upp_test_parms, _sim_test_regime)
208
-
209
- save_data_to_hdf5(
210
- _upp_test_arrays,
211
- saved_array_name_suffix=saved_array_name_suffix,
212
- save_data_to_file=save_data_to_file,
213
- )
214
-
215
- return _upp_test_arrays
216
-
217
85
 
218
- def enf_cnts(
219
- _market_data_sample: MarketDataSample,
220
- _upp_test_parms: gbl.HMGThresholds,
221
- _upp_test_regime: UPPTestRegime,
222
- /,
223
- ) -> UPPTestsCounts:
86
+ _enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
224
87
  _upp_test_arrays = gen_upp_test_arrays(
225
88
  _market_data_sample, _upp_test_parms, _upp_test_regime
226
89
  )
@@ -231,23 +94,23 @@ def enf_cnts(
231
94
 
232
95
  _stats_rowlen = 6
233
96
  # Clearance/enforcement counts --- by firm count
234
- _firm_counts_weights = np.unique(_fcounts)
235
- if _firm_counts_weights is not None and np.all(_firm_counts_weights >= 0):
236
- _max_firm_count = len(_firm_counts_weights)
97
+ _firmcounts_list = np.unique(_fcounts)
98
+ if _firmcounts_list is not None and np.all(_firmcounts_list >= 0):
99
+ _max_firmcount = max(_firmcounts_list)
237
100
 
238
101
  _enf_cnts_sim_byfirmcount_array = -1 * np.ones(_stats_rowlen, np.int64)
239
- for _firm_cnt in 2 + np.arange(_max_firm_count):
240
- _firm_count_test = _fcounts == _firm_cnt
102
+ for _firmcount in np.arange(2, _max_firmcount + 1):
103
+ _firmcount_test = _fcounts == _firmcount
241
104
 
242
105
  _enf_cnts_sim_byfirmcount_array = np.vstack((
243
106
  _enf_cnts_sim_byfirmcount_array,
244
107
  np.array([
245
- _firm_cnt,
246
- np.einsum("ij->", 1 * _firm_count_test),
108
+ _firmcount,
109
+ np.einsum("ij->", 1 * _firmcount_test),
247
110
  *[
248
111
  np.einsum(
249
112
  "ij->",
250
- 1 * (_firm_count_test & getattr(_upp_test_arrays, _f)),
113
+ 1 * (_firmcount_test & getattr(_upp_test_arrays, _f)),
251
114
  )
252
115
  for _f in _upp_test_arrays.__dataclass_fields__
253
116
  ],
@@ -353,12 +216,10 @@ def gen_upp_test_arrays(
353
216
  getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
354
217
  )
355
218
 
356
- _enf_resolution, _guppi_aggregator, _divr_aggregator = (
357
- getattr(_sim_test_regime, _f)
358
- for _f in ("resolution", "guppi_aggregator", "divr_aggregator")
219
+ _guppi_array, _ipr_array, _cmcr_array = (
220
+ np.empty_like(_market_data.price_array) for _ in range(3)
359
221
  )
360
222
 
361
- _guppi_array = np.empty_like(_market_data.divr_array)
362
223
  np.einsum(
363
224
  "ij,ij,ij->ij",
364
225
  _market_data.divr_array,
@@ -367,18 +228,53 @@ def gen_upp_test_arrays(
367
228
  out=_guppi_array,
368
229
  )
369
230
 
370
- _ipr_array = np.empty_like(_market_data.divr_array)
371
- np.divide(_guppi_array, (1 - _market_data.divr_array[:, ::-1]), out=_ipr_array)
231
+ np.divide(
232
+ np.einsum("ij,ij->ij", _market_data.pcm_array, _market_data.divr_array),
233
+ 1 - _market_data.divr_array,
234
+ out=_ipr_array,
235
+ )
372
236
 
373
- _cmcr_array = np.empty_like(_market_data.divr_array)
374
237
  np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
375
238
 
376
- _test_measure_seq = (_market_data.divr_array, _guppi_array, _cmcr_array, _ipr_array)
239
+ (_divr_test_vector,) = _compute_test_value_seq(
240
+ (_market_data.divr_array,),
241
+ _market_data.frmshr_array,
242
+ _sim_test_regime.divr_aggregator,
243
+ )
244
+
245
+ (_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_value_seq(
246
+ (_guppi_array, _cmcr_array, _ipr_array),
247
+ _market_data.frmshr_array,
248
+ _sim_test_regime.guppi_aggregator,
249
+ )
250
+ del _cmcr_array, _ipr_array, _guppi_array
377
251
 
252
+ if _sim_test_regime.resolution == INVResolution.ENFT:
253
+ _upp_test_arrays = UPPTestsRaw(
254
+ _guppi_test_vector >= _g_bar,
255
+ (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
256
+ _cmcr_test_vector >= _cmcr_bar,
257
+ _ipr_test_vector >= _ipr_bar,
258
+ )
259
+ else:
260
+ _upp_test_arrays = UPPTestsRaw(
261
+ _guppi_test_vector < _g_bar,
262
+ (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
263
+ _cmcr_test_vector < _cmcr_bar,
264
+ _ipr_test_vector < _ipr_bar,
265
+ )
266
+
267
+ return _upp_test_arrays
268
+
269
+
270
+ def _compute_test_value_seq(
271
+ _test_measure_seq: tuple[ArrayDouble, ...],
272
+ _wt_array: ArrayDouble,
273
+ _aggregator: UPPAggrSelector,
274
+ ) -> tuple[ArrayDouble, ...]:
378
275
  _wt_array = (
379
- _market_data.frmshr_array
380
- / np.einsum("ij->i", _market_data.frmshr_array)[:, None]
381
- if _guppi_aggregator
276
+ _wt_array / np.einsum("ij->i", _wt_array)[:, None]
277
+ if _aggregator
382
278
  in (
383
279
  UPPAggrSelector.CPA,
384
280
  UPPAggrSelector.CPD,
@@ -388,7 +284,7 @@ def gen_upp_test_arrays(
388
284
  else EMPTY_ARRAY_DEFAULT
389
285
  )
390
286
 
391
- match _guppi_aggregator:
287
+ match _aggregator:
392
288
  case UPPAggrSelector.AVG:
393
289
  _test_value_seq = (
394
290
  1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
@@ -428,30 +324,7 @@ def gen_upp_test_arrays(
428
324
  )
429
325
  case _:
430
326
  raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
431
- del _cmcr_array, _guppi_array
432
- (_divr_test_vector, _guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = (
433
- _test_value_seq
434
- )
435
-
436
- if _divr_aggregator == UPPAggrSelector.MAX:
437
- _divr_test_vector = _market_data.divr_array.max(axis=1, keepdims=True)
438
-
439
- if _enf_resolution == INVResolution.ENFT:
440
- _upp_test_arrays = UPPTestsRaw(
441
- _guppi_test_vector >= _g_bar,
442
- (_guppi_test_vector >= _g_bar) | (_divr_test_vector >= _divr_bar),
443
- _cmcr_test_vector >= _cmcr_bar,
444
- _ipr_test_vector >= _ipr_bar,
445
- )
446
- else:
447
- _upp_test_arrays = UPPTestsRaw(
448
- _guppi_test_vector < _g_bar,
449
- (_guppi_test_vector < _g_bar) & (_divr_test_vector < _divr_bar),
450
- _cmcr_test_vector < _cmcr_bar,
451
- _ipr_test_vector < _ipr_bar,
452
- )
453
-
454
- return _upp_test_arrays
327
+ return tuple(_test_value_seq)
455
328
 
456
329
 
457
330
  def initialize_hd5(
@@ -494,11 +367,7 @@ def save_data_to_hdf5(
494
367
 
495
368
 
496
369
  def save_array_to_hdf5(
497
- _array_obj: ArrayFloat[TF]
498
- | ArrayINT[TI]
499
- | ArrayDouble
500
- | ArrayBIGINT
501
- | ArrayBoolean,
370
+ _array_obj: ArrayFloat | ArrayINT | ArrayDouble | ArrayBIGINT | ArrayBoolean,
502
371
  _array_name: str,
503
372
  _h5_group: ptb.Group,
504
373
  _h5_file: ptb.File,
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.1
2
+ Name: mergeron
3
+ Version: 2024.739105.2
4
+ Summary: Merger Policy Analysis using Python
5
+ License: MIT
6
+ Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI
7
+ Author: Murthy Kambhampaty
8
+ Author-email: smk@capeconomics.com
9
+ Requires-Python: >=3.12,<4.0
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Programming Language :: Python :: Implementation :: CPython
21
+ Requires-Dist: aenum (>=3.1.15,<4.0.0)
22
+ Requires-Dist: attrs (>=23.2)
23
+ Requires-Dist: bs4 (>=0.0.1)
24
+ Requires-Dist: certifi (>=2023.11.17)
25
+ Requires-Dist: google-re2 (>=1.1)
26
+ Requires-Dist: jinja2 (>=3.1)
27
+ Requires-Dist: joblib (>=1.3)
28
+ Requires-Dist: matplotlib (>=3.8)
29
+ Requires-Dist: mpmath (>=1.3)
30
+ Requires-Dist: msgpack (>=1.0)
31
+ Requires-Dist: msgpack-numpy (>=0.4)
32
+ Requires-Dist: numpy (>=1.26,<2)
33
+ Requires-Dist: scipy (>=1.12)
34
+ Requires-Dist: sympy (>=1.12)
35
+ Requires-Dist: tables (>=3.8)
36
+ Requires-Dist: types-beautifulsoup4 (>=4.11.2)
37
+ Requires-Dist: urllib3 (>=2.2.2,<3.0.0)
38
+ Requires-Dist: xlrd (>=2.0.1,<3.0.0)
39
+ Requires-Dist: xlsxwriter (>=3.1)
40
+ Description-Content-Type: text/x-rst
41
+
42
+ mergeron: Merger Policy Analysis using Python
43
+ =============================================
44
+
45
+ Analyze the sets of mergers conforming to concentration and diversion ratio bounds. Analyze intrinsic enforcement rates, and intrinsic clearance rates, under concentration, diversion ratio, GUPPI, CMCR, and IPR bounds using generated data with specified distributions of market shares, price-cost margins, firm counts, and prices, optionally imposing restrictions implied by statutory filing thresholds and/or Bertrand-Nash oligopoly with MNL demand. Download and analyze merger investigations data published by the U.S. Federal Trade Commission in various reports on extended merger investigations (Second Requests) during 1996 to 2011.
46
+
47
+ Here, enforcement rates derived with merger enforcement as being exogenous to firm conduct are defined as intrinsic enforcement rates, and similarly intrinsic clearance rates. Depending on the merger enforcement regime, or merger control regime, intrinsic enforcement rates may also not be the complement of intrinsic clearance rates, i.e, it is not necessarily true that the intrinsic clearance rate estimate for a given enforcement regime is 1 minus the intrinsic enforcement rate. In contrast, observed enforcement rates reflect the deterrent effects of merger enforcement on firm conduct as well as the effects of merger screening on the level of enforcement; and, by definition, the observed clearance rate is 1 minus the observed enforcement rate.
48
+
49
+ Introduction
50
+ ------------
51
+
52
+ Module :code:`.core.guidelines_boundaries` includes classes for specifying concentration bounds (:code:`..core.guidelines_boundaries.ConcentrationBoundary`) and diversion-ratio bounds (:code:`..core.guidelines_boundaries.DiversionRatioBoundary`), with automatic generation of boundary (as an array of share-pairs) and area. This module also includes a function for generating plots of concentration and diversion-ratio boundaries, and functions for mapping GUPPI standards to concentration (ΔHHI) standards, and vice-versa.
53
+
54
+ Module :code:`.gen.data_generation` includes the :code:`.gen.data_generation.MarketSample` which provides for a rich specification of shares and diversion ratios (:code:`.gen.data_generation.MarketSample.share_spec`), margins (:code:`.gen.data_generation.MarketSample.pcm_spec`, prices (:code:`.gen.data_generation.MarketSample.price_spec`), and HSR filing requirements (:code:`.gen.data_generation.MarketSample.hsr_filing_test_type`), and with methods for, (i) generating sample data (:code:`.gen.data_generation.MarketSample.generate_sample`), and (ii) estimating enforcement or clearance rates under specified enforcement regimes given a method of aggregating diversion ratio or GUPPI estimates for the firms in a merger (:code:`.gen.data_generation.MarketSample.estimate_enf_counts`). While the latter populate the properties, :code:`.gen.data_generation.MarketSample.data`
55
+ and :code:`.gen.data_generation.MarketSample.enf_counts`, respectively, the underlying methods for generating standalone :code:`MarketDataSample` and :code:`UPPTestCounts` objects are included in the class definition, with helper functions defined in the modules, :code:`.gen.data_generation_functions` and :code:`.gen.upp_tests`. Notably, market shares are generated for a sample of markets with firm-count distributed as specified in :code:`.gen.data_generation.MarketSample.ShareSpec.firm_count_weights`, with defaults as discussed below (also see, :code:`.gen.ShareSpec.firm_count_weights`.
56
+
57
+ By default, merging-firm shares are drawn with uniform distribution over the space :math:`s_1 + s_2 \leqslant 1` for an unspecified number of firms. Alternatively, shares may be drawn from the Dirichlet distribution, with specified shape parameters (see :code:`.gen.data_generation.MarketSample.ShareSpec`, and, specifically, :code:`.gen.SHRDistribution`). When drawing shares from the Dirichlet distribution, the user passes, using :code:`.gen.data_generation.MarketSample.ShareSpec.firm_count_weights`, a vector of weights specifying the frequency distribution over sequential firm counts, e.g., :code:`[133, 184, 134, 52, 32, 10, 12, 4, 3]` to specify shares drawn from Dirichlet distributions with 2 to 10 pre-merger firms distributed as in data for FTC merger investigations during 1996--2003 (See, for example, Table 4.1 of `FTC, Horizontal Merger Investigations Data, Fiscal Years 1996--2003 (Revised: August 31, 2004) <https://www.ftc.gov/sites/default/files/documents/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003/040831horizmergersdata96-03.pdf>`_). If :code:`.gen.data_generation.MarketSample.ShareSpec.firm_count_weights` is not explicitly assigned a value when defining :code:`.gen.data_generation.MarketSample.ShareSpec`, the default values is used, which results in a sample of markets with 2 to 7 firms with relative frequency in inverse proportion to firm-count, with 2-firm markets being 6 times as likely to be drawn as 7-firm markets.
58
+
59
+ Recapture rates can be specified as, "proportional", "inside-out", "outside-in" (see :code:`.RECForm`. The "inside-out" specification (:code:`.gen.data_generation.MarketSample.ShareSpec.recapture_form`:code:` = `:code:`.RECForm.INOUT`) results in recapture ratios consistent with merging-firms' in-market shares and a default recapture rate. The "outside-in" specification (:code:`.gen.data_generation.MarketSample.ShareSpec.recapture_form`:code:` = `:code:`.RECForm.INOUT`) yields diversion ratios from purchase probabilities drawn at random for :math:`N+1` goods, from which are derived market shares and recapture rates for the :math:`N` goods in the putative market (see, :code:`.gen.ShareSpec`). The "outside-in" specification is invalid when the distribution of markets over firm-count is unspecified, i.e., when :code:`.gen.data_generation.MarketSample.ShareSpec.dist_type`:code:` ==`:code:`.gen.ShareDistributions.UNI`, thus raising a :code:`ValueError` exception. The "proportional" form (:code:`.gen.data_generation.MarketSample.ShareSpec.recapture_form`:code:` = `:code:`.RECForm.FIXED`) is often used in the literature, as an approximation to the "inside-out" form. See, for example, Coate (2011).
60
+
61
+ Price-cost-margins may be specified as having uniform distribution, Beta distribution (including a bounded Beta distribution with specified mean and variance), or an empirical distribution (see, :code:`.gen.PCMSpec`). The empirical margin distribution is based on resampling margin data published by Prof. Damodaran of NYU Stern School of Business (see Notes), using an estimated Gaussian KDE. The second merging firm's margin (:code:`.gen.data_generation.MarketSample.PCMSpec.firm2_pcm_constraint`) may be specified as symmetric, i.i.d., or subject to equilibrium conditions for (profit-maximization in) Bertrand-Nash oligopoly with MNL demand (:code:`.gen.FM2Constraint`).
62
+
63
+ Prices may be specified as symmetric or asymmetric, and in the latter case, the direction of correlation between merging firm prices, if any, can also be specified (see, :code:`.gen.PriceSpec`). Prices may also be defined by imposing cost symmetry on firms in the sample, with fixed unit marginal costs normalized to 1 unit, such that price equal :math:`1 / (1 - \pmb{m})`, where :math:`\pmb{m}` represents the array of margins for firms in the sample.
64
+
65
+ The market sample may be restricted to mergers meeting the HSR filing requirement under two alternative approaches: in the one, the smaller of the two merging firms meets the lower HSR size threshold ($10 million, as adjusted) and the larger of the two merging firms meets the size test if it's share is no less than 10 times the share of the smaller firm. In the other, the :math:`n`-th firm's size is maintained as $10 million, as adjusted (see, :code:`.gen.SSZConstant`), and a merger meets the HSR filing test if either, (a.) the smaller merging firm is no smaller than the n-th firm and the larger merging firm is at 10-times as large as the n-th firm, or (b.) the smaller merging firm's market share is in excess of 10%; in effect this version of the test maintains that if the smaller merging firm's market share exceeds 10%, the value of the transaction exceeds $200 million, as adjusted, and the size-of-person test is eliminated (see, FTC (2008, p. 12); the above are simplifications of the statutory HSR filing requirements). The second assumption avoids the unfortunate assumption in the first that, within the resulting sample, the larger merging firm be at least 10 times as large as the smaller merging firm, as a consequence of the full definition of the HSR filing requirement.
66
+
67
+ The full specification of a market sample is given in a :code:`.gen.data_generation.MarketSample` object, including the above parameters. Data are drawn by invoking :code:`.gen.data_generation.MarketSample.generate_sample` which adds a :code:`data` property of class, :code:`.gen.MarketDataSample`. Enforcement or clearance counts are computed by invoking :code:`.gen.data_generation.MarketSample.estimate_enf_counts`, which adds an :code:`enf_counts` property of class :code:`.gen.UPPTestsCounts`. For fast, parallel generation of enforcement or clearance counts over large market data samples that ordinarily would exceed available limits on machine memory, the user can invoke the method :code:`.gen.data_generation.MarketSample.estimate_enf_counts` on a :code:`.gen.data_generation.MarketSample` object without first invoking :code:`.gen.data_generation.MarketSample.generate_sample`. Note, however, that this strategy does not retain the market sample in memory in the interests of conserving memory and maintaining high performance (the user can specify that the market sample and enforcement statistics be stored to permanent storage; when saving to current PCIe NVMe storage, the performance penalty is slight, but can be considerable if saving to SATA storage).
68
+
69
+ Enforcement statistics based on FTC investigations data and test data are printed to screen or rendered to LaTex files (for processing into publication-quality tables) using methods provided in :code:`.gen.enforcement_stats`.
70
+
71
+ Programs demonstrating the use of this package are included in the sub-package, :code:`.demo`.
72
+
73
+ This package includes a class, :code:`.core.pseudorandom_numbers.MulithreadedRNG` for generating random numbers with selected continuous distribution over specified parameters, and with CPU multithreading on machines with multiple virtual, logical, or physical CPU cores. This class is an adaptation from the documentation of the :code:`numpy` package, from the discussion on `multithreaded random-number generation <https://numpy.org/doc/stable/reference/random/multithreading.html>_`; the version included here permits selection of the distribution with pre-tests to catch and inform on common errors. To access these directly:
74
+
75
+ .. code-block:: python
76
+
77
+ import mergeron.core.pseudorandom_numbers as prng
78
+
79
+ Documentation for this package is in the form of the API Reference. Documentation for individual functions and classes is accessible within a python shell. For example:
80
+
81
+ .. code-block:: python
82
+
83
+ import mergeron.core.market_sample as market_sample
84
+
85
+ help(market_sample.MarketSample)
86
+
87
+ .. rubric:: References
88
+
89
+ .. _coate2011:
90
+
91
+ Coate, M. B. (2011). Benchmarking the upward pricing pressure model with Federal Trade
92
+ Commission evidence. Journal of Competition Law & Economics, 7(4), 825--846. URL: https://doi.org/10.1093/joclec/nhr014.
93
+
94
+ .. _ftc_premerger_guide2:
95
+
96
+ FTC Premerger Notification Office. “To File or Not to File: When You Must File a Premerger Notification Report Form”. 2008 (September, revised). URL: https://www.ftc.gov/sites/default/files/attachments/premerger-introductory-guides/guide2.pdf
97
+
98
+
99
+ .. image:: https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json
100
+ :alt: Poetry
101
+ :target: https://python-poetry.org/
102
+
103
+ .. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
104
+ :alt: Ruff
105
+ :target: https://github.com/astral-sh/ruff
106
+
107
+ .. image:: https://www.mypy-lang.org/static/mypy_badge.svg
108
+ :alt: Checked with mypy
109
+ :target: https://mypy-lang.org/
110
+
111
+ .. image:: https://img.shields.io/badge/License-MIT-yellow.svg
112
+ :alt: License: MIT
113
+ :target: https://opensource.org/licenses/MIT
114
+
115
+