mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -5,23 +5,24 @@ Methods to generate data for analyzing merger enforcement policy.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from collections.abc import Sequence
9
8
  from typing import TypedDict
10
9
 
11
10
  import numpy as np
12
- from attrs import Attribute, define, field, validators
11
+ from attrs import Attribute, Converter, define, field, validators
13
12
  from joblib import Parallel, cpu_count, delayed # type: ignore
14
13
  from numpy.random import SeedSequence
14
+ from ruamel import yaml
15
15
 
16
- from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
16
+ from .. import NTHREADS, VERSION, RECForm, this_yaml # noqa: TID252 # noqa
17
17
  from ..core import guidelines_boundaries as gbl # noqa: TID252
18
18
  from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
19
19
  from . import (
20
20
  FM2Constraint,
21
- MarketDataSample,
21
+ MarketSampleData,
22
22
  PCMDistribution,
23
23
  PCMSpec,
24
24
  PriceSpec,
25
+ SeedSequenceData,
25
26
  ShareSpec,
26
27
  SHRDistribution,
27
28
  SSZConstant,
@@ -32,9 +33,8 @@ from .data_generation_functions import (
32
33
  gen_divr_array,
33
34
  gen_margin_price_data,
34
35
  gen_share_data,
35
- parse_seed_seq_list,
36
36
  )
37
- from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
37
+ from .upp_tests import SaveData, compute_upp_test_counts
38
38
 
39
39
  __version__ = VERSION
40
40
 
@@ -45,17 +45,8 @@ class SamplingFunctionKWArgs(TypedDict, total=False):
45
45
  sample_size: int
46
46
  """number of draws to generate"""
47
47
 
48
- seed_seq_list: Sequence[SeedSequence] | None
49
- """sequence of SeedSequences to ensure replicable data generation with
50
- appropriately independent random streams
51
-
52
- NOTES
53
- -----
54
-
55
- See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
- specification of this parameter.
57
-
58
- """
48
+ seed_data: SeedSequenceData | None
49
+ """seed data to ensure independedent and replicable draws"""
59
50
 
60
51
  nthreads: int
61
52
  """number of parallel threads to use"""
@@ -67,26 +58,47 @@ class SamplingFunctionKWArgs(TypedDict, total=False):
67
58
  """optionally specify a suffix for the HDF5 array names"""
68
59
 
69
60
 
70
- @define
61
+ def _seed_data_conv(_v: SeedSequenceData | None, _i: MarketSample) -> SeedSequenceData:
62
+ if isinstance(_v, SeedSequenceData):
63
+ return _v
64
+
65
+ _mktshr_dist_type = _i.share_spec.dist_type
66
+ _price_spec = _i.price_spec
67
+
68
+ _seed_count = 2 if _mktshr_dist_type == SHRDistribution.UNI else 3
69
+ _seed_count += 1 if _price_spec == PriceSpec.ZERO else 0
70
+
71
+ _sseq_list = tuple(SeedSequence(pool_size=8) for _ in range(_seed_count))
72
+
73
+ _mktshr_rng_seed_seq, _pcm_rng_seed_seq = _sseq_list[:2]
74
+ _fcount_rng_seed_seq = (
75
+ None if _mktshr_dist_type == SHRDistribution.UNI else _sseq_list[2]
76
+ )
77
+ _pr_rng_seed_seq = _sseq_list[-1] if _price_spec == PriceSpec.ZERO else None
78
+
79
+ return SeedSequenceData(
80
+ _mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq, _pr_rng_seed_seq
81
+ )
82
+
83
+
84
+ @this_yaml.register_class
85
+ @define(kw_only=True)
71
86
  class MarketSample:
72
87
  """Parameter specification for market data generation."""
73
88
 
74
89
  share_spec: ShareSpec = field(
75
- kw_only=True,
76
- default=ShareSpec(
77
- SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
78
- ),
90
+ default=ShareSpec(SHRDistribution.UNI),
79
91
  validator=validators.instance_of(ShareSpec),
80
92
  )
81
93
  """Market-share specification, see :class:`ShareSpec`"""
82
94
 
83
95
  pcm_spec: PCMSpec = field(
84
- kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
96
+ default=PCMSpec(PCMDistribution.UNI), validator=validators.instance_of(PCMSpec)
85
97
  )
86
98
  """Margin specification, see :class:`PCMSpec`"""
87
99
 
88
100
  @pcm_spec.validator
89
- def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
101
+ def _psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
90
102
  if (
91
103
  self.share_spec.recapture_form == RECForm.FIXED
92
104
  and _v.firm2_pcm_constraint == FM2Constraint.MNL
@@ -98,29 +110,60 @@ class MarketSample:
98
110
  )
99
111
 
100
112
  price_spec: PriceSpec = field(
101
- kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
113
+ default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
102
114
  )
103
115
  """Price specification, see :class:`PriceSpec`"""
104
116
 
105
117
  hsr_filing_test_type: SSZConstant = field(
106
- kw_only=True,
107
- default=SSZConstant.ONE,
108
- validator=validators.instance_of(SSZConstant),
118
+ default=SSZConstant.ONE, validator=validators.instance_of(SSZConstant)
109
119
  )
110
120
  """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
111
121
 
112
- data: MarketDataSample = field(default=None)
122
+ sample_size: int = field(default=10**6, validator=validators.instance_of(int))
123
+ """number of draws to simulate"""
113
124
 
114
- enf_counts: UPPTestsCounts = field(default=None)
125
+ seed_data: SeedSequenceData = field(
126
+ converter=Converter(_seed_data_conv, takes_self=True) # type: ignore
127
+ )
128
+ """sequence of SeedSequences to ensure replicable data generation with
129
+ appropriately independent random streams
130
+ """
115
131
 
116
- def __gen_market_sample(
117
- self,
118
- /,
119
- *,
120
- sample_size: int,
121
- seed_seq_list: Sequence[SeedSequence] | None,
122
- nthreads: int,
123
- ) -> MarketDataSample:
132
+ @seed_data.default
133
+ def __dsd(self) -> SeedSequenceData | None:
134
+ return _seed_data_conv(None, self)
135
+
136
+ @seed_data.validator
137
+ def _sdv(
138
+ _i: MarketSample, _a: Attribute[SeedSequenceData], _v: SeedSequenceData, /
139
+ ) -> None:
140
+ if _i.share_spec.dist_type == SHRDistribution.UNI and any((
141
+ _v.fcounts,
142
+ _v.price,
143
+ )):
144
+ raise ValueError(
145
+ "Attribute, seed_data.fcounts is ignored as irrelevant when "
146
+ "market shares are drawn with Uniform distribution. "
147
+ "Set seed_data.fcounts to None and retry."
148
+ )
149
+
150
+ if _i.price_spec != PriceSpec.ZERO and _v.price is not None:
151
+ raise ValueError(
152
+ "Attribute, seed_data.price is ignored as irrelevant unless "
153
+ "prices are asymmetric and uncorrelated and price-cost margins "
154
+ "are also not symmetric. Set seed_data.price to None and retry."
155
+ )
156
+
157
+ nthreads: int = field(default=NTHREADS, validator=validators.instance_of(int))
158
+ """number of parallel threads to use"""
159
+
160
+ data: MarketSampleData | None = field(default=None)
161
+
162
+ enf_counts: UPPTestsCounts | None = field(default=None)
163
+
164
+ def _gen_market_sample(
165
+ self, /, *, sample_size: int, seed_data: SeedSequenceData | None, nthreads: int
166
+ ) -> MarketSampleData:
124
167
  """
125
168
  Generate share, diversion ratio, price, and margin data for MarketSpec.
126
169
 
@@ -139,14 +182,15 @@ class MarketSample:
139
182
  _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
183
  _hsr_filing_test_type = self.hsr_filing_test_type
141
184
 
185
+ _seed_data = seed_data or self.seed_data
142
186
  (
143
187
  _mktshr_rng_seed_seq,
144
188
  _pcm_rng_seed_seq,
145
189
  _fcount_rng_seed_seq,
146
190
  _pr_rng_seed_seq,
147
- ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
191
+ ) = (getattr(_seed_data, _a) for _a in _seed_data.__dataclass_fields__)
192
+ _shr_sample_size = 1.0 * (sample_size or self.sample_size)
148
193
 
149
- _shr_sample_size = 1.0 * sample_size
150
194
  # Scale up sample size to offset discards based on specified criteria
151
195
  _shr_sample_size *= _hsr_filing_test_type
152
196
  if _dist_firm2_pcm == FM2Constraint.MNL:
@@ -159,7 +203,7 @@ class MarketSample:
159
203
  self.share_spec,
160
204
  _fcount_rng_seed_seq,
161
205
  _mktshr_rng_seed_seq,
162
- nthreads,
206
+ nthreads or self.nthreads,
163
207
  )
164
208
 
165
209
  _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
@@ -224,7 +268,7 @@ class MarketSample:
224
268
  _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
225
269
  )
226
270
 
227
- return MarketDataSample(
271
+ return MarketSampleData(
228
272
  _frmshr_array,
229
273
  _pcm_array,
230
274
  _price_array,
@@ -236,41 +280,19 @@ class MarketSample:
236
280
  _hhi_delta,
237
281
  )
238
282
 
239
- def generate_sample(
240
- self,
241
- /,
242
- *,
243
- sample_size: int = 10**6,
244
- seed_seq_list: Sequence[SeedSequence] | None = None,
245
- nthreads: int = 16,
246
- save_data_to_file: SaveData = False,
247
- saved_array_name_suffix: str = "",
248
- ) -> None:
283
+ def generate_sample(self, /) -> None:
249
284
  """Populate :attr:`data` with generated data
250
285
 
251
- see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
252
-
253
286
  Returns
254
287
  -------
255
288
  None
256
289
 
257
290
  """
258
291
 
259
- self.data = self.__gen_market_sample(
260
- sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
261
- )
262
-
263
- _invalid_array_names = (
264
- ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
265
- if self.share_spec.dist_type == "Uniform"
266
- else ()
267
- )
268
-
269
- save_data_to_hdf5(
270
- self.data,
271
- saved_array_name_suffix=saved_array_name_suffix,
272
- excluded_attrs=_invalid_array_names,
273
- save_data_to_file=save_data_to_file,
292
+ self.data = self._gen_market_sample(
293
+ seed_data=self.seed_data,
294
+ sample_size=self.sample_size,
295
+ nthreads=self.nthreads,
274
296
  )
275
297
 
276
298
  def __sim_enf_cnts(
@@ -279,11 +301,9 @@ class MarketSample:
279
301
  _sim_test_regime: UPPTestRegime,
280
302
  /,
281
303
  *,
304
+ seed_data: SeedSequenceData,
282
305
  sample_size: int = 10**6,
283
- seed_seq_list: Sequence[SeedSequence] | None = None,
284
- nthreads: int = 16,
285
- save_data_to_file: SaveData = False,
286
- saved_array_name_suffix: str = "",
306
+ nthreads: int = NTHREADS,
287
307
  ) -> UPPTestsCounts:
288
308
  """Generate market data and etstimate UPP test counts on same.
289
309
 
@@ -302,26 +322,20 @@ class MarketSample:
302
322
  sample_size
303
323
  Number of draws to generate
304
324
 
305
- seed_seq_list
325
+ seed_data
306
326
  List of seed sequences, to assure independent samples in each thread
307
327
 
308
328
  nthreads
309
329
  Number of parallel processes to use
310
330
 
311
- save_data_to_file
312
- Whether to save data to an HDF5 file, and where to save it
313
-
314
- saved_array_name_suffix
315
- Suffix to add to the array names in the HDF5 file
316
-
317
331
  Returns
318
332
  -------
319
333
  UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
320
334
 
321
335
  """
322
336
 
323
- _market_data_sample = self.__gen_market_sample(
324
- sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
337
+ _market_data_sample = self._gen_market_sample(
338
+ sample_size=sample_size, seed_data=seed_data, nthreads=nthreads
325
339
  )
326
340
 
327
341
  _invalid_array_names = (
@@ -330,42 +344,20 @@ class MarketSample:
330
344
  else ()
331
345
  )
332
346
 
333
- save_data_to_hdf5(
334
- _market_data_sample,
335
- saved_array_name_suffix=saved_array_name_suffix,
336
- excluded_attrs=_invalid_array_names,
337
- save_data_to_file=save_data_to_file,
338
- )
339
-
340
347
  _upp_test_arrays = compute_upp_test_counts(
341
348
  _market_data_sample, _upp_test_parms, _sim_test_regime
342
349
  )
343
350
 
344
- save_data_to_hdf5(
345
- _upp_test_arrays,
346
- saved_array_name_suffix=saved_array_name_suffix,
347
- save_data_to_file=save_data_to_file,
348
- )
349
-
350
351
  return _upp_test_arrays
351
352
 
352
353
  def __sim_enf_cnts_ll(
353
- self,
354
- _enf_parm_vec: gbl.HMGThresholds,
355
- _sim_test_regime: UPPTestRegime,
356
- /,
357
- *,
358
- sample_size: int = 10**6,
359
- seed_seq_list: Sequence[SeedSequence] | None = None,
360
- nthreads: int = 16,
361
- save_data_to_file: SaveData = False,
362
- saved_array_name_suffix: str = "",
354
+ self, _enf_parm_vec: gbl.HMGThresholds, _sim_test_regime: UPPTestRegime, /
363
355
  ) -> UPPTestsCounts:
364
356
  """A function to parallelize data-generation and testing
365
357
 
366
358
  The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
367
359
  the parent function, `sim_enf_cnts()`, except that, if provided,
368
- `seed_seq_list` is used to spawn a seed sequence for each thread,
360
+ `seed_data` is used to spawn a seed sequence for each thread,
369
361
  to assure independent samples in each thread, and `nthreads` defines
370
362
  the number of parallel processes used. The number of draws in
371
363
  each thread may be tuned, by trial and error, to the amount of
@@ -380,33 +372,18 @@ class MarketSample:
380
372
  _sim_test_regime
381
373
  Configuration to use for testing
382
374
 
383
- sample_size
384
- Number of draws to simulate
385
-
386
- seed_seq_list
387
- List of seed sequences, to assure independent samples in each thread
388
-
389
- nthreads
390
- Number of parallel processes to use
391
-
392
- save_data_to_file
393
- Whether to save data to an HDF5 file, and where to save it
394
-
395
- saved_array_name_suffix
396
- Suffix to add to the array names in the HDF5 file
397
-
398
375
  Returns
399
376
  -------
400
377
  Arrays of enforcement counts or clearance counts by firm count,
401
378
  ΔHHI and concentration zone
402
379
 
403
380
  """
404
- _sample_sz = sample_size
381
+ _sample_sz = self.sample_size
405
382
  _subsample_sz = 10**6
406
383
  _iter_count = (
407
384
  int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
408
385
  )
409
- _thread_count = cpu_count()
386
+ _thread_count = self.nthreads or cpu_count()
410
387
 
411
388
  if (
412
389
  self.share_spec.recapture_form != RECForm.OUTIN
@@ -420,27 +397,35 @@ class MarketSample:
420
397
  )
421
398
  )
422
399
 
423
- _rng_seed_seq_list = [None] * _iter_count
424
- if seed_seq_list:
425
- _rng_seed_seq_list = list(
426
- zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
400
+ _rng_seed_data = [
401
+ SeedSequenceData(*_z)
402
+ for _z in (
403
+ zip(
404
+ *[
405
+ _s.spawn(_iter_count) if _s else [None] * _iter_count
406
+ for _s in (
407
+ getattr(self.seed_data, _a)
408
+ for _a in self.seed_data.__dataclass_fields__
409
+ )
410
+ ],
411
+ strict=True,
412
+ )
427
413
  )
414
+ ]
428
415
 
429
- _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
416
+ __sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
430
417
  "sample_size": _subsample_sz,
431
- "save_data_to_file": save_data_to_file,
432
- "nthreads": nthreads,
418
+ "nthreads": self.nthreads,
433
419
  })
434
420
 
435
421
  _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
436
422
  delayed(self.__sim_enf_cnts)(
437
423
  _enf_parm_vec,
438
424
  _sim_test_regime,
439
- **_sim_enf_cnts_kwargs,
440
- saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
441
- seed_seq_list=_rng_seed_seq_list_ch,
425
+ **__sim_enf_cnts_kwargs,
426
+ seed_data=_rng_seed_data_ch,
442
427
  )
443
- for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
428
+ for _iter_id, _rng_seed_data_ch in enumerate(_rng_seed_data)
444
429
  )
445
430
 
446
431
  _res_list_stacks = UPPTestsCounts(*[
@@ -450,10 +435,10 @@ class MarketSample:
450
435
  upp_test_results = UPPTestsCounts(*[
451
436
  np.column_stack((
452
437
  (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
453
- np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
438
+ np.einsum("ijk->jk", _gv[:, :, _h:], dtype=np.int64),
454
439
  ))
455
440
  for _g, _h in zip(
456
- _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
441
+ _res_list_stacks.__dataclass_fields__, [1, 1, 3], strict=True
457
442
  )
458
443
  ])
459
444
  del _res_list, _res_list_stacks
@@ -461,16 +446,7 @@ class MarketSample:
461
446
  return upp_test_results
462
447
 
463
448
  def estimate_enf_counts(
464
- self,
465
- _enf_parm_vec: HMGThresholds,
466
- _upp_test_regime: UPPTestRegime,
467
- /,
468
- *,
469
- sample_size: int = 10**6,
470
- seed_seq_list: Sequence[SeedSequence] | None = None,
471
- nthreads: int = 16,
472
- save_data_to_file: SaveData = False,
473
- saved_array_name_suffix: str = "",
449
+ self, _enf_parm_vec: HMGThresholds, _upp_test_regime: UPPTestRegime, /
474
450
  ) -> None:
475
451
  """Populate :attr:`enf_counts` with estimated UPP test counts.
476
452
 
@@ -486,21 +462,6 @@ class MarketSample:
486
462
  merging-firm GUPPI and maximum diversion ratio between the
487
463
  merging firms
488
464
 
489
- sample_size
490
- Number of draws to simulate
491
-
492
- seed_seq_list
493
- List of seed sequences, to assure independent samples in each thread
494
-
495
- nthreads
496
- Number of parallel processes to use
497
-
498
- save_data_to_file
499
- Whether to save data to an HDF5 file, and where to save it
500
-
501
- saved_array_name_suffix
502
- Suffix to add to the array names in the HDF5 file
503
-
504
465
  Returns
505
466
  -------
506
467
  None
@@ -508,22 +469,28 @@ class MarketSample:
508
469
  """
509
470
 
510
471
  if self.data is None:
511
- self.enf_counts = self.__sim_enf_cnts_ll(
512
- _enf_parm_vec,
513
- _upp_test_regime,
514
- sample_size=sample_size,
515
- seed_seq_list=seed_seq_list,
516
- nthreads=nthreads,
517
- save_data_to_file=save_data_to_file,
518
- saved_array_name_suffix=saved_array_name_suffix,
519
- )
472
+ self.enf_counts = self.__sim_enf_cnts_ll(_enf_parm_vec, _upp_test_regime)
520
473
  else:
521
474
  self.enf_counts = compute_upp_test_counts(
522
475
  self.data, _enf_parm_vec, _upp_test_regime
523
476
  )
524
- if save_data_to_file:
525
- save_data_to_hdf5(
526
- self.enf_counts,
527
- save_data_to_file=save_data_to_file,
528
- saved_array_name_suffix=saved_array_name_suffix,
529
- )
477
+
478
+ @classmethod
479
+ def to_yaml(
480
+ cls, _r: yaml.representer.SafeRepresenter, _d: MarketSample
481
+ ) -> yaml.MappingNode:
482
+ _ret: yaml.MappingNode = _r.represent_mapping(
483
+ f"!{cls.__name__}",
484
+ {
485
+ _a.name: getattr(_d, _a.name)
486
+ for _a in _d.__attrs_attrs__
487
+ if _a.type not in (MarketSampleData, UPPTestsCounts)
488
+ },
489
+ )
490
+ return _ret
491
+
492
+ @classmethod
493
+ def from_yaml(
494
+ cls, _c: yaml.constructor.SafeConstructor, _n: yaml.MappingNode
495
+ ) -> MarketSample:
496
+ return cls(**_c.construct_mapping(_n))