mergeron 2025.739265.2__py3-none-any.whl → 2025.739290.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

@@ -5,23 +5,24 @@ Methods to generate data for analyzing merger enforcement policy.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from collections.abc import Sequence
9
8
  from typing import TypedDict
10
9
 
11
10
  import numpy as np
12
- from attrs import Attribute, define, field, validators
11
+ from attrs import Attribute, Converter, define, field, validators
13
12
  from joblib import Parallel, cpu_count, delayed # type: ignore
14
13
  from numpy.random import SeedSequence
14
+ from ruamel import yaml
15
15
 
16
- from .. import DEFAULT_REC_RATIO, VERSION, RECForm # noqa: TID252 # noqa
16
+ from .. import NTHREADS, VERSION, RECForm, this_yaml # noqa: TID252 # noqa
17
17
  from ..core import guidelines_boundaries as gbl # noqa: TID252
18
18
  from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
19
19
  from . import (
20
20
  FM2Constraint,
21
- MarketDataSample,
21
+ MarketSampleData,
22
22
  PCMDistribution,
23
23
  PCMSpec,
24
24
  PriceSpec,
25
+ SeedSequenceData,
25
26
  ShareSpec,
26
27
  SHRDistribution,
27
28
  SSZConstant,
@@ -32,9 +33,8 @@ from .data_generation_functions import (
32
33
  gen_divr_array,
33
34
  gen_margin_price_data,
34
35
  gen_share_data,
35
- parse_seed_seq_list,
36
36
  )
37
- from .upp_tests import SaveData, compute_upp_test_counts, save_data_to_hdf5
37
+ from .upp_tests import compute_upp_test_counts
38
38
 
39
39
  __version__ = VERSION
40
40
 
@@ -45,82 +45,119 @@ class SamplingFunctionKWArgs(TypedDict, total=False):
45
45
  sample_size: int
46
46
  """number of draws to generate"""
47
47
 
48
- seed_seq_list: Sequence[SeedSequence] | None
49
- """sequence of SeedSequences to ensure replicable data generation with
50
- appropriately independent random streams
48
+ seed_data: SeedSequenceData | None
49
+ """seed data to ensure independedent and replicable draws"""
51
50
 
52
- NOTES
53
- -----
51
+ nthreads: int
52
+ """number of parallel threads to use"""
54
53
 
55
- See, :func:`.data_generation_functions.parse_seed_seq_list` for more on
56
- specification of this parameter.
57
54
 
58
- """
55
+ def _seed_data_conv(_v: SeedSequenceData | None, _i: MarketSample) -> SeedSequenceData:
56
+ if isinstance(_v, SeedSequenceData):
57
+ return _v
59
58
 
60
- nthreads: int
61
- """number of parallel threads to use"""
59
+ _mktshr_dist_type = _i.share_spec.dist_type
60
+ _price_spec = _i.price_spec
61
+
62
+ _seed_count = 2 if _mktshr_dist_type == SHRDistribution.UNI else 3
63
+ _seed_count += 1 if _price_spec == PriceSpec.ZERO else 0
62
64
 
63
- save_data_to_file: SaveData
64
- """optionally save data to HDF5 file"""
65
+ _sseq_list = tuple(SeedSequence(pool_size=8) for _ in range(_seed_count))
65
66
 
66
- saved_array_name_suffix: str
67
- """optionally specify a suffix for the HDF5 array names"""
67
+ _mktshr_rng_seed_seq, _pcm_rng_seed_seq = _sseq_list[:2]
68
+ _fcount_rng_seed_seq = (
69
+ None if _mktshr_dist_type == SHRDistribution.UNI else _sseq_list[2]
70
+ )
71
+ _pr_rng_seed_seq = _sseq_list[-1] if _price_spec == PriceSpec.ZERO else None
72
+
73
+ return SeedSequenceData(
74
+ _mktshr_rng_seed_seq, _pcm_rng_seed_seq, _fcount_rng_seed_seq, _pr_rng_seed_seq
75
+ )
68
76
 
69
77
 
70
- @define
78
+ @this_yaml.register_class
79
+ @define(kw_only=True)
71
80
  class MarketSample:
72
81
  """Parameter specification for market data generation."""
73
82
 
74
83
  share_spec: ShareSpec = field(
75
- kw_only=True,
76
- default=ShareSpec(
77
- SHRDistribution.UNI, None, None, RECForm.INOUT, DEFAULT_REC_RATIO
78
- ),
84
+ default=ShareSpec(SHRDistribution.UNI),
79
85
  validator=validators.instance_of(ShareSpec),
80
86
  )
81
87
  """Market-share specification, see :class:`ShareSpec`"""
82
88
 
83
89
  pcm_spec: PCMSpec = field(
84
- kw_only=True, default=PCMSpec(PCMDistribution.UNI, None, FM2Constraint.IID)
90
+ default=PCMSpec(PCMDistribution.UNI), validator=validators.instance_of(PCMSpec)
85
91
  )
86
92
  """Margin specification, see :class:`PCMSpec`"""
87
93
 
88
94
  @pcm_spec.validator
89
- def __psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
95
+ def _psv(self, _a: Attribute[PCMSpec], _v: PCMSpec, /) -> None:
90
96
  if (
91
97
  self.share_spec.recapture_form == RECForm.FIXED
92
98
  and _v.firm2_pcm_constraint == FM2Constraint.MNL
93
99
  ):
94
100
  raise ValueError(
95
- f'Specification of "recapture_form", "{self.share_spec.recapture_form}" '
96
- "requires Firm 2 margin must have property, "
97
- f'"{FM2Constraint.IID}" or "{FM2Constraint.SYM}".'
101
+ f'Specification of "PCMSpec.firm2_pcm_constraint", as {FM2Constraint.MNL!r} '
102
+ f'requires that "ShareSpec.recapture_form" be {RECForm.INOUT!r} '
103
+ f"or {RECForm.OUTIN!r}, not {RECForm.FIXED!r} as presently specified"
98
104
  )
99
105
 
100
106
  price_spec: PriceSpec = field(
101
- kw_only=True, default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
107
+ default=PriceSpec.SYM, validator=validators.instance_of(PriceSpec)
102
108
  )
103
109
  """Price specification, see :class:`PriceSpec`"""
104
110
 
105
111
  hsr_filing_test_type: SSZConstant = field(
106
- kw_only=True,
107
- default=SSZConstant.ONE,
108
- validator=validators.instance_of(SSZConstant),
112
+ default=SSZConstant.ONE, validator=validators.instance_of(SSZConstant)
109
113
  )
110
114
  """Method for modeling HSR filing threholds, see :class:`SSZConstant`"""
111
115
 
112
- data: MarketDataSample = field(default=None)
116
+ sample_size: int = field(default=10**6, validator=validators.instance_of(int))
117
+ """number of draws to simulate"""
113
118
 
114
- enf_counts: UPPTestsCounts = field(default=None)
119
+ seed_data: SeedSequenceData = field(
120
+ converter=Converter(_seed_data_conv, takes_self=True) # type: ignore
121
+ )
122
+ """sequence of SeedSequences to ensure replicable data generation with
123
+ appropriately independent random streams
124
+ """
115
125
 
116
- def __gen_market_sample(
117
- self,
118
- /,
119
- *,
120
- sample_size: int,
121
- seed_seq_list: Sequence[SeedSequence] | None,
122
- nthreads: int,
123
- ) -> MarketDataSample:
126
+ @seed_data.default
127
+ def __dsd(self) -> SeedSequenceData | None:
128
+ return _seed_data_conv(None, self)
129
+
130
+ @seed_data.validator
131
+ def _sdv(
132
+ _i: MarketSample, _a: Attribute[SeedSequenceData], _v: SeedSequenceData, /
133
+ ) -> None:
134
+ if _i.share_spec.dist_type == SHRDistribution.UNI and any((
135
+ _v.fcounts,
136
+ _v.price,
137
+ )):
138
+ raise ValueError(
139
+ "Attribute, seed_data.fcounts is ignored as irrelevant when "
140
+ "market shares are drawn with Uniform distribution. "
141
+ "Set seed_data.fcounts to None and retry."
142
+ )
143
+
144
+ if _i.price_spec != PriceSpec.ZERO and _v.price is not None:
145
+ raise ValueError(
146
+ "Attribute, seed_data.price is ignored as irrelevant unless "
147
+ "prices are asymmetric and uncorrelated and price-cost margins "
148
+ "are also not symmetric. Set seed_data.price to None and retry."
149
+ )
150
+
151
+ nthreads: int = field(default=NTHREADS, validator=validators.instance_of(int))
152
+ """number of parallel threads to use"""
153
+
154
+ data: MarketSampleData | None = field(default=None)
155
+
156
+ enf_counts: UPPTestsCounts | None = field(default=None)
157
+
158
+ def _gen_market_sample(
159
+ self, /, *, sample_size: int, seed_data: SeedSequenceData | None, nthreads: int
160
+ ) -> MarketSampleData:
124
161
  """
125
162
  Generate share, diversion ratio, price, and margin data for MarketSpec.
126
163
 
@@ -139,14 +176,15 @@ class MarketSample:
139
176
  _dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
140
177
  _hsr_filing_test_type = self.hsr_filing_test_type
141
178
 
179
+ _seed_data = seed_data or self.seed_data
142
180
  (
143
181
  _mktshr_rng_seed_seq,
144
182
  _pcm_rng_seed_seq,
145
183
  _fcount_rng_seed_seq,
146
184
  _pr_rng_seed_seq,
147
- ) = parse_seed_seq_list(seed_seq_list, _dist_type_mktshr, self.price_spec)
185
+ ) = (getattr(_seed_data, _a) for _a in _seed_data.__dataclass_fields__)
186
+ _shr_sample_size = 1.0 * (sample_size or self.sample_size)
148
187
 
149
- _shr_sample_size = 1.0 * sample_size
150
188
  # Scale up sample size to offset discards based on specified criteria
151
189
  _shr_sample_size *= _hsr_filing_test_type
152
190
  if _dist_firm2_pcm == FM2Constraint.MNL:
@@ -159,7 +197,7 @@ class MarketSample:
159
197
  self.share_spec,
160
198
  _fcount_rng_seed_seq,
161
199
  _mktshr_rng_seed_seq,
162
- nthreads,
200
+ nthreads or self.nthreads,
163
201
  )
164
202
 
165
203
  _mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
@@ -224,7 +262,7 @@ class MarketSample:
224
262
  _hhi_delta + np.einsum("ij,ij->i", _mktshr_array, _mktshr_array)[:, None]
225
263
  )
226
264
 
227
- return MarketDataSample(
265
+ return MarketSampleData(
228
266
  _frmshr_array,
229
267
  _pcm_array,
230
268
  _price_array,
@@ -236,41 +274,19 @@ class MarketSample:
236
274
  _hhi_delta,
237
275
  )
238
276
 
239
- def generate_sample(
240
- self,
241
- /,
242
- *,
243
- sample_size: int = 10**6,
244
- seed_seq_list: Sequence[SeedSequence] | None = None,
245
- nthreads: int = 16,
246
- save_data_to_file: SaveData = False,
247
- saved_array_name_suffix: str = "",
248
- ) -> None:
277
+ def generate_sample(self, /) -> None:
249
278
  """Populate :attr:`data` with generated data
250
279
 
251
- see :attr:`SamplingFunctionKWArgs` for description of keyord parameters
252
-
253
280
  Returns
254
281
  -------
255
282
  None
256
283
 
257
284
  """
258
285
 
259
- self.data = self.__gen_market_sample(
260
- sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
261
- )
262
-
263
- _invalid_array_names = (
264
- ("fcounts", "choice_prob_outgd", "nth_firm_share", "hhi_post")
265
- if self.share_spec.dist_type == "Uniform"
266
- else ()
267
- )
268
-
269
- save_data_to_hdf5(
270
- self.data,
271
- saved_array_name_suffix=saved_array_name_suffix,
272
- excluded_attrs=_invalid_array_names,
273
- save_data_to_file=save_data_to_file,
286
+ self.data = self._gen_market_sample(
287
+ seed_data=self.seed_data,
288
+ sample_size=self.sample_size,
289
+ nthreads=self.nthreads,
274
290
  )
275
291
 
276
292
  def __sim_enf_cnts(
@@ -279,11 +295,9 @@ class MarketSample:
279
295
  _sim_test_regime: UPPTestRegime,
280
296
  /,
281
297
  *,
298
+ seed_data: SeedSequenceData,
282
299
  sample_size: int = 10**6,
283
- seed_seq_list: Sequence[SeedSequence] | None = None,
284
- nthreads: int = 16,
285
- save_data_to_file: SaveData = False,
286
- saved_array_name_suffix: str = "",
300
+ nthreads: int = NTHREADS,
287
301
  ) -> UPPTestsCounts:
288
302
  """Generate market data and etstimate UPP test counts on same.
289
303
 
@@ -302,26 +316,20 @@ class MarketSample:
302
316
  sample_size
303
317
  Number of draws to generate
304
318
 
305
- seed_seq_list
319
+ seed_data
306
320
  List of seed sequences, to assure independent samples in each thread
307
321
 
308
322
  nthreads
309
323
  Number of parallel processes to use
310
324
 
311
- save_data_to_file
312
- Whether to save data to an HDF5 file, and where to save it
313
-
314
- saved_array_name_suffix
315
- Suffix to add to the array names in the HDF5 file
316
-
317
325
  Returns
318
326
  -------
319
327
  UPPTestCounts ojbect with of test counts by firm count, ΔHHI and concentration zone
320
328
 
321
329
  """
322
330
 
323
- _market_data_sample = self.__gen_market_sample(
324
- sample_size=sample_size, seed_seq_list=seed_seq_list, nthreads=nthreads
331
+ _market_data_sample = self._gen_market_sample(
332
+ sample_size=sample_size, seed_data=seed_data, nthreads=nthreads
325
333
  )
326
334
 
327
335
  _invalid_array_names = (
@@ -330,42 +338,20 @@ class MarketSample:
330
338
  else ()
331
339
  )
332
340
 
333
- save_data_to_hdf5(
334
- _market_data_sample,
335
- saved_array_name_suffix=saved_array_name_suffix,
336
- excluded_attrs=_invalid_array_names,
337
- save_data_to_file=save_data_to_file,
338
- )
339
-
340
341
  _upp_test_arrays = compute_upp_test_counts(
341
342
  _market_data_sample, _upp_test_parms, _sim_test_regime
342
343
  )
343
344
 
344
- save_data_to_hdf5(
345
- _upp_test_arrays,
346
- saved_array_name_suffix=saved_array_name_suffix,
347
- save_data_to_file=save_data_to_file,
348
- )
349
-
350
345
  return _upp_test_arrays
351
346
 
352
347
  def __sim_enf_cnts_ll(
353
- self,
354
- _enf_parm_vec: gbl.HMGThresholds,
355
- _sim_test_regime: UPPTestRegime,
356
- /,
357
- *,
358
- sample_size: int = 10**6,
359
- seed_seq_list: Sequence[SeedSequence] | None = None,
360
- nthreads: int = 16,
361
- save_data_to_file: SaveData = False,
362
- saved_array_name_suffix: str = "",
348
+ self, _enf_parm_vec: gbl.HMGThresholds, _sim_test_regime: UPPTestRegime, /
363
349
  ) -> UPPTestsCounts:
364
350
  """A function to parallelize data-generation and testing
365
351
 
366
352
  The parameters `_sim_enf_cnts_kwargs` are passed unaltered to
367
353
  the parent function, `sim_enf_cnts()`, except that, if provided,
368
- `seed_seq_list` is used to spawn a seed sequence for each thread,
354
+ `seed_data` is used to spawn a seed sequence for each thread,
369
355
  to assure independent samples in each thread, and `nthreads` defines
370
356
  the number of parallel processes used. The number of draws in
371
357
  each thread may be tuned, by trial and error, to the amount of
@@ -380,33 +366,18 @@ class MarketSample:
380
366
  _sim_test_regime
381
367
  Configuration to use for testing
382
368
 
383
- sample_size
384
- Number of draws to simulate
385
-
386
- seed_seq_list
387
- List of seed sequences, to assure independent samples in each thread
388
-
389
- nthreads
390
- Number of parallel processes to use
391
-
392
- save_data_to_file
393
- Whether to save data to an HDF5 file, and where to save it
394
-
395
- saved_array_name_suffix
396
- Suffix to add to the array names in the HDF5 file
397
-
398
369
  Returns
399
370
  -------
400
371
  Arrays of enforcement counts or clearance counts by firm count,
401
372
  ΔHHI and concentration zone
402
373
 
403
374
  """
404
- _sample_sz = sample_size
375
+ _sample_sz = self.sample_size
405
376
  _subsample_sz = 10**6
406
377
  _iter_count = (
407
378
  int(_sample_sz / _subsample_sz) if _subsample_sz < _sample_sz else 1
408
379
  )
409
- _thread_count = cpu_count()
380
+ _thread_count = self.nthreads or cpu_count()
410
381
 
411
382
  if (
412
383
  self.share_spec.recapture_form != RECForm.OUTIN
@@ -420,27 +391,35 @@ class MarketSample:
420
391
  )
421
392
  )
422
393
 
423
- _rng_seed_seq_list = [None] * _iter_count
424
- if seed_seq_list:
425
- _rng_seed_seq_list = list(
426
- zip(*[g.spawn(_iter_count) for g in seed_seq_list], strict=True) # type: ignore
394
+ _rng_seed_data = [
395
+ SeedSequenceData(*_z)
396
+ for _z in (
397
+ zip(
398
+ *[
399
+ _s.spawn(_iter_count) if _s else [None] * _iter_count
400
+ for _s in (
401
+ getattr(self.seed_data, _a)
402
+ for _a in self.seed_data.__dataclass_fields__
403
+ )
404
+ ],
405
+ strict=True,
406
+ )
427
407
  )
408
+ ]
428
409
 
429
- _sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
410
+ __sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
430
411
  "sample_size": _subsample_sz,
431
- "save_data_to_file": save_data_to_file,
432
- "nthreads": nthreads,
412
+ "nthreads": self.nthreads,
433
413
  })
434
414
 
435
415
  _res_list = Parallel(n_jobs=_thread_count, prefer="threads")(
436
416
  delayed(self.__sim_enf_cnts)(
437
417
  _enf_parm_vec,
438
418
  _sim_test_regime,
439
- **_sim_enf_cnts_kwargs,
440
- saved_array_name_suffix=f"{saved_array_name_suffix}_{_iter_id:0{2 + int(np.ceil(np.log10(_iter_count)))}d}",
441
- seed_seq_list=_rng_seed_seq_list_ch,
419
+ **__sim_enf_cnts_kwargs,
420
+ seed_data=_rng_seed_data_ch,
442
421
  )
443
- for _iter_id, _rng_seed_seq_list_ch in enumerate(_rng_seed_seq_list)
422
+ for _iter_id, _rng_seed_data_ch in enumerate(_rng_seed_data)
444
423
  )
445
424
 
446
425
  _res_list_stacks = UPPTestsCounts(*[
@@ -450,10 +429,10 @@ class MarketSample:
450
429
  upp_test_results = UPPTestsCounts(*[
451
430
  np.column_stack((
452
431
  (_gv := getattr(_res_list_stacks, _g))[0, :, :_h],
453
- np.einsum("ijk->jk", np.int64(1) * _gv[:, :, _h:]),
432
+ np.einsum("ijk->jk", _gv[:, :, _h:], dtype=np.int64),
454
433
  ))
455
434
  for _g, _h in zip(
456
- _res_list_stacks.__dataclass_fields__.keys(), [1, 1, 3], strict=True
435
+ _res_list_stacks.__dataclass_fields__, [1, 1, 3], strict=True
457
436
  )
458
437
  ])
459
438
  del _res_list, _res_list_stacks
@@ -461,16 +440,7 @@ class MarketSample:
461
440
  return upp_test_results
462
441
 
463
442
  def estimate_enf_counts(
464
- self,
465
- _enf_parm_vec: HMGThresholds,
466
- _upp_test_regime: UPPTestRegime,
467
- /,
468
- *,
469
- sample_size: int = 10**6,
470
- seed_seq_list: Sequence[SeedSequence] | None = None,
471
- nthreads: int = 16,
472
- save_data_to_file: SaveData = False,
473
- saved_array_name_suffix: str = "",
443
+ self, _enf_parm_vec: HMGThresholds, _upp_test_regime: UPPTestRegime, /
474
444
  ) -> None:
475
445
  """Populate :attr:`enf_counts` with estimated UPP test counts.
476
446
 
@@ -486,21 +456,6 @@ class MarketSample:
486
456
  merging-firm GUPPI and maximum diversion ratio between the
487
457
  merging firms
488
458
 
489
- sample_size
490
- Number of draws to simulate
491
-
492
- seed_seq_list
493
- List of seed sequences, to assure independent samples in each thread
494
-
495
- nthreads
496
- Number of parallel processes to use
497
-
498
- save_data_to_file
499
- Whether to save data to an HDF5 file, and where to save it
500
-
501
- saved_array_name_suffix
502
- Suffix to add to the array names in the HDF5 file
503
-
504
459
  Returns
505
460
  -------
506
461
  None
@@ -508,22 +463,28 @@ class MarketSample:
508
463
  """
509
464
 
510
465
  if self.data is None:
511
- self.enf_counts = self.__sim_enf_cnts_ll(
512
- _enf_parm_vec,
513
- _upp_test_regime,
514
- sample_size=sample_size,
515
- seed_seq_list=seed_seq_list,
516
- nthreads=nthreads,
517
- save_data_to_file=save_data_to_file,
518
- saved_array_name_suffix=saved_array_name_suffix,
519
- )
466
+ self.enf_counts = self.__sim_enf_cnts_ll(_enf_parm_vec, _upp_test_regime)
520
467
  else:
521
468
  self.enf_counts = compute_upp_test_counts(
522
469
  self.data, _enf_parm_vec, _upp_test_regime
523
470
  )
524
- if save_data_to_file:
525
- save_data_to_hdf5(
526
- self.enf_counts,
527
- save_data_to_file=save_data_to_file,
528
- saved_array_name_suffix=saved_array_name_suffix,
529
- )
471
+
472
+ @classmethod
473
+ def to_yaml(
474
+ cls, _r: yaml.representer.SafeRepresenter, _d: MarketSample
475
+ ) -> yaml.MappingNode:
476
+ _ret: yaml.MappingNode = _r.represent_mapping(
477
+ f"!{cls.__name__}",
478
+ {
479
+ _a.name: getattr(_d, _a.name)
480
+ for _a in _d.__attrs_attrs__
481
+ if _a.name not in ("data", "enf_counts")
482
+ },
483
+ )
484
+ return _ret
485
+
486
+ @classmethod
487
+ def from_yaml(
488
+ cls, _c: yaml.constructor.SafeConstructor, _n: yaml.MappingNode
489
+ ) -> MarketSample:
490
+ return cls(**_c.construct_mapping(_n))