mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +74 -48
- mergeron/core/__init__.py +105 -4
- mergeron/core/empirical_margin_distribution.py +100 -78
- mergeron/core/ftc_merger_investigations_data.py +309 -316
- mergeron/core/guidelines_boundaries.py +62 -121
- mergeron/core/guidelines_boundary_functions.py +207 -384
- mergeron/core/guidelines_boundary_functions_extra.py +264 -104
- mergeron/core/pseudorandom_numbers.py +76 -67
- mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- mergeron/data/ftc_invdata.zip +0 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
- mergeron/gen/__init__.py +123 -161
- mergeron/gen/data_generation.py +183 -149
- mergeron/gen/data_generation_functions.py +220 -237
- mergeron/gen/enforcement_stats.py +83 -115
- mergeron/gen/upp_tests.py +118 -193
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.4.dist-info}/METADATA +2 -3
- mergeron-2025.739290.4.dist-info/RECORD +24 -0
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.4.dist-info}/WHEEL +1 -1
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron-2025.739290.3.dist-info/RECORD +0 -23
mergeron/gen/data_generation.py
CHANGED
|
@@ -5,15 +5,26 @@ Methods to generate data for analyzing merger enforcement policy.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
+
import io
|
|
9
|
+
import zipfile
|
|
10
|
+
from itertools import starmap
|
|
8
11
|
from typing import TypedDict
|
|
9
12
|
|
|
13
|
+
import h5py # type: ignore
|
|
10
14
|
import numpy as np
|
|
11
15
|
from attrs import Attribute, Converter, define, field, validators
|
|
12
16
|
from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
13
17
|
from numpy.random import SeedSequence
|
|
14
18
|
from ruamel import yaml
|
|
15
19
|
|
|
16
|
-
from .. import
|
|
20
|
+
from .. import ( # noqa: TID252 # noqa
|
|
21
|
+
_PKG_NAME,
|
|
22
|
+
NTHREADS,
|
|
23
|
+
VERSION,
|
|
24
|
+
RECForm,
|
|
25
|
+
this_yaml,
|
|
26
|
+
yaml_rt_mapper,
|
|
27
|
+
)
|
|
17
28
|
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
29
|
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
19
30
|
from . import (
|
|
@@ -34,10 +45,12 @@ from .data_generation_functions import (
|
|
|
34
45
|
gen_margin_price_data,
|
|
35
46
|
gen_share_data,
|
|
36
47
|
)
|
|
37
|
-
from .upp_tests import compute_upp_test_counts
|
|
48
|
+
from .upp_tests import compute_upp_test_counts # type: ignore # has pytypes marker ...
|
|
38
49
|
|
|
39
50
|
__version__ = VERSION
|
|
40
51
|
|
|
52
|
+
H5_CHUNK_SIZE = 10**6
|
|
53
|
+
|
|
41
54
|
|
|
42
55
|
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
56
|
"Keyword arguments of sampling methods defined below"
|
|
@@ -56,22 +69,15 @@ def _seed_data_conv(_v: SeedSequenceData | None, _i: MarketSample) -> SeedSequen
|
|
|
56
69
|
if isinstance(_v, SeedSequenceData):
|
|
57
70
|
return _v
|
|
58
71
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
_seed_count = 2 if _mktshr_dist_type == SHRDistribution.UNI else 3
|
|
63
|
-
_seed_count += 1 if _price_spec == PriceSpec.ZERO else 0
|
|
64
|
-
|
|
65
|
-
_sseq_list = tuple(SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
66
|
-
|
|
67
|
-
_mktshr_rng_seed_seq, _pcm_rng_seed_seq = _sseq_list[:2]
|
|
68
|
-
_fcount_rng_seed_seq = (
|
|
69
|
-
None if _mktshr_dist_type == SHRDistribution.UNI else _sseq_list[2]
|
|
70
|
-
)
|
|
71
|
-
_pr_rng_seed_seq = _sseq_list[-1] if _price_spec == PriceSpec.ZERO else None
|
|
72
|
+
_sseq = tuple(SeedSequence(pool_size=8) for _ in range(4))
|
|
73
|
+
_sdtt = _i.share_spec.dist_type == SHRDistribution.UNI
|
|
74
|
+
_pst = _i.price_spec == PriceSpec.RNG
|
|
72
75
|
|
|
73
76
|
return SeedSequenceData(
|
|
74
|
-
|
|
77
|
+
share=_sseq[0],
|
|
78
|
+
pcm=_sseq[1],
|
|
79
|
+
fcounts=(None if _sdtt else _sseq[2]),
|
|
80
|
+
price=(None if not _pst else (_sseq[2] if _sdtt else _sseq[3])),
|
|
75
81
|
)
|
|
76
82
|
|
|
77
83
|
|
|
@@ -141,7 +147,7 @@ class MarketSample:
|
|
|
141
147
|
"Set seed_data.fcounts to None and retry."
|
|
142
148
|
)
|
|
143
149
|
|
|
144
|
-
if _i.price_spec != PriceSpec.
|
|
150
|
+
if _i.price_spec != PriceSpec.RNG and _v.price is not None:
|
|
145
151
|
raise ValueError(
|
|
146
152
|
"Attribute, seed_data.price is ignored as irrelevant unless "
|
|
147
153
|
"prices are asymmetric and uncorrelated and price-cost margins "
|
|
@@ -151,12 +157,12 @@ class MarketSample:
|
|
|
151
157
|
nthreads: int = field(default=NTHREADS, validator=validators.instance_of(int))
|
|
152
158
|
"""number of parallel threads to use"""
|
|
153
159
|
|
|
154
|
-
|
|
160
|
+
dataset: MarketSampleData | None = field(default=None, init=False)
|
|
155
161
|
|
|
156
|
-
enf_counts: UPPTestsCounts | None = field(default=None)
|
|
162
|
+
enf_counts: UPPTestsCounts | None = field(default=None, init=False)
|
|
157
163
|
|
|
158
164
|
def _gen_market_sample(
|
|
159
|
-
self, /, *, sample_size: int, seed_data: SeedSequenceData
|
|
165
|
+
self, /, *, sample_size: int, seed_data: SeedSequenceData, nthreads: int
|
|
160
166
|
) -> MarketSampleData:
|
|
161
167
|
"""
|
|
162
168
|
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
@@ -170,108 +176,81 @@ class MarketSample:
|
|
|
170
176
|
|
|
171
177
|
"""
|
|
172
178
|
|
|
173
|
-
_recapture_form = self.share_spec.recapture_form
|
|
174
|
-
_recapture_ratio = self.share_spec.recapture_ratio
|
|
175
|
-
_dist_type_mktshr = self.share_spec.dist_type
|
|
176
|
-
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
177
|
-
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
178
|
-
|
|
179
|
-
_seed_data = seed_data or self.seed_data
|
|
180
|
-
(
|
|
181
|
-
_mktshr_rng_seed_seq,
|
|
182
|
-
_pcm_rng_seed_seq,
|
|
183
|
-
_fcount_rng_seed_seq,
|
|
184
|
-
_pr_rng_seed_seq,
|
|
185
|
-
) = (getattr(_seed_data, _a) for _a in _seed_data.__dataclass_fields__)
|
|
186
|
-
_shr_sample_size = 1.0 * (sample_size or self.sample_size)
|
|
187
|
-
|
|
188
179
|
# Scale up sample size to offset discards based on specified criteria
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
180
|
+
shr_sample_size = sample_size * self.hsr_filing_test_type
|
|
181
|
+
shr_sample_size *= (
|
|
182
|
+
SSZConstant.MNL_DEP
|
|
183
|
+
if self.pcm_spec.firm2_pcm_constraint == FM2Constraint.MNL
|
|
184
|
+
else 1
|
|
185
|
+
)
|
|
186
|
+
shr_sample_size = int(shr_sample_size)
|
|
193
187
|
|
|
194
188
|
# Generate share data
|
|
195
|
-
|
|
196
|
-
|
|
189
|
+
mktshr_data = gen_share_data(
|
|
190
|
+
shr_sample_size,
|
|
197
191
|
self.share_spec,
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
nthreads
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
204
|
-
getattr(_mktshr_data, _f)
|
|
205
|
-
for _f in (
|
|
206
|
-
"mktshr_array",
|
|
207
|
-
"fcounts",
|
|
208
|
-
"aggregate_purchase_prob",
|
|
209
|
-
"nth_firm_share",
|
|
210
|
-
)
|
|
192
|
+
seed_data.fcounts,
|
|
193
|
+
seed_data.share,
|
|
194
|
+
nthreads,
|
|
211
195
|
)
|
|
196
|
+
mktshr_array_ = mktshr_data.mktshr_array
|
|
197
|
+
fcounts_ = mktshr_data.fcounts
|
|
198
|
+
aggregate_purchase_prob_ = mktshr_data.aggregate_purchase_prob
|
|
199
|
+
nth_firm_share_ = mktshr_data.nth_firm_share
|
|
200
|
+
del mktshr_data
|
|
212
201
|
|
|
213
202
|
# Generate merging-firm price and PCM data
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
203
|
+
margin_data, price_data = gen_margin_price_data(
|
|
204
|
+
mktshr_array_[:, :2],
|
|
205
|
+
nth_firm_share_,
|
|
206
|
+
aggregate_purchase_prob_,
|
|
218
207
|
self.pcm_spec,
|
|
219
208
|
self.price_spec,
|
|
220
209
|
self.hsr_filing_test_type,
|
|
221
|
-
|
|
222
|
-
|
|
210
|
+
seed_data.pcm,
|
|
211
|
+
seed_data.price,
|
|
223
212
|
nthreads,
|
|
224
213
|
)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
238
|
-
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
239
|
-
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
240
|
-
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
241
|
-
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
242
|
-
:_s_size
|
|
214
|
+
pcm_array_ = margin_data.pcm_array
|
|
215
|
+
price_array_ = price_data.price_array
|
|
216
|
+
|
|
217
|
+
if shr_sample_size > sample_size:
|
|
218
|
+
mnl_test_rows = margin_data.mnl_test_array * price_data.hsr_filing_test
|
|
219
|
+
|
|
220
|
+
mktshr_array_ = mktshr_array_[mnl_test_rows][:sample_size]
|
|
221
|
+
pcm_array_ = margin_data.pcm_array[mnl_test_rows][:sample_size]
|
|
222
|
+
price_array_ = price_data.price_array[mnl_test_rows][:sample_size]
|
|
223
|
+
fcounts_ = fcounts_[mnl_test_rows][:sample_size]
|
|
224
|
+
aggregate_purchase_prob_ = aggregate_purchase_prob_[mnl_test_rows][
|
|
225
|
+
:sample_size
|
|
243
226
|
]
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
# Calculate diversion ratios
|
|
247
|
-
_divr_array = gen_divr_array(
|
|
248
|
-
_recapture_form,
|
|
249
|
-
_recapture_ratio,
|
|
250
|
-
_mktshr_array[:, :2],
|
|
251
|
-
_aggregate_purchase_prob,
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
del _mnl_test_rows, _s_size
|
|
227
|
+
nth_firm_share_ = nth_firm_share_[mnl_test_rows][:sample_size]
|
|
255
228
|
|
|
256
|
-
|
|
257
|
-
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
258
|
-
:, None
|
|
259
|
-
]
|
|
229
|
+
del mnl_test_rows
|
|
260
230
|
|
|
261
|
-
|
|
262
|
-
|
|
231
|
+
# Calculate diversion ratios
|
|
232
|
+
divr_array = gen_divr_array(
|
|
233
|
+
self.share_spec.recapture_form,
|
|
234
|
+
self.share_spec.recapture_ratio,
|
|
235
|
+
mktshr_array_[:, :2],
|
|
236
|
+
aggregate_purchase_prob_,
|
|
263
237
|
)
|
|
264
238
|
|
|
265
239
|
return MarketSampleData(
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
240
|
+
mktshr_array_[:, :2],
|
|
241
|
+
pcm_array_,
|
|
242
|
+
price_array_,
|
|
243
|
+
divr_array,
|
|
244
|
+
np.einsum("ij,ij->i", mktshr_array_[:, :2], mktshr_array_[:, [1, 0]])[
|
|
245
|
+
:, None
|
|
246
|
+
],
|
|
247
|
+
aggregate_purchase_prob_,
|
|
248
|
+
fcounts_,
|
|
249
|
+
nth_firm_share_,
|
|
250
|
+
(
|
|
251
|
+
np.einsum("ij,ij->i", mktshr_array_[:, :2], mktshr_array_[:, [1, 0]])
|
|
252
|
+
+ np.einsum("ij,ij->i", mktshr_array_, mktshr_array_)
|
|
253
|
+
)[:, None],
|
|
275
254
|
)
|
|
276
255
|
|
|
277
256
|
def generate_sample(self, /) -> None:
|
|
@@ -283,7 +262,7 @@ class MarketSample:
|
|
|
283
262
|
|
|
284
263
|
"""
|
|
285
264
|
|
|
286
|
-
self.
|
|
265
|
+
self.dataset = self._gen_market_sample(
|
|
287
266
|
seed_data=self.seed_data,
|
|
288
267
|
sample_size=self.sample_size,
|
|
289
268
|
nthreads=self.nthreads,
|
|
@@ -328,21 +307,15 @@ class MarketSample:
|
|
|
328
307
|
|
|
329
308
|
"""
|
|
330
309
|
|
|
331
|
-
|
|
310
|
+
market_data_sample = self._gen_market_sample(
|
|
332
311
|
sample_size=sample_size, seed_data=seed_data, nthreads=nthreads
|
|
333
312
|
)
|
|
334
313
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if self.share_spec.dist_type == "Uniform"
|
|
338
|
-
else ()
|
|
339
|
-
)
|
|
340
|
-
|
|
341
|
-
_upp_test_arrays = compute_upp_test_counts(
|
|
342
|
-
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
314
|
+
upp_test_arrays: UPPTestsCounts = compute_upp_test_counts(
|
|
315
|
+
market_data_sample, _upp_test_parms, _sim_test_regime
|
|
343
316
|
)
|
|
344
317
|
|
|
345
|
-
return
|
|
318
|
+
return upp_test_arrays
|
|
346
319
|
|
|
347
320
|
def __sim_enf_cnts_ll(
|
|
348
321
|
self, _enf_parm_vec: gbl.HMGThresholds, _sim_test_regime: UPPTestRegime, /
|
|
@@ -372,12 +345,10 @@ class MarketSample:
|
|
|
372
345
|
ΔHHI and concentration zone
|
|
373
346
|
|
|
374
347
|
"""
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
)
|
|
380
|
-
_thread_count = self.nthreads or cpu_count()
|
|
348
|
+
sample_sz = self.sample_size
|
|
349
|
+
subsample_sz = H5_CHUNK_SIZE
|
|
350
|
+
iter_count = (sample_sz / subsample_sz).__ceil__() # noqa: PLC2801
|
|
351
|
+
thread_count = self.nthreads or cpu_count()
|
|
381
352
|
|
|
382
353
|
if (
|
|
383
354
|
self.share_spec.recapture_form != RECForm.OUTIN
|
|
@@ -391,51 +362,49 @@ class MarketSample:
|
|
|
391
362
|
)
|
|
392
363
|
)
|
|
393
364
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
365
|
+
rng_seed_data = list(
|
|
366
|
+
starmap(
|
|
367
|
+
SeedSequenceData,
|
|
397
368
|
zip(
|
|
398
369
|
*[
|
|
399
|
-
_s.spawn(
|
|
370
|
+
_s.spawn(iter_count) if _s else [None] * iter_count
|
|
400
371
|
for _s in (
|
|
401
|
-
getattr(self.seed_data, _a)
|
|
402
|
-
for _a in self.seed_data.
|
|
372
|
+
getattr(self.seed_data, _a.name)
|
|
373
|
+
for _a in self.seed_data.__attrs_attrs__
|
|
403
374
|
)
|
|
404
375
|
],
|
|
405
376
|
strict=True,
|
|
406
|
-
)
|
|
377
|
+
),
|
|
407
378
|
)
|
|
408
|
-
|
|
379
|
+
)
|
|
409
380
|
|
|
410
|
-
|
|
411
|
-
"sample_size":
|
|
381
|
+
sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
382
|
+
"sample_size": subsample_sz,
|
|
412
383
|
"nthreads": self.nthreads,
|
|
413
384
|
})
|
|
414
385
|
|
|
415
|
-
|
|
386
|
+
res_list = Parallel(n_jobs=thread_count, prefer="threads")(
|
|
416
387
|
delayed(self.__sim_enf_cnts)(
|
|
417
388
|
_enf_parm_vec,
|
|
418
389
|
_sim_test_regime,
|
|
419
|
-
**
|
|
390
|
+
**sim_enf_cnts_kwargs,
|
|
420
391
|
seed_data=_rng_seed_data_ch,
|
|
421
392
|
)
|
|
422
|
-
for _iter_id, _rng_seed_data_ch in enumerate(
|
|
393
|
+
for _iter_id, _rng_seed_data_ch in enumerate(rng_seed_data)
|
|
423
394
|
)
|
|
424
395
|
|
|
425
|
-
|
|
426
|
-
np.stack([getattr(_j, _k) for _j in
|
|
396
|
+
res_list_stacks = UPPTestsCounts(*[
|
|
397
|
+
np.stack([getattr(_j, _k) for _j in res_list])
|
|
427
398
|
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
428
399
|
])
|
|
429
400
|
upp_test_results = UPPTestsCounts(*[
|
|
430
401
|
np.column_stack((
|
|
431
|
-
(_gv := getattr(
|
|
402
|
+
(_gv := getattr(res_list_stacks, _g.name))[0, :, :_h],
|
|
432
403
|
np.einsum("ijk->jk", _gv[:, :, _h:], dtype=np.int64),
|
|
433
404
|
))
|
|
434
|
-
for _g, _h in zip(
|
|
435
|
-
_res_list_stacks.__dataclass_fields__, [1, 1, 3], strict=True
|
|
436
|
-
)
|
|
405
|
+
for _g, _h in zip(res_list_stacks.__attrs_attrs__, [1, 1, 3], strict=True)
|
|
437
406
|
])
|
|
438
|
-
del
|
|
407
|
+
del res_list, res_list_stacks
|
|
439
408
|
|
|
440
409
|
return upp_test_results
|
|
441
410
|
|
|
@@ -462,29 +431,94 @@ class MarketSample:
|
|
|
462
431
|
|
|
463
432
|
"""
|
|
464
433
|
|
|
465
|
-
if self.
|
|
434
|
+
if self.dataset is None:
|
|
466
435
|
self.enf_counts = self.__sim_enf_cnts_ll(_enf_parm_vec, _upp_test_regime)
|
|
467
436
|
else:
|
|
468
437
|
self.enf_counts = compute_upp_test_counts(
|
|
469
|
-
self.
|
|
438
|
+
self.dataset, _enf_parm_vec, _upp_test_regime
|
|
470
439
|
)
|
|
471
440
|
|
|
441
|
+
def to_archive(
|
|
442
|
+
self, zip_: zipfile.ZipFile, _subdir: str = "", /, *, save_dataset: bool = False
|
|
443
|
+
) -> None:
|
|
444
|
+
zpath = zipfile.Path(zip_, at=_subdir)
|
|
445
|
+
name_root = f"{_PKG_NAME}_market_sample"
|
|
446
|
+
|
|
447
|
+
with (zpath / f"{name_root}.yaml").open("w") as _yfh:
|
|
448
|
+
this_yaml.dump(self, _yfh)
|
|
449
|
+
|
|
450
|
+
if save_dataset:
|
|
451
|
+
if all((_dt := self.dataset is None, _et := self.enf_counts is None)):
|
|
452
|
+
raise ValueError(
|
|
453
|
+
"No dataset and/or enforcement counts available for saving. "
|
|
454
|
+
"Generate some data or set save_dataset to False to poceed."
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
if not _dt:
|
|
458
|
+
byte_stream = io.BytesIO()
|
|
459
|
+
with h5py.File(byte_stream, "w") as h5f:
|
|
460
|
+
for _a in self.dataset.__attrs_attrs__:
|
|
461
|
+
if all((
|
|
462
|
+
(_arr := getattr(self.dataset, _a.name)).any(),
|
|
463
|
+
not np.isnan(_arr).all(),
|
|
464
|
+
)):
|
|
465
|
+
h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
|
|
466
|
+
|
|
467
|
+
with (zpath / f"{name_root}_dataset.h5").open("wb") as _hfh:
|
|
468
|
+
_hfh.write(byte_stream.getvalue())
|
|
469
|
+
|
|
470
|
+
if not _et:
|
|
471
|
+
with (zpath / f"{name_root}_enf_counts.yaml").open("w") as _yfh:
|
|
472
|
+
this_yaml.dump(self.enf_counts, _yfh)
|
|
473
|
+
|
|
474
|
+
def from_archive(
|
|
475
|
+
zip_: zipfile.ZipFile, _subdir: str = "", /, *, restore_dataset: bool = False
|
|
476
|
+
) -> MarketSample:
|
|
477
|
+
zpath = zipfile.Path(zip_, at=_subdir)
|
|
478
|
+
name_root = f"{_PKG_NAME}_market_sample"
|
|
479
|
+
|
|
480
|
+
market_sample_ = this_yaml.load((zpath / f"{name_root}.yaml").read_text())
|
|
481
|
+
|
|
482
|
+
if restore_dataset:
|
|
483
|
+
if not any((
|
|
484
|
+
(_dt := (_dp := zpath / f"{name_root}_dataset.h5").is_file()),
|
|
485
|
+
(_et := (_ep := zpath / f"{name_root}_enf_counts.yaml").is_file()),
|
|
486
|
+
)):
|
|
487
|
+
raise ValueError(
|
|
488
|
+
"Archive has no sample data to restore. "
|
|
489
|
+
"Delete second argument, or set it False, and rerun."
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
if _dt:
|
|
493
|
+
with _dp.open("rb") as _hfh:
|
|
494
|
+
h5f = h5py.File(_hfh)
|
|
495
|
+
object.__setattr__( # noqa: PLC2801
|
|
496
|
+
market_sample_,
|
|
497
|
+
"dataset",
|
|
498
|
+
MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
|
|
499
|
+
)
|
|
500
|
+
if _et:
|
|
501
|
+
object.__setattr__( # noqa: PLC2801
|
|
502
|
+
market_sample_, "enf_counts", this_yaml.load(_ep.read_text())
|
|
503
|
+
)
|
|
504
|
+
return market_sample_
|
|
505
|
+
|
|
472
506
|
@classmethod
|
|
473
507
|
def to_yaml(
|
|
474
|
-
cls, _r: yaml.representer.
|
|
508
|
+
cls, _r: yaml.representer.RoundTripRepresenter, _d: MarketSample
|
|
475
509
|
) -> yaml.MappingNode:
|
|
476
|
-
|
|
510
|
+
retval: yaml.MappingNode = _r.represent_mapping(
|
|
477
511
|
f"!{cls.__name__}",
|
|
478
512
|
{
|
|
479
513
|
_a.name: getattr(_d, _a.name)
|
|
480
514
|
for _a in _d.__attrs_attrs__
|
|
481
|
-
if _a.name not in
|
|
515
|
+
if _a.name not in {"dataset", "enf_counts"}
|
|
482
516
|
},
|
|
483
517
|
)
|
|
484
|
-
return
|
|
518
|
+
return retval
|
|
485
519
|
|
|
486
520
|
@classmethod
|
|
487
521
|
def from_yaml(
|
|
488
|
-
cls, _c: yaml.constructor.
|
|
522
|
+
cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
|
|
489
523
|
) -> MarketSample:
|
|
490
|
-
return cls(**_c
|
|
524
|
+
return cls(**yaml_rt_mapper(_c, _n))
|