mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +103 -48
- mergeron/core/__init__.py +105 -4
- mergeron/core/empirical_margin_distribution.py +100 -78
- mergeron/core/ftc_merger_investigations_data.py +309 -316
- mergeron/core/guidelines_boundaries.py +67 -138
- mergeron/core/guidelines_boundary_functions.py +202 -379
- mergeron/core/guidelines_boundary_functions_extra.py +264 -106
- mergeron/core/pseudorandom_numbers.py +73 -64
- mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- mergeron/data/ftc_invdata.zip +0 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
- mergeron/gen/__init__.py +138 -161
- mergeron/gen/data_generation.py +181 -149
- mergeron/gen/data_generation_functions.py +220 -237
- mergeron/gen/enforcement_stats.py +78 -109
- mergeron/gen/upp_tests.py +119 -194
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/METADATA +2 -3
- mergeron-2025.739290.5.dist-info/RECORD +24 -0
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/WHEEL +1 -1
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron-2025.739290.3.dist-info/RECORD +0 -23
mergeron/gen/data_generation.py
CHANGED
|
@@ -5,6 +5,8 @@ Methods to generate data for analyzing merger enforcement policy.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
+
import zipfile
|
|
9
|
+
from itertools import starmap
|
|
8
10
|
from typing import TypedDict
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
@@ -13,7 +15,14 @@ from joblib import Parallel, cpu_count, delayed # type: ignore
|
|
|
13
15
|
from numpy.random import SeedSequence
|
|
14
16
|
from ruamel import yaml
|
|
15
17
|
|
|
16
|
-
from .. import
|
|
18
|
+
from .. import ( # noqa: TID252 # noqa
|
|
19
|
+
_PKG_NAME,
|
|
20
|
+
NTHREADS,
|
|
21
|
+
VERSION,
|
|
22
|
+
RECForm,
|
|
23
|
+
this_yaml,
|
|
24
|
+
yaml_rt_mapper,
|
|
25
|
+
)
|
|
17
26
|
from ..core import guidelines_boundaries as gbl # noqa: TID252
|
|
18
27
|
from ..core.guidelines_boundaries import HMGThresholds # noqa: TID252
|
|
19
28
|
from . import (
|
|
@@ -34,10 +43,12 @@ from .data_generation_functions import (
|
|
|
34
43
|
gen_margin_price_data,
|
|
35
44
|
gen_share_data,
|
|
36
45
|
)
|
|
37
|
-
from .upp_tests import compute_upp_test_counts
|
|
46
|
+
from .upp_tests import compute_upp_test_counts # type: ignore # has pytypes marker ...
|
|
38
47
|
|
|
39
48
|
__version__ = VERSION
|
|
40
49
|
|
|
50
|
+
H5_CHUNK_SIZE = 10**6
|
|
51
|
+
|
|
41
52
|
|
|
42
53
|
class SamplingFunctionKWArgs(TypedDict, total=False):
|
|
43
54
|
"Keyword arguments of sampling methods defined below"
|
|
@@ -56,22 +67,15 @@ def _seed_data_conv(_v: SeedSequenceData | None, _i: MarketSample) -> SeedSequen
|
|
|
56
67
|
if isinstance(_v, SeedSequenceData):
|
|
57
68
|
return _v
|
|
58
69
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
_seed_count = 2 if _mktshr_dist_type == SHRDistribution.UNI else 3
|
|
63
|
-
_seed_count += 1 if _price_spec == PriceSpec.ZERO else 0
|
|
64
|
-
|
|
65
|
-
_sseq_list = tuple(SeedSequence(pool_size=8) for _ in range(_seed_count))
|
|
66
|
-
|
|
67
|
-
_mktshr_rng_seed_seq, _pcm_rng_seed_seq = _sseq_list[:2]
|
|
68
|
-
_fcount_rng_seed_seq = (
|
|
69
|
-
None if _mktshr_dist_type == SHRDistribution.UNI else _sseq_list[2]
|
|
70
|
-
)
|
|
71
|
-
_pr_rng_seed_seq = _sseq_list[-1] if _price_spec == PriceSpec.ZERO else None
|
|
70
|
+
_sseq = tuple(SeedSequence(pool_size=8) for _ in range(4))
|
|
71
|
+
_sdtt = _i.share_spec.dist_type == SHRDistribution.UNI
|
|
72
|
+
_pst = _i.price_spec == PriceSpec.RNG
|
|
72
73
|
|
|
73
74
|
return SeedSequenceData(
|
|
74
|
-
|
|
75
|
+
share=_sseq[0],
|
|
76
|
+
pcm=_sseq[1],
|
|
77
|
+
fcounts=(None if _sdtt else _sseq[2]),
|
|
78
|
+
price=(None if not _pst else (_sseq[2] if _sdtt else _sseq[3])),
|
|
75
79
|
)
|
|
76
80
|
|
|
77
81
|
|
|
@@ -141,7 +145,7 @@ class MarketSample:
|
|
|
141
145
|
"Set seed_data.fcounts to None and retry."
|
|
142
146
|
)
|
|
143
147
|
|
|
144
|
-
if _i.price_spec != PriceSpec.
|
|
148
|
+
if _i.price_spec != PriceSpec.RNG and _v.price is not None:
|
|
145
149
|
raise ValueError(
|
|
146
150
|
"Attribute, seed_data.price is ignored as irrelevant unless "
|
|
147
151
|
"prices are asymmetric and uncorrelated and price-cost margins "
|
|
@@ -151,12 +155,12 @@ class MarketSample:
|
|
|
151
155
|
nthreads: int = field(default=NTHREADS, validator=validators.instance_of(int))
|
|
152
156
|
"""number of parallel threads to use"""
|
|
153
157
|
|
|
154
|
-
|
|
158
|
+
dataset: MarketSampleData | None = field(default=None, init=False)
|
|
155
159
|
|
|
156
|
-
enf_counts: UPPTestsCounts | None = field(default=None)
|
|
160
|
+
enf_counts: UPPTestsCounts | None = field(default=None, init=False)
|
|
157
161
|
|
|
158
162
|
def _gen_market_sample(
|
|
159
|
-
self, /, *, sample_size: int, seed_data: SeedSequenceData
|
|
163
|
+
self, /, *, sample_size: int, seed_data: SeedSequenceData, nthreads: int
|
|
160
164
|
) -> MarketSampleData:
|
|
161
165
|
"""
|
|
162
166
|
Generate share, diversion ratio, price, and margin data for MarketSpec.
|
|
@@ -170,108 +174,81 @@ class MarketSample:
|
|
|
170
174
|
|
|
171
175
|
"""
|
|
172
176
|
|
|
173
|
-
_recapture_form = self.share_spec.recapture_form
|
|
174
|
-
_recapture_ratio = self.share_spec.recapture_ratio
|
|
175
|
-
_dist_type_mktshr = self.share_spec.dist_type
|
|
176
|
-
_dist_firm2_pcm = self.pcm_spec.firm2_pcm_constraint
|
|
177
|
-
_hsr_filing_test_type = self.hsr_filing_test_type
|
|
178
|
-
|
|
179
|
-
_seed_data = seed_data or self.seed_data
|
|
180
|
-
(
|
|
181
|
-
_mktshr_rng_seed_seq,
|
|
182
|
-
_pcm_rng_seed_seq,
|
|
183
|
-
_fcount_rng_seed_seq,
|
|
184
|
-
_pr_rng_seed_seq,
|
|
185
|
-
) = (getattr(_seed_data, _a) for _a in _seed_data.__dataclass_fields__)
|
|
186
|
-
_shr_sample_size = 1.0 * (sample_size or self.sample_size)
|
|
187
|
-
|
|
188
177
|
# Scale up sample size to offset discards based on specified criteria
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
178
|
+
shr_sample_size = sample_size * self.hsr_filing_test_type
|
|
179
|
+
shr_sample_size *= (
|
|
180
|
+
SSZConstant.MNL_DEP
|
|
181
|
+
if self.pcm_spec.firm2_pcm_constraint == FM2Constraint.MNL
|
|
182
|
+
else 1
|
|
183
|
+
)
|
|
184
|
+
shr_sample_size = int(shr_sample_size)
|
|
193
185
|
|
|
194
186
|
# Generate share data
|
|
195
|
-
|
|
196
|
-
|
|
187
|
+
mktshr_data = gen_share_data(
|
|
188
|
+
shr_sample_size,
|
|
197
189
|
self.share_spec,
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
nthreads
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
_mktshr_array, _fcounts, _aggregate_purchase_prob, _nth_firm_share = (
|
|
204
|
-
getattr(_mktshr_data, _f)
|
|
205
|
-
for _f in (
|
|
206
|
-
"mktshr_array",
|
|
207
|
-
"fcounts",
|
|
208
|
-
"aggregate_purchase_prob",
|
|
209
|
-
"nth_firm_share",
|
|
210
|
-
)
|
|
190
|
+
seed_data.fcounts,
|
|
191
|
+
seed_data.share,
|
|
192
|
+
nthreads,
|
|
211
193
|
)
|
|
194
|
+
mktshr_array_ = mktshr_data.mktshr_array
|
|
195
|
+
fcounts_ = mktshr_data.fcounts
|
|
196
|
+
aggregate_purchase_prob_ = mktshr_data.aggregate_purchase_prob
|
|
197
|
+
nth_firm_share_ = mktshr_data.nth_firm_share
|
|
198
|
+
del mktshr_data
|
|
212
199
|
|
|
213
200
|
# Generate merging-firm price and PCM data
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
201
|
+
margin_data, price_data = gen_margin_price_data(
|
|
202
|
+
mktshr_array_[:, :2],
|
|
203
|
+
nth_firm_share_,
|
|
204
|
+
aggregate_purchase_prob_,
|
|
218
205
|
self.pcm_spec,
|
|
219
206
|
self.price_spec,
|
|
220
207
|
self.hsr_filing_test_type,
|
|
221
|
-
|
|
222
|
-
|
|
208
|
+
seed_data.pcm,
|
|
209
|
+
seed_data.price,
|
|
223
210
|
nthreads,
|
|
224
211
|
)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
_mktshr_array = _mktshr_array[_mnl_test_rows][:_s_size]
|
|
238
|
-
_pcm_array = _pcm_array[_mnl_test_rows][:_s_size]
|
|
239
|
-
_price_array = _price_array[_mnl_test_rows][:_s_size]
|
|
240
|
-
_fcounts = _fcounts[_mnl_test_rows][:_s_size]
|
|
241
|
-
_aggregate_purchase_prob = _aggregate_purchase_prob[_mnl_test_rows][
|
|
242
|
-
:_s_size
|
|
212
|
+
pcm_array_ = margin_data.pcm_array
|
|
213
|
+
price_array_ = price_data.price_array
|
|
214
|
+
|
|
215
|
+
if shr_sample_size > sample_size:
|
|
216
|
+
mnl_test_rows = margin_data.mnl_test_array * price_data.hsr_filing_test
|
|
217
|
+
|
|
218
|
+
mktshr_array_ = mktshr_array_[mnl_test_rows][:sample_size]
|
|
219
|
+
pcm_array_ = margin_data.pcm_array[mnl_test_rows][:sample_size]
|
|
220
|
+
price_array_ = price_data.price_array[mnl_test_rows][:sample_size]
|
|
221
|
+
fcounts_ = fcounts_[mnl_test_rows][:sample_size]
|
|
222
|
+
aggregate_purchase_prob_ = aggregate_purchase_prob_[mnl_test_rows][
|
|
223
|
+
:sample_size
|
|
243
224
|
]
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
# Calculate diversion ratios
|
|
247
|
-
_divr_array = gen_divr_array(
|
|
248
|
-
_recapture_form,
|
|
249
|
-
_recapture_ratio,
|
|
250
|
-
_mktshr_array[:, :2],
|
|
251
|
-
_aggregate_purchase_prob,
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
del _mnl_test_rows, _s_size
|
|
225
|
+
nth_firm_share_ = nth_firm_share_[mnl_test_rows][:sample_size]
|
|
255
226
|
|
|
256
|
-
|
|
257
|
-
_hhi_delta = np.einsum("ij,ij->i", _frmshr_array, _frmshr_array[:, ::-1])[
|
|
258
|
-
:, None
|
|
259
|
-
]
|
|
227
|
+
del mnl_test_rows
|
|
260
228
|
|
|
261
|
-
|
|
262
|
-
|
|
229
|
+
# Calculate diversion ratios
|
|
230
|
+
divr_array = gen_divr_array(
|
|
231
|
+
self.share_spec.recapture_form,
|
|
232
|
+
self.share_spec.recapture_ratio,
|
|
233
|
+
mktshr_array_[:, :2],
|
|
234
|
+
aggregate_purchase_prob_,
|
|
263
235
|
)
|
|
264
236
|
|
|
265
237
|
return MarketSampleData(
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
238
|
+
mktshr_array_[:, :2],
|
|
239
|
+
pcm_array_,
|
|
240
|
+
price_array_,
|
|
241
|
+
divr_array,
|
|
242
|
+
np.einsum("ij,ij->i", mktshr_array_[:, :2], mktshr_array_[:, [1, 0]])[
|
|
243
|
+
:, None
|
|
244
|
+
],
|
|
245
|
+
aggregate_purchase_prob_,
|
|
246
|
+
fcounts_,
|
|
247
|
+
nth_firm_share_,
|
|
248
|
+
(
|
|
249
|
+
np.einsum("ij,ij->i", mktshr_array_[:, :2], mktshr_array_[:, [1, 0]])
|
|
250
|
+
+ np.einsum("ij,ij->i", mktshr_array_, mktshr_array_)
|
|
251
|
+
)[:, None],
|
|
275
252
|
)
|
|
276
253
|
|
|
277
254
|
def generate_sample(self, /) -> None:
|
|
@@ -283,7 +260,7 @@ class MarketSample:
|
|
|
283
260
|
|
|
284
261
|
"""
|
|
285
262
|
|
|
286
|
-
self.
|
|
263
|
+
self.dataset = self._gen_market_sample(
|
|
287
264
|
seed_data=self.seed_data,
|
|
288
265
|
sample_size=self.sample_size,
|
|
289
266
|
nthreads=self.nthreads,
|
|
@@ -328,21 +305,15 @@ class MarketSample:
|
|
|
328
305
|
|
|
329
306
|
"""
|
|
330
307
|
|
|
331
|
-
|
|
308
|
+
market_data_sample = self._gen_market_sample(
|
|
332
309
|
sample_size=sample_size, seed_data=seed_data, nthreads=nthreads
|
|
333
310
|
)
|
|
334
311
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if self.share_spec.dist_type == "Uniform"
|
|
338
|
-
else ()
|
|
339
|
-
)
|
|
340
|
-
|
|
341
|
-
_upp_test_arrays = compute_upp_test_counts(
|
|
342
|
-
_market_data_sample, _upp_test_parms, _sim_test_regime
|
|
312
|
+
upp_test_arrays: UPPTestsCounts = compute_upp_test_counts(
|
|
313
|
+
market_data_sample, _upp_test_parms, _sim_test_regime
|
|
343
314
|
)
|
|
344
315
|
|
|
345
|
-
return
|
|
316
|
+
return upp_test_arrays
|
|
346
317
|
|
|
347
318
|
def __sim_enf_cnts_ll(
|
|
348
319
|
self, _enf_parm_vec: gbl.HMGThresholds, _sim_test_regime: UPPTestRegime, /
|
|
@@ -372,12 +343,10 @@ class MarketSample:
|
|
|
372
343
|
ΔHHI and concentration zone
|
|
373
344
|
|
|
374
345
|
"""
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
)
|
|
380
|
-
_thread_count = self.nthreads or cpu_count()
|
|
346
|
+
sample_sz = self.sample_size
|
|
347
|
+
subsample_sz = H5_CHUNK_SIZE
|
|
348
|
+
iter_count = (sample_sz / subsample_sz).__ceil__() # noqa: PLC2801
|
|
349
|
+
thread_count = self.nthreads or cpu_count()
|
|
381
350
|
|
|
382
351
|
if (
|
|
383
352
|
self.share_spec.recapture_form != RECForm.OUTIN
|
|
@@ -391,51 +360,49 @@ class MarketSample:
|
|
|
391
360
|
)
|
|
392
361
|
)
|
|
393
362
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
363
|
+
rng_seed_data = list(
|
|
364
|
+
starmap(
|
|
365
|
+
SeedSequenceData,
|
|
397
366
|
zip(
|
|
398
367
|
*[
|
|
399
|
-
_s.spawn(
|
|
368
|
+
_s.spawn(iter_count) if _s else [None] * iter_count
|
|
400
369
|
for _s in (
|
|
401
|
-
getattr(self.seed_data, _a)
|
|
402
|
-
for _a in self.seed_data.
|
|
370
|
+
getattr(self.seed_data, _a.name)
|
|
371
|
+
for _a in self.seed_data.__attrs_attrs__
|
|
403
372
|
)
|
|
404
373
|
],
|
|
405
374
|
strict=True,
|
|
406
|
-
)
|
|
375
|
+
),
|
|
407
376
|
)
|
|
408
|
-
|
|
377
|
+
)
|
|
409
378
|
|
|
410
|
-
|
|
411
|
-
"sample_size":
|
|
379
|
+
sim_enf_cnts_kwargs: SamplingFunctionKWArgs = SamplingFunctionKWArgs({
|
|
380
|
+
"sample_size": subsample_sz,
|
|
412
381
|
"nthreads": self.nthreads,
|
|
413
382
|
})
|
|
414
383
|
|
|
415
|
-
|
|
384
|
+
res_list = Parallel(n_jobs=thread_count, prefer="threads")(
|
|
416
385
|
delayed(self.__sim_enf_cnts)(
|
|
417
386
|
_enf_parm_vec,
|
|
418
387
|
_sim_test_regime,
|
|
419
|
-
**
|
|
388
|
+
**sim_enf_cnts_kwargs,
|
|
420
389
|
seed_data=_rng_seed_data_ch,
|
|
421
390
|
)
|
|
422
|
-
for _iter_id, _rng_seed_data_ch in enumerate(
|
|
391
|
+
for _iter_id, _rng_seed_data_ch in enumerate(rng_seed_data)
|
|
423
392
|
)
|
|
424
393
|
|
|
425
|
-
|
|
426
|
-
np.stack([getattr(_j, _k) for _j in
|
|
394
|
+
res_list_stacks = UPPTestsCounts(*[
|
|
395
|
+
np.stack([getattr(_j, _k) for _j in res_list])
|
|
427
396
|
for _k in ("by_firm_count", "by_delta", "by_conczone")
|
|
428
397
|
])
|
|
429
398
|
upp_test_results = UPPTestsCounts(*[
|
|
430
399
|
np.column_stack((
|
|
431
|
-
(_gv := getattr(
|
|
400
|
+
(_gv := getattr(res_list_stacks, _g.name))[0, :, :_h],
|
|
432
401
|
np.einsum("ijk->jk", _gv[:, :, _h:], dtype=np.int64),
|
|
433
402
|
))
|
|
434
|
-
for _g, _h in zip(
|
|
435
|
-
_res_list_stacks.__dataclass_fields__, [1, 1, 3], strict=True
|
|
436
|
-
)
|
|
403
|
+
for _g, _h in zip(res_list_stacks.__attrs_attrs__, [1, 1, 3], strict=True)
|
|
437
404
|
])
|
|
438
|
-
del
|
|
405
|
+
del res_list, res_list_stacks
|
|
439
406
|
|
|
440
407
|
return upp_test_results
|
|
441
408
|
|
|
@@ -462,29 +429,94 @@ class MarketSample:
|
|
|
462
429
|
|
|
463
430
|
"""
|
|
464
431
|
|
|
465
|
-
if self.
|
|
432
|
+
if self.dataset is None:
|
|
466
433
|
self.enf_counts = self.__sim_enf_cnts_ll(_enf_parm_vec, _upp_test_regime)
|
|
467
434
|
else:
|
|
468
435
|
self.enf_counts = compute_upp_test_counts(
|
|
469
|
-
self.
|
|
436
|
+
self.dataset, _enf_parm_vec, _upp_test_regime
|
|
470
437
|
)
|
|
471
438
|
|
|
439
|
+
def to_archive(
|
|
440
|
+
self, zip_: zipfile.ZipFile, _subdir: str = "", /, *, save_dataset: bool = False
|
|
441
|
+
) -> None:
|
|
442
|
+
zpath = zipfile.Path(zip_, at=_subdir)
|
|
443
|
+
name_root = f"{_PKG_NAME}_market_sample"
|
|
444
|
+
|
|
445
|
+
with (zpath / f"{name_root}.yaml").open("w") as _yfh:
|
|
446
|
+
this_yaml.dump(self, _yfh)
|
|
447
|
+
|
|
448
|
+
if save_dataset:
|
|
449
|
+
if all((_ndt := self.dataset is None, _net := self.enf_counts is None)):
|
|
450
|
+
raise ValueError(
|
|
451
|
+
"No dataset and/or enforcement counts available for saving. "
|
|
452
|
+
"Generate some data or set save_dataset to False to poceed."
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if not _ndt:
|
|
456
|
+
# byte_stream = io.BytesIO()
|
|
457
|
+
# with h5py.File(byte_stream, "w") as h5f:
|
|
458
|
+
# for _a in self.dataset.__attrs_attrs__:
|
|
459
|
+
# if all((
|
|
460
|
+
# (_arr := getattr(self.dataset, _a.name)).any(),
|
|
461
|
+
# not np.isnan(_arr).all(),
|
|
462
|
+
# )):
|
|
463
|
+
# h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
|
|
464
|
+
|
|
465
|
+
with (zpath / f"{name_root}_dataset.h5").open("wb") as _hfh:
|
|
466
|
+
_hfh.write(self.dataset.to_h5bin())
|
|
467
|
+
|
|
468
|
+
if not _net:
|
|
469
|
+
with (zpath / f"{name_root}_enf_counts.yaml").open("w") as _yfh:
|
|
470
|
+
this_yaml.dump(self.enf_counts, _yfh)
|
|
471
|
+
|
|
472
|
+
def from_archive(
|
|
473
|
+
zip_: zipfile.ZipFile, _subdir: str = "", /, *, restore_dataset: bool = False
|
|
474
|
+
) -> MarketSample:
|
|
475
|
+
zpath = zipfile.Path(zip_, at=_subdir)
|
|
476
|
+
name_root = f"{_PKG_NAME}_market_sample"
|
|
477
|
+
|
|
478
|
+
market_sample_ = this_yaml.load((zpath / f"{name_root}.yaml").read_text())
|
|
479
|
+
|
|
480
|
+
if restore_dataset:
|
|
481
|
+
if not any((
|
|
482
|
+
(_dt := (_dp := zpath / f"{name_root}_dataset.h5").is_file()),
|
|
483
|
+
(_et := (_ep := zpath / f"{name_root}_enf_counts.yaml").is_file()),
|
|
484
|
+
)):
|
|
485
|
+
raise ValueError(
|
|
486
|
+
"Archive has no sample data to restore. "
|
|
487
|
+
"Delete second argument, or set it False, and rerun."
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
if _dt:
|
|
491
|
+
with _dp.open("rb") as _hfh:
|
|
492
|
+
object.__setattr__( # noqa: PLC2801
|
|
493
|
+
market_sample_,
|
|
494
|
+
"dataset",
|
|
495
|
+
# MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
|
|
496
|
+
MarketSampleData.from_h5f(_hfh),
|
|
497
|
+
)
|
|
498
|
+
if _et:
|
|
499
|
+
object.__setattr__( # noqa: PLC2801
|
|
500
|
+
market_sample_, "enf_counts", this_yaml.load(_ep.read_text())
|
|
501
|
+
)
|
|
502
|
+
return market_sample_
|
|
503
|
+
|
|
472
504
|
@classmethod
|
|
473
505
|
def to_yaml(
|
|
474
|
-
cls, _r: yaml.representer.
|
|
506
|
+
cls, _r: yaml.representer.RoundTripRepresenter, _d: MarketSample
|
|
475
507
|
) -> yaml.MappingNode:
|
|
476
|
-
|
|
508
|
+
retval: yaml.MappingNode = _r.represent_mapping(
|
|
477
509
|
f"!{cls.__name__}",
|
|
478
510
|
{
|
|
479
511
|
_a.name: getattr(_d, _a.name)
|
|
480
512
|
for _a in _d.__attrs_attrs__
|
|
481
|
-
if _a.name not in
|
|
513
|
+
if _a.name not in {"dataset", "enf_counts"}
|
|
482
514
|
},
|
|
483
515
|
)
|
|
484
|
-
return
|
|
516
|
+
return retval
|
|
485
517
|
|
|
486
518
|
@classmethod
|
|
487
519
|
def from_yaml(
|
|
488
|
-
cls, _c: yaml.constructor.
|
|
520
|
+
cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
|
|
489
521
|
) -> MarketSample:
|
|
490
|
-
return cls(**_c
|
|
522
|
+
return cls(**yaml_rt_mapper(_c, _n))
|