mergeron 2025.739290.4__tar.gz → 2025.739290.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (24) hide show
  1. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/PKG-INFO +1 -1
  2. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/pyproject.toml +1 -1
  3. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/__init__.py +60 -31
  4. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/__init__.py +1 -1
  5. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/empirical_margin_distribution.py +24 -24
  6. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/ftc_merger_investigations_data.py +8 -8
  7. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundaries.py +13 -25
  8. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundary_functions.py +6 -6
  9. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/guidelines_boundary_functions_extra.py +2 -4
  10. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/core/pseudorandom_numbers.py +16 -16
  11. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/__init__.py +34 -19
  12. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/data_generation.py +14 -16
  13. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/enforcement_stats.py +24 -23
  14. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/upp_tests.py +1 -1
  15. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/README.rst +0 -0
  16. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/data/__init__.py +0 -0
  17. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/data/damodaran_margin_data.xls +0 -0
  18. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/data/damodaran_margin_data_serialized.zip +0 -0
  19. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/data/ftc_invdata.msgpack +0 -0
  20. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/data/ftc_invdata.zip +0 -0
  21. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/demo/__init__.py +0 -0
  22. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/demo/visualize_empirical_margin_distribution.py +0 -0
  23. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/gen/data_generation_functions.py +0 -0
  24. {mergeron-2025.739290.4 → mergeron-2025.739290.5}/src/mergeron/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mergeron
3
- Version: 2025.739290.4
3
+ Version: 2025.739290.5
4
4
  Summary: Analyze merger enforcement policy using Python
5
5
  License: MIT
6
6
  Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI
@@ -13,7 +13,7 @@ keywords = [
13
13
  "upward pricing pressure",
14
14
  "GUPPI",
15
15
  ]
16
- version = "2025.739290.4"
16
+ version = "2025.739290.5"
17
17
 
18
18
  # Classifiers list: https://pypi.org/classifiers/
19
19
  classifiers = [
@@ -12,7 +12,7 @@ from ruamel import yaml
12
12
 
13
13
  _PKG_NAME: str = Path(__file__).parent.stem
14
14
 
15
- VERSION = "2025.739290.4"
15
+ VERSION = "2025.739290.5"
16
16
 
17
17
  __version__ = VERSION
18
18
 
@@ -32,7 +32,6 @@ EMPTY_ARRAYINT = np.array([], int)
32
32
 
33
33
  NTHREADS = 2 * cpu_count()
34
34
 
35
- PKG_ENUMS_MAP: dict[str, object] = {}
36
35
  PKG_ATTRS_MAP: dict[str, object] = {}
37
36
 
38
37
  np.set_printoptions(precision=24, floatmode="fixed")
@@ -71,38 +70,27 @@ this_yaml.indent(mapping=2, sequence=4, offset=2)
71
70
  )
72
71
 
73
72
 
74
- @this_yaml.register_class
75
- class EnumYAMLized(enum.Enum):
76
- @classmethod
77
- def to_yaml(
78
- cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
79
- ) -> yaml.ScalarNode:
80
- return _r.represent_scalar(
81
- f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
82
- )
83
-
84
- @classmethod
85
- def from_yaml(
86
- cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
87
- ) -> object[enum.EnumType]:
88
- return super().__getattribute__(cls, _n.value)
89
-
90
-
91
73
  def yaml_rt_mapper(
92
74
  _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
93
75
  ) -> Mapping[str, Any]:
76
+ """
77
+ Constructs a mapping from a mapping node with the RoundTripConstructor
78
+
79
+ """
94
80
  data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
95
81
  _c.construct_mapping(_n, maptyp=data_, deep=True)
96
82
  return data_
97
83
 
98
84
 
99
85
  def yamelize_attrs(
100
- _typ: object,
101
- excluded_attributes: set | None = None,
102
- /,
103
- *,
104
- attr_map: Mapping[str, object] = PKG_ATTRS_MAP,
86
+ _typ: object, /, *, attr_map: Mapping[str, object] = PKG_ATTRS_MAP
105
87
  ) -> None:
88
+ """Add yaml representer, constructor for attrs-defined class.
89
+
90
+ Applying this function, attributes with property, `init=False` are
91
+ not serialized to YAML.
92
+ """
93
+
106
94
  attr_map |= {_typ.__name__: _typ}
107
95
 
108
96
  _ = this_yaml.representer.add_representer(
@@ -112,11 +100,7 @@ def yamelize_attrs(
112
100
  # construct mapping, rather than calling attrs.asdict(),
113
101
  # to use yaml representers defined in this package for
114
102
  # "upstream" objects
115
- {
116
- _a.name: getattr(_d, _a.name)
117
- for _a in _d.__attrs_attrs__
118
- if excluded_attributes is None or _a.name not in excluded_attributes
119
- },
103
+ {_a.name: getattr(_d, _a.name) for _a in _d.__attrs_attrs__ if _a.init},
120
104
  ),
121
105
  )
122
106
  _ = this_yaml.constructor.add_constructor(
@@ -125,19 +109,64 @@ def yamelize_attrs(
125
109
  )
126
110
 
127
111
 
112
+ @this_yaml.register_class
113
+ class Enameled(enum.Enum):
114
+ """Add YAML representer, constructor for enum.Enum"""
115
+
116
+ @classmethod
117
+ def to_yaml(
118
+ cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
119
+ ) -> yaml.ScalarNode:
120
+ return _r.represent_scalar(
121
+ f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
122
+ )
123
+
124
+ @classmethod
125
+ def from_yaml(
126
+ cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
127
+ ) -> object[enum.EnumType]:
128
+ return super().__getattribute__(cls, _n.value)
129
+
130
+
128
131
  @this_yaml.register_class
129
132
  @enum.unique
130
- class RECForm(str, EnumYAMLized):
133
+ class RECForm(str, Enameled):
131
134
  """For derivation of recapture ratio from market shares."""
132
135
 
133
136
  INOUT = "inside-out"
137
+ R"""
138
+ Given, :math:`\overline{r}, s_i {\ } \forall {\ } i \in \set{1, 2, \ldots, m}`, with
139
+ :math:`s_{min} = \min(s_1, s_2)`,
140
+
141
+ .. math::
142
+
143
+ REC_i = \frac{(1 - s_i) \overline{r}}{(1 - s_{min}) - (s_i - s_{min}) \overline{r}}
144
+
145
+ """
146
+
134
147
  OUTIN = "outside-in"
148
+ R"""
149
+ Given, :math:`\pi_i {\ } \forall {\ } i \in N`,
150
+
151
+ .. math::
152
+
153
+ REC_i = \frac{\sum_{i \in M} \pi_i}{\sum_{j \in N} \pi_j}
154
+
155
+ """
156
+
135
157
  FIXED = "proportional"
158
+ R"""Given, :math:`\overline{r}`,
159
+
160
+ .. math::
161
+
162
+ REC_i = \overline{r} {\ } \forall {\ } i \in M
163
+
164
+ """
136
165
 
137
166
 
138
167
  @this_yaml.register_class
139
168
  @enum.unique
140
- class UPPAggrSelector(str, EnumYAMLized):
169
+ class UPPAggrSelector(str, Enameled):
141
170
  """
142
171
  Aggregator for GUPPI and diversion ratio estimates.
143
172
 
@@ -27,7 +27,7 @@ class INVTableData:
27
27
  type INVData = MappingProxyType[
28
28
  str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
29
29
  ]
30
- type INVData_in_ = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
30
+ type INVData_in = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
31
31
 
32
32
 
33
33
  (_, _) = (
@@ -71,21 +71,21 @@ def margin_data_getter( # noqa: PLR0912
71
71
  "This code is designed for parsing Prof. Damodaran's margin tables."
72
72
  )
73
73
 
74
- data_archive_path_ = data_archive_path or MGNDATA_ARCHIVE_PATH
75
- workbook_path_ = data_archive_path_.parent / f"damodaran_{_table_name}_data.xls"
76
- if data_archive_path_.is_file() and not data_download_flag:
74
+ data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
75
+ workbook_path = data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
76
+ if data_archive_path.is_file() and not data_download_flag:
77
77
  # with data_archive_path_.open("r") as _yfh:
78
78
  # margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
79
79
  with (
80
- zipfile.ZipFile(data_archive_path_) as _yzip,
81
- _yzip.open(f"{data_archive_path_.stem}.yaml") as _yfh,
80
+ zipfile.ZipFile(data_archive_path) as _yzip,
81
+ _yzip.open(f"{data_archive_path.stem}.yaml") as _yfh,
82
82
  ):
83
83
  margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
84
84
  return _mappingproxy_from_mapping(margin_data_dict)
85
- elif workbook_path_.is_file():
86
- workbook_path_.unlink()
87
- if data_archive_path_.is_file():
88
- data_archive_path_.unlink()
85
+ elif workbook_path.is_file():
86
+ workbook_path.unlink()
87
+ if data_archive_path.is_file():
88
+ data_archive_path.unlink()
89
89
 
90
90
  margin_urlstr = (
91
91
  f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
@@ -96,7 +96,7 @@ def margin_data_getter( # noqa: PLR0912
96
96
  u3pm.request(
97
97
  "GET", margin_urlstr, preload_content=False
98
98
  ) as _urlopen_handle,
99
- workbook_path_.open("wb") as margin_file,
99
+ workbook_path.open("wb") as margin_file,
100
100
  ):
101
101
  while True:
102
102
  data_ = _urlopen_handle.read(chunk_size_)
@@ -104,7 +104,7 @@ def margin_data_getter( # noqa: PLR0912
104
104
  break
105
105
  margin_file.write(data_)
106
106
 
107
- print(f"Downloaded {margin_urlstr} to {workbook_path_}.")
107
+ print(f"Downloaded {margin_urlstr} to {workbook_path}.")
108
108
 
109
109
  except urllib3.exceptions.MaxRetryError as error_:
110
110
  if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
@@ -115,38 +115,38 @@ def margin_data_getter( # noqa: PLR0912
115
115
  f"WARNING: Could not establish secure connection to, {margin_urlstr}."
116
116
  "Using bundled copy."
117
117
  )
118
- if not workbook_path_.is_file():
118
+ if not workbook_path.is_file():
119
119
  with resources.as_file(
120
120
  resources.files(f"{_PKG_NAME}.data").joinpath(
121
121
  "empirical_margin_distribution.xls"
122
122
  )
123
123
  ) as margin_data_archive_path:
124
- shutil.copy2(margin_data_archive_path, workbook_path_)
124
+ shutil.copy2(margin_data_archive_path, workbook_path)
125
125
  else:
126
126
  raise error_
127
127
 
128
- xl_book_ = open_workbook(workbook_path_, ragged_rows=True, on_demand=True)
129
- xl_sheet_ = xl_book_.sheet_by_name("Industry Averages")
128
+ xl_book = open_workbook(workbook_path, ragged_rows=True, on_demand=True)
129
+ xl_sheet = xl_book.sheet_by_name("Industry Averages")
130
130
 
131
131
  margin_dict: dict[str, dict[str, float | int]] = {}
132
- row_keys_: list[str] = []
132
+ row_keys: list[str] = []
133
133
  read_row_flag = False
134
- for _ridx in range(xl_sheet_.nrows):
135
- xl_row = xl_sheet_.row_values(_ridx)
134
+ for _ridx in range(xl_sheet.nrows):
135
+ xl_row = xl_sheet.row_values(_ridx)
136
136
  if xl_row[0] == "Industry Name":
137
137
  read_row_flag = True
138
- row_keys_ = xl_row
138
+ row_keys = xl_row
139
139
  continue
140
140
 
141
141
  if not xl_row[0] or not read_row_flag:
142
142
  continue
143
143
 
144
144
  xl_row[1] = int(xl_row[1])
145
- margin_dict[xl_row[0]] = dict(zip(row_keys_[1:], xl_row[1:], strict=True))
145
+ margin_dict[xl_row[0]] = dict(zip(row_keys[1:], xl_row[1:], strict=True))
146
146
 
147
147
  with (
148
- zipfile.ZipFile(data_archive_path_, "w") as _yzip,
149
- _yzip.open(f"{data_archive_path_.stem}.yaml", "w") as _yfh,
148
+ zipfile.ZipFile(data_archive_path, "w") as _yzip,
149
+ _yzip.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
150
150
  ):
151
151
  this_yaml.dump(margin_dict, _yfh)
152
152
 
@@ -240,9 +240,9 @@ def margin_data_resampler(
240
240
 
241
241
  seed_sequence_ = seed_sequence or SeedSequence(pool_size=8)
242
242
 
243
- x_, w_, _ = margin_data_builder(margin_data_getter())
243
+ _x, _w, _ = margin_data_builder(margin_data_getter())
244
244
 
245
- margin_kde = stats.gaussian_kde(x_, weights=w_, bw_method="silverman")
245
+ margin_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
246
246
  margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
247
247
 
248
248
  if isinstance(_sample_size, int):
@@ -36,7 +36,7 @@ from .. import ( # noqa: TID252
36
36
  )
37
37
  from . import (
38
38
  INVData,
39
- INVData_in_,
39
+ INVData_in,
40
40
  INVTableData,
41
41
  _dict_from_mapping,
42
42
  _mappingproxy_from_mapping,
@@ -147,8 +147,8 @@ def construct_data(
147
147
  ZipFile(_archive_path, "r") as _yzh,
148
148
  _yzh.open(f"{_archive_path.stem}.yaml", "r") as _yfh,
149
149
  ):
150
- invdata_ = this_yaml.load(_yfh)
151
- if isinstance(invdata_, MappingProxyType):
150
+ invdata_: INVData = this_yaml.load(_yfh)
151
+ if not isinstance(invdata_, MappingProxyType):
152
152
  invdata_ = _mappingproxy_from_mapping(invdata_)
153
153
  with (
154
154
  ZipFile(_archive_path, "w", compression=ZIP_DEFLATED) as _yzh,
@@ -157,7 +157,7 @@ def construct_data(
157
157
  this_yaml.dump(invdata_, _yfh)
158
158
  return invdata_
159
159
 
160
- invdata: INVData_in_ = _dict_from_mapping(_parse_invdata())
160
+ invdata: INVData_in = _dict_from_mapping(_parse_invdata())
161
161
 
162
162
  # Add some data periods (
163
163
  # only periods ending in 2011, others have few observations and
@@ -217,7 +217,7 @@ def construct_data(
217
217
  return retval
218
218
 
219
219
 
220
- def _construct_no_evidence_data(_invdata: INVData_in_, _data_period: str, /) -> None:
220
+ def _construct_no_evidence_data(_invdata: INVData_in, _data_period: str, /) -> None:
221
221
  invdata_ind_grp = "All Markets"
222
222
  table_nos_map = dict(
223
223
  zip(
@@ -444,7 +444,7 @@ def _parse_invdata() -> INVData:
444
444
 
445
445
  invdata_docnames = _download_invdata(FTCDATA_DIR)
446
446
 
447
- invdata: INVData_in_ = {}
447
+ invdata: INVData_in = {}
448
448
 
449
449
  for invdata_docname in invdata_docnames:
450
450
  invdata_pdf_path = FTCDATA_DIR.joinpath(invdata_docname)
@@ -513,7 +513,7 @@ def _parse_invdata() -> INVData:
513
513
 
514
514
 
515
515
  def _parse_page_blocks(
516
- _invdata: INVData_in_, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
516
+ _invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
517
517
  ) -> None:
518
518
  if _data_period != "1996-2011":
519
519
  _parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
@@ -540,7 +540,7 @@ def _parse_page_blocks(
540
540
 
541
541
 
542
542
  def _parse_table_blocks(
543
- _invdata: INVData_in_, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
543
+ _invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
544
544
  ) -> None:
545
545
  invdata_evid_cond = "Unrestricted on additional evidence"
546
546
  table_num, table_ser, table_type = _identify_table_type(
@@ -12,7 +12,6 @@ from typing import Literal
12
12
  import numpy as np
13
13
  from attrs import Attribute, field, frozen, validators
14
14
  from mpmath import mp # type: ignore
15
- from ruamel import yaml
16
15
 
17
16
  from .. import ( # noqa: TID252
18
17
  DEFAULT_REC_RATIO,
@@ -23,7 +22,6 @@ from .. import ( # noqa: TID252
23
22
  UPPAggrSelector,
24
23
  this_yaml,
25
24
  yamelize_attrs,
26
- yaml_rt_mapper,
27
25
  )
28
26
  from . import guidelines_boundary_functions as gbfn
29
27
 
@@ -147,21 +145,6 @@ class GuidelinesThresholds:
147
145
  ),
148
146
  )
149
147
 
150
- @classmethod
151
- def to_yaml(
152
- cls, _r: yaml.representer.RoundTripRepresenter, _d: GuidelinesThresholds
153
- ) -> yaml.MappingNode:
154
- ret: yaml.MappingNode = _r.represent_mapping(
155
- f"!{cls.__name__}", {"pub_year": _d.pub_year}
156
- )
157
- return ret
158
-
159
- @classmethod
160
- def from_yaml(
161
- cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
162
- ) -> GuidelinesThresholds:
163
- return cls(**yaml_rt_mapper(_c, _n))
164
-
165
148
 
166
149
  @frozen
167
150
  class ConcentrationBoundary:
@@ -170,8 +153,8 @@ class ConcentrationBoundary:
170
153
  measure_name: Literal[
171
154
  "ΔHHI",
172
155
  "Combined share",
173
- "Pre-merger HHI Contribution",
174
- "Post-merger HHI Contribution",
156
+ "HHI contribution, pre-merger",
157
+ "HHI contribution, post-merger",
175
158
  ] = field(kw_only=False, default="ΔHHI")
176
159
 
177
160
  @measure_name.validator
@@ -181,8 +164,8 @@ class ConcentrationBoundary:
181
164
  if _value not in {
182
165
  "ΔHHI",
183
166
  "Combined share",
184
- "Pre-merger HHI Contribution",
185
- "Post-merger HHI Contribution",
167
+ "HHI contribution, pre-merger",
168
+ "HHI contribution, post-merger",
186
169
  }:
187
170
  raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
188
171
 
@@ -211,9 +194,9 @@ class ConcentrationBoundary:
211
194
  conc_fn = gbfn.hhi_delta_boundary
212
195
  case "Combined share":
213
196
  conc_fn = gbfn.combined_share_boundary
214
- case "Pre-merger HHI Contribution":
197
+ case "HHI contribution, pre-merger":
215
198
  conc_fn = gbfn.hhi_pre_contrib_boundary
216
- case "Post-merger HHI Contribution":
199
+ case "HHI contribution, post-merger":
217
200
  conc_fn = gbfn.hhi_post_contrib_boundary
218
201
 
219
202
  boundary_ = conc_fn(self.threshold, dps=self.precision)
@@ -477,5 +460,10 @@ if __name__ == "__main__":
477
460
  )
478
461
 
479
462
 
480
- for _typ in (HMGThresholds, ConcentrationBoundary, DiversionRatioBoundary):
481
- yamelize_attrs(_typ, {"coordinates", "area"})
463
+ for _typ in (
464
+ ConcentrationBoundary,
465
+ DiversionRatioBoundary,
466
+ GuidelinesThresholds,
467
+ HMGThresholds,
468
+ ):
469
+ yamelize_attrs(_typ)
@@ -699,7 +699,7 @@ def _shrratio_boundary_intcpt(
699
699
 
700
700
 
701
701
  def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
702
- _x1: LerpT, _x2: LerpT, r_: float | MPFloat = 0.25, /
702
+ _x1: LerpT, _x2: LerpT, _r: float | MPFloat = 0.25, /
703
703
  ) -> LerpT:
704
704
  """
705
705
  From the function of the same name in the C++ standard [2]_
@@ -711,7 +711,7 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
711
711
  ----------
712
712
  _x1, _x2
713
713
  bounds :math:`x_1, x_2` to interpolate between.
714
- r_
714
+ _r
715
715
  interpolation weight :math:`r` assigned to :math:`x_2`
716
716
 
717
717
  Returns
@@ -731,14 +731,14 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
731
731
 
732
732
  """
733
733
 
734
- if not 0 <= r_ <= 1:
734
+ if not 0 <= _r <= 1:
735
735
  raise ValueError("Specified interpolation weight must lie in [0, 1].")
736
- elif r_ == 0:
736
+ elif _r == 0:
737
737
  return _x1
738
- elif r_ == 1:
738
+ elif _r == 1:
739
739
  return _x2
740
740
  else:
741
- return r_ * _x2 + (1 - r_) * _x1
741
+ return _r * _x2 + (1 - _r) * _x1
742
742
 
743
743
 
744
744
  def round_cust(
@@ -45,8 +45,6 @@ def dh_area_quad(_dh_val: float = 0.01, /) -> float:
45
45
  ----------
46
46
  _dh_val
47
47
  Merging-firms' ΔHHI bound.
48
- dps
49
- Specified precision in decimal places.
50
48
 
51
49
  Returns
52
50
  -------
@@ -297,10 +295,10 @@ def shrratio_boundary_distance( # noqa: PLR0914
297
295
 
298
296
  weights_i = (
299
297
  (
300
- w_ := mp.fdiv(
298
+ _w := mp.fdiv(
301
299
  s_2 if weighting == "cross-product-share" else s_1, s_1 + s_2
302
300
  ),
303
- 1 - w_,
301
+ 1 - _w,
304
302
  )
305
303
  if weighting
306
304
  else _weights_base
@@ -112,11 +112,11 @@ def gen_seed_seq_list_default(
112
112
  63206306147411023146090085885772240748399174641427012462446714431253444120718,
113
113
  ]
114
114
 
115
- if _len > (lge_ := len(generated_entropy)):
115
+ if _len > (_lge := len(generated_entropy)):
116
116
  e_str_segs = (
117
117
  "This function can presently create SeedSequences for generating up to ",
118
- f"{lge_:,d} independent random variates. If you really need to generate ",
119
- f"more than {lge_:,d} seeded independent random variates, please pass a ",
118
+ f"{_lge:,d} independent random variates. If you really need to generate ",
119
+ f"more than {_lge:,d} seeded independent random variates, please pass a ",
120
120
  "sufficiently large list of seeds as generated_entropy. See,",
121
121
  "{}/{}.".format(
122
122
  "https://numpy.org/doc/stable/reference/random",
@@ -219,13 +219,13 @@ class MultithreadedRNG:
219
219
  self.dist_parms, DEFAULT_DIST_PARMS
220
220
  ):
221
221
  if self.dist_type == "Uniform":
222
- dist_type_ = "Random"
222
+ dist_type = "Random"
223
223
  elif self.dist_type == "Normal":
224
- dist_type_ = "Gaussian"
224
+ dist_type = "Gaussian"
225
225
  else:
226
- dist_type_ = self.dist_type
226
+ dist_type = self.dist_type
227
227
 
228
- step_size = (len(self.values) / self.nthreads).__ceil__() # noqa: PLC2801
228
+ step_size = (len(self.values) / self.nthreads).__ceil__()
229
229
 
230
230
  seed_ = (
231
231
  SeedSequence(pool_size=8)
@@ -233,7 +233,7 @@ class MultithreadedRNG:
233
233
  else self.seed_sequence
234
234
  )
235
235
 
236
- random_generators_ = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
236
+ random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
237
237
 
238
238
  def _fill(
239
239
  _rng: np.random.Generator,
@@ -244,23 +244,23 @@ class MultithreadedRNG:
244
244
  _last: int,
245
245
  /,
246
246
  ) -> None:
247
- sz_: tuple[int, ...] = out_[_first:_last].shape
247
+ _sz: tuple[int, ...] = out_[_first:_last].shape
248
248
  match _dist_type:
249
249
  case "Beta":
250
250
  shape_a, shape_b = _dist_parms
251
- out_[_first:_last] = _rng.beta(shape_a, shape_b, size=sz_)
251
+ out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
252
252
  case "Dirichlet":
253
- out_[_first:_last] = _rng.dirichlet(_dist_parms, size=sz_[:-1])
253
+ out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
254
254
  case "Gaussian":
255
255
  _rng.standard_normal(out=out_[_first:_last])
256
256
  case "Normal":
257
- mu_, sigma_ = _dist_parms
258
- out_[_first:_last] = _rng.normal(mu_, sigma_, size=sz_)
257
+ _mu, _sigma = _dist_parms
258
+ out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
259
259
  case "Random":
260
260
  _rng.random(out=out_[_first:_last])
261
261
  case "Uniform":
262
262
  uni_l, uni_h = _dist_parms
263
- out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=sz_)
263
+ out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
264
264
  case _:
265
265
  "Unreachable. The validator would have rejected this as invalid."
266
266
 
@@ -271,8 +271,8 @@ class MultithreadedRNG:
271
271
 
272
272
  executor_.submit(
273
273
  _fill,
274
- random_generators_[_i],
275
- dist_type_,
274
+ random_generators[_i],
275
+ dist_type,
276
276
  self.dist_parms,
277
277
  self.values,
278
278
  range_first,
@@ -7,9 +7,11 @@ containers for industry data generation and testing.
7
7
  from __future__ import annotations
8
8
 
9
9
  import enum
10
+ import io
10
11
  from collections.abc import Sequence
11
12
  from operator import attrgetter
12
13
 
14
+ import h5py
13
15
  import numpy as np
14
16
  from attrs import Attribute, Converter, cmp_using, field, frozen, validators
15
17
  from numpy.random import SeedSequence
@@ -22,7 +24,7 @@ from .. import ( # noqa: TID252
22
24
  ArrayDouble,
23
25
  ArrayFloat,
24
26
  ArrayINT,
25
- EnumYAMLized,
27
+ Enameled,
26
28
  RECForm,
27
29
  UPPAggrSelector,
28
30
  this_yaml,
@@ -50,7 +52,7 @@ class SeedSequenceData:
50
52
 
51
53
  @this_yaml.register_class
52
54
  @enum.unique
53
- class PriceSpec(tuple[bool, str | None], EnumYAMLized):
55
+ class PriceSpec(tuple[bool, str | None], Enameled):
54
56
  """Price specification.
55
57
 
56
58
  Whether prices are symmetric and, if not, the direction of correlation, if any.
@@ -65,7 +67,7 @@ class PriceSpec(tuple[bool, str | None], EnumYAMLized):
65
67
 
66
68
  @this_yaml.register_class
67
69
  @enum.unique
68
- class SHRDistribution(str, EnumYAMLized):
70
+ class SHRDistribution(str, Enameled):
69
71
  """Market share distributions."""
70
72
 
71
73
  UNI = "Uniform"
@@ -285,7 +287,7 @@ class ShareSpec:
285
287
 
286
288
  @this_yaml.register_class
287
289
  @enum.unique
288
- class PCMDistribution(str, EnumYAMLized):
290
+ class PCMDistribution(str, Enameled):
289
291
  """Margin distributions."""
290
292
 
291
293
  UNI = "Uniform"
@@ -296,7 +298,7 @@ class PCMDistribution(str, EnumYAMLized):
296
298
 
297
299
  @this_yaml.register_class
298
300
  @enum.unique
299
- class FM2Constraint(str, EnumYAMLized):
301
+ class FM2Constraint(str, Enameled):
300
302
  """Firm 2 margins - derivation methods."""
301
303
 
302
304
  IID = "i.i.d"
@@ -401,7 +403,7 @@ class PCMSpec:
401
403
 
402
404
  @this_yaml.register_class
403
405
  @enum.unique
404
- class SSZConstant(float, EnumYAMLized):
406
+ class SSZConstant(float, Enameled):
405
407
  """
406
408
  Scale factors to offset sample size reduction.
407
409
 
@@ -467,10 +469,8 @@ class MarketSampleData:
467
469
  """
468
470
 
469
471
  @aggregate_purchase_prob.default
470
- def __appd(_i: MarketSampleData) -> ArrayINT:
471
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
472
- e_.fill(np.nan)
473
- return e_
472
+ def __appd(_i: MarketSampleData) -> ArrayDouble:
473
+ return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
474
474
 
475
475
  fcounts: ArrayINT = field(eq=cmp_using(np.array_equal))
476
476
  """Number of firms in market"""
@@ -487,19 +487,34 @@ class MarketSampleData:
487
487
  """
488
488
 
489
489
  @nth_firm_share.default
490
- def __nfsd(_i: MarketSampleData) -> ArrayINT:
491
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
492
- e_.fill(np.nan)
493
- return e_
490
+ def __nfsd(_i: MarketSampleData) -> ArrayDouble:
491
+ return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
494
492
 
495
493
  hhi_post: ArrayDouble = field(eq=cmp_using(np.array_equal))
496
494
  """Post-merger change in Herfindahl-Hirschmann Index (HHI)"""
497
495
 
498
496
  @hhi_post.default
499
- def __hpd(_i: MarketSampleData) -> ArrayINT:
500
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
501
- e_.fill(np.nan)
502
- return e_
497
+ def __hpd(_i: MarketSampleData) -> ArrayDouble:
498
+ return np.nan * np.empty_like(_i.frmshr_array[:, :1], float)
499
+
500
+ def to_h5bin(self) -> bytes:
501
+ """Save market sample data to HDF5 file."""
502
+ byte_stream = io.BytesIO()
503
+ with h5py.File(byte_stream, "w") as _h5f:
504
+ for _a in self.__attrs_attrs__:
505
+ if all((
506
+ (_arr := getattr(self, _a.name)).any(),
507
+ not np.isnan(_arr).all(),
508
+ )):
509
+ _h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
510
+ return byte_stream.getvalue()
511
+
512
+ @classmethod
513
+ def from_h5f(cls, _hfh: io.BufferedReader) -> MarketSampleData:
514
+ """Load market sample data from HDF5 file."""
515
+ with h5py.File(_hfh, "r") as _h5f:
516
+ _retval = cls(**{_a: _h5f[_a][:] for _a in _h5f})
517
+ return _retval
503
518
 
504
519
 
505
520
  @frozen
@@ -557,7 +572,7 @@ class MarginDataSample:
557
572
 
558
573
  @this_yaml.register_class
559
574
  @enum.unique
560
- class INVResolution(str, EnumYAMLized):
575
+ class INVResolution(str, Enameled):
561
576
  CLRN = "clearance"
562
577
  ENFT = "enforcement"
563
578
  BOTH = "investigation"
@@ -5,12 +5,10 @@ Methods to generate data for analyzing merger enforcement policy.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import io
9
8
  import zipfile
10
9
  from itertools import starmap
11
10
  from typing import TypedDict
12
11
 
13
- import h5py # type: ignore
14
12
  import numpy as np
15
13
  from attrs import Attribute, Converter, define, field, validators
16
14
  from joblib import Parallel, cpu_count, delayed # type: ignore
@@ -448,26 +446,26 @@ class MarketSample:
448
446
  this_yaml.dump(self, _yfh)
449
447
 
450
448
  if save_dataset:
451
- if all((_dt := self.dataset is None, _et := self.enf_counts is None)):
449
+ if all((_ndt := self.dataset is None, _net := self.enf_counts is None)):
452
450
  raise ValueError(
453
451
  "No dataset and/or enforcement counts available for saving. "
454
452
  "Generate some data or set save_dataset to False to poceed."
455
453
  )
456
454
 
457
- if not _dt:
458
- byte_stream = io.BytesIO()
459
- with h5py.File(byte_stream, "w") as h5f:
460
- for _a in self.dataset.__attrs_attrs__:
461
- if all((
462
- (_arr := getattr(self.dataset, _a.name)).any(),
463
- not np.isnan(_arr).all(),
464
- )):
465
- h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
455
+ if not _ndt:
456
+ # byte_stream = io.BytesIO()
457
+ # with h5py.File(byte_stream, "w") as h5f:
458
+ # for _a in self.dataset.__attrs_attrs__:
459
+ # if all((
460
+ # (_arr := getattr(self.dataset, _a.name)).any(),
461
+ # not np.isnan(_arr).all(),
462
+ # )):
463
+ # h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
466
464
 
467
465
  with (zpath / f"{name_root}_dataset.h5").open("wb") as _hfh:
468
- _hfh.write(byte_stream.getvalue())
466
+ _hfh.write(self.dataset.to_h5bin())
469
467
 
470
- if not _et:
468
+ if not _net:
471
469
  with (zpath / f"{name_root}_enf_counts.yaml").open("w") as _yfh:
472
470
  this_yaml.dump(self.enf_counts, _yfh)
473
471
 
@@ -491,11 +489,11 @@ class MarketSample:
491
489
 
492
490
  if _dt:
493
491
  with _dp.open("rb") as _hfh:
494
- h5f = h5py.File(_hfh)
495
492
  object.__setattr__( # noqa: PLC2801
496
493
  market_sample_,
497
494
  "dataset",
498
- MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
495
+ # MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
496
+ MarketSampleData.from_h5f(_hfh),
499
497
  )
500
498
  if _et:
501
499
  object.__setattr__( # noqa: PLC2801
@@ -9,7 +9,7 @@ from collections.abc import Mapping
9
9
  import numpy as np
10
10
  from scipy.interpolate import interp1d # type: ignore
11
11
 
12
- from .. import VERSION, ArrayBIGINT, EnumYAMLized, this_yaml # noqa: TID252
12
+ from .. import VERSION, ArrayBIGINT, Enameled, this_yaml # noqa: TID252
13
13
  from ..core import ftc_merger_investigations_data as fid # noqa: TID252
14
14
  from . import INVResolution
15
15
 
@@ -18,7 +18,7 @@ __version__ = VERSION
18
18
 
19
19
  @this_yaml.register_class
20
20
  @enum.unique
21
- class IndustryGroup(str, EnumYAMLized):
21
+ class IndustryGroup(str, Enameled):
22
22
  ALL = "All Markets"
23
23
  GRO = "Grocery Markets"
24
24
  OIL = "Oil Markets"
@@ -33,7 +33,7 @@ class IndustryGroup(str, EnumYAMLized):
33
33
 
34
34
  @this_yaml.register_class
35
35
  @enum.unique
36
- class OtherEvidence(str, EnumYAMLized):
36
+ class OtherEvidence(str, Enameled):
37
37
  UR = "Unrestricted on additional evidence"
38
38
  HD = "Hot Documents Identified"
39
39
  HN = "No Hot Documents Identified"
@@ -48,7 +48,7 @@ class OtherEvidence(str, EnumYAMLized):
48
48
 
49
49
  @this_yaml.register_class
50
50
  @enum.unique
51
- class StatsGrpSelector(str, EnumYAMLized):
51
+ class StatsGrpSelector(str, Enameled):
52
52
  FC = "ByFirmCount"
53
53
  HD = "ByHHIandDelta"
54
54
  DL = "ByDelta"
@@ -57,7 +57,7 @@ class StatsGrpSelector(str, EnumYAMLized):
57
57
 
58
58
  @this_yaml.register_class
59
59
  @enum.unique
60
- class StatsReturnSelector(str, EnumYAMLized):
60
+ class StatsReturnSelector(str, Enameled):
61
61
  CNT = "count"
62
62
  RPT = "rate, point"
63
63
  RIN = "rate, interval"
@@ -65,7 +65,7 @@ class StatsReturnSelector(str, EnumYAMLized):
65
65
 
66
66
  @this_yaml.register_class
67
67
  @enum.unique
68
- class SortSelector(str, EnumYAMLized):
68
+ class SortSelector(str, Enameled):
69
69
  UCH = "unchanged"
70
70
  REV = "reversed"
71
71
 
@@ -236,19 +236,19 @@ def table_no_lku(
236
236
  /,
237
237
  ) -> str:
238
238
  if _table_ind_group not in (
239
- igl_ := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
239
+ _igl := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
240
240
  ):
241
241
  raise ValueError(
242
242
  f"Invalid value for industry group, {f'"{_table_ind_group}"'}."
243
- f"Must be one of {igl_!r}"
243
+ f"Must be one of {_igl!r}"
244
244
  )
245
245
 
246
246
  tno_ = next(
247
- t_
248
- for t_ in _data_array_dict_sub
247
+ _t
248
+ for _t in _data_array_dict_sub
249
249
  if all((
250
- _data_array_dict_sub[t_].industry_group == _table_ind_group,
251
- _data_array_dict_sub[t_].additional_evidence == _table_evid_cond,
250
+ _data_array_dict_sub[_t].industry_group == _table_ind_group,
251
+ _data_array_dict_sub[_t].additional_evidence == _table_evid_cond,
252
252
  ))
253
253
  )
254
254
 
@@ -259,10 +259,10 @@ def enf_cnts_byfirmcount(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
259
259
  ndim_in = 1
260
260
  return np.vstack([
261
261
  np.concatenate([
262
- (f,),
263
- np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == f][:, ndim_in:]),
262
+ (_i,),
263
+ np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == _i][:, ndim_in:]),
264
264
  ])
265
- for f in np.unique(_cnts_array[:, 0])
265
+ for _i in np.unique(_cnts_array[:, 0])
266
266
  ])
267
267
 
268
268
 
@@ -270,10 +270,10 @@ def enf_cnts_bydelta(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
270
270
  ndim_in = 2
271
271
  return np.vstack([
272
272
  np.concatenate([
273
- (f_,),
274
- np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == f_][:, ndim_in:]),
273
+ (_k,),
274
+ np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == _k][:, ndim_in:]),
275
275
  ])
276
- for f_ in HHI_DELTA_KNOTS[:-1]
276
+ for _k in HHI_DELTA_KNOTS[:-1]
277
277
  ])
278
278
 
279
279
 
@@ -286,10 +286,11 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
286
286
  # aggregation reduces the footprint of this step in memory. Although this point
287
287
  # is more relevant for generated than observed data, using the same coding pattern
288
288
  # in both cases does make life easier
289
- ndim_in = 2
290
- nkeys_ = 3
289
+ _ndim_in = 2
290
+ _nkeys = 3
291
291
  cnts_byhhipostanddelta, cnts_byconczone = (
292
- np.zeros(nkeys_ + _cnts_array.shape[1] - ndim_in, dtype=int) for _ in range(2)
292
+ np.zeros((1, _nkeys + _cnts_array.shape[1] - _ndim_in), dtype=int)
293
+ for _ in range(2)
293
294
  )
294
295
 
295
296
  # Prepare to tag clearance stats by presumption zone
@@ -314,7 +315,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
314
315
  np.array(
315
316
  (
316
317
  *zone_val,
317
- *np.einsum("ij->j", _cnts_array[:, ndim_in:][conc_test]),
318
+ *np.einsum("ij->j", _cnts_array[:, _ndim_in:][conc_test]),
318
319
  ),
319
320
  dtype=int,
320
321
  ),
@@ -337,7 +338,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
337
338
  (
338
339
  zone_val,
339
340
  np.einsum(
340
- "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, nkeys_:]
341
+ "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, _nkeys:]
341
342
  ),
342
343
  ),
343
344
  dtype=int,
@@ -191,7 +191,7 @@ def compute_upp_test_arrays(
191
191
 
192
192
  Parameters
193
193
  ----------
194
- _market_data
194
+ _market_data_sample
195
195
  market data sample
196
196
  _upp_test_parms
197
197
  guidelines thresholds for testing UPP and related statistics