mergeron 2025.739290.4__tar.gz → 2025.739290.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (26) hide show
  1. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/PKG-INFO +1 -1
  2. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/pyproject.toml +4 -4
  3. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/__init__.py +84 -41
  4. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/__init__.py +3 -3
  5. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/empirical_margin_distribution.py +38 -38
  6. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/ftc_merger_investigations_data.py +23 -34
  7. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/guidelines_boundaries.py +27 -38
  8. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/guidelines_boundary_functions.py +6 -6
  9. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/guidelines_boundary_functions_extra.py +2 -4
  10. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/core/pseudorandom_numbers.py +16 -16
  11. mergeron-2025.739290.6/src/mergeron/data/__init__.py +57 -0
  12. mergeron-2025.739290.6/src/mergeron/data/ftc_merger_investigations_data.zip +0 -0
  13. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/demo/visualize_empirical_margin_distribution.py +5 -2
  14. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/gen/__init__.py +41 -20
  15. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/gen/data_generation.py +14 -16
  16. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/gen/enforcement_stats.py +24 -23
  17. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/gen/upp_tests.py +1 -1
  18. mergeron-2025.739290.4/src/mergeron/data/damodaran_margin_data_serialized.zip +0 -0
  19. mergeron-2025.739290.4/src/mergeron/data/ftc_invdata.msgpack +0 -0
  20. mergeron-2025.739290.4/src/mergeron/data/ftc_invdata.zip +0 -0
  21. mergeron-2025.739290.4/src/mergeron/demo/__init__.py +0 -3
  22. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/README.rst +0 -0
  23. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/data/damodaran_margin_data.xls +0 -0
  24. {mergeron-2025.739290.4/src/mergeron/data → mergeron-2025.739290.6/src/mergeron/demo}/__init__.py +0 -0
  25. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/gen/data_generation_functions.py +0 -0
  26. {mergeron-2025.739290.4 → mergeron-2025.739290.6}/src/mergeron/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mergeron
3
- Version: 2025.739290.4
3
+ Version: 2025.739290.6
4
4
  Summary: Analyze merger enforcement policy using Python
5
5
  License: MIT
6
6
  Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI
@@ -13,7 +13,7 @@ keywords = [
13
13
  "upward pricing pressure",
14
14
  "GUPPI",
15
15
  ]
16
- version = "2025.739290.4"
16
+ version = "2025.739290.6"
17
17
 
18
18
  # Classifiers list: https://pypi.org/classifiers/
19
19
  classifiers = [
@@ -68,11 +68,11 @@ pendulum = ">=3.0.0"
68
68
  ruff = ">=0.5"
69
69
  poetry-plugin-export = "^1.8.0"
70
70
  pytest = ">=8.0"
71
- Sphinx = ">=7.2, <8.0"
71
+ sphinx = ">8.2"
72
72
  semver = ">=3.0"
73
73
  sphinx-autodoc-typehints = ">=2.0.0"
74
- sphinx-autoapi = ">=3.0"
75
- sphinx-immaterial = ">=0.11"
74
+ sphinx-autoapi = ">=3.6.0"
75
+ sphinx-immaterial = ">0.11"
76
76
  pipdeptree = ">=2.15.1"
77
77
  types-openpyxl = ">=3.0.0"
78
78
  virtualenv = ">=20.28.0"
@@ -12,18 +12,20 @@ from ruamel import yaml
12
12
 
13
13
  _PKG_NAME: str = Path(__file__).parent.stem
14
14
 
15
- VERSION = "2025.739290.4"
15
+ VERSION = "2025.739290.6"
16
16
 
17
17
  __version__ = VERSION
18
18
 
19
- DATA_DIR: Path = Path.home() / _PKG_NAME
19
+ WORK_DIR = globals().get("WORK_DIR", Path.home() / _PKG_NAME)
20
20
  """
21
- Defines a subdirectory named for this package in the user's home path.
21
+ If defined, the global variable WORK_DIR is used as a data store.
22
22
 
23
- If the subdirectory doesn't exist, it is created on package invocation.
23
+ If the user does not define WORK_DIR, a subdirectory in
24
+ the user's home directory, named for this package, is
25
+ created/reused.
24
26
  """
25
- if not DATA_DIR.is_dir():
26
- DATA_DIR.mkdir(parents=False)
27
+ if not WORK_DIR.is_dir():
28
+ WORK_DIR.mkdir(parents=False)
27
29
 
28
30
  DEFAULT_REC_RATIO = 0.85
29
31
 
@@ -32,19 +34,18 @@ EMPTY_ARRAYINT = np.array([], int)
32
34
 
33
35
  NTHREADS = 2 * cpu_count()
34
36
 
35
- PKG_ENUMS_MAP: dict[str, object] = {}
36
37
  PKG_ATTRS_MAP: dict[str, object] = {}
37
38
 
38
39
  np.set_printoptions(precision=24, floatmode="fixed")
39
40
 
40
- type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
41
+ type HMGPubYear = Literal[1992, 2010, 2023]
41
42
 
42
43
  type ArrayBoolean = NDArray[np.bool_]
43
44
  type ArrayFloat = NDArray[np.floating]
44
- type ArrayINT = NDArray[np.unsignedinteger]
45
+ type ArrayINT = NDArray[np.integer]
45
46
 
46
47
  type ArrayDouble = NDArray[np.float64]
47
- type ArrayBIGINT = NDArray[np.uint64]
48
+ type ArrayBIGINT = NDArray[np.int64]
48
49
 
49
50
 
50
51
  this_yaml = yaml.YAML(typ="rt")
@@ -71,38 +72,27 @@ this_yaml.indent(mapping=2, sequence=4, offset=2)
71
72
  )
72
73
 
73
74
 
74
- @this_yaml.register_class
75
- class EnumYAMLized(enum.Enum):
76
- @classmethod
77
- def to_yaml(
78
- cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
79
- ) -> yaml.ScalarNode:
80
- return _r.represent_scalar(
81
- f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
82
- )
83
-
84
- @classmethod
85
- def from_yaml(
86
- cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
87
- ) -> object[enum.EnumType]:
88
- return super().__getattribute__(cls, _n.value)
89
-
90
-
91
75
  def yaml_rt_mapper(
92
76
  _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
93
77
  ) -> Mapping[str, Any]:
78
+ """
79
+ Constructs a mapping from a mapping node with the RoundTripConstructor
80
+
81
+ """
94
82
  data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
95
83
  _c.construct_mapping(_n, maptyp=data_, deep=True)
96
84
  return data_
97
85
 
98
86
 
99
87
  def yamelize_attrs(
100
- _typ: object,
101
- excluded_attributes: set | None = None,
102
- /,
103
- *,
104
- attr_map: Mapping[str, object] = PKG_ATTRS_MAP,
88
+ _typ: object, /, *, attr_map: Mapping[str, object] = PKG_ATTRS_MAP
105
89
  ) -> None:
90
+ """Add yaml representer, constructor for attrs-defined class.
91
+
92
+ Applying this function, attributes with property, `init=False` are
93
+ not serialized to YAML.
94
+ """
95
+
106
96
  attr_map |= {_typ.__name__: _typ}
107
97
 
108
98
  _ = this_yaml.representer.add_representer(
@@ -112,11 +102,7 @@ def yamelize_attrs(
112
102
  # construct mapping, rather than calling attrs.asdict(),
113
103
  # to use yaml representers defined in this package for
114
104
  # "upstream" objects
115
- {
116
- _a.name: getattr(_d, _a.name)
117
- for _a in _d.__attrs_attrs__
118
- if excluded_attributes is None or _a.name not in excluded_attributes
119
- },
105
+ {_a.name: getattr(_d, _a.name) for _a in _d.__attrs_attrs__ if _a.init},
120
106
  ),
121
107
  )
122
108
  _ = this_yaml.constructor.add_constructor(
@@ -125,19 +111,76 @@ def yamelize_attrs(
125
111
  )
126
112
 
127
113
 
114
+ @this_yaml.register_class
115
+ class Enameled(enum.Enum):
116
+ """Add YAML representer, constructor for enum.Enum"""
117
+
118
+ @classmethod
119
+ def to_yaml(
120
+ cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
121
+ ) -> yaml.ScalarNode:
122
+ return _r.represent_scalar(
123
+ f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
124
+ )
125
+
126
+ @classmethod
127
+ def from_yaml(
128
+ cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
129
+ ) -> object[enum.EnumType]:
130
+ return super().__getattribute__(cls, _n.value)
131
+
132
+
128
133
  @this_yaml.register_class
129
134
  @enum.unique
130
- class RECForm(str, EnumYAMLized):
131
- """For derivation of recapture ratio from market shares."""
135
+ class RECForm(str, Enameled):
136
+ R"""For derivation of recapture ratio from market shares.
137
+
138
+ With :math:`\mathscr{N}` a set of firms, each supplying a
139
+ single differentiated product, and :math:`\mathscr{M} \subset \mathscr{N}`
140
+ a putative relevant product market, with
141
+ :math:`d_{ij}` denoting diversion ratio from good :math:`i` to good :math:`j`,
142
+ :math:`s_i` denoting market shares, and
143
+ :math:`\overline{r}` the default market recapture ratio,
144
+ market recapture ratios for the respective products may be specified
145
+ as having one of the following forms:
146
+ """
147
+
148
+ FIXED = "proportional"
149
+ R"""Given, :math:`\overline{r}`,
150
+
151
+ .. math::
152
+
153
+ REC_i = \overline{r} {\ } \forall {\ } i \in \mathscr{M}
154
+
155
+ """
132
156
 
133
157
  INOUT = "inside-out"
158
+ R"""
159
+ Given, :math:`\overline{r}, s_i {\ } \forall {\ } i \in \mathscr{M}`, with
160
+ :math:`s_{min} = \min(s_1, s_2)`,
161
+
162
+ .. math::
163
+
164
+ REC_i = \frac{\overline{r} (1 - s_i)}{1 - (1 - \overline{r}) s_{min} - \overline{r} s_i}
165
+ {\ } \forall {\ } i \in \mathscr{M}
166
+
167
+ """
168
+
134
169
  OUTIN = "outside-in"
135
- FIXED = "proportional"
170
+ R"""
171
+ Given, :math:`d_{ij} {\ } \forall {\ } i, j \in \mathscr{M}, i \neq j`,
172
+
173
+ .. math::
174
+
175
+ REC_i = {\sum_{j \in \mathscr{M}}^{j \neq i} d_{ij}}
176
+ {\ } \forall {\ } i \in \mathscr{M}
177
+
178
+ """
136
179
 
137
180
 
138
181
  @this_yaml.register_class
139
182
  @enum.unique
140
- class UPPAggrSelector(str, EnumYAMLized):
183
+ class UPPAggrSelector(str, Enameled):
141
184
  """
142
185
  Aggregator for GUPPI and diversion ratio estimates.
143
186
 
@@ -27,7 +27,7 @@ class INVTableData:
27
27
  type INVData = MappingProxyType[
28
28
  str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
29
29
  ]
30
- type INVData_in_ = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
30
+ type INVData_in = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
31
31
 
32
32
 
33
33
  (_, _) = (
@@ -61,14 +61,14 @@ type INVData_in_ = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
61
61
 
62
62
 
63
63
  def _dict_from_mapping(_p: Mapping[Any, Any], /) -> dict[Any, Any]:
64
- retval = {}
64
+ retval: dict[Any, Any] = {}
65
65
  for _k, _v in _p.items(): # for subit in it:
66
66
  retval |= {_k: _dict_from_mapping(_v)} if isinstance(_v, Mapping) else {_k: _v}
67
67
  return retval
68
68
 
69
69
 
70
70
  def _mappingproxy_from_mapping(_p: Mapping[Any, Any], /) -> MappingProxyType[Any, Any]:
71
- retval = {}
71
+ retval: dict[Any, Any] = {}
72
72
  for _k, _v in _p.items(): # for subit in it:
73
73
  retval |= (
74
74
  {_k: _mappingproxy_from_mapping(_v)}
@@ -39,7 +39,6 @@ price-cost margins fall in the interval :math:`[0, 1]`.
39
39
  import shutil
40
40
  import zipfile
41
41
  from collections.abc import Mapping
42
- from importlib import resources
43
42
  from pathlib import Path
44
43
  from types import MappingProxyType
45
44
 
@@ -49,12 +48,17 @@ from numpy.random import PCG64DXSM, Generator, SeedSequence
49
48
  from scipy import stats # type: ignore
50
49
  from xlrd import open_workbook # type: ignore
51
50
 
52
- from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble, this_yaml # noqa: TID252
51
+ from .. import VERSION, ArrayDouble, this_yaml # noqa: TID252
52
+ from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
53
+ from .. import data as mdat # noqa: TID252
53
54
  from . import _mappingproxy_from_mapping
54
55
 
55
56
  __version__ = VERSION
56
57
 
57
- MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_serialized.zip"
58
+ WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
59
+ """Redefined, in case the user defines WORK_DIR betweeen module imports."""
60
+
61
+ MGNDATA_ARCHIVE_PATH = WORK_DIR / "damodaran_margin_data_serialized.zip"
58
62
 
59
63
 
60
64
  u3pm = urllib3.PoolManager()
@@ -71,21 +75,21 @@ def margin_data_getter( # noqa: PLR0912
71
75
  "This code is designed for parsing Prof. Damodaran's margin tables."
72
76
  )
73
77
 
74
- data_archive_path_ = data_archive_path or MGNDATA_ARCHIVE_PATH
75
- workbook_path_ = data_archive_path_.parent / f"damodaran_{_table_name}_data.xls"
76
- if data_archive_path_.is_file() and not data_download_flag:
77
- # with data_archive_path_.open("r") as _yfh:
78
- # margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
78
+ data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
79
+ workbook_path = data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
80
+ if data_archive_path.is_file() and not data_download_flag:
79
81
  with (
80
- zipfile.ZipFile(data_archive_path_) as _yzip,
81
- _yzip.open(f"{data_archive_path_.stem}.yaml") as _yfh,
82
+ zipfile.ZipFile(data_archive_path) as _yzip,
83
+ _yzip.open(f"{data_archive_path.stem}.yaml") as _yfh,
82
84
  ):
83
- margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
84
- return _mappingproxy_from_mapping(margin_data_dict)
85
- elif workbook_path_.is_file():
86
- workbook_path_.unlink()
87
- if data_archive_path_.is_file():
88
- data_archive_path_.unlink()
85
+ margin_data_dict: MappingProxyType[
86
+ str, MappingProxyType[str, float | int]
87
+ ] = this_yaml.load(_yfh)
88
+ return margin_data_dict
89
+ elif workbook_path.is_file():
90
+ workbook_path.unlink()
91
+ if data_archive_path.is_file():
92
+ data_archive_path.unlink()
89
93
 
90
94
  margin_urlstr = (
91
95
  f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
@@ -96,7 +100,7 @@ def margin_data_getter( # noqa: PLR0912
96
100
  u3pm.request(
97
101
  "GET", margin_urlstr, preload_content=False
98
102
  ) as _urlopen_handle,
99
- workbook_path_.open("wb") as margin_file,
103
+ workbook_path.open("wb") as margin_file,
100
104
  ):
101
105
  while True:
102
106
  data_ = _urlopen_handle.read(chunk_size_)
@@ -104,7 +108,7 @@ def margin_data_getter( # noqa: PLR0912
104
108
  break
105
109
  margin_file.write(data_)
106
110
 
107
- print(f"Downloaded {margin_urlstr} to {workbook_path_}.")
111
+ print(f"Downloaded {margin_urlstr} to {workbook_path}.")
108
112
 
109
113
  except urllib3.exceptions.MaxRetryError as error_:
110
114
  if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
@@ -115,42 +119,38 @@ def margin_data_getter( # noqa: PLR0912
115
119
  f"WARNING: Could not establish secure connection to, {margin_urlstr}."
116
120
  "Using bundled copy."
117
121
  )
118
- if not workbook_path_.is_file():
119
- with resources.as_file(
120
- resources.files(f"{_PKG_NAME}.data").joinpath(
121
- "empirical_margin_distribution.xls"
122
- )
123
- ) as margin_data_archive_path:
124
- shutil.copy2(margin_data_archive_path, workbook_path_)
122
+ if not workbook_path.is_file():
123
+ shutil.copy2(mdat.DAMODARAN_MARGIN_WORKBOOK, workbook_path)
125
124
  else:
126
125
  raise error_
127
126
 
128
- xl_book_ = open_workbook(workbook_path_, ragged_rows=True, on_demand=True)
129
- xl_sheet_ = xl_book_.sheet_by_name("Industry Averages")
127
+ xl_book = open_workbook(workbook_path, ragged_rows=True, on_demand=True)
128
+ xl_sheet = xl_book.sheet_by_name("Industry Averages")
130
129
 
131
- margin_dict: dict[str, dict[str, float | int]] = {}
132
- row_keys_: list[str] = []
130
+ margin_dict_in: dict[str, dict[str, float | int]] = {}
131
+ row_keys: list[str] = []
133
132
  read_row_flag = False
134
- for _ridx in range(xl_sheet_.nrows):
135
- xl_row = xl_sheet_.row_values(_ridx)
133
+ for _ridx in range(xl_sheet.nrows):
134
+ xl_row = xl_sheet.row_values(_ridx)
136
135
  if xl_row[0] == "Industry Name":
137
136
  read_row_flag = True
138
- row_keys_ = xl_row
137
+ row_keys = xl_row
139
138
  continue
140
139
 
141
140
  if not xl_row[0] or not read_row_flag:
142
141
  continue
143
142
 
144
143
  xl_row[1] = int(xl_row[1])
145
- margin_dict[xl_row[0]] = dict(zip(row_keys_[1:], xl_row[1:], strict=True))
144
+ margin_dict_in[xl_row[0]] = dict(zip(row_keys[1:], xl_row[1:], strict=True))
146
145
 
146
+ margin_dict = _mappingproxy_from_mapping(margin_dict_in)
147
147
  with (
148
- zipfile.ZipFile(data_archive_path_, "w") as _yzip,
149
- _yzip.open(f"{data_archive_path_.stem}.yaml", "w") as _yfh,
148
+ zipfile.ZipFile(data_archive_path, "w") as _yzip,
149
+ _yzip.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
150
150
  ):
151
151
  this_yaml.dump(margin_dict, _yfh)
152
152
 
153
- return _mappingproxy_from_mapping(margin_dict)
153
+ return margin_dict
154
154
 
155
155
 
156
156
  def margin_data_builder(
@@ -240,9 +240,9 @@ def margin_data_resampler(
240
240
 
241
241
  seed_sequence_ = seed_sequence or SeedSequence(pool_size=8)
242
242
 
243
- x_, w_, _ = margin_data_builder(margin_data_getter())
243
+ _x, _w, _ = margin_data_builder(margin_data_getter())
244
244
 
245
- margin_kde = stats.gaussian_kde(x_, weights=w_, bw_method="silverman")
245
+ margin_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
246
246
  margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
247
247
 
248
248
  if isinstance(_sample_size, int):
@@ -13,7 +13,6 @@ from __future__ import annotations
13
13
  import re
14
14
  import shutil
15
15
  from collections.abc import Sequence
16
- from importlib import resources
17
16
  from operator import itemgetter
18
17
  from pathlib import Path
19
18
  from types import MappingProxyType
@@ -26,17 +25,12 @@ import urllib3
26
25
  from bs4 import BeautifulSoup
27
26
  from numpy.testing import assert_array_equal
28
27
 
29
- from .. import ( # noqa: TID252
30
- _PKG_NAME,
31
- DATA_DIR,
32
- EMPTY_ARRAYINT,
33
- VERSION,
34
- ArrayBIGINT,
35
- this_yaml,
36
- )
28
+ from .. import EMPTY_ARRAYINT, VERSION, ArrayBIGINT, this_yaml # noqa: TID252
29
+ from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
30
+ from .. import data as mdat # noqa: TID252
37
31
  from . import (
38
32
  INVData,
39
- INVData_in_,
33
+ INVData_in,
40
34
  INVTableData,
41
35
  _dict_from_mapping,
42
36
  _mappingproxy_from_mapping,
@@ -46,21 +40,16 @@ __version__ = VERSION
46
40
 
47
41
  m.patch()
48
42
 
49
- FTCDATA_DIR = DATA_DIR / "FTCData"
50
- if not FTCDATA_DIR.is_dir():
51
- FTCDATA_DIR.mkdir(parents=True)
43
+ WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
44
+ """Redefined, in case the user defines WORK_DIR betweeen module imports."""
52
45
 
53
- INVDATA_ARCHIVE_PATH = DATA_DIR / "ftc_invdata.zip"
54
- if (
55
- not INVDATA_ARCHIVE_PATH.is_file()
56
- and (
57
- _bundled_copy := resources.files(f"{_PKG_NAME}.data").joinpath(
58
- INVDATA_ARCHIVE_PATH.name
59
- )
60
- ).is_file()
61
- ):
62
- with resources.as_file(_bundled_copy) as _bundled_copy_path:
63
- shutil.copy2(_bundled_copy_path, INVDATA_ARCHIVE_PATH)
46
+ FID_WORK_DIR = WORK_DIR / "FTCData"
47
+ if not FID_WORK_DIR.is_dir():
48
+ FID_WORK_DIR.mkdir(parents=True)
49
+
50
+ INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
51
+ if not INVDATA_ARCHIVE_PATH.is_file():
52
+ shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH)
64
53
 
65
54
  TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
66
55
  TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
@@ -147,8 +136,8 @@ def construct_data(
147
136
  ZipFile(_archive_path, "r") as _yzh,
148
137
  _yzh.open(f"{_archive_path.stem}.yaml", "r") as _yfh,
149
138
  ):
150
- invdata_ = this_yaml.load(_yfh)
151
- if isinstance(invdata_, MappingProxyType):
139
+ invdata_: INVData = this_yaml.load(_yfh)
140
+ if not isinstance(invdata_, MappingProxyType):
152
141
  invdata_ = _mappingproxy_from_mapping(invdata_)
153
142
  with (
154
143
  ZipFile(_archive_path, "w", compression=ZIP_DEFLATED) as _yzh,
@@ -157,7 +146,7 @@ def construct_data(
157
146
  this_yaml.dump(invdata_, _yfh)
158
147
  return invdata_
159
148
 
160
- invdata: INVData_in_ = _dict_from_mapping(_parse_invdata())
149
+ invdata: INVData_in = _dict_from_mapping(_parse_invdata())
161
150
 
162
151
  # Add some data periods (
163
152
  # only periods ending in 2011, others have few observations and
@@ -217,7 +206,7 @@ def construct_data(
217
206
  return retval
218
207
 
219
208
 
220
- def _construct_no_evidence_data(_invdata: INVData_in_, _data_period: str, /) -> None:
209
+ def _construct_no_evidence_data(_invdata: INVData_in, _data_period: str, /) -> None:
221
210
  invdata_ind_grp = "All Markets"
222
211
  table_nos_map = dict(
223
212
  zip(
@@ -442,12 +431,12 @@ def _parse_invdata() -> INVData:
442
431
  # )
443
432
  import pymupdf # type: ignore # noqa: PLC0415
444
433
 
445
- invdata_docnames = _download_invdata(FTCDATA_DIR)
434
+ invdata_docnames = _download_invdata(FID_WORK_DIR)
446
435
 
447
- invdata: INVData_in_ = {}
436
+ invdata: INVData_in = {}
448
437
 
449
438
  for invdata_docname in invdata_docnames:
450
- invdata_pdf_path = FTCDATA_DIR.joinpath(invdata_docname)
439
+ invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
451
440
 
452
441
  invdata_doc = pymupdf.open(invdata_pdf_path)
453
442
  invdata_meta = invdata_doc.metadata
@@ -513,7 +502,7 @@ def _parse_invdata() -> INVData:
513
502
 
514
503
 
515
504
  def _parse_page_blocks(
516
- _invdata: INVData_in_, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
505
+ _invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
517
506
  ) -> None:
518
507
  if _data_period != "1996-2011":
519
508
  _parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
@@ -540,7 +529,7 @@ def _parse_page_blocks(
540
529
 
541
530
 
542
531
  def _parse_table_blocks(
543
- _invdata: INVData_in_, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
532
+ _invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
544
533
  ) -> None:
545
534
  invdata_evid_cond = "Unrestricted on additional evidence"
546
535
  table_num, table_ser, table_type = _identify_table_type(
@@ -709,7 +698,7 @@ def _process_table_blks_cnt_type(
709
698
  return invdata_array[np.argsort(invdata_array[:, 0])]
710
699
 
711
700
 
712
- def _download_invdata(_dl_path: Path = FTCDATA_DIR) -> tuple[str, ...]:
701
+ def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
713
702
  if not _dl_path.is_dir():
714
703
  _dl_path.mkdir(parents=True)
715
704
 
@@ -12,7 +12,6 @@ from typing import Literal
12
12
  import numpy as np
13
13
  from attrs import Attribute, field, frozen, validators
14
14
  from mpmath import mp # type: ignore
15
- from ruamel import yaml
16
15
 
17
16
  from .. import ( # noqa: TID252
18
17
  DEFAULT_REC_RATIO,
@@ -23,7 +22,6 @@ from .. import ( # noqa: TID252
23
22
  UPPAggrSelector,
24
23
  this_yaml,
25
24
  yamelize_attrs,
26
- yaml_rt_mapper,
27
25
  )
28
26
  from . import guidelines_boundary_functions as gbfn
29
27
 
@@ -53,14 +51,12 @@ class GuidelinesThresholds:
53
51
 
54
52
  ΔHHI, Recapture Ratio, GUPPI, Diversion ratio, CMCR, and IPR thresholds
55
53
  constructed from concentration standards in Guidelines published in
56
- 1982, 1984, 1992, 2010, and 2023.
54
+ 1992, 2010, and 2023.
57
55
 
58
56
  """
59
57
 
60
58
  pub_year: HMGPubYear = field(
61
- kw_only=False,
62
- default=2023,
63
- validator=validators.in_([1982, 1984, 1992, 2010, 2023]),
59
+ kw_only=False, default=2023, validator=validators.in_([1992, 2010, 2023])
64
60
  )
65
61
  """
66
62
  Year of publication of the Guidelines
@@ -99,9 +95,7 @@ class GuidelinesThresholds:
99
95
  # thus, here, the tentative delta safeharbor under
100
96
  # the 2023 Guidelines is 100 points
101
97
  hhi_p, dh_s, dh_p = {
102
- 1982: (_s1982 := (0.18, 0.005, 0.01)),
103
- 1984: _s1982,
104
- 1992: _s1982,
98
+ 1992: (0.18, 0.005, 0.01),
105
99
  2010: (0.25, 0.01, 0.02),
106
100
  2023: (0.18, 0.01, 0.01),
107
101
  }[self.pub_year]
@@ -147,21 +141,6 @@ class GuidelinesThresholds:
147
141
  ),
148
142
  )
149
143
 
150
- @classmethod
151
- def to_yaml(
152
- cls, _r: yaml.representer.RoundTripRepresenter, _d: GuidelinesThresholds
153
- ) -> yaml.MappingNode:
154
- ret: yaml.MappingNode = _r.represent_mapping(
155
- f"!{cls.__name__}", {"pub_year": _d.pub_year}
156
- )
157
- return ret
158
-
159
- @classmethod
160
- def from_yaml(
161
- cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
162
- ) -> GuidelinesThresholds:
163
- return cls(**yaml_rt_mapper(_c, _n))
164
-
165
144
 
166
145
  @frozen
167
146
  class ConcentrationBoundary:
@@ -170,8 +149,8 @@ class ConcentrationBoundary:
170
149
  measure_name: Literal[
171
150
  "ΔHHI",
172
151
  "Combined share",
173
- "Pre-merger HHI Contribution",
174
- "Post-merger HHI Contribution",
152
+ "HHI contribution, pre-merger",
153
+ "HHI contribution, post-merger",
175
154
  ] = field(kw_only=False, default="ΔHHI")
176
155
 
177
156
  @measure_name.validator
@@ -181,8 +160,8 @@ class ConcentrationBoundary:
181
160
  if _value not in {
182
161
  "ΔHHI",
183
162
  "Combined share",
184
- "Pre-merger HHI Contribution",
185
- "Post-merger HHI Contribution",
163
+ "HHI contribution, pre-merger",
164
+ "HHI contribution, post-merger",
186
165
  }:
187
166
  raise ValueError(f"Invalid name for a concentration measure, {_value!r}.")
188
167
 
@@ -211,9 +190,9 @@ class ConcentrationBoundary:
211
190
  conc_fn = gbfn.hhi_delta_boundary
212
191
  case "Combined share":
213
192
  conc_fn = gbfn.combined_share_boundary
214
- case "Pre-merger HHI Contribution":
193
+ case "HHI contribution, pre-merger":
215
194
  conc_fn = gbfn.hhi_pre_contrib_boundary
216
- case "Post-merger HHI Contribution":
195
+ case "HHI contribution, post-merger":
217
196
  conc_fn = gbfn.hhi_post_contrib_boundary
218
197
 
219
198
  boundary_ = conc_fn(self.threshold, dps=self.precision)
@@ -257,7 +236,7 @@ class DiversionRatioBoundary:
257
236
  )
258
237
 
259
238
  recapture_form: RECForm | None = field(kw_only=True, default=RECForm.INOUT)
260
- """
239
+ R"""
261
240
  The form of the recapture ratio.
262
241
 
263
242
  When :attr:`mergeron.RECForm.INOUT`, the recapture ratio for
@@ -268,12 +247,17 @@ class DiversionRatioBoundary:
268
247
  constructed from the generated purchase-probabilities for products in
269
248
  the market and for the outside good, specify :attr:`mergeron.RECForm.OUTIN`.)
270
249
 
271
- The GUPPI boundary is a continuum of diversion ratio boundaries conditional on
272
- price-cost margins, :math:`d_{ij} = g_i * p_i / (m_j * p_j)`,
273
- with :math:`d_{ij}` the diverion ratio from product :math:`i` to product :math:`j`;
250
+ The GUPPI boundary is a continuum of conditional diversion ratio boundaries,
251
+
252
+ .. math::
253
+
254
+ d_{ij} \vert_{p_i, p_j, m_j} \triangleq \frac{g_i p_i}{m_j p_j} = \overline{d}
255
+
256
+ with :math:`d_{ij}` the diversion ratio from product :math:`i` to product :math:`j`;
274
257
  :math:`g_i` the GUPPI for product :math:`i`;
275
- :math:`m_j` the margin for product :math:`j`; and
276
- :math:`p_i, p_j` the prices of goods :math:`i, j`, respectively.
258
+ :math:`m_j` the price-cost margin on product :math:`j`;
259
+ :math:`p_i, p_j` the prices of goods :math:`i, j`, respectively; and
260
+ :math:`\overline{d}` the diversion ratio threshold (i.e., bound).
277
261
 
278
262
  """
279
263
 
@@ -477,5 +461,10 @@ if __name__ == "__main__":
477
461
  )
478
462
 
479
463
 
480
- for _typ in (HMGThresholds, ConcentrationBoundary, DiversionRatioBoundary):
481
- yamelize_attrs(_typ, {"coordinates", "area"})
464
+ for _typ in (
465
+ ConcentrationBoundary,
466
+ DiversionRatioBoundary,
467
+ GuidelinesThresholds,
468
+ HMGThresholds,
469
+ ):
470
+ yamelize_attrs(_typ)
@@ -699,7 +699,7 @@ def _shrratio_boundary_intcpt(
699
699
 
700
700
 
701
701
  def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
702
- _x1: LerpT, _x2: LerpT, r_: float | MPFloat = 0.25, /
702
+ _x1: LerpT, _x2: LerpT, _r: float | MPFloat = 0.25, /
703
703
  ) -> LerpT:
704
704
  """
705
705
  From the function of the same name in the C++ standard [2]_
@@ -711,7 +711,7 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
711
711
  ----------
712
712
  _x1, _x2
713
713
  bounds :math:`x_1, x_2` to interpolate between.
714
- r_
714
+ _r
715
715
  interpolation weight :math:`r` assigned to :math:`x_2`
716
716
 
717
717
  Returns
@@ -731,14 +731,14 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble, ArrayBIGINT)](
731
731
 
732
732
  """
733
733
 
734
- if not 0 <= r_ <= 1:
734
+ if not 0 <= _r <= 1:
735
735
  raise ValueError("Specified interpolation weight must lie in [0, 1].")
736
- elif r_ == 0:
736
+ elif _r == 0:
737
737
  return _x1
738
- elif r_ == 1:
738
+ elif _r == 1:
739
739
  return _x2
740
740
  else:
741
- return r_ * _x2 + (1 - r_) * _x1
741
+ return _r * _x2 + (1 - _r) * _x1
742
742
 
743
743
 
744
744
  def round_cust(
@@ -45,8 +45,6 @@ def dh_area_quad(_dh_val: float = 0.01, /) -> float:
45
45
  ----------
46
46
  _dh_val
47
47
  Merging-firms' ΔHHI bound.
48
- dps
49
- Specified precision in decimal places.
50
48
 
51
49
  Returns
52
50
  -------
@@ -297,10 +295,10 @@ def shrratio_boundary_distance( # noqa: PLR0914
297
295
 
298
296
  weights_i = (
299
297
  (
300
- w_ := mp.fdiv(
298
+ _w := mp.fdiv(
301
299
  s_2 if weighting == "cross-product-share" else s_1, s_1 + s_2
302
300
  ),
303
- 1 - w_,
301
+ 1 - _w,
304
302
  )
305
303
  if weighting
306
304
  else _weights_base
@@ -112,11 +112,11 @@ def gen_seed_seq_list_default(
112
112
  63206306147411023146090085885772240748399174641427012462446714431253444120718,
113
113
  ]
114
114
 
115
- if _len > (lge_ := len(generated_entropy)):
115
+ if _len > (_lge := len(generated_entropy)):
116
116
  e_str_segs = (
117
117
  "This function can presently create SeedSequences for generating up to ",
118
- f"{lge_:,d} independent random variates. If you really need to generate ",
119
- f"more than {lge_:,d} seeded independent random variates, please pass a ",
118
+ f"{_lge:,d} independent random variates. If you really need to generate ",
119
+ f"more than {_lge:,d} seeded independent random variates, please pass a ",
120
120
  "sufficiently large list of seeds as generated_entropy. See,",
121
121
  "{}/{}.".format(
122
122
  "https://numpy.org/doc/stable/reference/random",
@@ -219,13 +219,13 @@ class MultithreadedRNG:
219
219
  self.dist_parms, DEFAULT_DIST_PARMS
220
220
  ):
221
221
  if self.dist_type == "Uniform":
222
- dist_type_ = "Random"
222
+ dist_type = "Random"
223
223
  elif self.dist_type == "Normal":
224
- dist_type_ = "Gaussian"
224
+ dist_type = "Gaussian"
225
225
  else:
226
- dist_type_ = self.dist_type
226
+ dist_type = self.dist_type
227
227
 
228
- step_size = (len(self.values) / self.nthreads).__ceil__() # noqa: PLC2801
228
+ step_size = (len(self.values) / self.nthreads).__ceil__()
229
229
 
230
230
  seed_ = (
231
231
  SeedSequence(pool_size=8)
@@ -233,7 +233,7 @@ class MultithreadedRNG:
233
233
  else self.seed_sequence
234
234
  )
235
235
 
236
- random_generators_ = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
236
+ random_generators = tuple(prng(_t) for _t in seed_.spawn(self.nthreads))
237
237
 
238
238
  def _fill(
239
239
  _rng: np.random.Generator,
@@ -244,23 +244,23 @@ class MultithreadedRNG:
244
244
  _last: int,
245
245
  /,
246
246
  ) -> None:
247
- sz_: tuple[int, ...] = out_[_first:_last].shape
247
+ _sz: tuple[int, ...] = out_[_first:_last].shape
248
248
  match _dist_type:
249
249
  case "Beta":
250
250
  shape_a, shape_b = _dist_parms
251
- out_[_first:_last] = _rng.beta(shape_a, shape_b, size=sz_)
251
+ out_[_first:_last] = _rng.beta(shape_a, shape_b, size=_sz)
252
252
  case "Dirichlet":
253
- out_[_first:_last] = _rng.dirichlet(_dist_parms, size=sz_[:-1])
253
+ out_[_first:_last] = _rng.dirichlet(_dist_parms, size=_sz[:-1])
254
254
  case "Gaussian":
255
255
  _rng.standard_normal(out=out_[_first:_last])
256
256
  case "Normal":
257
- mu_, sigma_ = _dist_parms
258
- out_[_first:_last] = _rng.normal(mu_, sigma_, size=sz_)
257
+ _mu, _sigma = _dist_parms
258
+ out_[_first:_last] = _rng.normal(_mu, _sigma, size=_sz)
259
259
  case "Random":
260
260
  _rng.random(out=out_[_first:_last])
261
261
  case "Uniform":
262
262
  uni_l, uni_h = _dist_parms
263
- out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=sz_)
263
+ out_[_first:_last] = _rng.uniform(uni_l, uni_h, size=_sz)
264
264
  case _:
265
265
  "Unreachable. The validator would have rejected this as invalid."
266
266
 
@@ -271,8 +271,8 @@ class MultithreadedRNG:
271
271
 
272
272
  executor_.submit(
273
273
  _fill,
274
- random_generators_[_i],
275
- dist_type_,
274
+ random_generators[_i],
275
+ dist_type,
276
276
  self.dist_parms,
277
277
  self.values,
278
278
  range_first,
@@ -0,0 +1,57 @@
1
+ """
2
+ Data useful for empirical analysis of merger enforcement policy
3
+
4
+ These data are processed for further analysis within relevant
5
+ submodules of the parent package. Thus, direct access is
6
+ unnecessary in routine use of this package.
7
+ """
8
+
9
+ from importlib import resources
10
+
11
+ from .. import _PKG_NAME, VERSION # noqa: TID252
12
+
13
+ __version__ = VERSION
14
+
15
+
16
+ DAMODARAN_MARGIN_WORKBOOK = resources.files(f"{_PKG_NAME}.data").joinpath(
17
+ "damodaran_margin_data.xls"
18
+ )
19
+ """
20
+ Python object pointing to included copy of Prof. Damodaran's margin data
21
+
22
+ Only used as a fallback, in case direct download from source fails.
23
+
24
+ NOTES
25
+ -----
26
+ Source data are from Prof. Aswath Damodaran, Stern School of Business, NYU; available online
27
+ at https://pages.stern.nyu.edu/~adamodar/pc/datasets/margin.xls
28
+
29
+
30
+ Use as, for example:
31
+
32
+ .. code-block:: python
33
+
34
+ from mergeron.data import DAMODARAN_MARGIN_WORKBOOK
35
+
36
+ shutil.copy2(DAMODARAN_MARGIN_WORKBOOK, Path.home() / f"{DAMODARAN_MARGIN_WORKBOOK.name}")
37
+ """
38
+
39
+ FTC_MERGER_INVESTIGATIONS_DATA = resources.files(f"{_PKG_NAME}.data").joinpath(
40
+ "ftc_merger_investigations_data.zip"
41
+ )
42
+ """
43
+ FTC merger investigtions data published in 2004, 2007, 2008, and 2013
44
+
45
+ NOTES
46
+ -----
47
+ Raw data tables published by the FTC are loaded into a nested distionary, organized by
48
+ data period, table type, and table number. Each table is stored as a numerical array
49
+ (:module:`numpy` arrray), with additonal attrubutes for the industry group and additonal
50
+ evidence noted in the source data.
51
+
52
+ Data for additonal data periods (time spans) not reported in the source data,
53
+ e.g., 2004-2011, are constructed by subtracting counts in the base data from counts
54
+ in the cumulative data, by table, for "enforced" mergers and "closed" mergers, when
55
+ the cumulative data for the longer period are consistent with the base data for
56
+ a sub-period.
57
+ """
@@ -13,9 +13,12 @@ from numpy.random import PCG64DXSM, Generator, SeedSequence
13
13
  from scipy import stats # type: ignore
14
14
 
15
15
  import mergeron.core.empirical_margin_distribution as emd
16
- from mergeron import DATA_DIR
16
+ from mergeron import WORK_DIR as PKG_WORK_DIR
17
17
  from mergeron.core.guidelines_boundary_functions import boundary_plot
18
18
 
19
+ WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
20
+ """Redefined, in case the user defines WORK_DIR betweeen module imports."""
21
+
19
22
  SAMPLE_SIZE = 10**6
20
23
  BIN_COUNT = 25
21
24
  margin_data_obs, margin_data_wts, margin_data_stats = emd.margin_data_builder()
@@ -85,4 +88,4 @@ mgn_ax.set_xlabel("Price Cost Margin", fontsize=10)
85
88
  mgn_ax.set_ylabel("Relative Frequency", fontsize=10)
86
89
 
87
90
  mgn_fig.tight_layout()
88
- plt.savefig(DATA_DIR / f"{Path(__file__).stem}.pdf")
91
+ plt.savefig(WORK_DIR / f"{Path(__file__).stem}.pdf")
@@ -7,9 +7,11 @@ containers for industry data generation and testing.
7
7
  from __future__ import annotations
8
8
 
9
9
  import enum
10
+ import io
10
11
  from collections.abc import Sequence
11
12
  from operator import attrgetter
12
13
 
14
+ import h5py # type: ignore
13
15
  import numpy as np
14
16
  from attrs import Attribute, Converter, cmp_using, field, frozen, validators
15
17
  from numpy.random import SeedSequence
@@ -22,7 +24,7 @@ from .. import ( # noqa: TID252
22
24
  ArrayDouble,
23
25
  ArrayFloat,
24
26
  ArrayINT,
25
- EnumYAMLized,
27
+ Enameled,
26
28
  RECForm,
27
29
  UPPAggrSelector,
28
30
  this_yaml,
@@ -50,7 +52,7 @@ class SeedSequenceData:
50
52
 
51
53
  @this_yaml.register_class
52
54
  @enum.unique
53
- class PriceSpec(tuple[bool, str | None], EnumYAMLized):
55
+ class PriceSpec(tuple[bool, str | None], Enameled):
54
56
  """Price specification.
55
57
 
56
58
  Whether prices are symmetric and, if not, the direction of correlation, if any.
@@ -65,7 +67,7 @@ class PriceSpec(tuple[bool, str | None], EnumYAMLized):
65
67
 
66
68
  @this_yaml.register_class
67
69
  @enum.unique
68
- class SHRDistribution(str, EnumYAMLized):
70
+ class SHRDistribution(str, Enameled):
69
71
  """Market share distributions."""
70
72
 
71
73
  UNI = "Uniform"
@@ -253,7 +255,7 @@ class ShareSpec:
253
255
  in published merger guidelines. Accordingly, the recapture ratio rounded to
254
256
  the nearest 5% is:
255
257
 
256
- * 0.85, **7-to-6 merger from symmetry**; US Guidelines, 1982, 1984, 1992, 2023
258
+ * 0.85, **7-to-6 merger from symmetry**; US Guidelines, 1992, 2023
257
259
  * 0.80, 5-to-4 merger from symmetry
258
260
  * 0.80, **5-to-4 merger to symmetry**; US Guidelines, 2010
259
261
 
@@ -285,7 +287,7 @@ class ShareSpec:
285
287
 
286
288
  @this_yaml.register_class
287
289
  @enum.unique
288
- class PCMDistribution(str, EnumYAMLized):
290
+ class PCMDistribution(str, Enameled):
289
291
  """Margin distributions."""
290
292
 
291
293
  UNI = "Uniform"
@@ -296,7 +298,7 @@ class PCMDistribution(str, EnumYAMLized):
296
298
 
297
299
  @this_yaml.register_class
298
300
  @enum.unique
299
- class FM2Constraint(str, EnumYAMLized):
301
+ class FM2Constraint(str, Enameled):
300
302
  """Firm 2 margins - derivation methods."""
301
303
 
302
304
  IID = "i.i.d"
@@ -401,7 +403,7 @@ class PCMSpec:
401
403
 
402
404
  @this_yaml.register_class
403
405
  @enum.unique
404
- class SSZConstant(float, EnumYAMLized):
406
+ class SSZConstant(float, Enameled):
405
407
  """
406
408
  Scale factors to offset sample size reduction.
407
409
 
@@ -467,10 +469,10 @@ class MarketSampleData:
467
469
  """
468
470
 
469
471
  @aggregate_purchase_prob.default
470
- def __appd(_i: MarketSampleData) -> ArrayINT:
471
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
472
- e_.fill(np.nan)
473
- return e_
472
+ def __appd(_i: MarketSampleData) -> ArrayDouble:
473
+ retval: ArrayDouble = np.empty_like(_i.frmshr_array[:, :1], float)
474
+ retval.fill(np.nan)
475
+ return retval
474
476
 
475
477
  fcounts: ArrayINT = field(eq=cmp_using(np.array_equal))
476
478
  """Number of firms in market"""
@@ -487,19 +489,38 @@ class MarketSampleData:
487
489
  """
488
490
 
489
491
  @nth_firm_share.default
490
- def __nfsd(_i: MarketSampleData) -> ArrayINT:
491
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
492
- e_.fill(np.nan)
493
- return e_
492
+ def __nfsd(_i: MarketSampleData) -> ArrayDouble:
493
+ retval: ArrayDouble = np.empty_like(_i.frmshr_array[:, :1], float)
494
+ retval.fill(np.nan)
495
+ return retval
494
496
 
495
497
  hhi_post: ArrayDouble = field(eq=cmp_using(np.array_equal))
496
498
  """Post-merger change in Herfindahl-Hirschmann Index (HHI)"""
497
499
 
498
500
  @hhi_post.default
499
- def __hpd(_i: MarketSampleData) -> ArrayINT:
500
- e_ = np.empty_like(_i.frmshr_array[:, :1], float)
501
- e_.fill(np.nan)
502
- return e_
501
+ def __hpd(_i: MarketSampleData) -> ArrayDouble:
502
+ retval: ArrayDouble = np.empty_like(_i.frmshr_array[:, :1], float)
503
+ retval.fill(np.nan)
504
+ return retval
505
+
506
+ def to_h5bin(self) -> bytes:
507
+ """Save market sample data to HDF5 file."""
508
+ byte_stream = io.BytesIO()
509
+ with h5py.File(byte_stream, "w") as _h5f:
510
+ for _a in self.__attrs_attrs__:
511
+ if all((
512
+ (_arr := getattr(self, _a.name)).any(),
513
+ not np.isnan(_arr).all(),
514
+ )):
515
+ _h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
516
+ return byte_stream.getvalue()
517
+
518
+ @classmethod
519
+ def from_h5f(cls, _hfh: io.BufferedReader) -> MarketSampleData:
520
+ """Load market sample data from HDF5 file."""
521
+ with h5py.File(_hfh, "r") as _h5f:
522
+ _retval = cls(**{_a: _h5f[_a][:] for _a in _h5f})
523
+ return _retval
503
524
 
504
525
 
505
526
  @frozen
@@ -557,7 +578,7 @@ class MarginDataSample:
557
578
 
558
579
  @this_yaml.register_class
559
580
  @enum.unique
560
- class INVResolution(str, EnumYAMLized):
581
+ class INVResolution(str, Enameled):
561
582
  CLRN = "clearance"
562
583
  ENFT = "enforcement"
563
584
  BOTH = "investigation"
@@ -5,12 +5,10 @@ Methods to generate data for analyzing merger enforcement policy.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import io
9
8
  import zipfile
10
9
  from itertools import starmap
11
10
  from typing import TypedDict
12
11
 
13
- import h5py # type: ignore
14
12
  import numpy as np
15
13
  from attrs import Attribute, Converter, define, field, validators
16
14
  from joblib import Parallel, cpu_count, delayed # type: ignore
@@ -448,26 +446,26 @@ class MarketSample:
448
446
  this_yaml.dump(self, _yfh)
449
447
 
450
448
  if save_dataset:
451
- if all((_dt := self.dataset is None, _et := self.enf_counts is None)):
449
+ if all((_ndt := self.dataset is None, _net := self.enf_counts is None)):
452
450
  raise ValueError(
453
451
  "No dataset and/or enforcement counts available for saving. "
454
452
  "Generate some data or set save_dataset to False to poceed."
455
453
  )
456
454
 
457
- if not _dt:
458
- byte_stream = io.BytesIO()
459
- with h5py.File(byte_stream, "w") as h5f:
460
- for _a in self.dataset.__attrs_attrs__:
461
- if all((
462
- (_arr := getattr(self.dataset, _a.name)).any(),
463
- not np.isnan(_arr).all(),
464
- )):
465
- h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
455
+ if not _ndt:
456
+ # byte_stream = io.BytesIO()
457
+ # with h5py.File(byte_stream, "w") as h5f:
458
+ # for _a in self.dataset.__attrs_attrs__:
459
+ # if all((
460
+ # (_arr := getattr(self.dataset, _a.name)).any(),
461
+ # not np.isnan(_arr).all(),
462
+ # )):
463
+ # h5f.create_dataset(_a.name, data=_arr, fletcher32=True)
466
464
 
467
465
  with (zpath / f"{name_root}_dataset.h5").open("wb") as _hfh:
468
- _hfh.write(byte_stream.getvalue())
466
+ _hfh.write(self.dataset.to_h5bin())
469
467
 
470
- if not _et:
468
+ if not _net:
471
469
  with (zpath / f"{name_root}_enf_counts.yaml").open("w") as _yfh:
472
470
  this_yaml.dump(self.enf_counts, _yfh)
473
471
 
@@ -491,11 +489,11 @@ class MarketSample:
491
489
 
492
490
  if _dt:
493
491
  with _dp.open("rb") as _hfh:
494
- h5f = h5py.File(_hfh)
495
492
  object.__setattr__( # noqa: PLC2801
496
493
  market_sample_,
497
494
  "dataset",
498
- MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
495
+ # MarketSampleData(**{_a: h5f[_a][:] for _a in h5f}),
496
+ MarketSampleData.from_h5f(_hfh),
499
497
  )
500
498
  if _et:
501
499
  object.__setattr__( # noqa: PLC2801
@@ -9,7 +9,7 @@ from collections.abc import Mapping
9
9
  import numpy as np
10
10
  from scipy.interpolate import interp1d # type: ignore
11
11
 
12
- from .. import VERSION, ArrayBIGINT, EnumYAMLized, this_yaml # noqa: TID252
12
+ from .. import VERSION, ArrayBIGINT, Enameled, this_yaml # noqa: TID252
13
13
  from ..core import ftc_merger_investigations_data as fid # noqa: TID252
14
14
  from . import INVResolution
15
15
 
@@ -18,7 +18,7 @@ __version__ = VERSION
18
18
 
19
19
  @this_yaml.register_class
20
20
  @enum.unique
21
- class IndustryGroup(str, EnumYAMLized):
21
+ class IndustryGroup(str, Enameled):
22
22
  ALL = "All Markets"
23
23
  GRO = "Grocery Markets"
24
24
  OIL = "Oil Markets"
@@ -33,7 +33,7 @@ class IndustryGroup(str, EnumYAMLized):
33
33
 
34
34
  @this_yaml.register_class
35
35
  @enum.unique
36
- class OtherEvidence(str, EnumYAMLized):
36
+ class OtherEvidence(str, Enameled):
37
37
  UR = "Unrestricted on additional evidence"
38
38
  HD = "Hot Documents Identified"
39
39
  HN = "No Hot Documents Identified"
@@ -48,7 +48,7 @@ class OtherEvidence(str, EnumYAMLized):
48
48
 
49
49
  @this_yaml.register_class
50
50
  @enum.unique
51
- class StatsGrpSelector(str, EnumYAMLized):
51
+ class StatsGrpSelector(str, Enameled):
52
52
  FC = "ByFirmCount"
53
53
  HD = "ByHHIandDelta"
54
54
  DL = "ByDelta"
@@ -57,7 +57,7 @@ class StatsGrpSelector(str, EnumYAMLized):
57
57
 
58
58
  @this_yaml.register_class
59
59
  @enum.unique
60
- class StatsReturnSelector(str, EnumYAMLized):
60
+ class StatsReturnSelector(str, Enameled):
61
61
  CNT = "count"
62
62
  RPT = "rate, point"
63
63
  RIN = "rate, interval"
@@ -65,7 +65,7 @@ class StatsReturnSelector(str, EnumYAMLized):
65
65
 
66
66
  @this_yaml.register_class
67
67
  @enum.unique
68
- class SortSelector(str, EnumYAMLized):
68
+ class SortSelector(str, Enameled):
69
69
  UCH = "unchanged"
70
70
  REV = "reversed"
71
71
 
@@ -236,19 +236,19 @@ def table_no_lku(
236
236
  /,
237
237
  ) -> str:
238
238
  if _table_ind_group not in (
239
- igl_ := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
239
+ _igl := [_data_array_dict_sub[_v].industry_group for _v in _data_array_dict_sub]
240
240
  ):
241
241
  raise ValueError(
242
242
  f"Invalid value for industry group, {f'"{_table_ind_group}"'}."
243
- f"Must be one of {igl_!r}"
243
+ f"Must be one of {_igl!r}"
244
244
  )
245
245
 
246
246
  tno_ = next(
247
- t_
248
- for t_ in _data_array_dict_sub
247
+ _t
248
+ for _t in _data_array_dict_sub
249
249
  if all((
250
- _data_array_dict_sub[t_].industry_group == _table_ind_group,
251
- _data_array_dict_sub[t_].additional_evidence == _table_evid_cond,
250
+ _data_array_dict_sub[_t].industry_group == _table_ind_group,
251
+ _data_array_dict_sub[_t].additional_evidence == _table_evid_cond,
252
252
  ))
253
253
  )
254
254
 
@@ -259,10 +259,10 @@ def enf_cnts_byfirmcount(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
259
259
  ndim_in = 1
260
260
  return np.vstack([
261
261
  np.concatenate([
262
- (f,),
263
- np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == f][:, ndim_in:]),
262
+ (_i,),
263
+ np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == _i][:, ndim_in:]),
264
264
  ])
265
- for f in np.unique(_cnts_array[:, 0])
265
+ for _i in np.unique(_cnts_array[:, 0])
266
266
  ])
267
267
 
268
268
 
@@ -270,10 +270,10 @@ def enf_cnts_bydelta(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
270
270
  ndim_in = 2
271
271
  return np.vstack([
272
272
  np.concatenate([
273
- (f_,),
274
- np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == f_][:, ndim_in:]),
273
+ (_k,),
274
+ np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == _k][:, ndim_in:]),
275
275
  ])
276
- for f_ in HHI_DELTA_KNOTS[:-1]
276
+ for _k in HHI_DELTA_KNOTS[:-1]
277
277
  ])
278
278
 
279
279
 
@@ -286,10 +286,11 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
286
286
  # aggregation reduces the footprint of this step in memory. Although this point
287
287
  # is more relevant for generated than observed data, using the same coding pattern
288
288
  # in both cases does make life easier
289
- ndim_in = 2
290
- nkeys_ = 3
289
+ _ndim_in = 2
290
+ _nkeys = 3
291
291
  cnts_byhhipostanddelta, cnts_byconczone = (
292
- np.zeros(nkeys_ + _cnts_array.shape[1] - ndim_in, dtype=int) for _ in range(2)
292
+ np.zeros((1, _nkeys + _cnts_array.shape[1] - _ndim_in), dtype=int)
293
+ for _ in range(2)
293
294
  )
294
295
 
295
296
  # Prepare to tag clearance stats by presumption zone
@@ -314,7 +315,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
314
315
  np.array(
315
316
  (
316
317
  *zone_val,
317
- *np.einsum("ij->j", _cnts_array[:, ndim_in:][conc_test]),
318
+ *np.einsum("ij->j", _cnts_array[:, _ndim_in:][conc_test]),
318
319
  ),
319
320
  dtype=int,
320
321
  ),
@@ -337,7 +338,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
337
338
  (
338
339
  zone_val,
339
340
  np.einsum(
340
- "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, nkeys_:]
341
+ "ij->j", cnts_byhhipostanddelta[hhi_zone_test][:, _nkeys:]
341
342
  ),
342
343
  ),
343
344
  dtype=int,
@@ -191,7 +191,7 @@ def compute_upp_test_arrays(
191
191
 
192
192
  Parameters
193
193
  ----------
194
- _market_data
194
+ _market_data_sample
195
195
  market data sample
196
196
  _upp_test_parms
197
197
  guidelines thresholds for testing UPP and related statistics
@@ -1,3 +0,0 @@
1
- from .. import VERSION # noqa: TID252
2
-
3
- __version__ = VERSION