mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/__init__.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from importlib.metadata import version
|
|
5
4
|
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from numpy.typing import NDArray
|
|
6
9
|
|
|
7
10
|
_PKG_NAME: str = Path(__file__).parent.stem
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
VERSION = "2025.739265.0"
|
|
13
|
+
|
|
14
|
+
__version__ = VERSION
|
|
10
15
|
|
|
11
16
|
DATA_DIR: Path = Path.home() / _PKG_NAME
|
|
12
17
|
"""
|
|
@@ -14,14 +19,26 @@ Defines a subdirectory named for this package in the user's home path.
|
|
|
14
19
|
|
|
15
20
|
If the subdirectory doesn't exist, it is created on package invocation.
|
|
16
21
|
"""
|
|
17
|
-
|
|
18
22
|
if not DATA_DIR.is_dir():
|
|
19
23
|
DATA_DIR.mkdir(parents=False)
|
|
20
24
|
|
|
25
|
+
np.set_printoptions(precision=24, floatmode="fixed")
|
|
26
|
+
|
|
27
|
+
type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
|
|
28
|
+
|
|
29
|
+
type ArrayBoolean = NDArray[np.bool_]
|
|
30
|
+
type ArrayFloat = NDArray[np.float16 | np.float32 | np.float64 | np.float128]
|
|
31
|
+
type ArrayINT = NDArray[np.intp]
|
|
32
|
+
|
|
33
|
+
type ArrayDouble = NDArray[np.float64]
|
|
34
|
+
type ArrayBIGINT = NDArray[np.int64]
|
|
35
|
+
|
|
36
|
+
DEFAULT_REC_RATIO = 0.85
|
|
37
|
+
|
|
21
38
|
|
|
22
39
|
@enum.unique
|
|
23
|
-
class
|
|
24
|
-
"""
|
|
40
|
+
class RECForm(enum.StrEnum):
|
|
41
|
+
"""For derivation of recapture ratio from market shares."""
|
|
25
42
|
|
|
26
43
|
INOUT = "inside-out"
|
|
27
44
|
OUTIN = "outside-in"
|
|
@@ -31,15 +48,18 @@ class RECConstants(enum.StrEnum):
|
|
|
31
48
|
@enum.unique
|
|
32
49
|
class UPPAggrSelector(enum.StrEnum):
|
|
33
50
|
"""
|
|
34
|
-
Aggregator
|
|
51
|
+
Aggregator for GUPPI and diversion ratio estimates.
|
|
35
52
|
|
|
36
53
|
"""
|
|
37
54
|
|
|
38
55
|
AVG = "average"
|
|
39
56
|
CPA = "cross-product-share weighted average"
|
|
40
57
|
CPD = "cross-product-share weighted distance"
|
|
58
|
+
CPG = "cross-product-share weighted geometric mean"
|
|
41
59
|
DIS = "symmetrically-weighted distance"
|
|
60
|
+
GMN = "geometric mean"
|
|
42
61
|
MAX = "max"
|
|
43
62
|
MIN = "min"
|
|
44
63
|
OSA = "own-share weighted average"
|
|
45
64
|
OSD = "own-share weighted distance"
|
|
65
|
+
OSG = "own-share weighted geometric mean"
|
mergeron/core/__init__.py
CHANGED
|
@@ -1,68 +1,8 @@
|
|
|
1
|
-
from
|
|
1
|
+
from mpmath import mp # type: ignore
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from .. import VERSION # noqa: TID252
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
__version__ = VERSION
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
__version__ = version(_PKG_NAME)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _delta_value_validator(
|
|
13
|
-
_instance: UPPBoundarySpec, _attribute: Attribute[float], _value: float, /
|
|
14
|
-
) -> None:
|
|
15
|
-
if not 0 <= _value <= 1:
|
|
16
|
-
raise ValueError(
|
|
17
|
-
"Margin-adjusted benchmark share ratio must lie between 0 and 1."
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _rec_spec_validator(
|
|
22
|
-
_instance: UPPBoundarySpec,
|
|
23
|
-
_attribute: Attribute[RECConstants],
|
|
24
|
-
_value: RECConstants,
|
|
25
|
-
/,
|
|
26
|
-
) -> None:
|
|
27
|
-
if _value == RECConstants.OUTIN:
|
|
28
|
-
raise ValueError(
|
|
29
|
-
f"Invalid recapture specification, {_value!r}. "
|
|
30
|
-
"You may consider specifying RECConstants.INOUT here, and "
|
|
31
|
-
"assigning the recapture rate for the merging-firm with "
|
|
32
|
-
'the smaller market-share to the attribue, "rec" of '
|
|
33
|
-
"the UPPBoundarySpec object you pass."
|
|
34
|
-
)
|
|
35
|
-
if _value is None and _instance.agg_method != UPPAggrSelector.MAX:
|
|
36
|
-
raise ValueError(
|
|
37
|
-
f"Specified aggregation method, {_instance.agg_method} requires a recapture specification."
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@define(slots=True, frozen=True)
|
|
42
|
-
class UPPBoundarySpec:
|
|
43
|
-
share_ratio: float = field(
|
|
44
|
-
kw_only=False,
|
|
45
|
-
default=0.075,
|
|
46
|
-
validator=(validators.instance_of(float), _delta_value_validator),
|
|
47
|
-
)
|
|
48
|
-
rec: float = field(
|
|
49
|
-
kw_only=False, default=0.80, validator=validators.instance_of(float)
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
agg_method: UPPAggrSelector = field(
|
|
53
|
-
kw_only=True,
|
|
54
|
-
default=UPPAggrSelector.MAX,
|
|
55
|
-
validator=validators.instance_of(UPPAggrSelector),
|
|
56
|
-
)
|
|
57
|
-
recapture_form: RECConstants | None = field(
|
|
58
|
-
kw_only=True,
|
|
59
|
-
default=RECConstants.INOUT,
|
|
60
|
-
validator=(
|
|
61
|
-
validators.optional(validators.instance_of(RECConstants)), # type: ignore
|
|
62
|
-
_rec_spec_validator,
|
|
63
|
-
),
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
precision: int = field(
|
|
67
|
-
kw_only=False, default=5, validator=validators.instance_of(int)
|
|
68
|
-
)
|
|
7
|
+
type MPFloat = mp.mpf # pyright: ignore
|
|
8
|
+
type MPMatrix = mp.matrix # pyright: ignore
|
|
@@ -2,12 +2,16 @@
|
|
|
2
2
|
Functions to parse margin data compiled by
|
|
3
3
|
Prof. Aswath Damodaran, Stern School of Business, NYU.
|
|
4
4
|
|
|
5
|
+
Provides :func:`mgn_data_resampler` for generating margin data
|
|
6
|
+
from an estimated Gaussian KDE from the source (margin) data.
|
|
7
|
+
|
|
5
8
|
Data are downloaded or reused from a local copy, on demand.
|
|
6
9
|
|
|
7
10
|
For terms of use of Prof. Damodaran's data, please see:
|
|
8
11
|
https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
|
|
9
12
|
|
|
10
|
-
|
|
13
|
+
NOTES
|
|
14
|
+
-----
|
|
11
15
|
|
|
12
16
|
Prof. Damodaran notes that the data construction may not be
|
|
13
17
|
consistent from iteration to iteration. He also notes that,
|
|
@@ -32,29 +36,29 @@ price-cost margins fall in the interval :math:`[0, 1]`.
|
|
|
32
36
|
|
|
33
37
|
"""
|
|
34
38
|
|
|
39
|
+
import shutil
|
|
35
40
|
from collections.abc import Mapping
|
|
36
|
-
from importlib
|
|
41
|
+
from importlib import resources
|
|
37
42
|
from pathlib import Path
|
|
38
43
|
from types import MappingProxyType
|
|
39
44
|
|
|
40
45
|
import msgpack # type:ignore
|
|
41
46
|
import numpy as np
|
|
42
|
-
import
|
|
47
|
+
import urllib3
|
|
43
48
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
44
|
-
from numpy.typing import NDArray
|
|
45
|
-
from requests_toolbelt.downloadutils import stream # type: ignore
|
|
46
49
|
from scipy import stats # type: ignore
|
|
47
50
|
from xlrd import open_workbook # type: ignore
|
|
48
51
|
|
|
49
|
-
from .. import _PKG_NAME, DATA_DIR # noqa: TID252
|
|
50
|
-
|
|
51
|
-
__version__ = version(_PKG_NAME)
|
|
52
|
+
from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble # noqa: TID252
|
|
52
53
|
|
|
54
|
+
__version__ = VERSION
|
|
53
55
|
|
|
54
56
|
MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
|
|
55
57
|
|
|
58
|
+
u3pm = urllib3.PoolManager()
|
|
56
59
|
|
|
57
|
-
|
|
60
|
+
|
|
61
|
+
def mgn_data_getter( # noqa: PLR0912
|
|
58
62
|
_table_name: str = "margin",
|
|
59
63
|
*,
|
|
60
64
|
data_archive_path: Path | None = None,
|
|
@@ -68,32 +72,46 @@ def scrape_data_table(
|
|
|
68
72
|
_data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
|
|
69
73
|
|
|
70
74
|
_mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
|
|
71
|
-
_mgn_path = _data_archive_path.parent
|
|
75
|
+
_mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
|
|
72
76
|
if _data_archive_path.is_file() and not data_download_flag:
|
|
73
77
|
return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
|
|
74
78
|
elif _mgn_path.is_file():
|
|
75
79
|
_mgn_path.unlink()
|
|
76
|
-
_data_archive_path.
|
|
77
|
-
|
|
78
|
-
_REQ_TIMEOUT = (9.05, 27)
|
|
79
|
-
# NYU will eventually updates its server certificate, to one signed with
|
|
80
|
-
# "InCommon RSA Server CA 2.pem", the step below will be obsolete. In
|
|
81
|
-
# the interim, it is necessary to provide the certificate chain to the
|
|
82
|
-
# root CA, so that the obsolete CA certificate is validated.
|
|
83
|
-
_INCOMMON_2014_CERT_CHAIN_PATH = (
|
|
84
|
-
Path(__file__).parent / "InCommon RSA Server CA cert chain.pem"
|
|
85
|
-
)
|
|
86
|
-
try:
|
|
87
|
-
_urlopen_handle = requests.get(_mgn_urlstr, timeout=_REQ_TIMEOUT, stream=True)
|
|
88
|
-
except requests.exceptions.SSLError:
|
|
89
|
-
_urlopen_handle = requests.get(
|
|
90
|
-
_mgn_urlstr,
|
|
91
|
-
timeout=_REQ_TIMEOUT,
|
|
92
|
-
stream=True,
|
|
93
|
-
verify=str(_INCOMMON_2014_CERT_CHAIN_PATH),
|
|
94
|
-
)
|
|
80
|
+
if _data_archive_path.is_file():
|
|
81
|
+
_data_archive_path.unlink()
|
|
95
82
|
|
|
96
|
-
|
|
83
|
+
try:
|
|
84
|
+
_chunk_size = 1024 * 1024
|
|
85
|
+
with (
|
|
86
|
+
u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
|
|
87
|
+
_mgn_path.open("wb") as _mgn_file,
|
|
88
|
+
):
|
|
89
|
+
while True:
|
|
90
|
+
_data = _urlopen_handle.read(_chunk_size)
|
|
91
|
+
if not _data:
|
|
92
|
+
break
|
|
93
|
+
_mgn_file.write(_data)
|
|
94
|
+
|
|
95
|
+
print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
|
|
96
|
+
|
|
97
|
+
except urllib3.exceptions.MaxRetryError as _err:
|
|
98
|
+
if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
|
|
99
|
+
# Works fine with other sites secured with certificates
|
|
100
|
+
# from the Internet2 CA, such as,
|
|
101
|
+
# https://snap.stanford.edu/data/web-Stanford.txt.gz
|
|
102
|
+
print(
|
|
103
|
+
f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
|
|
104
|
+
"Using bundled copy."
|
|
105
|
+
)
|
|
106
|
+
if not _mgn_path.is_file():
|
|
107
|
+
with resources.as_file(
|
|
108
|
+
resources.files(f"{_PKG_NAME}.data").joinpath(
|
|
109
|
+
"empirical_margin_distribution.xls"
|
|
110
|
+
)
|
|
111
|
+
) as _mgn_data_archive_path:
|
|
112
|
+
shutil.copy2(_mgn_data_archive_path, _mgn_path)
|
|
113
|
+
else:
|
|
114
|
+
raise _err
|
|
97
115
|
|
|
98
116
|
_xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
|
|
99
117
|
_xl_sheet = _xl_book.sheet_by_name("Industry Averages")
|
|
@@ -114,16 +132,16 @@ def scrape_data_table(
|
|
|
114
132
|
_xl_row[1] = int(_xl_row[1])
|
|
115
133
|
_mgn_dict[_xl_row[0]] = dict(zip(_mgn_row_keys[1:], _xl_row[1:], strict=True))
|
|
116
134
|
|
|
117
|
-
_ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict))
|
|
135
|
+
_ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict)) # pyright: ignore
|
|
118
136
|
|
|
119
137
|
return MappingProxyType(_mgn_dict)
|
|
120
138
|
|
|
121
139
|
|
|
122
140
|
def mgn_data_builder(
|
|
123
141
|
_mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
|
|
124
|
-
) -> tuple[
|
|
142
|
+
) -> tuple[ArrayDouble, ArrayDouble, ArrayDouble]:
|
|
125
143
|
if _mgn_tbl_dict is None:
|
|
126
|
-
_mgn_tbl_dict =
|
|
144
|
+
_mgn_tbl_dict = mgn_data_getter()
|
|
127
145
|
|
|
128
146
|
_mgn_data_wts, _mgn_data_obs = (
|
|
129
147
|
_f.flatten()
|
|
@@ -169,22 +187,24 @@ def mgn_data_builder(
|
|
|
169
187
|
)
|
|
170
188
|
|
|
171
189
|
|
|
172
|
-
def
|
|
173
|
-
_sample_size: int | tuple[int,
|
|
190
|
+
def mgn_data_resampler(
|
|
191
|
+
_sample_size: int | tuple[int, ...] = (10**6, 2),
|
|
174
192
|
/,
|
|
175
193
|
*,
|
|
176
194
|
seed_sequence: SeedSequence | None = None,
|
|
177
|
-
) ->
|
|
195
|
+
) -> ArrayDouble:
|
|
178
196
|
"""
|
|
179
|
-
Generate the
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
197
|
+
Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
|
|
198
|
+
|
|
199
|
+
The empirical distribution is estimated using a Gaussian KDE; the bandwidth
|
|
200
|
+
selected using Silverman's rule is narrowed to reflect that the margin data
|
|
201
|
+
are multimodal. Margins for firms in finance, investment, insurance, reinsurance, and
|
|
202
|
+
REITs are excluded from the sample used to estimate the empirical distribution.
|
|
183
203
|
|
|
184
204
|
Parameters
|
|
185
205
|
----------
|
|
186
206
|
_sample_size
|
|
187
|
-
Number of draws
|
|
207
|
+
Number of draws; if tuple, (number of draws, number of columns)
|
|
188
208
|
|
|
189
209
|
seed_sequence
|
|
190
210
|
SeedSequence for seeding random-number generator when results
|
|
@@ -198,28 +218,24 @@ def resample_mgn_data(
|
|
|
198
218
|
|
|
199
219
|
_seed_sequence = seed_sequence or SeedSequence(pool_size=8)
|
|
200
220
|
|
|
201
|
-
_x, _w, _ = mgn_data_builder(
|
|
202
|
-
|
|
203
|
-
_mgn_kde = stats.gaussian_kde(_x, weights=_w)
|
|
221
|
+
_x, _w, _ = mgn_data_builder(mgn_data_getter())
|
|
204
222
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
) -> NDArray[np.float64]:
|
|
208
|
-
_seed = Generator(PCG64DXSM(_seed_sequence))
|
|
223
|
+
_mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
|
|
224
|
+
_mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0) # pyright: ignore
|
|
209
225
|
|
|
210
|
-
|
|
211
|
-
# the range between [0.0, 1.0)
|
|
212
|
-
ssz_up = int(_ssz / (_mgn_kde.integrate_box_1d(0.0, 1.0) ** 2))
|
|
213
|
-
sample_1 = _mgn_kde.resample(ssz_up, seed=_seed)[0]
|
|
226
|
+
if isinstance(_sample_size, int):
|
|
214
227
|
return np.array(
|
|
215
|
-
|
|
228
|
+
_mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
|
|
229
|
+
0
|
|
230
|
+
]
|
|
216
231
|
)
|
|
217
|
-
|
|
218
|
-
if isinstance(_sample_size, int):
|
|
219
|
-
return _generate_draws(_mgn_kde, _sample_size, _seed_sequence)
|
|
220
|
-
else:
|
|
232
|
+
elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
|
|
221
233
|
_ssz, _num_cols = _sample_size
|
|
222
234
|
_ret_array = np.empty(_sample_size, np.float64)
|
|
223
235
|
for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
|
|
224
|
-
_ret_array[:, _idx] =
|
|
236
|
+
_ret_array[:, _idx] = _mgn_kde.resample(
|
|
237
|
+
_ssz, seed=Generator(PCG64DXSM(_seed_seq))
|
|
238
|
+
)[0]
|
|
225
239
|
return _ret_array
|
|
240
|
+
else:
|
|
241
|
+
raise ValueError(f"Invalid sample size: {_sample_size!r}")
|