mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +147 -101
  5. mergeron/core/guidelines_boundaries.py +290 -1078
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +258 -246
  15. mergeron/gen/data_generation.py +473 -224
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +171 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -257
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738953.1.dist-info/METADATA +0 -93
  38. mergeron-2024.738953.1.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
mergeron/__init__.py CHANGED
@@ -1,12 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from importlib.metadata import version
5
4
  from pathlib import Path
5
+ from typing import Literal
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
6
9
 
7
10
  _PKG_NAME: str = Path(__file__).parent.stem
8
11
 
9
- __version__ = version(_PKG_NAME)
12
+ VERSION = "2025.739265.0"
13
+
14
+ __version__ = VERSION
10
15
 
11
16
  DATA_DIR: Path = Path.home() / _PKG_NAME
12
17
  """
@@ -14,14 +19,26 @@ Defines a subdirectory named for this package in the user's home path.
14
19
 
15
20
  If the subdirectory doesn't exist, it is created on package invocation.
16
21
  """
17
-
18
22
  if not DATA_DIR.is_dir():
19
23
  DATA_DIR.mkdir(parents=False)
20
24
 
25
+ np.set_printoptions(precision=24, floatmode="fixed")
26
+
27
+ type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
28
+
29
+ type ArrayBoolean = NDArray[np.bool_]
30
+ type ArrayFloat = NDArray[np.float16 | np.float32 | np.float64 | np.float128]
31
+ type ArrayINT = NDArray[np.intp]
32
+
33
+ type ArrayDouble = NDArray[np.float64]
34
+ type ArrayBIGINT = NDArray[np.int64]
35
+
36
+ DEFAULT_REC_RATIO = 0.85
37
+
21
38
 
22
39
  @enum.unique
23
- class RECConstants(enum.StrEnum):
24
- """Recapture rate - derivation methods."""
40
+ class RECForm(enum.StrEnum):
41
+ """For derivation of recapture ratio from market shares."""
25
42
 
26
43
  INOUT = "inside-out"
27
44
  OUTIN = "outside-in"
@@ -31,15 +48,18 @@ class RECConstants(enum.StrEnum):
31
48
  @enum.unique
32
49
  class UPPAggrSelector(enum.StrEnum):
33
50
  """
34
- Aggregator selection for GUPPI and diversion ratio
51
+ Aggregator for GUPPI and diversion ratio estimates.
35
52
 
36
53
  """
37
54
 
38
55
  AVG = "average"
39
56
  CPA = "cross-product-share weighted average"
40
57
  CPD = "cross-product-share weighted distance"
58
+ CPG = "cross-product-share weighted geometric mean"
41
59
  DIS = "symmetrically-weighted distance"
60
+ GMN = "geometric mean"
42
61
  MAX = "max"
43
62
  MIN = "min"
44
63
  OSA = "own-share weighted average"
45
64
  OSD = "own-share weighted distance"
65
+ OSG = "own-share weighted geometric mean"
mergeron/core/__init__.py CHANGED
@@ -1,68 +1,8 @@
1
- from __future__ import annotations
1
+ from mpmath import mp # type: ignore
2
2
 
3
- from importlib.metadata import version
3
+ from .. import VERSION # noqa: TID252
4
4
 
5
- from attrs import Attribute, define, field, validators
5
+ __version__ = VERSION
6
6
 
7
- from .. import _PKG_NAME, RECConstants, UPPAggrSelector # noqa: TID252
8
-
9
- __version__ = version(_PKG_NAME)
10
-
11
-
12
- def _delta_value_validator(
13
- _instance: UPPBoundarySpec, _attribute: Attribute[float], _value: float, /
14
- ) -> None:
15
- if not 0 <= _value <= 1:
16
- raise ValueError(
17
- "Margin-adjusted benchmark share ratio must lie between 0 and 1."
18
- )
19
-
20
-
21
- def _rec_spec_validator(
22
- _instance: UPPBoundarySpec,
23
- _attribute: Attribute[RECConstants],
24
- _value: RECConstants,
25
- /,
26
- ) -> None:
27
- if _value == RECConstants.OUTIN:
28
- raise ValueError(
29
- f"Invalid recapture specification, {_value!r}. "
30
- "You may consider specifying RECConstants.INOUT here, and "
31
- "assigning the recapture rate for the merging-firm with "
32
- 'the smaller market-share to the attribue, "rec" of '
33
- "the UPPBoundarySpec object you pass."
34
- )
35
- if _value is None and _instance.agg_method != UPPAggrSelector.MAX:
36
- raise ValueError(
37
- f"Specified aggregation method, {_instance.agg_method} requires a recapture specification."
38
- )
39
-
40
-
41
- @define(slots=True, frozen=True)
42
- class UPPBoundarySpec:
43
- share_ratio: float = field(
44
- kw_only=False,
45
- default=0.075,
46
- validator=(validators.instance_of(float), _delta_value_validator),
47
- )
48
- rec: float = field(
49
- kw_only=False, default=0.80, validator=validators.instance_of(float)
50
- )
51
-
52
- agg_method: UPPAggrSelector = field(
53
- kw_only=True,
54
- default=UPPAggrSelector.MAX,
55
- validator=validators.instance_of(UPPAggrSelector),
56
- )
57
- recapture_spec: RECConstants | None = field(
58
- kw_only=True,
59
- default=RECConstants.INOUT,
60
- validator=(
61
- validators.optional(validators.instance_of(RECConstants)), # type: ignore
62
- _rec_spec_validator,
63
- ),
64
- )
65
-
66
- precision: int = field(
67
- kw_only=False, default=5, validator=validators.instance_of(int)
68
- )
7
+ type MPFloat = mp.mpf # pyright: ignore
8
+ type MPMatrix = mp.matrix # pyright: ignore
@@ -2,12 +2,16 @@
2
2
  Functions to parse margin data compiled by
3
3
  Prof. Aswath Damodaran, Stern School of Business, NYU.
4
4
 
5
+ Provides :func:`mgn_data_resampler` for generating margin data
6
+ from an estimated Gaussian KDE from the source (margin) data.
7
+
5
8
  Data are downloaded or reused from a local copy, on demand.
6
9
 
7
10
  For terms of use of Prof. Damodaran's data, please see:
8
11
  https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
9
12
 
10
- Important caveats:
13
+ NOTES
14
+ -----
11
15
 
12
16
  Prof. Damodaran notes that the data construction may not be
13
17
  consistent from iteration to iteration. He also notes that,
@@ -32,29 +36,29 @@ price-cost margins fall in the interval :math:`[0, 1]`.
32
36
 
33
37
  """
34
38
 
39
+ import shutil
35
40
  from collections.abc import Mapping
36
- from importlib.metadata import version
41
+ from importlib import resources
37
42
  from pathlib import Path
38
43
  from types import MappingProxyType
39
44
 
40
45
  import msgpack # type:ignore
41
46
  import numpy as np
42
- import requests
47
+ import urllib3
43
48
  from numpy.random import PCG64DXSM, Generator, SeedSequence
44
- from numpy.typing import NDArray
45
- from requests_toolbelt.downloadutils import stream # type: ignore
46
49
  from scipy import stats # type: ignore
47
50
  from xlrd import open_workbook # type: ignore
48
51
 
49
- from .. import _PKG_NAME, DATA_DIR # noqa: TID252
50
-
51
- __version__ = version(_PKG_NAME)
52
+ from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble # noqa: TID252
52
53
 
54
+ __version__ = VERSION
53
55
 
54
56
  MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
55
57
 
58
+ u3pm = urllib3.PoolManager()
56
59
 
57
- def scrape_data_table(
60
+
61
+ def mgn_data_getter( # noqa: PLR0912
58
62
  _table_name: str = "margin",
59
63
  *,
60
64
  data_archive_path: Path | None = None,
@@ -68,32 +72,46 @@ def scrape_data_table(
68
72
  _data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
69
73
 
70
74
  _mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
71
- _mgn_path = _data_archive_path.parent.joinpath(f"damodaran_{_table_name}_data.xls")
75
+ _mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
72
76
  if _data_archive_path.is_file() and not data_download_flag:
73
77
  return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
74
78
  elif _mgn_path.is_file():
75
79
  _mgn_path.unlink()
76
- _data_archive_path.unlink()
77
-
78
- _REQ_TIMEOUT = (9.05, 27)
79
- # NYU will eventually updates its server certificate, to one signed with
80
- # "InCommon RSA Server CA 2.pem", the step below will be obsolete. In
81
- # the interim, it is necessary to provide the certificate chain to the
82
- # root CA, so that the obsolete CA certificate is validated.
83
- _INCOMMON_2014_CERT_CHAIN_PATH = (
84
- Path(__file__).parent / "InCommon RSA Server CA cert chain.pem"
85
- )
86
- try:
87
- _urlopen_handle = requests.get(_mgn_urlstr, timeout=_REQ_TIMEOUT, stream=True)
88
- except requests.exceptions.SSLError:
89
- _urlopen_handle = requests.get(
90
- _mgn_urlstr,
91
- timeout=_REQ_TIMEOUT,
92
- stream=True,
93
- verify=str(_INCOMMON_2014_CERT_CHAIN_PATH),
94
- )
80
+ if _data_archive_path.is_file():
81
+ _data_archive_path.unlink()
95
82
 
96
- _mgn_filename = stream.stream_response_to_file(_urlopen_handle, path=_mgn_path)
83
+ try:
84
+ _chunk_size = 1024 * 1024
85
+ with (
86
+ u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
87
+ _mgn_path.open("wb") as _mgn_file,
88
+ ):
89
+ while True:
90
+ _data = _urlopen_handle.read(_chunk_size)
91
+ if not _data:
92
+ break
93
+ _mgn_file.write(_data)
94
+
95
+ print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
96
+
97
+ except urllib3.exceptions.MaxRetryError as _err:
98
+ if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
99
+ # Works fine with other sites secured with certificates
100
+ # from the Internet2 CA, such as,
101
+ # https://snap.stanford.edu/data/web-Stanford.txt.gz
102
+ print(
103
+ f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
104
+ "Using bundled copy."
105
+ )
106
+ if not _mgn_path.is_file():
107
+ with resources.as_file(
108
+ resources.files(f"{_PKG_NAME}.data").joinpath(
109
+ "empirical_margin_distribution.xls"
110
+ )
111
+ ) as _mgn_data_archive_path:
112
+ shutil.copy2(_mgn_data_archive_path, _mgn_path)
113
+ else:
114
+ raise _err
97
115
 
98
116
  _xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
99
117
  _xl_sheet = _xl_book.sheet_by_name("Industry Averages")
@@ -114,16 +132,16 @@ def scrape_data_table(
114
132
  _xl_row[1] = int(_xl_row[1])
115
133
  _mgn_dict[_xl_row[0]] = dict(zip(_mgn_row_keys[1:], _xl_row[1:], strict=True))
116
134
 
117
- _ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict))
135
+ _ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict)) # pyright: ignore
118
136
 
119
137
  return MappingProxyType(_mgn_dict)
120
138
 
121
139
 
122
140
  def mgn_data_builder(
123
141
  _mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
124
- ) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
142
+ ) -> tuple[ArrayDouble, ArrayDouble, ArrayDouble]:
125
143
  if _mgn_tbl_dict is None:
126
- _mgn_tbl_dict = scrape_data_table()
144
+ _mgn_tbl_dict = mgn_data_getter()
127
145
 
128
146
  _mgn_data_wts, _mgn_data_obs = (
129
147
  _f.flatten()
@@ -169,22 +187,24 @@ def mgn_data_builder(
169
187
  )
170
188
 
171
189
 
172
- def resample_mgn_data(
173
- _sample_size: int | tuple[int, int] = (10**6, 2),
190
+ def mgn_data_resampler(
191
+ _sample_size: int | tuple[int, ...] = (10**6, 2),
174
192
  /,
175
193
  *,
176
194
  seed_sequence: SeedSequence | None = None,
177
- ) -> NDArray[np.float64]:
195
+ ) -> ArrayDouble:
178
196
  """
179
- Generate the specified number of draws from the empirical distribution
180
- for Prof. Damodaran's margin data using the estimated Gaussian KDE.
181
- Margins for firms in finance, investment, insurance, reinsurance, and REITs
182
- are excluded from the sample used to estimate the Gaussian KDE.
197
+ Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
198
+
199
+ The empirical distribution is estimated using a Gaussian KDE; the bandwidth
200
+ selected using Silverman's rule is narrowed to reflect that the margin data
201
+ are multimodal. Margins for firms in finance, investment, insurance, reinsurance, and
202
+ REITs are excluded from the sample used to estimate the empirical distribution.
183
203
 
184
204
  Parameters
185
205
  ----------
186
206
  _sample_size
187
- Number of draws
207
+ Number of draws; if tuple, (number of draws, number of columns)
188
208
 
189
209
  seed_sequence
190
210
  SeedSequence for seeding random-number generator when results
@@ -198,28 +218,24 @@ def resample_mgn_data(
198
218
 
199
219
  _seed_sequence = seed_sequence or SeedSequence(pool_size=8)
200
220
 
201
- _x, _w, _ = mgn_data_builder(scrape_data_table())
202
-
203
- _mgn_kde = stats.gaussian_kde(_x, weights=_w)
221
+ _x, _w, _ = mgn_data_builder(mgn_data_getter())
204
222
 
205
- def _generate_draws(
206
- _mgn_kde: stats.gaussian_kde, _ssz: int, _seed_seq: SeedSequence
207
- ) -> NDArray[np.float64]:
208
- _seed = Generator(PCG64DXSM(_seed_sequence))
223
+ _mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
224
+ _mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0) # pyright: ignore
209
225
 
210
- # We enlarge the sample, then truncate to
211
- # the range between [0.0, 1.0)
212
- ssz_up = int(_ssz / (_mgn_kde.integrate_box_1d(0.0, 1.0) ** 2))
213
- sample_1 = _mgn_kde.resample(ssz_up, seed=_seed)[0]
226
+ if isinstance(_sample_size, int):
214
227
  return np.array(
215
- sample_1[(sample_1 >= 0.0) & (sample_1 <= 1)][:_ssz], np.float64
228
+ _mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
229
+ 0
230
+ ]
216
231
  )
217
-
218
- if isinstance(_sample_size, int):
219
- return _generate_draws(_mgn_kde, _sample_size, _seed_sequence)
220
- else:
232
+ elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
221
233
  _ssz, _num_cols = _sample_size
222
234
  _ret_array = np.empty(_sample_size, np.float64)
223
235
  for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
224
- _ret_array[:, _idx] = _generate_draws(_mgn_kde, _ssz, _seed_seq)
236
+ _ret_array[:, _idx] = _mgn_kde.resample(
237
+ _ssz, seed=Generator(PCG64DXSM(_seed_seq))
238
+ )[0]
225
239
  return _ret_array
240
+ else:
241
+ raise ValueError(f"Invalid sample size: {_sample_size!r}")