mergeron 2025.739319.3__py3-none-any.whl → 2025.739341.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +21 -23
- mergeron/core/__init__.py +21 -5
- mergeron/core/empirical_margin_distribution.py +213 -158
- mergeron/core/ftc_merger_investigations_data.py +31 -35
- mergeron/core/guidelines_boundaries.py +27 -20
- mergeron/core/guidelines_boundary_functions.py +22 -32
- mergeron/core/guidelines_boundary_functions_extra.py +15 -30
- mergeron/core/pseudorandom_numbers.py +21 -18
- mergeron/data/__init__.py +13 -11
- mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- mergeron/gen/__init__.py +32 -41
- mergeron/gen/data_generation.py +19 -23
- mergeron/gen/data_generation_functions.py +27 -38
- mergeron/gen/enforcement_stats.py +144 -23
- mergeron/gen/upp_tests.py +4 -9
- mergeron-2025.739341.8.dist-info/METADATA +94 -0
- mergeron-2025.739341.8.dist-info/RECORD +20 -0
- {mergeron-2025.739319.3.dist-info → mergeron-2025.739341.8.dist-info}/WHEEL +1 -1
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/demo/__init__.py +0 -3
- mergeron/demo/visualize_empirical_margin_distribution.py +0 -94
- mergeron-2025.739319.3.dist-info/METADATA +0 -174
- mergeron-2025.739319.3.dist-info/RECORD +0 -22
mergeron/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Variables, types, objects and functions used throughout the package."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
import enum
|
|
@@ -6,13 +8,14 @@ from multiprocessing import cpu_count
|
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import Any, Literal
|
|
8
10
|
|
|
11
|
+
import attrs
|
|
9
12
|
import numpy as np
|
|
10
13
|
from numpy.typing import NDArray
|
|
11
14
|
from ruamel import yaml
|
|
12
15
|
|
|
13
|
-
_PKG_NAME: str = Path(__file__).parent.
|
|
16
|
+
_PKG_NAME: str = Path(__file__).parent.name
|
|
14
17
|
|
|
15
|
-
VERSION = "2025.
|
|
18
|
+
VERSION = "2025.739341.8"
|
|
16
19
|
|
|
17
20
|
__version__ = VERSION
|
|
18
21
|
|
|
@@ -34,7 +37,7 @@ EMPTY_ARRAYINT = np.array([], int)
|
|
|
34
37
|
|
|
35
38
|
NTHREADS = 2 * cpu_count()
|
|
36
39
|
|
|
37
|
-
PKG_ATTRS_MAP: dict[str,
|
|
40
|
+
PKG_ATTRS_MAP: dict[str, type] = {}
|
|
38
41
|
|
|
39
42
|
np.set_printoptions(precision=24, floatmode="fixed")
|
|
40
43
|
|
|
@@ -75,23 +78,21 @@ this_yaml.indent(mapping=2, sequence=4, offset=2)
|
|
|
75
78
|
def yaml_rt_mapper(
|
|
76
79
|
_c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
|
|
77
80
|
) -> Mapping[str, Any]:
|
|
78
|
-
"""
|
|
79
|
-
Constructs a mapping from a mapping node with the RoundTripConstructor
|
|
80
|
-
|
|
81
|
-
"""
|
|
81
|
+
"""Construct mapping from a mapping node with the RoundTripConstructor."""
|
|
82
82
|
data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
|
|
83
83
|
_c.construct_mapping(_n, maptyp=data_, deep=True)
|
|
84
84
|
return data_
|
|
85
85
|
|
|
86
86
|
|
|
87
|
-
def yamelize_attrs(
|
|
88
|
-
_typ: object, /, *, attr_map: Mapping[str, object] = PKG_ATTRS_MAP
|
|
89
|
-
) -> None:
|
|
87
|
+
def yamelize_attrs(_typ: type, /, *, attr_map: dict[str, type] = PKG_ATTRS_MAP) -> None:
|
|
90
88
|
"""Add yaml representer, constructor for attrs-defined class.
|
|
91
89
|
|
|
92
|
-
|
|
93
|
-
|
|
90
|
+
Attributes with property, `init=False` are not serialized/deserialized
|
|
91
|
+
to YAML by the functions defined here. These attributes can, of course,
|
|
92
|
+
be dumped to stand-alone (YAML) representation, and deserialized from there.
|
|
94
93
|
"""
|
|
94
|
+
if not attrs.has(_typ):
|
|
95
|
+
raise ValueError(f"Object {_typ} is not attrs-defined")
|
|
95
96
|
|
|
96
97
|
attr_map |= {_typ.__name__: _typ}
|
|
97
98
|
|
|
@@ -99,9 +100,6 @@ def yamelize_attrs(
|
|
|
99
100
|
_typ,
|
|
100
101
|
lambda _r, _d: _r.represent_mapping(
|
|
101
102
|
f"!{_d.__class__.__name__}",
|
|
102
|
-
# construct mapping, rather than calling attrs.asdict(),
|
|
103
|
-
# to use yaml representers defined in this package for
|
|
104
|
-
# "upstream" objects
|
|
105
103
|
{_a.name: getattr(_d, _a.name) for _a in _d.__attrs_attrs__ if _a.init},
|
|
106
104
|
),
|
|
107
105
|
)
|
|
@@ -113,12 +111,13 @@ def yamelize_attrs(
|
|
|
113
111
|
|
|
114
112
|
@this_yaml.register_class
|
|
115
113
|
class Enameled(enum.Enum):
|
|
116
|
-
"""Add YAML representer, constructor for enum.Enum"""
|
|
114
|
+
"""Add YAML representer, constructor for enum.Enum."""
|
|
117
115
|
|
|
118
116
|
@classmethod
|
|
119
117
|
def to_yaml(
|
|
120
|
-
cls, _r: yaml.representer.RoundTripRepresenter, _d:
|
|
118
|
+
cls, _r: yaml.representer.RoundTripRepresenter, _d: enum.Enum
|
|
121
119
|
) -> yaml.ScalarNode:
|
|
120
|
+
"""Serialize enumerations by .name, not .value."""
|
|
122
121
|
return _r.represent_scalar(
|
|
123
122
|
f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
|
|
124
123
|
)
|
|
@@ -126,8 +125,10 @@ class Enameled(enum.Enum):
|
|
|
126
125
|
@classmethod
|
|
127
126
|
def from_yaml(
|
|
128
127
|
cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
|
|
129
|
-
) ->
|
|
130
|
-
|
|
128
|
+
) -> enum.EnumType:
|
|
129
|
+
"""Deserialize enumeration."""
|
|
130
|
+
retval: enum.EnumType = super().__getattribute__(cls, _n.value)
|
|
131
|
+
return retval
|
|
131
132
|
|
|
132
133
|
|
|
133
134
|
@this_yaml.register_class
|
|
@@ -181,10 +182,7 @@ class RECForm(str, Enameled):
|
|
|
181
182
|
@this_yaml.register_class
|
|
182
183
|
@enum.unique
|
|
183
184
|
class UPPAggrSelector(str, Enameled):
|
|
184
|
-
"""
|
|
185
|
-
Aggregator for GUPPI and diversion ratio estimates.
|
|
186
|
-
|
|
187
|
-
"""
|
|
185
|
+
"""Aggregator for GUPPI and diversion ratio estimates."""
|
|
188
186
|
|
|
189
187
|
AVG = "average"
|
|
190
188
|
CPA = "cross-product-share weighted average"
|
mergeron/core/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Constants, types, objects and functions used within this sub-package."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
from collections.abc import Mapping
|
|
@@ -8,6 +10,7 @@ from typing import Any
|
|
|
8
10
|
import mpmath # type: ignore
|
|
9
11
|
import numpy as np
|
|
10
12
|
from attrs import cmp_using, field, frozen
|
|
13
|
+
from numpy.random import PCG64DXSM
|
|
11
14
|
|
|
12
15
|
from .. import ( # noqa: TID252
|
|
13
16
|
VERSION,
|
|
@@ -20,13 +23,15 @@ from .. import ( # noqa: TID252
|
|
|
20
23
|
|
|
21
24
|
__version__ = VERSION
|
|
22
25
|
|
|
26
|
+
DEFAULT_BITGENERATOR = PCG64DXSM
|
|
27
|
+
|
|
23
28
|
type MPFloat = mpmath.ctx_mp_python.mpf
|
|
24
|
-
type MPMatrix = mpmath.
|
|
29
|
+
type MPMatrix = mpmath.matrices.matrices._matrix
|
|
25
30
|
|
|
26
31
|
|
|
27
32
|
@frozen
|
|
28
33
|
class GuidelinesBoundary:
|
|
29
|
-
"""
|
|
34
|
+
"""Represents Guidelines boundary analytically."""
|
|
30
35
|
|
|
31
36
|
coordinates: ArrayDouble
|
|
32
37
|
"""Market-share pairs as Cartesian coordinates of points on the boundary."""
|
|
@@ -35,8 +40,19 @@ class GuidelinesBoundary:
|
|
|
35
40
|
"""Area under the boundary."""
|
|
36
41
|
|
|
37
42
|
|
|
43
|
+
@frozen
|
|
44
|
+
class GuidelinesBoundaryCallable:
|
|
45
|
+
"""A function to generate Guidelines boundary points, along with area and knot."""
|
|
46
|
+
|
|
47
|
+
boundary_function: Callable[[ArrayDouble], ArrayDouble]
|
|
48
|
+
area: float
|
|
49
|
+
s_naught: float = 0
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
@frozen
|
|
39
53
|
class INVTableData:
|
|
54
|
+
"""Represents individual table of FTC merger investigations data."""
|
|
55
|
+
|
|
40
56
|
industry_group: str
|
|
41
57
|
additional_evidence: str
|
|
42
58
|
data_array: ArrayBIGINT = field(eq=cmp_using(eq=np.array_equal))
|
|
@@ -45,7 +61,7 @@ class INVTableData:
|
|
|
45
61
|
type INVData = MappingProxyType[
|
|
46
62
|
str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
|
|
47
63
|
]
|
|
48
|
-
type INVData_in =
|
|
64
|
+
type INVData_in = dict[str, dict[str, dict[str, INVTableData]]]
|
|
49
65
|
|
|
50
66
|
yamelize_attrs(INVTableData)
|
|
51
67
|
|
|
@@ -91,14 +107,14 @@ _, _ = (
|
|
|
91
107
|
|
|
92
108
|
def _dict_from_mapping(_p: Mapping[Any, Any], /) -> dict[Any, Any]:
|
|
93
109
|
retval: dict[Any, Any] = {}
|
|
94
|
-
for _k, _v in _p.items():
|
|
110
|
+
for _k, _v in _p.items():
|
|
95
111
|
retval |= {_k: _dict_from_mapping(_v)} if isinstance(_v, Mapping) else {_k: _v}
|
|
96
112
|
return retval
|
|
97
113
|
|
|
98
114
|
|
|
99
115
|
def _mappingproxy_from_mapping(_p: Mapping[Any, Any], /) -> MappingProxyType[Any, Any]:
|
|
100
116
|
retval: dict[Any, Any] = {}
|
|
101
|
-
for _k, _v in _p.items():
|
|
117
|
+
for _k, _v in _p.items():
|
|
102
118
|
retval |= (
|
|
103
119
|
{_k: _mappingproxy_from_mapping(_v)}
|
|
104
120
|
if isinstance(_v, Mapping)
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Functions to parse margin data compiled by
|
|
3
|
-
Prof. Aswath Damodaran, Stern School of Business, NYU.
|
|
1
|
+
"""Functions to parse margin data compiled by Prof. Aswath Damodaran, Stern School of Business, NYU.
|
|
4
2
|
|
|
5
3
|
Provides :func:`margin_data_resampler` for generating margin data
|
|
6
4
|
from an estimated Gaussian KDE from the source (margin) data.
|
|
@@ -10,9 +8,8 @@ Data are downloaded or reused from a local copy, on demand.
|
|
|
10
8
|
For terms of use of Prof. Damodaran's data, please see:
|
|
11
9
|
https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
Notes
|
|
14
12
|
-----
|
|
15
|
-
|
|
16
13
|
Prof. Damodaran notes that the data construction may not be
|
|
17
14
|
consistent from iteration to iteration. He also notes that,
|
|
18
15
|
"the best use for my data is in real time corporate financial analysis
|
|
@@ -36,171 +33,50 @@ price-cost margins fall in the interval :math:`[0, 1]`.
|
|
|
36
33
|
|
|
37
34
|
"""
|
|
38
35
|
|
|
39
|
-
import
|
|
36
|
+
import datetime
|
|
37
|
+
import os
|
|
38
|
+
import re
|
|
40
39
|
import zipfile
|
|
41
40
|
from pathlib import Path
|
|
42
41
|
from types import MappingProxyType
|
|
43
42
|
|
|
44
43
|
import numpy as np
|
|
45
44
|
import urllib3
|
|
46
|
-
from
|
|
47
|
-
from
|
|
45
|
+
from bs4 import BeautifulSoup
|
|
46
|
+
from joblib import Parallel, delayed # type: ignore
|
|
47
|
+
from numpy.random import Generator, SeedSequence
|
|
48
|
+
from python_calamine import CalamineWorkbook
|
|
48
49
|
from scipy import stats # type: ignore
|
|
49
|
-
from xlrd import open_workbook # type: ignore
|
|
50
50
|
|
|
51
51
|
from .. import NTHREADS, VERSION, ArrayDouble, this_yaml # noqa: TID252
|
|
52
52
|
from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
|
|
53
|
-
from
|
|
54
|
-
from . import _mappingproxy_from_mapping
|
|
53
|
+
from . import DEFAULT_BITGENERATOR
|
|
55
54
|
|
|
56
55
|
__version__ = VERSION
|
|
57
56
|
|
|
58
57
|
WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
|
|
59
|
-
"""Redefined, in case the user defines WORK_DIR
|
|
58
|
+
"""Redefined, in case the user defines WORK_DIR between module imports."""
|
|
60
59
|
|
|
61
60
|
MGNDATA_ARCHIVE_PATH = WORK_DIR / "damodaran_margin_data_serialized.zip"
|
|
62
61
|
|
|
63
62
|
type DamodaranMarginData = MappingProxyType[str, MappingProxyType[str, float | int]]
|
|
64
63
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
|
|
80
|
-
workbook_path = data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
|
|
81
|
-
if data_archive_path.is_file() and not data_download_flag:
|
|
82
|
-
with zipfile.ZipFile(data_archive_path) as _yzip:
|
|
83
|
-
margin_data_dict = this_yaml.load(
|
|
84
|
-
_yzip.read(data_archive_path.with_suffix(".yaml").name)
|
|
85
|
-
)
|
|
86
|
-
return margin_data_dict
|
|
87
|
-
elif workbook_path.is_file():
|
|
88
|
-
workbook_path.unlink()
|
|
89
|
-
if data_archive_path.is_file():
|
|
90
|
-
data_archive_path.unlink()
|
|
91
|
-
|
|
92
|
-
margin_urlstr = (
|
|
93
|
-
f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
|
|
94
|
-
)
|
|
95
|
-
try:
|
|
96
|
-
chunk_size_ = 1024 * 1024
|
|
97
|
-
with (
|
|
98
|
-
u3pm.request(
|
|
99
|
-
"GET", margin_urlstr, preload_content=False
|
|
100
|
-
) as _urlopen_handle,
|
|
101
|
-
workbook_path.open("wb") as margin_file,
|
|
102
|
-
):
|
|
103
|
-
while True:
|
|
104
|
-
data_ = _urlopen_handle.read(chunk_size_)
|
|
105
|
-
if not data_:
|
|
106
|
-
break
|
|
107
|
-
margin_file.write(data_)
|
|
108
|
-
|
|
109
|
-
print(f"Downloaded {margin_urlstr} to {workbook_path}.")
|
|
110
|
-
|
|
111
|
-
except urllib3.exceptions.MaxRetryError as error_:
|
|
112
|
-
if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
|
|
113
|
-
# Works fine with other sites secured with certificates
|
|
114
|
-
# from the Internet2 CA, such as,
|
|
115
|
-
# https://snap.stanford.edu/data/web-Stanford.txt.gz
|
|
116
|
-
print(
|
|
117
|
-
f"WARNING: Could not establish secure connection to, {margin_urlstr}."
|
|
118
|
-
"Using bundled copy."
|
|
119
|
-
)
|
|
120
|
-
if not workbook_path.is_file():
|
|
121
|
-
shutil.copy2(mdat.DAMODARAN_MARGIN_WORKBOOK, workbook_path)
|
|
122
|
-
else:
|
|
123
|
-
raise error_
|
|
124
|
-
|
|
125
|
-
xl_book = open_workbook(workbook_path, ragged_rows=True, on_demand=True)
|
|
126
|
-
xl_sheet = xl_book.sheet_by_name("Industry Averages")
|
|
127
|
-
|
|
128
|
-
margin_dict_in: dict[str, dict[str, float | int]] = {}
|
|
129
|
-
row_keys: list[str] = []
|
|
130
|
-
read_row_flag = False
|
|
131
|
-
for _ridx in range(xl_sheet.nrows):
|
|
132
|
-
xl_row = xl_sheet.row_values(_ridx)
|
|
133
|
-
if xl_row[0] == "Industry Name":
|
|
134
|
-
read_row_flag = True
|
|
135
|
-
row_keys = xl_row
|
|
136
|
-
continue
|
|
137
|
-
|
|
138
|
-
if not xl_row[0] or not read_row_flag:
|
|
139
|
-
continue
|
|
140
|
-
|
|
141
|
-
xl_row[1] = int(xl_row[1])
|
|
142
|
-
margin_dict_in[xl_row[0]] = dict(zip(row_keys[1:], xl_row[1:], strict=True))
|
|
143
|
-
|
|
144
|
-
margin_dict = _mappingproxy_from_mapping(margin_dict_in)
|
|
145
|
-
with (
|
|
146
|
-
zipfile.ZipFile(data_archive_path, "w") as _yzip,
|
|
147
|
-
_yzip.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
|
|
148
|
-
):
|
|
149
|
-
this_yaml.dump(margin_dict, _yfh)
|
|
150
|
-
|
|
151
|
-
return margin_dict
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def margin_data_builder(
|
|
155
|
-
_src_data_dict: DamodaranMarginData | None = None, /
|
|
156
|
-
) -> tuple[ArrayDouble, ArrayDouble]:
|
|
157
|
-
if _src_data_dict is None:
|
|
158
|
-
_src_data_dict = margin_data_getter()
|
|
159
|
-
|
|
160
|
-
margin_data_wts, margin_data_obs = (
|
|
161
|
-
_f.flatten()
|
|
162
|
-
for _f in np.hsplit(
|
|
163
|
-
np.array([
|
|
164
|
-
tuple(
|
|
165
|
-
_src_data_dict[_g][_h] for _h in ["Number of firms", "Gross Margin"]
|
|
166
|
-
)
|
|
167
|
-
for _g in _src_data_dict
|
|
168
|
-
if not _g.startswith("Total Market")
|
|
169
|
-
and _g
|
|
170
|
-
not in {
|
|
171
|
-
"Bank (Money Center)",
|
|
172
|
-
"Banks (Regional)",
|
|
173
|
-
"Brokerage & Investment Banking",
|
|
174
|
-
"Financial Svcs. (Non-bank & Insurance)",
|
|
175
|
-
"Insurance (General)",
|
|
176
|
-
"Insurance (Life)",
|
|
177
|
-
"Insurance (Prop/Cas.)",
|
|
178
|
-
"Investments & Asset Management",
|
|
179
|
-
"R.E.I.T.",
|
|
180
|
-
"Retail (REITs)",
|
|
181
|
-
"Reinsurance",
|
|
182
|
-
}
|
|
183
|
-
]),
|
|
184
|
-
2,
|
|
185
|
-
)
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
margin_wtd_avg = np.average(margin_data_obs, weights=margin_data_wts)
|
|
189
|
-
# https://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
|
|
190
|
-
margin_wtd_stderr = np.sqrt(
|
|
191
|
-
np.average((margin_data_obs - margin_wtd_avg) ** 2, weights=margin_data_wts)
|
|
192
|
-
* (len(margin_data_wts) / (len(margin_data_wts) - 1))
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
return np.stack([margin_data_obs, margin_data_wts], axis=1, dtype=float), np.round(
|
|
196
|
-
(
|
|
197
|
-
margin_wtd_avg,
|
|
198
|
-
margin_wtd_stderr,
|
|
199
|
-
margin_data_obs.min(),
|
|
200
|
-
margin_data_obs.max(),
|
|
201
|
-
),
|
|
202
|
-
8,
|
|
64
|
+
FINANCIAL_INDUSTRIES = {
|
|
65
|
+
_i.upper()
|
|
66
|
+
for _i in (
|
|
67
|
+
"Bank (Money Center)",
|
|
68
|
+
"Banks (Regional)",
|
|
69
|
+
"Brokerage & Investment Banking",
|
|
70
|
+
"Financial Svcs. (Non-bank & Insurance)",
|
|
71
|
+
"Insurance (General)",
|
|
72
|
+
"Insurance (Life)",
|
|
73
|
+
"Insurance (Prop/Cas.)",
|
|
74
|
+
"Investments & Asset Management",
|
|
75
|
+
"R.E.I.T.",
|
|
76
|
+
"Retail (REITs)",
|
|
77
|
+
"Reinsurance",
|
|
203
78
|
)
|
|
79
|
+
}
|
|
204
80
|
|
|
205
81
|
|
|
206
82
|
def margin_data_resampler(
|
|
@@ -211,8 +87,7 @@ def margin_data_resampler(
|
|
|
211
87
|
seed_sequence: SeedSequence | None = None,
|
|
212
88
|
nthreads: int = NTHREADS,
|
|
213
89
|
) -> ArrayDouble:
|
|
214
|
-
"""
|
|
215
|
-
Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
|
|
90
|
+
"""Generate draws from the empirical distribution based on Prof. Damodaran's margin data.
|
|
216
91
|
|
|
217
92
|
The empirical distribution is estimated using a Gaussian KDE; the bandwidth
|
|
218
93
|
selected using Silverman's rule is narrowed to reflect that the margin data
|
|
@@ -222,7 +97,6 @@ def margin_data_resampler(
|
|
|
222
97
|
|
|
223
98
|
Parameters
|
|
224
99
|
----------
|
|
225
|
-
|
|
226
100
|
_dist_parms
|
|
227
101
|
Array of margins and firm counts extracted from Prof. Damodaran's margin data
|
|
228
102
|
|
|
@@ -235,12 +109,12 @@ def margin_data_resampler(
|
|
|
235
109
|
|
|
236
110
|
nthreads
|
|
237
111
|
Number of threads to use in generating margin data.
|
|
112
|
+
|
|
238
113
|
Returns
|
|
239
114
|
-------
|
|
240
115
|
Array of margin values
|
|
241
116
|
|
|
242
117
|
"""
|
|
243
|
-
|
|
244
118
|
_dist_parms = margin_data_builder()[0] if _dist_parms is None else _dist_parms
|
|
245
119
|
|
|
246
120
|
_seed = seed_sequence or SeedSequence(pool_size=8)
|
|
@@ -248,23 +122,204 @@ def margin_data_resampler(
|
|
|
248
122
|
_x, _w = _dist_parms[:, 0], _dist_parms[:, 1]
|
|
249
123
|
|
|
250
124
|
margin_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
|
|
125
|
+
# preserve multiplicity of modes:
|
|
251
126
|
margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
|
|
252
127
|
|
|
253
128
|
if isinstance(sample_size, int):
|
|
254
|
-
return margin_kde.resample(
|
|
129
|
+
return margin_kde.resample(
|
|
130
|
+
sample_size, seed=Generator(DEFAULT_BITGENERATOR(_seed))
|
|
131
|
+
).T
|
|
255
132
|
|
|
256
133
|
elif isinstance(sample_size, tuple) and len(sample_size) == 2:
|
|
257
134
|
ret_array = np.empty(sample_size, float)
|
|
258
135
|
|
|
259
136
|
_ssz, _ncol = sample_size
|
|
260
|
-
|
|
261
|
-
|
|
137
|
+
_threads = min(nthreads, _ncol)
|
|
138
|
+
dat_list = Parallel(n_jobs=_threads, prefer="threads")(
|
|
139
|
+
delayed(margin_kde.resample)(
|
|
140
|
+
_ssz, seed=Generator(DEFAULT_BITGENERATOR(_col_seed))
|
|
141
|
+
)
|
|
262
142
|
for _col_seed in _seed.spawn(_ncol)
|
|
263
143
|
)
|
|
264
144
|
|
|
265
145
|
for _i in range(_ncol):
|
|
266
|
-
ret_array[:,
|
|
146
|
+
ret_array[:, _i] = dat_list[_i][0]
|
|
267
147
|
|
|
268
148
|
return ret_array
|
|
269
149
|
else:
|
|
270
150
|
raise ValueError(f"Invalid sample size: {sample_size!r}")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def margin_data_builder(
|
|
154
|
+
_margin_data_dict: DamodaranMarginData | None = None,
|
|
155
|
+
) -> tuple[ArrayDouble, ArrayDouble]:
|
|
156
|
+
"""Derive average firm-counts and gross-margins by industry from source data."""
|
|
157
|
+
_margin_data_dict = (
|
|
158
|
+
margin_data_getter() if _margin_data_dict is None else _margin_data_dict
|
|
159
|
+
)
|
|
160
|
+
dmd_keys = set()
|
|
161
|
+
for _k, _v in _margin_data_dict.items():
|
|
162
|
+
dmd_keys.update(set(_v.keys()))
|
|
163
|
+
dmd_keys = sorted(dmd_keys)
|
|
164
|
+
|
|
165
|
+
dist_parms = np.array([np.nan, np.nan], dtype=float)
|
|
166
|
+
for _sk in dmd_keys:
|
|
167
|
+
if _sk in FINANCIAL_INDUSTRIES or _sk.startswith("TOTAL"):
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
_missing = {"GROSS MARGIN": 0.0, "NUMBER OF FIRMS": 0.0}
|
|
171
|
+
gm, fc = zip(*[
|
|
172
|
+
[_v.get(_sk, _missing).get(_f) for _f in _missing]
|
|
173
|
+
for _k, _v in _margin_data_dict.items()
|
|
174
|
+
])
|
|
175
|
+
|
|
176
|
+
average_margin, firm_count = np.array(gm, float), np.array(fc, int)
|
|
177
|
+
# print(firm_count, average_margin)
|
|
178
|
+
dist_parms = np.vstack((
|
|
179
|
+
dist_parms,
|
|
180
|
+
np.array((
|
|
181
|
+
np.average(
|
|
182
|
+
average_margin, weights=(average_margin > 0) * (firm_count > 0)
|
|
183
|
+
),
|
|
184
|
+
np.average(firm_count, weights=(average_margin > 0) * (firm_count > 0)),
|
|
185
|
+
)),
|
|
186
|
+
))
|
|
187
|
+
|
|
188
|
+
dist_parms = dist_parms[1:, :]
|
|
189
|
+
|
|
190
|
+
obs_, wts_ = (dist_parms[:, _f] for _f in range(2))
|
|
191
|
+
|
|
192
|
+
avg_gm, num_firms = np.average(obs_, weights=wts_, returned=True)
|
|
193
|
+
std_gm = np.sqrt(
|
|
194
|
+
np.average((obs_ - avg_gm) ** 2, weights=wts_)
|
|
195
|
+
* num_firms
|
|
196
|
+
* len(obs_)
|
|
197
|
+
/ ((num_firms - len(obs_)) * (len(obs_) - 1))
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return dist_parms, np.array([avg_gm, std_gm, obs_.min(), obs_.max()], float)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def margin_data_getter(
|
|
204
|
+
*, data_archive_path: Path | None = None, data_download_flag: bool = False
|
|
205
|
+
) -> DamodaranMarginData:
|
|
206
|
+
"""Download and parse Prof.Damodaran's margin data."""
|
|
207
|
+
data_archive_path = (
|
|
208
|
+
MGNDATA_ARCHIVE_PATH if data_archive_path is None else data_archive_path
|
|
209
|
+
)
|
|
210
|
+
if data_archive_path.is_file() and not data_download_flag:
|
|
211
|
+
with zipfile.ZipFile(data_archive_path) as _yzp:
|
|
212
|
+
margin_data_dict: DamodaranMarginData = this_yaml.load(
|
|
213
|
+
_yzp.read(data_archive_path.with_suffix(".yaml").name)
|
|
214
|
+
)
|
|
215
|
+
return margin_data_dict
|
|
216
|
+
|
|
217
|
+
# Get workbooks from source
|
|
218
|
+
elif data_download_flag or not list(data_archive_path.glob("margin*.xls")):
|
|
219
|
+
margin_data_downloader()
|
|
220
|
+
|
|
221
|
+
# Whitespace cleanup
|
|
222
|
+
ws_pat = re.compile(r"\s+")
|
|
223
|
+
|
|
224
|
+
# Parse workbooks and save margin data dictionary
|
|
225
|
+
margin_data_dict = {}
|
|
226
|
+
for _p in (WORK_DIR / "damodaran_margin_data_archive").iterdir():
|
|
227
|
+
xl_wbk = CalamineWorkbook.from_path(_p)
|
|
228
|
+
xl_wks = xl_wbk.get_sheet_by_index(
|
|
229
|
+
0 if (_p.stem != "margin" and int(_p.stem[-2:]) in {17, 18, 19}) else 1
|
|
230
|
+
).to_python()
|
|
231
|
+
if xl_wks[8][2] != "Gross Margin":
|
|
232
|
+
raise ValueError("Worksheet does not match expected layout.")
|
|
233
|
+
|
|
234
|
+
update = xl_wks[0][1].isoformat()[:10]
|
|
235
|
+
margin_data_annual = margin_data_dict.setdefault(update, {})
|
|
236
|
+
row_keys: list[str] = []
|
|
237
|
+
read_row_flag = False
|
|
238
|
+
for xl_row in xl_wks:
|
|
239
|
+
row_key = _s.upper() if isinstance((_s := xl_row[0]), str) else ""
|
|
240
|
+
|
|
241
|
+
if ws_pat.sub(" ", row_key) == "INDUSTRY NAME":
|
|
242
|
+
read_row_flag = True
|
|
243
|
+
row_keys = [_c.upper() for _c in xl_row]
|
|
244
|
+
continue
|
|
245
|
+
elif not read_row_flag or not row_key or row_key.startswith("TOTAL"):
|
|
246
|
+
continue
|
|
247
|
+
else:
|
|
248
|
+
xl_row[1] = int(xl_row[1])
|
|
249
|
+
margin_data_annual |= MappingProxyType({
|
|
250
|
+
row_key: MappingProxyType(
|
|
251
|
+
dict(zip(row_keys[1:], xl_row[1:], strict=True))
|
|
252
|
+
)
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
damodaran_margin_data = MappingProxyType(margin_data_dict)
|
|
256
|
+
with (
|
|
257
|
+
zipfile.ZipFile(data_archive_path, "w") as _yzp,
|
|
258
|
+
_yzp.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
|
|
259
|
+
):
|
|
260
|
+
this_yaml.dump(damodaran_margin_data, _yfh)
|
|
261
|
+
|
|
262
|
+
return damodaran_margin_data
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def margin_data_downloader() -> DamodaranMarginData:
|
|
266
|
+
"""Download Prof.Damodaran's margin data."""
|
|
267
|
+
_u3pm = urllib3.PoolManager(ca_certs=certifi.where())
|
|
268
|
+
_data_source_url = "https://pages.stern.nyu.edu/~adamodar/pc/datasets/"
|
|
269
|
+
_archive_source_url = "https://pages.stern.nyu.edu/~adamodar/pc/archives/"
|
|
270
|
+
|
|
271
|
+
dest_dir = WORK_DIR / "damodaran_margin_data_archive"
|
|
272
|
+
|
|
273
|
+
# Get current-year margin data
|
|
274
|
+
workbook_name = "margin.xls"
|
|
275
|
+
workbook_path = dest_dir / workbook_name
|
|
276
|
+
if workbook_path.is_file():
|
|
277
|
+
workbook_path.unlink()
|
|
278
|
+
|
|
279
|
+
u3pm = urllib3.PoolManager(ca_certs=certifi.where())
|
|
280
|
+
download_file(u3pm, f"{_data_source_url}{workbook_name}", workbook_path)
|
|
281
|
+
|
|
282
|
+
# Get archived margin data
|
|
283
|
+
workbook_re = re.compile(r"margin(\d{2}).xls")
|
|
284
|
+
archive_html = _u3pm.request("GET", _archive_source_url).data.decode("utf-8")
|
|
285
|
+
archive_tree = BeautifulSoup(archive_html, "lxml")
|
|
286
|
+
for tag in archive_tree.find_all("a"):
|
|
287
|
+
if (
|
|
288
|
+
(_r := workbook_re.fullmatch(_w := tag.get("href", "")))
|
|
289
|
+
and int(_r[1]) > 16
|
|
290
|
+
and int(_r[1]) not in {98, 99}
|
|
291
|
+
):
|
|
292
|
+
_url, _path = f"{_archive_source_url}{_w}", dest_dir / _w
|
|
293
|
+
if _path.is_file():
|
|
294
|
+
_path.unlink()
|
|
295
|
+
|
|
296
|
+
download_file(_u3pm, _url, _path)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def download_file(_u3pm: urllib3.PoolManager, _url: str, _path: Path) -> None:
|
|
300
|
+
"""Download a a binary file from URL to filesystem path."""
|
|
301
|
+
chunk_size_ = 1024 * 1024
|
|
302
|
+
with (
|
|
303
|
+
_u3pm.request("GET", _url, preload_content=False) as _uh,
|
|
304
|
+
_path.open("wb") as _fh,
|
|
305
|
+
):
|
|
306
|
+
while True:
|
|
307
|
+
data_ = _uh.read(chunk_size_)
|
|
308
|
+
if not data_:
|
|
309
|
+
break
|
|
310
|
+
_fh.write(data_)
|
|
311
|
+
os.utime(
|
|
312
|
+
_path,
|
|
313
|
+
times=(
|
|
314
|
+
(
|
|
315
|
+
_t := datetime.datetime.strptime(
|
|
316
|
+
_uh.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z"
|
|
317
|
+
)
|
|
318
|
+
.astimezone(datetime.UTC)
|
|
319
|
+
.timestamp()
|
|
320
|
+
),
|
|
321
|
+
_t,
|
|
322
|
+
),
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
print(f"Downloaded {_url} to {_path}.")
|