mergeron 2025.739290.3__py3-none-any.whl → 2025.739290.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +103 -48
- mergeron/core/__init__.py +105 -4
- mergeron/core/empirical_margin_distribution.py +100 -78
- mergeron/core/ftc_merger_investigations_data.py +309 -316
- mergeron/core/guidelines_boundaries.py +67 -138
- mergeron/core/guidelines_boundary_functions.py +202 -379
- mergeron/core/guidelines_boundary_functions_extra.py +264 -106
- mergeron/core/pseudorandom_numbers.py +73 -64
- mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- mergeron/data/ftc_invdata.zip +0 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
- mergeron/gen/__init__.py +138 -161
- mergeron/gen/data_generation.py +181 -149
- mergeron/gen/data_generation_functions.py +220 -237
- mergeron/gen/enforcement_stats.py +78 -109
- mergeron/gen/upp_tests.py +119 -194
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/METADATA +2 -3
- mergeron-2025.739290.5.dist-info/RECORD +24 -0
- {mergeron-2025.739290.3.dist-info → mergeron-2025.739290.5.dist-info}/WHEEL +1 -1
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron-2025.739290.3.dist-info/RECORD +0 -23
mergeron/__init__.py
CHANGED
|
@@ -1,26 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
+
from collections.abc import Mapping
|
|
4
5
|
from multiprocessing import cpu_count
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Literal
|
|
7
|
+
from typing import Any, Literal
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
|
-
from numpy.random import SeedSequence
|
|
10
10
|
from numpy.typing import NDArray
|
|
11
11
|
from ruamel import yaml
|
|
12
12
|
|
|
13
13
|
_PKG_NAME: str = Path(__file__).parent.stem
|
|
14
14
|
|
|
15
|
-
VERSION = "2025.739290.
|
|
15
|
+
VERSION = "2025.739290.5"
|
|
16
16
|
|
|
17
17
|
__version__ = VERSION
|
|
18
18
|
|
|
19
|
-
this_yaml = yaml.YAML(typ="safe", pure=True)
|
|
20
|
-
this_yaml.constructor.deep_construct = True
|
|
21
|
-
this_yaml.indent(mapping=2, sequence=4, offset=2)
|
|
22
|
-
|
|
23
|
-
|
|
24
19
|
DATA_DIR: Path = Path.home() / _PKG_NAME
|
|
25
20
|
"""
|
|
26
21
|
Defines a subdirectory named for this package in the user's home path.
|
|
@@ -37,18 +32,24 @@ EMPTY_ARRAYINT = np.array([], int)
|
|
|
37
32
|
|
|
38
33
|
NTHREADS = 2 * cpu_count()
|
|
39
34
|
|
|
35
|
+
PKG_ATTRS_MAP: dict[str, object] = {}
|
|
36
|
+
|
|
40
37
|
np.set_printoptions(precision=24, floatmode="fixed")
|
|
41
38
|
|
|
42
39
|
type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
|
|
43
40
|
|
|
44
41
|
type ArrayBoolean = NDArray[np.bool_]
|
|
45
|
-
type ArrayFloat = NDArray[np.
|
|
46
|
-
type ArrayINT = NDArray[np.
|
|
42
|
+
type ArrayFloat = NDArray[np.floating]
|
|
43
|
+
type ArrayINT = NDArray[np.unsignedinteger]
|
|
47
44
|
|
|
48
45
|
type ArrayDouble = NDArray[np.float64]
|
|
49
|
-
type ArrayBIGINT = NDArray[np.
|
|
46
|
+
type ArrayBIGINT = NDArray[np.uint64]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
this_yaml = yaml.YAML(typ="rt")
|
|
50
|
+
this_yaml.indent(mapping=2, sequence=4, offset=2)
|
|
50
51
|
|
|
51
|
-
|
|
52
|
+
# Add yaml representer, constructor for NoneType
|
|
52
53
|
(_, _) = (
|
|
53
54
|
this_yaml.representer.add_representer(
|
|
54
55
|
type(None), lambda _r, _d: _r.represent_scalar("!None", "none")
|
|
@@ -56,44 +57,116 @@ type ArrayBIGINT = NDArray[np.int64]
|
|
|
56
57
|
this_yaml.constructor.add_constructor("!None", lambda _c, _n, /: None),
|
|
57
58
|
)
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
|
|
61
|
+
# Add yaml representer, constructor for ndarray
|
|
60
62
|
(_, _) = (
|
|
61
63
|
this_yaml.representer.add_representer(
|
|
62
64
|
np.ndarray,
|
|
63
65
|
lambda _r, _d: _r.represent_sequence("!ndarray", (_d.tolist(), _d.dtype.str)),
|
|
64
66
|
),
|
|
65
67
|
this_yaml.constructor.add_constructor(
|
|
66
|
-
"!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n))
|
|
68
|
+
"!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n, deep=True))
|
|
67
69
|
),
|
|
68
70
|
)
|
|
69
71
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
72
|
+
|
|
73
|
+
def yaml_rt_mapper(
|
|
74
|
+
_c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
|
|
75
|
+
) -> Mapping[str, Any]:
|
|
76
|
+
"""
|
|
77
|
+
Constructs a mapping from a mapping node with the RoundTripConstructor
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
|
|
81
|
+
_c.construct_mapping(_n, maptyp=data_, deep=True)
|
|
82
|
+
return data_
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def yamelize_attrs(
|
|
86
|
+
_typ: object, /, *, attr_map: Mapping[str, object] = PKG_ATTRS_MAP
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Add yaml representer, constructor for attrs-defined class.
|
|
89
|
+
|
|
90
|
+
Applying this function, attributes with property, `init=False` are
|
|
91
|
+
not serialized to YAML.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
attr_map |= {_typ.__name__: _typ}
|
|
95
|
+
|
|
96
|
+
_ = this_yaml.representer.add_representer(
|
|
97
|
+
_typ,
|
|
98
|
+
lambda _r, _d: _r.represent_mapping(
|
|
99
|
+
f"!{_d.__class__.__name__}",
|
|
100
|
+
# construct mapping, rather than calling attrs.asdict(),
|
|
101
|
+
# to use yaml representers defined in this package for
|
|
102
|
+
# "upstream" objects
|
|
103
|
+
{_a.name: getattr(_d, _a.name) for _a in _d.__attrs_attrs__ if _a.init},
|
|
104
|
+
),
|
|
105
|
+
)
|
|
106
|
+
_ = this_yaml.constructor.add_constructor(
|
|
107
|
+
f"!{_typ.__name__}",
|
|
108
|
+
lambda _c, _n: attr_map[_n.tag.lstrip("!")](**yaml_rt_mapper(_c, _n)),
|
|
109
|
+
)
|
|
84
110
|
|
|
85
111
|
|
|
112
|
+
@this_yaml.register_class
|
|
113
|
+
class Enameled(enum.Enum):
|
|
114
|
+
"""Add YAML representer, constructor for enum.Enum"""
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def to_yaml(
|
|
118
|
+
cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
|
|
119
|
+
) -> yaml.ScalarNode:
|
|
120
|
+
return _r.represent_scalar(
|
|
121
|
+
f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def from_yaml(
|
|
126
|
+
cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
|
|
127
|
+
) -> object[enum.EnumType]:
|
|
128
|
+
return super().__getattribute__(cls, _n.value)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@this_yaml.register_class
|
|
86
132
|
@enum.unique
|
|
87
|
-
class RECForm(
|
|
133
|
+
class RECForm(str, Enameled):
|
|
88
134
|
"""For derivation of recapture ratio from market shares."""
|
|
89
135
|
|
|
90
136
|
INOUT = "inside-out"
|
|
137
|
+
R"""
|
|
138
|
+
Given, :math:`\overline{r}, s_i {\ } \forall {\ } i \in \set{1, 2, \ldots, m}`, with
|
|
139
|
+
:math:`s_{min} = \min(s_1, s_2)`,
|
|
140
|
+
|
|
141
|
+
.. math::
|
|
142
|
+
|
|
143
|
+
REC_i = \frac{(1 - s_i) \overline{r}}{(1 - s_{min}) - (s_i - s_{min}) \overline{r}}
|
|
144
|
+
|
|
145
|
+
"""
|
|
146
|
+
|
|
91
147
|
OUTIN = "outside-in"
|
|
148
|
+
R"""
|
|
149
|
+
Given, :math:`\pi_i {\ } \forall {\ } i \in N`,
|
|
150
|
+
|
|
151
|
+
.. math::
|
|
152
|
+
|
|
153
|
+
REC_i = \frac{\sum_{i \in M} \pi_i}{\sum_{j \in N} \pi_j}
|
|
154
|
+
|
|
155
|
+
"""
|
|
156
|
+
|
|
92
157
|
FIXED = "proportional"
|
|
158
|
+
R"""Given, :math:`\overline{r}`,
|
|
159
|
+
|
|
160
|
+
.. math::
|
|
161
|
+
|
|
162
|
+
REC_i = \overline{r} {\ } \forall {\ } i \in M
|
|
163
|
+
|
|
164
|
+
"""
|
|
93
165
|
|
|
94
166
|
|
|
167
|
+
@this_yaml.register_class
|
|
95
168
|
@enum.unique
|
|
96
|
-
class UPPAggrSelector(
|
|
169
|
+
class UPPAggrSelector(str, Enameled):
|
|
97
170
|
"""
|
|
98
171
|
Aggregator for GUPPI and diversion ratio estimates.
|
|
99
172
|
|
|
@@ -110,21 +183,3 @@ class UPPAggrSelector(enum.StrEnum):
|
|
|
110
183
|
OSA = "own-share weighted average"
|
|
111
184
|
OSD = "own-share weighted distance"
|
|
112
185
|
OSG = "own-share weighted geometric mean"
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
for _typ in (RECForm, UPPAggrSelector):
|
|
116
|
-
# NOTE: If additional enums are defined in this module,
|
|
117
|
-
# add themn to the list above
|
|
118
|
-
|
|
119
|
-
_, _ = (
|
|
120
|
-
this_yaml.representer.add_representer(
|
|
121
|
-
_typ,
|
|
122
|
-
lambda _r, _d: _r.represent_scalar(f"!{_d.__class__.__name__}", _d.name),
|
|
123
|
-
),
|
|
124
|
-
this_yaml.constructor.add_constructor(
|
|
125
|
-
f"!{_typ.__name__}",
|
|
126
|
-
lambda _c, _n, /: getattr(
|
|
127
|
-
globals().get(_n.tag.lstrip("!")), _c.construct_scalar(_n)
|
|
128
|
-
),
|
|
129
|
-
),
|
|
130
|
-
)
|
mergeron/core/__init__.py
CHANGED
|
@@ -1,8 +1,109 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from decimal import Decimal
|
|
5
|
+
from types import MappingProxyType
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import mpmath # type: ignore
|
|
9
|
+
import numpy as np
|
|
10
|
+
from attrs import cmp_using, field, frozen
|
|
11
|
+
|
|
12
|
+
from .. import VERSION, ArrayBIGINT, this_yaml, yaml_rt_mapper # noqa: TID252
|
|
4
13
|
|
|
5
14
|
__version__ = VERSION
|
|
6
15
|
|
|
7
|
-
type MPFloat =
|
|
8
|
-
type MPMatrix =
|
|
16
|
+
type MPFloat = mpmath.ctx_mp_python.mpf
|
|
17
|
+
type MPMatrix = mpmath.matrix # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@frozen
|
|
21
|
+
class INVTableData:
|
|
22
|
+
industry_group: str
|
|
23
|
+
additional_evidence: str
|
|
24
|
+
data_array: ArrayBIGINT = field(eq=cmp_using(eq=np.array_equal))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
type INVData = MappingProxyType[
|
|
28
|
+
str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
|
|
29
|
+
]
|
|
30
|
+
type INVData_in = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
(_, _) = (
|
|
34
|
+
this_yaml.representer.add_representer(
|
|
35
|
+
Decimal, lambda _r, _d: _r.represent_scalar("!Decimal", f"{_d}")
|
|
36
|
+
),
|
|
37
|
+
this_yaml.constructor.add_constructor(
|
|
38
|
+
"!Decimal", lambda _c, _n, /: Decimal(_c.construct_scalar(_n))
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
(_, _) = (
|
|
44
|
+
this_yaml.representer.add_representer(
|
|
45
|
+
mpmath.mpf, lambda _r, _d: _r.represent_scalar("!MPFloat", f"{_d}")
|
|
46
|
+
),
|
|
47
|
+
this_yaml.constructor.add_constructor(
|
|
48
|
+
"!MPFloat", lambda _c, _n, /: mpmath.mpf(_c.construct_scalar(_n))
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
(_, _) = (
|
|
53
|
+
this_yaml.representer.add_representer(
|
|
54
|
+
mpmath.matrix, lambda _r, _d: _r.represent_sequence("!MPMatrix", _d.tolist())
|
|
55
|
+
),
|
|
56
|
+
this_yaml.constructor.add_constructor(
|
|
57
|
+
"!MPMatrix",
|
|
58
|
+
lambda _c, _n, /: mpmath.matrix(_c.construct_sequence(_n, deep=True)),
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _dict_from_mapping(_p: Mapping[Any, Any], /) -> dict[Any, Any]:
|
|
64
|
+
retval = {}
|
|
65
|
+
for _k, _v in _p.items(): # for subit in it:
|
|
66
|
+
retval |= {_k: _dict_from_mapping(_v)} if isinstance(_v, Mapping) else {_k: _v}
|
|
67
|
+
return retval
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _mappingproxy_from_mapping(_p: Mapping[Any, Any], /) -> MappingProxyType[Any, Any]:
|
|
71
|
+
retval = {}
|
|
72
|
+
for _k, _v in _p.items(): # for subit in it:
|
|
73
|
+
retval |= (
|
|
74
|
+
{_k: _mappingproxy_from_mapping(_v)}
|
|
75
|
+
if isinstance(_v, Mapping)
|
|
76
|
+
else {_k: _v}
|
|
77
|
+
)
|
|
78
|
+
return MappingProxyType(retval)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_, _ = (
|
|
82
|
+
this_yaml.representer.add_representer(
|
|
83
|
+
MappingProxyType,
|
|
84
|
+
lambda _r, _d: _r.represent_mapping("!mappingproxy", dict(_d.items())),
|
|
85
|
+
),
|
|
86
|
+
this_yaml.constructor.add_constructor(
|
|
87
|
+
"!mappingproxy", lambda _c, _n: MappingProxyType(yaml_rt_mapper(_c, _n))
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
for _typ in (INVTableData,):
|
|
93
|
+
_, _ = (
|
|
94
|
+
this_yaml.representer.add_representer(
|
|
95
|
+
_typ,
|
|
96
|
+
lambda _r, _d: _r.represent_mapping(
|
|
97
|
+
f"!{_d.__class__.__name__}",
|
|
98
|
+
{
|
|
99
|
+
_a.name: getattr(_d, _a.name)
|
|
100
|
+
for _a in _d.__attrs_attrs__
|
|
101
|
+
if _a.name not in {"coordinates", "area"}
|
|
102
|
+
},
|
|
103
|
+
),
|
|
104
|
+
),
|
|
105
|
+
this_yaml.constructor.add_constructor(
|
|
106
|
+
f"!{_typ.__name__}",
|
|
107
|
+
lambda _c, _n: globals()[_n.tag.lstrip("!")](**yaml_rt_mapper(_c, _n)),
|
|
108
|
+
),
|
|
109
|
+
)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Functions to parse margin data compiled by
|
|
3
3
|
Prof. Aswath Damodaran, Stern School of Business, NYU.
|
|
4
4
|
|
|
5
|
-
Provides :func:`
|
|
5
|
+
Provides :func:`margin_data_resampler` for generating margin data
|
|
6
6
|
from an estimated Gaussian KDE from the source (margin) data.
|
|
7
7
|
|
|
8
8
|
Data are downloaded or reused from a local copy, on demand.
|
|
@@ -37,123 +37,139 @@ price-cost margins fall in the interval :math:`[0, 1]`.
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
import shutil
|
|
40
|
+
import zipfile
|
|
40
41
|
from collections.abc import Mapping
|
|
41
42
|
from importlib import resources
|
|
42
43
|
from pathlib import Path
|
|
43
44
|
from types import MappingProxyType
|
|
44
45
|
|
|
45
|
-
import msgpack # type:ignore
|
|
46
46
|
import numpy as np
|
|
47
47
|
import urllib3
|
|
48
48
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
49
49
|
from scipy import stats # type: ignore
|
|
50
50
|
from xlrd import open_workbook # type: ignore
|
|
51
51
|
|
|
52
|
-
from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble # noqa: TID252
|
|
52
|
+
from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble, this_yaml # noqa: TID252
|
|
53
|
+
from . import _mappingproxy_from_mapping
|
|
53
54
|
|
|
54
55
|
__version__ = VERSION
|
|
55
56
|
|
|
56
|
-
MGNDATA_ARCHIVE_PATH = DATA_DIR / "
|
|
57
|
+
MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_serialized.zip"
|
|
58
|
+
|
|
57
59
|
|
|
58
60
|
u3pm = urllib3.PoolManager()
|
|
59
61
|
|
|
60
62
|
|
|
61
|
-
def
|
|
63
|
+
def margin_data_getter( # noqa: PLR0912
|
|
62
64
|
_table_name: str = "margin",
|
|
63
65
|
*,
|
|
64
66
|
data_archive_path: Path | None = None,
|
|
65
67
|
data_download_flag: bool = False,
|
|
66
|
-
) -> MappingProxyType[str,
|
|
68
|
+
) -> MappingProxyType[str, MappingProxyType[str, float | int]]:
|
|
67
69
|
if _table_name != "margin": # Not validated for other tables
|
|
68
70
|
raise ValueError(
|
|
69
71
|
"This code is designed for parsing Prof. Damodaran's margin tables."
|
|
70
72
|
)
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
74
|
+
data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
|
|
75
|
+
workbook_path = data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
|
|
76
|
+
if data_archive_path.is_file() and not data_download_flag:
|
|
77
|
+
# with data_archive_path_.open("r") as _yfh:
|
|
78
|
+
# margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
|
|
79
|
+
with (
|
|
80
|
+
zipfile.ZipFile(data_archive_path) as _yzip,
|
|
81
|
+
_yzip.open(f"{data_archive_path.stem}.yaml") as _yfh,
|
|
82
|
+
):
|
|
83
|
+
margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
|
|
84
|
+
return _mappingproxy_from_mapping(margin_data_dict)
|
|
85
|
+
elif workbook_path.is_file():
|
|
86
|
+
workbook_path.unlink()
|
|
87
|
+
if data_archive_path.is_file():
|
|
88
|
+
data_archive_path.unlink()
|
|
89
|
+
|
|
90
|
+
margin_urlstr = (
|
|
91
|
+
f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
|
|
92
|
+
)
|
|
83
93
|
try:
|
|
84
|
-
|
|
94
|
+
chunk_size_ = 1024 * 1024
|
|
85
95
|
with (
|
|
86
|
-
u3pm.request(
|
|
87
|
-
|
|
96
|
+
u3pm.request(
|
|
97
|
+
"GET", margin_urlstr, preload_content=False
|
|
98
|
+
) as _urlopen_handle,
|
|
99
|
+
workbook_path.open("wb") as margin_file,
|
|
88
100
|
):
|
|
89
101
|
while True:
|
|
90
|
-
|
|
91
|
-
if not
|
|
102
|
+
data_ = _urlopen_handle.read(chunk_size_)
|
|
103
|
+
if not data_:
|
|
92
104
|
break
|
|
93
|
-
|
|
105
|
+
margin_file.write(data_)
|
|
94
106
|
|
|
95
|
-
print(f"Downloaded {
|
|
107
|
+
print(f"Downloaded {margin_urlstr} to {workbook_path}.")
|
|
96
108
|
|
|
97
|
-
except urllib3.exceptions.MaxRetryError as
|
|
98
|
-
if isinstance(
|
|
109
|
+
except urllib3.exceptions.MaxRetryError as error_:
|
|
110
|
+
if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
|
|
99
111
|
# Works fine with other sites secured with certificates
|
|
100
112
|
# from the Internet2 CA, such as,
|
|
101
113
|
# https://snap.stanford.edu/data/web-Stanford.txt.gz
|
|
102
114
|
print(
|
|
103
|
-
f"WARNING: Could not establish secure connection to, {
|
|
115
|
+
f"WARNING: Could not establish secure connection to, {margin_urlstr}."
|
|
104
116
|
"Using bundled copy."
|
|
105
117
|
)
|
|
106
|
-
if not
|
|
118
|
+
if not workbook_path.is_file():
|
|
107
119
|
with resources.as_file(
|
|
108
120
|
resources.files(f"{_PKG_NAME}.data").joinpath(
|
|
109
121
|
"empirical_margin_distribution.xls"
|
|
110
122
|
)
|
|
111
|
-
) as
|
|
112
|
-
shutil.copy2(
|
|
123
|
+
) as margin_data_archive_path:
|
|
124
|
+
shutil.copy2(margin_data_archive_path, workbook_path)
|
|
113
125
|
else:
|
|
114
|
-
raise
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
for _ridx in range(
|
|
123
|
-
|
|
124
|
-
if
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
raise error_
|
|
127
|
+
|
|
128
|
+
xl_book = open_workbook(workbook_path, ragged_rows=True, on_demand=True)
|
|
129
|
+
xl_sheet = xl_book.sheet_by_name("Industry Averages")
|
|
130
|
+
|
|
131
|
+
margin_dict: dict[str, dict[str, float | int]] = {}
|
|
132
|
+
row_keys: list[str] = []
|
|
133
|
+
read_row_flag = False
|
|
134
|
+
for _ridx in range(xl_sheet.nrows):
|
|
135
|
+
xl_row = xl_sheet.row_values(_ridx)
|
|
136
|
+
if xl_row[0] == "Industry Name":
|
|
137
|
+
read_row_flag = True
|
|
138
|
+
row_keys = xl_row
|
|
127
139
|
continue
|
|
128
140
|
|
|
129
|
-
if not
|
|
141
|
+
if not xl_row[0] or not read_row_flag:
|
|
130
142
|
continue
|
|
131
143
|
|
|
132
|
-
|
|
133
|
-
|
|
144
|
+
xl_row[1] = int(xl_row[1])
|
|
145
|
+
margin_dict[xl_row[0]] = dict(zip(row_keys[1:], xl_row[1:], strict=True))
|
|
134
146
|
|
|
135
|
-
|
|
147
|
+
with (
|
|
148
|
+
zipfile.ZipFile(data_archive_path, "w") as _yzip,
|
|
149
|
+
_yzip.open(f"{data_archive_path.stem}.yaml", "w") as _yfh,
|
|
150
|
+
):
|
|
151
|
+
this_yaml.dump(margin_dict, _yfh)
|
|
136
152
|
|
|
137
|
-
return
|
|
153
|
+
return _mappingproxy_from_mapping(margin_dict)
|
|
138
154
|
|
|
139
155
|
|
|
140
|
-
def
|
|
141
|
-
|
|
156
|
+
def margin_data_builder(
|
|
157
|
+
_src_data_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
|
|
142
158
|
) -> tuple[ArrayDouble, ArrayDouble, ArrayDouble]:
|
|
143
|
-
if
|
|
144
|
-
|
|
159
|
+
if _src_data_dict is None:
|
|
160
|
+
_src_data_dict = margin_data_getter()
|
|
145
161
|
|
|
146
|
-
|
|
162
|
+
margin_data_wts, margin_data_obs = (
|
|
147
163
|
_f.flatten()
|
|
148
164
|
for _f in np.hsplit(
|
|
149
165
|
np.array([
|
|
150
166
|
tuple(
|
|
151
|
-
|
|
167
|
+
_src_data_dict[_g][_h] for _h in ["Number of firms", "Gross Margin"]
|
|
152
168
|
)
|
|
153
|
-
for _g in
|
|
169
|
+
for _g in _src_data_dict
|
|
154
170
|
if not _g.startswith("Total Market")
|
|
155
171
|
and _g
|
|
156
|
-
not in
|
|
172
|
+
not in {
|
|
157
173
|
"Bank (Money Center)",
|
|
158
174
|
"Banks (Regional)",
|
|
159
175
|
"Brokerage & Investment Banking",
|
|
@@ -165,29 +181,35 @@ def mgn_data_builder(
|
|
|
165
181
|
"R.E.I.T.",
|
|
166
182
|
"Retail (REITs)",
|
|
167
183
|
"Reinsurance",
|
|
168
|
-
|
|
184
|
+
}
|
|
169
185
|
]),
|
|
170
186
|
2,
|
|
171
187
|
)
|
|
172
188
|
)
|
|
173
189
|
|
|
174
|
-
|
|
190
|
+
margin_wtd_avg = np.average(margin_data_obs, weights=margin_data_wts)
|
|
175
191
|
# https://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
|
|
176
|
-
|
|
177
|
-
np.average((
|
|
178
|
-
* (len(
|
|
192
|
+
margin_wtd_stderr = np.sqrt(
|
|
193
|
+
np.average((margin_data_obs - margin_wtd_avg) ** 2, weights=margin_data_wts)
|
|
194
|
+
* (len(margin_data_wts) / (len(margin_data_wts) - 1))
|
|
179
195
|
)
|
|
180
196
|
|
|
181
197
|
return (
|
|
182
|
-
|
|
183
|
-
|
|
198
|
+
margin_data_obs,
|
|
199
|
+
margin_data_wts,
|
|
184
200
|
np.round(
|
|
185
|
-
(
|
|
201
|
+
(
|
|
202
|
+
margin_wtd_avg,
|
|
203
|
+
margin_wtd_stderr,
|
|
204
|
+
margin_data_obs.min(),
|
|
205
|
+
margin_data_obs.max(),
|
|
206
|
+
),
|
|
207
|
+
8,
|
|
186
208
|
),
|
|
187
209
|
)
|
|
188
210
|
|
|
189
211
|
|
|
190
|
-
def
|
|
212
|
+
def margin_data_resampler(
|
|
191
213
|
_sample_size: int | tuple[int, ...] = (10**6, 2),
|
|
192
214
|
/,
|
|
193
215
|
*,
|
|
@@ -216,26 +238,26 @@ def mgn_data_resampler(
|
|
|
216
238
|
|
|
217
239
|
"""
|
|
218
240
|
|
|
219
|
-
|
|
241
|
+
seed_sequence_ = seed_sequence or SeedSequence(pool_size=8)
|
|
220
242
|
|
|
221
|
-
_x, _w, _ =
|
|
243
|
+
_x, _w, _ = margin_data_builder(margin_data_getter())
|
|
222
244
|
|
|
223
|
-
|
|
224
|
-
|
|
245
|
+
margin_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
|
|
246
|
+
margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
|
|
225
247
|
|
|
226
248
|
if isinstance(_sample_size, int):
|
|
227
249
|
return np.array(
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
]
|
|
250
|
+
margin_kde.resample(
|
|
251
|
+
_sample_size, seed=Generator(PCG64DXSM(seed_sequence_))
|
|
252
|
+
)[0]
|
|
231
253
|
)
|
|
232
254
|
elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
for
|
|
236
|
-
|
|
237
|
-
|
|
255
|
+
ssz, num_cols = _sample_size
|
|
256
|
+
ret_array = np.empty(_sample_size, np.float64)
|
|
257
|
+
for idx, seed_seq in enumerate(seed_sequence_.spawn(num_cols)):
|
|
258
|
+
ret_array[:, idx] = margin_kde.resample(
|
|
259
|
+
ssz, seed=Generator(PCG64DXSM(seed_seq))
|
|
238
260
|
)[0]
|
|
239
|
-
return
|
|
261
|
+
return ret_array
|
|
240
262
|
else:
|
|
241
263
|
raise ValueError(f"Invalid sample size: {_sample_size!r}")
|