mergeron 2025.739290.2__tar.gz → 2025.739290.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (30) hide show
  1. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/PKG-INFO +2 -3
  2. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/pyproject.toml +5 -4
  3. mergeron-2025.739290.4/src/mergeron/__init__.py +156 -0
  4. mergeron-2025.739290.4/src/mergeron/core/__init__.py +109 -0
  5. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/core/empirical_margin_distribution.py +100 -78
  6. mergeron-2025.739290.4/src/mergeron/core/ftc_merger_investigations_data.py +769 -0
  7. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/core/guidelines_boundaries.py +62 -121
  8. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/core/guidelines_boundary_functions.py +207 -384
  9. mergeron-2025.739290.4/src/mergeron/core/guidelines_boundary_functions_extra.py +533 -0
  10. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/core/pseudorandom_numbers.py +76 -67
  11. mergeron-2025.739290.4/src/mergeron/data/damodaran_margin_data_serialized.zip +0 -0
  12. mergeron-2025.739290.4/src/mergeron/data/ftc_invdata.zip +0 -0
  13. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/demo/visualize_empirical_margin_distribution.py +9 -7
  14. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/gen/__init__.py +123 -161
  15. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/gen/data_generation.py +183 -149
  16. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/gen/data_generation_functions.py +220 -237
  17. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/gen/enforcement_stats.py +83 -115
  18. mergeron-2025.739290.4/src/mergeron/gen/upp_tests.py +318 -0
  19. mergeron-2025.739290.2/src/mergeron/__init__.py +0 -130
  20. mergeron-2025.739290.2/src/mergeron/core/__init__.py +0 -8
  21. mergeron-2025.739290.2/src/mergeron/core/ftc_merger_investigations_data.py +0 -776
  22. mergeron-2025.739290.2/src/mergeron/core/guidelines_boundary_functions_extra.py +0 -373
  23. mergeron-2025.739290.2/src/mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  24. mergeron-2025.739290.2/src/mergeron/gen/upp_tests.py +0 -393
  25. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/README.rst +0 -0
  26. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/data/__init__.py +0 -0
  27. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/data/damodaran_margin_data.xls +0 -0
  28. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/data/ftc_invdata.msgpack +0 -0
  29. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/demo/__init__.py +0 -0
  30. {mergeron-2025.739290.2 → mergeron-2025.739290.4}/src/mergeron/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mergeron
3
- Version: 2025.739290.2
3
+ Version: 2025.739290.4
4
4
  Summary: Analyze merger enforcement policy using Python
5
5
  License: MIT
6
6
  Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI
@@ -23,7 +23,7 @@ Requires-Dist: aenum (>=3.1.15,<4.0.0)
23
23
  Requires-Dist: attrs (>=23.2)
24
24
  Requires-Dist: bs4 (>=0.0.1)
25
25
  Requires-Dist: certifi (>=2023.11.17)
26
- Requires-Dist: google-re2 (>=1.1)
26
+ Requires-Dist: h5py (>=3.13.0,<4.0.0)
27
27
  Requires-Dist: jinja2 (>=3.1)
28
28
  Requires-Dist: joblib (>=1.3)
29
29
  Requires-Dist: matplotlib (>=3.8)
@@ -33,7 +33,6 @@ Requires-Dist: msgpack-numpy (>=0.4)
33
33
  Requires-Dist: ruamel-yaml (>=0.18.10,<0.19.0)
34
34
  Requires-Dist: scipy (>=1.12)
35
35
  Requires-Dist: sympy (>=1.12)
36
- Requires-Dist: tables (>=3.10.1)
37
36
  Requires-Dist: types-beautifulsoup4 (>=4.11.2)
38
37
  Requires-Dist: urllib3 (>=2.2.2,<3.0.0)
39
38
  Requires-Dist: xlrd (>=2.0.1,<3.0.0)
@@ -13,7 +13,7 @@ keywords = [
13
13
  "upward pricing pressure",
14
14
  "GUPPI",
15
15
  ]
16
- version = "2025.739290.2"
16
+ version = "2025.739290.4"
17
17
 
18
18
  # Classifiers list: https://pypi.org/classifiers/
19
19
  classifiers = [
@@ -41,7 +41,6 @@ build-backend = "poetry.core.masonry.api"
41
41
  aenum = "^3.1.15"
42
42
  attrs = ">=23.2"
43
43
  bs4 = ">=0.0.1"
44
- google-re2 = ">=1.1"
45
44
  jinja2 = ">=3.1"
46
45
  joblib = ">=1.3"
47
46
  matplotlib = ">=3.8"
@@ -51,13 +50,13 @@ msgpack-numpy = ">=0.4"
51
50
  python = "^3.12"
52
51
  scipy = ">=1.12"
53
52
  sympy = ">=1.12"
54
- tables = ">=3.10.1"
55
53
  xlsxwriter = ">=3.1"
56
54
  certifi = ">=2023.11.17"
57
55
  types-beautifulsoup4 = ">=4.11.2"
58
56
  xlrd = "^2.0.1" # Needed to read margin data
59
57
  urllib3 = "^2.2.2"
60
58
  ruamel-yaml = "^0.18.10"
59
+ h5py = "^3.13.0"
61
60
 
62
61
 
63
62
  [tool.poetry.group.dev.dependencies]
@@ -131,7 +130,7 @@ select = [
131
130
  "S", # flake8-bandit
132
131
  "SIM", # flake8-simplify
133
132
  "TID", # flake8-tidy-imports
134
- "TC", # flake8-type-checking
133
+ "TC", # flake8-type-checking
135
134
  "UP", # pyupgrade
136
135
  "RUF", # ruff-specific
137
136
  ]
@@ -148,6 +147,8 @@ ignore = [
148
147
  'B904',
149
148
  'B905',
150
149
  "PLR2004", # avoid magic values
150
+ # RUF
151
+ "RUF052",
151
152
  # flake8-type-checking
152
153
  "TC001", # move application import into a type-checking block
153
154
  "TC002", # move third-party import into a type-checking block
@@ -0,0 +1,156 @@
1
+ from __future__ import annotations
2
+
3
+ import enum
4
+ from collections.abc import Mapping
5
+ from multiprocessing import cpu_count
6
+ from pathlib import Path
7
+ from typing import Any, Literal
8
+
9
+ import numpy as np
10
+ from numpy.typing import NDArray
11
+ from ruamel import yaml
12
+
13
+ _PKG_NAME: str = Path(__file__).parent.stem
14
+
15
+ VERSION = "2025.739290.4"
16
+
17
+ __version__ = VERSION
18
+
19
+ DATA_DIR: Path = Path.home() / _PKG_NAME
20
+ """
21
+ Defines a subdirectory named for this package in the user's home path.
22
+
23
+ If the subdirectory doesn't exist, it is created on package invocation.
24
+ """
25
+ if not DATA_DIR.is_dir():
26
+ DATA_DIR.mkdir(parents=False)
27
+
28
+ DEFAULT_REC_RATIO = 0.85
29
+
30
+ EMPTY_ARRAYDOUBLE = np.array([], float)
31
+ EMPTY_ARRAYINT = np.array([], int)
32
+
33
+ NTHREADS = 2 * cpu_count()
34
+
35
+ PKG_ENUMS_MAP: dict[str, object] = {}
36
+ PKG_ATTRS_MAP: dict[str, object] = {}
37
+
38
+ np.set_printoptions(precision=24, floatmode="fixed")
39
+
40
+ type HMGPubYear = Literal[1982, 1984, 1992, 2010, 2023]
41
+
42
+ type ArrayBoolean = NDArray[np.bool_]
43
+ type ArrayFloat = NDArray[np.floating]
44
+ type ArrayINT = NDArray[np.unsignedinteger]
45
+
46
+ type ArrayDouble = NDArray[np.float64]
47
+ type ArrayBIGINT = NDArray[np.uint64]
48
+
49
+
50
+ this_yaml = yaml.YAML(typ="rt")
51
+ this_yaml.indent(mapping=2, sequence=4, offset=2)
52
+
53
+ # Add yaml representer, constructor for NoneType
54
+ (_, _) = (
55
+ this_yaml.representer.add_representer(
56
+ type(None), lambda _r, _d: _r.represent_scalar("!None", "none")
57
+ ),
58
+ this_yaml.constructor.add_constructor("!None", lambda _c, _n, /: None),
59
+ )
60
+
61
+
62
+ # Add yaml representer, constructor for ndarray
63
+ (_, _) = (
64
+ this_yaml.representer.add_representer(
65
+ np.ndarray,
66
+ lambda _r, _d: _r.represent_sequence("!ndarray", (_d.tolist(), _d.dtype.str)),
67
+ ),
68
+ this_yaml.constructor.add_constructor(
69
+ "!ndarray", lambda _c, _n, /: np.array(*_c.construct_sequence(_n, deep=True))
70
+ ),
71
+ )
72
+
73
+
74
+ @this_yaml.register_class
75
+ class EnumYAMLized(enum.Enum):
76
+ @classmethod
77
+ def to_yaml(
78
+ cls, _r: yaml.representer.RoundTripRepresenter, _d: object[enum.EnumType]
79
+ ) -> yaml.ScalarNode:
80
+ return _r.represent_scalar(
81
+ f"!{super().__getattribute__(cls, '__name__')}", f"{_d.name}"
82
+ )
83
+
84
+ @classmethod
85
+ def from_yaml(
86
+ cls, _c: yaml.constructor.RoundTripConstructor, _n: yaml.ScalarNode
87
+ ) -> object[enum.EnumType]:
88
+ return super().__getattribute__(cls, _n.value)
89
+
90
+
91
+ def yaml_rt_mapper(
92
+ _c: yaml.constructor.RoundTripConstructor, _n: yaml.MappingNode
93
+ ) -> Mapping[str, Any]:
94
+ data_: Mapping[str, Any] = yaml.constructor.CommentedMap()
95
+ _c.construct_mapping(_n, maptyp=data_, deep=True)
96
+ return data_
97
+
98
+
99
+ def yamelize_attrs(
100
+ _typ: object,
101
+ excluded_attributes: set | None = None,
102
+ /,
103
+ *,
104
+ attr_map: Mapping[str, object] = PKG_ATTRS_MAP,
105
+ ) -> None:
106
+ attr_map |= {_typ.__name__: _typ}
107
+
108
+ _ = this_yaml.representer.add_representer(
109
+ _typ,
110
+ lambda _r, _d: _r.represent_mapping(
111
+ f"!{_d.__class__.__name__}",
112
+ # construct mapping, rather than calling attrs.asdict(),
113
+ # to use yaml representers defined in this package for
114
+ # "upstream" objects
115
+ {
116
+ _a.name: getattr(_d, _a.name)
117
+ for _a in _d.__attrs_attrs__
118
+ if excluded_attributes is None or _a.name not in excluded_attributes
119
+ },
120
+ ),
121
+ )
122
+ _ = this_yaml.constructor.add_constructor(
123
+ f"!{_typ.__name__}",
124
+ lambda _c, _n: attr_map[_n.tag.lstrip("!")](**yaml_rt_mapper(_c, _n)),
125
+ )
126
+
127
+
128
+ @this_yaml.register_class
129
+ @enum.unique
130
+ class RECForm(str, EnumYAMLized):
131
+ """For derivation of recapture ratio from market shares."""
132
+
133
+ INOUT = "inside-out"
134
+ OUTIN = "outside-in"
135
+ FIXED = "proportional"
136
+
137
+
138
+ @this_yaml.register_class
139
+ @enum.unique
140
+ class UPPAggrSelector(str, EnumYAMLized):
141
+ """
142
+ Aggregator for GUPPI and diversion ratio estimates.
143
+
144
+ """
145
+
146
+ AVG = "average"
147
+ CPA = "cross-product-share weighted average"
148
+ CPD = "cross-product-share weighted distance"
149
+ CPG = "cross-product-share weighted geometric mean"
150
+ DIS = "symmetrically-weighted distance"
151
+ GMN = "geometric mean"
152
+ MAX = "max"
153
+ MIN = "min"
154
+ OSA = "own-share weighted average"
155
+ OSD = "own-share weighted distance"
156
+ OSG = "own-share weighted geometric mean"
@@ -0,0 +1,109 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from decimal import Decimal
5
+ from types import MappingProxyType
6
+ from typing import Any
7
+
8
+ import mpmath # type: ignore
9
+ import numpy as np
10
+ from attrs import cmp_using, field, frozen
11
+
12
+ from .. import VERSION, ArrayBIGINT, this_yaml, yaml_rt_mapper # noqa: TID252
13
+
14
+ __version__ = VERSION
15
+
16
+ type MPFloat = mpmath.ctx_mp_python.mpf
17
+ type MPMatrix = mpmath.matrix # type: ignore
18
+
19
+
20
+ @frozen
21
+ class INVTableData:
22
+ industry_group: str
23
+ additional_evidence: str
24
+ data_array: ArrayBIGINT = field(eq=cmp_using(eq=np.array_equal))
25
+
26
+
27
+ type INVData = MappingProxyType[
28
+ str, MappingProxyType[str, MappingProxyType[str, INVTableData]]
29
+ ]
30
+ type INVData_in_ = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
31
+
32
+
33
+ (_, _) = (
34
+ this_yaml.representer.add_representer(
35
+ Decimal, lambda _r, _d: _r.represent_scalar("!Decimal", f"{_d}")
36
+ ),
37
+ this_yaml.constructor.add_constructor(
38
+ "!Decimal", lambda _c, _n, /: Decimal(_c.construct_scalar(_n))
39
+ ),
40
+ )
41
+
42
+
43
+ (_, _) = (
44
+ this_yaml.representer.add_representer(
45
+ mpmath.mpf, lambda _r, _d: _r.represent_scalar("!MPFloat", f"{_d}")
46
+ ),
47
+ this_yaml.constructor.add_constructor(
48
+ "!MPFloat", lambda _c, _n, /: mpmath.mpf(_c.construct_scalar(_n))
49
+ ),
50
+ )
51
+
52
+ (_, _) = (
53
+ this_yaml.representer.add_representer(
54
+ mpmath.matrix, lambda _r, _d: _r.represent_sequence("!MPMatrix", _d.tolist())
55
+ ),
56
+ this_yaml.constructor.add_constructor(
57
+ "!MPMatrix",
58
+ lambda _c, _n, /: mpmath.matrix(_c.construct_sequence(_n, deep=True)),
59
+ ),
60
+ )
61
+
62
+
63
+ def _dict_from_mapping(_p: Mapping[Any, Any], /) -> dict[Any, Any]:
64
+ retval = {}
65
+ for _k, _v in _p.items(): # for subit in it:
66
+ retval |= {_k: _dict_from_mapping(_v)} if isinstance(_v, Mapping) else {_k: _v}
67
+ return retval
68
+
69
+
70
+ def _mappingproxy_from_mapping(_p: Mapping[Any, Any], /) -> MappingProxyType[Any, Any]:
71
+ retval = {}
72
+ for _k, _v in _p.items(): # for subit in it:
73
+ retval |= (
74
+ {_k: _mappingproxy_from_mapping(_v)}
75
+ if isinstance(_v, Mapping)
76
+ else {_k: _v}
77
+ )
78
+ return MappingProxyType(retval)
79
+
80
+
81
+ _, _ = (
82
+ this_yaml.representer.add_representer(
83
+ MappingProxyType,
84
+ lambda _r, _d: _r.represent_mapping("!mappingproxy", dict(_d.items())),
85
+ ),
86
+ this_yaml.constructor.add_constructor(
87
+ "!mappingproxy", lambda _c, _n: MappingProxyType(yaml_rt_mapper(_c, _n))
88
+ ),
89
+ )
90
+
91
+
92
+ for _typ in (INVTableData,):
93
+ _, _ = (
94
+ this_yaml.representer.add_representer(
95
+ _typ,
96
+ lambda _r, _d: _r.represent_mapping(
97
+ f"!{_d.__class__.__name__}",
98
+ {
99
+ _a.name: getattr(_d, _a.name)
100
+ for _a in _d.__attrs_attrs__
101
+ if _a.name not in {"coordinates", "area"}
102
+ },
103
+ ),
104
+ ),
105
+ this_yaml.constructor.add_constructor(
106
+ f"!{_typ.__name__}",
107
+ lambda _c, _n: globals()[_n.tag.lstrip("!")](**yaml_rt_mapper(_c, _n)),
108
+ ),
109
+ )
@@ -2,7 +2,7 @@
2
2
  Functions to parse margin data compiled by
3
3
  Prof. Aswath Damodaran, Stern School of Business, NYU.
4
4
 
5
- Provides :func:`mgn_data_resampler` for generating margin data
5
+ Provides :func:`margin_data_resampler` for generating margin data
6
6
  from an estimated Gaussian KDE from the source (margin) data.
7
7
 
8
8
  Data are downloaded or reused from a local copy, on demand.
@@ -37,123 +37,139 @@ price-cost margins fall in the interval :math:`[0, 1]`.
37
37
  """
38
38
 
39
39
  import shutil
40
+ import zipfile
40
41
  from collections.abc import Mapping
41
42
  from importlib import resources
42
43
  from pathlib import Path
43
44
  from types import MappingProxyType
44
45
 
45
- import msgpack # type:ignore
46
46
  import numpy as np
47
47
  import urllib3
48
48
  from numpy.random import PCG64DXSM, Generator, SeedSequence
49
49
  from scipy import stats # type: ignore
50
50
  from xlrd import open_workbook # type: ignore
51
51
 
52
- from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble # noqa: TID252
52
+ from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayDouble, this_yaml # noqa: TID252
53
+ from . import _mappingproxy_from_mapping
53
54
 
54
55
  __version__ = VERSION
55
56
 
56
- MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
57
+ MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_serialized.zip"
58
+
57
59
 
58
60
  u3pm = urllib3.PoolManager()
59
61
 
60
62
 
61
- def mgn_data_getter( # noqa: PLR0912
63
+ def margin_data_getter( # noqa: PLR0912
62
64
  _table_name: str = "margin",
63
65
  *,
64
66
  data_archive_path: Path | None = None,
65
67
  data_download_flag: bool = False,
66
- ) -> MappingProxyType[str, Mapping[str, float | int]]:
68
+ ) -> MappingProxyType[str, MappingProxyType[str, float | int]]:
67
69
  if _table_name != "margin": # Not validated for other tables
68
70
  raise ValueError(
69
71
  "This code is designed for parsing Prof. Damodaran's margin tables."
70
72
  )
71
73
 
72
- _data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
73
-
74
- _mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
75
- _mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
76
- if _data_archive_path.is_file() and not data_download_flag:
77
- return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
78
- elif _mgn_path.is_file():
79
- _mgn_path.unlink()
80
- if _data_archive_path.is_file():
81
- _data_archive_path.unlink()
82
-
74
+ data_archive_path_ = data_archive_path or MGNDATA_ARCHIVE_PATH
75
+ workbook_path_ = data_archive_path_.parent / f"damodaran_{_table_name}_data.xls"
76
+ if data_archive_path_.is_file() and not data_download_flag:
77
+ # with data_archive_path_.open("r") as _yfh:
78
+ # margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
79
+ with (
80
+ zipfile.ZipFile(data_archive_path_) as _yzip,
81
+ _yzip.open(f"{data_archive_path_.stem}.yaml") as _yfh,
82
+ ):
83
+ margin_data_dict: dict[str, dict[str, float | int]] = this_yaml.load(_yfh)
84
+ return _mappingproxy_from_mapping(margin_data_dict)
85
+ elif workbook_path_.is_file():
86
+ workbook_path_.unlink()
87
+ if data_archive_path_.is_file():
88
+ data_archive_path_.unlink()
89
+
90
+ margin_urlstr = (
91
+ f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
92
+ )
83
93
  try:
84
- _chunk_size = 1024 * 1024
94
+ chunk_size_ = 1024 * 1024
85
95
  with (
86
- u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
87
- _mgn_path.open("wb") as _mgn_file,
96
+ u3pm.request(
97
+ "GET", margin_urlstr, preload_content=False
98
+ ) as _urlopen_handle,
99
+ workbook_path_.open("wb") as margin_file,
88
100
  ):
89
101
  while True:
90
- _data = _urlopen_handle.read(_chunk_size)
91
- if not _data:
102
+ data_ = _urlopen_handle.read(chunk_size_)
103
+ if not data_:
92
104
  break
93
- _mgn_file.write(_data)
105
+ margin_file.write(data_)
94
106
 
95
- print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
107
+ print(f"Downloaded {margin_urlstr} to {workbook_path_}.")
96
108
 
97
- except urllib3.exceptions.MaxRetryError as _err:
98
- if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
109
+ except urllib3.exceptions.MaxRetryError as error_:
110
+ if isinstance(error_.__cause__, urllib3.exceptions.SSLError):
99
111
  # Works fine with other sites secured with certificates
100
112
  # from the Internet2 CA, such as,
101
113
  # https://snap.stanford.edu/data/web-Stanford.txt.gz
102
114
  print(
103
- f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
115
+ f"WARNING: Could not establish secure connection to, {margin_urlstr}."
104
116
  "Using bundled copy."
105
117
  )
106
- if not _mgn_path.is_file():
118
+ if not workbook_path_.is_file():
107
119
  with resources.as_file(
108
120
  resources.files(f"{_PKG_NAME}.data").joinpath(
109
121
  "empirical_margin_distribution.xls"
110
122
  )
111
- ) as _mgn_data_archive_path:
112
- shutil.copy2(_mgn_data_archive_path, _mgn_path)
123
+ ) as margin_data_archive_path:
124
+ shutil.copy2(margin_data_archive_path, workbook_path_)
113
125
  else:
114
- raise _err
115
-
116
- _xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
117
- _xl_sheet = _xl_book.sheet_by_name("Industry Averages")
118
-
119
- _mgn_dict: dict[str, dict[str, float]] = {}
120
- _mgn_row_keys: list[str] = []
121
- _read_row_flag = False
122
- for _ridx in range(_xl_sheet.nrows):
123
- _xl_row = _xl_sheet.row_values(_ridx)
124
- if _xl_row[0] == "Industry Name":
125
- _read_row_flag = True
126
- _mgn_row_keys = _xl_row
126
+ raise error_
127
+
128
+ xl_book_ = open_workbook(workbook_path_, ragged_rows=True, on_demand=True)
129
+ xl_sheet_ = xl_book_.sheet_by_name("Industry Averages")
130
+
131
+ margin_dict: dict[str, dict[str, float | int]] = {}
132
+ row_keys_: list[str] = []
133
+ read_row_flag = False
134
+ for _ridx in range(xl_sheet_.nrows):
135
+ xl_row = xl_sheet_.row_values(_ridx)
136
+ if xl_row[0] == "Industry Name":
137
+ read_row_flag = True
138
+ row_keys_ = xl_row
127
139
  continue
128
140
 
129
- if not _xl_row[0] or not _read_row_flag:
141
+ if not xl_row[0] or not read_row_flag:
130
142
  continue
131
143
 
132
- _xl_row[1] = int(_xl_row[1])
133
- _mgn_dict[_xl_row[0]] = dict(zip(_mgn_row_keys[1:], _xl_row[1:], strict=True))
144
+ xl_row[1] = int(xl_row[1])
145
+ margin_dict[xl_row[0]] = dict(zip(row_keys_[1:], xl_row[1:], strict=True))
134
146
 
135
- _ = _data_archive_path.write_bytes(msgpack.packb(_mgn_dict))
147
+ with (
148
+ zipfile.ZipFile(data_archive_path_, "w") as _yzip,
149
+ _yzip.open(f"{data_archive_path_.stem}.yaml", "w") as _yfh,
150
+ ):
151
+ this_yaml.dump(margin_dict, _yfh)
136
152
 
137
- return MappingProxyType(_mgn_dict)
153
+ return _mappingproxy_from_mapping(margin_dict)
138
154
 
139
155
 
140
- def mgn_data_builder(
141
- _mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
156
+ def margin_data_builder(
157
+ _src_data_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
142
158
  ) -> tuple[ArrayDouble, ArrayDouble, ArrayDouble]:
143
- if _mgn_tbl_dict is None:
144
- _mgn_tbl_dict = mgn_data_getter()
159
+ if _src_data_dict is None:
160
+ _src_data_dict = margin_data_getter()
145
161
 
146
- _mgn_data_wts, _mgn_data_obs = (
162
+ margin_data_wts, margin_data_obs = (
147
163
  _f.flatten()
148
164
  for _f in np.hsplit(
149
165
  np.array([
150
166
  tuple(
151
- _mgn_tbl_dict[_g][_h] for _h in ["Number of firms", "Gross Margin"]
167
+ _src_data_dict[_g][_h] for _h in ["Number of firms", "Gross Margin"]
152
168
  )
153
- for _g in _mgn_tbl_dict
169
+ for _g in _src_data_dict
154
170
  if not _g.startswith("Total Market")
155
171
  and _g
156
- not in (
172
+ not in {
157
173
  "Bank (Money Center)",
158
174
  "Banks (Regional)",
159
175
  "Brokerage & Investment Banking",
@@ -165,29 +181,35 @@ def mgn_data_builder(
165
181
  "R.E.I.T.",
166
182
  "Retail (REITs)",
167
183
  "Reinsurance",
168
- )
184
+ }
169
185
  ]),
170
186
  2,
171
187
  )
172
188
  )
173
189
 
174
- _mgn_wtd_avg = np.average(_mgn_data_obs, weights=_mgn_data_wts)
190
+ margin_wtd_avg = np.average(margin_data_obs, weights=margin_data_wts)
175
191
  # https://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
176
- _mgn_wtd_stderr = np.sqrt(
177
- np.average((_mgn_data_obs - _mgn_wtd_avg) ** 2, weights=_mgn_data_wts)
178
- * (len(_mgn_data_wts) / (len(_mgn_data_wts) - 1))
192
+ margin_wtd_stderr = np.sqrt(
193
+ np.average((margin_data_obs - margin_wtd_avg) ** 2, weights=margin_data_wts)
194
+ * (len(margin_data_wts) / (len(margin_data_wts) - 1))
179
195
  )
180
196
 
181
197
  return (
182
- _mgn_data_obs,
183
- _mgn_data_wts,
198
+ margin_data_obs,
199
+ margin_data_wts,
184
200
  np.round(
185
- (_mgn_wtd_avg, _mgn_wtd_stderr, _mgn_data_obs.min(), _mgn_data_obs.max()), 8
201
+ (
202
+ margin_wtd_avg,
203
+ margin_wtd_stderr,
204
+ margin_data_obs.min(),
205
+ margin_data_obs.max(),
206
+ ),
207
+ 8,
186
208
  ),
187
209
  )
188
210
 
189
211
 
190
- def mgn_data_resampler(
212
+ def margin_data_resampler(
191
213
  _sample_size: int | tuple[int, ...] = (10**6, 2),
192
214
  /,
193
215
  *,
@@ -216,26 +238,26 @@ def mgn_data_resampler(
216
238
 
217
239
  """
218
240
 
219
- _seed_sequence = seed_sequence or SeedSequence(pool_size=8)
241
+ seed_sequence_ = seed_sequence or SeedSequence(pool_size=8)
220
242
 
221
- _x, _w, _ = mgn_data_builder(mgn_data_getter())
243
+ x_, w_, _ = margin_data_builder(margin_data_getter())
222
244
 
223
- _mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
224
- _mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0)
245
+ margin_kde = stats.gaussian_kde(x_, weights=w_, bw_method="silverman")
246
+ margin_kde.set_bandwidth(bw_method=margin_kde.factor / 3.0)
225
247
 
226
248
  if isinstance(_sample_size, int):
227
249
  return np.array(
228
- _mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
229
- 0
230
- ]
250
+ margin_kde.resample(
251
+ _sample_size, seed=Generator(PCG64DXSM(seed_sequence_))
252
+ )[0]
231
253
  )
232
254
  elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
233
- _ssz, _num_cols = _sample_size
234
- _ret_array = np.empty(_sample_size, np.float64)
235
- for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
236
- _ret_array[:, _idx] = _mgn_kde.resample(
237
- _ssz, seed=Generator(PCG64DXSM(_seed_seq))
255
+ ssz, num_cols = _sample_size
256
+ ret_array = np.empty(_sample_size, np.float64)
257
+ for idx, seed_seq in enumerate(seed_sequence_.spawn(num_cols)):
258
+ ret_array[:, idx] = margin_kde.resample(
259
+ ssz, seed=Generator(PCG64DXSM(seed_seq))
238
260
  )[0]
239
- return _ret_array
261
+ return ret_array
240
262
  else:
241
263
  raise ValueError(f"Invalid sample size: {_sample_size!r}")