mergeron 2024.738972.0__py3-none-any.whl → 2024.739079.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +28 -3
- mergeron/core/__init__.py +2 -67
- mergeron/core/damodaran_margin_data.py +66 -52
- mergeron/core/excel_helper.py +32 -37
- mergeron/core/ftc_merger_investigations_data.py +66 -35
- mergeron/core/guidelines_boundaries.py +256 -1042
- mergeron/core/guidelines_boundary_functions.py +981 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +53 -16
- mergeron/core/proportions_tests.py +2 -4
- mergeron/core/pseudorandom_numbers.py +6 -11
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/{jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 → data/jinja2_LaTeX_templates/setup_tikz_tables.tex} +45 -50
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +88 -0
- mergeron/ext/__init__.py +2 -4
- mergeron/ext/tol_colors.py +3 -3
- mergeron/gen/__init__.py +53 -55
- mergeron/gen/_data_generation_functions.py +28 -93
- mergeron/gen/data_generation.py +20 -24
- mergeron/gen/{investigations_stats.py → enforcement_stats.py} +59 -57
- mergeron/gen/market_sample.py +6 -10
- mergeron/gen/upp_tests.py +29 -26
- mergeron-2024.739079.9.dist-info/METADATA +109 -0
- mergeron-2024.739079.9.dist-info/RECORD +36 -0
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron-2024.738972.0.dist-info/METADATA +0 -108
- mergeron-2024.738972.0.dist-info/RECORD +0 -31
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/clrrate_cis_summary_table_template.tex.jinja2 +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/ftcinvdata_summary_table_template.tex.jinja2 +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/mergeron.cls +0 -0
- /mergeron/{jinja_LaTex_templates → data/jinja2_LaTeX_templates}/mergeron_table_collection_template.tex.jinja2 +0 -0
- {mergeron-2024.738972.0.dist-info → mergeron-2024.739079.9.dist-info}/WHEEL +0 -0
mergeron/__init__.py
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from importlib.metadata import version
|
|
5
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pendulum # type: ignore
|
|
9
|
+
from icecream import argumentToString, ic, install # type: ignore
|
|
10
|
+
from numpy.typing import NDArray
|
|
6
11
|
|
|
7
12
|
_PKG_NAME: str = Path(__file__).parent.stem
|
|
8
13
|
|
|
9
|
-
|
|
14
|
+
VERSION = "2024.739079.9"
|
|
15
|
+
|
|
16
|
+
__version__ = VERSION
|
|
10
17
|
|
|
11
18
|
DATA_DIR: Path = Path.home() / _PKG_NAME
|
|
12
19
|
"""
|
|
@@ -14,11 +21,26 @@ Defines a subdirectory named for this package in the user's home path.
|
|
|
14
21
|
|
|
15
22
|
If the subdirectory doesn't exist, it is created on package invocation.
|
|
16
23
|
"""
|
|
17
|
-
|
|
18
24
|
if not DATA_DIR.is_dir():
|
|
19
25
|
DATA_DIR.mkdir(parents=False)
|
|
20
26
|
|
|
21
27
|
|
|
28
|
+
np.set_printoptions(precision=18)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _timestamper() -> str:
|
|
32
|
+
return f"{pendulum.now().strftime("%F %T.%f")} |> "
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@argumentToString.register(np.ndarray) # type: ignore
|
|
36
|
+
def _(_obj: NDArray[Any]) -> str:
|
|
37
|
+
return f"ndarray, shape={_obj.shape}, dtype={_obj.dtype}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
ic.configureOutput(prefix=_timestamper, includeContext=True)
|
|
41
|
+
install()
|
|
42
|
+
|
|
43
|
+
|
|
22
44
|
@enum.unique
|
|
23
45
|
class RECConstants(enum.StrEnum):
|
|
24
46
|
"""Recapture rate - derivation methods."""
|
|
@@ -38,8 +60,11 @@ class UPPAggrSelector(enum.StrEnum):
|
|
|
38
60
|
AVG = "average"
|
|
39
61
|
CPA = "cross-product-share weighted average"
|
|
40
62
|
CPD = "cross-product-share weighted distance"
|
|
63
|
+
CPG = "cross-product-share weighted geometric mean"
|
|
41
64
|
DIS = "symmetrically-weighted distance"
|
|
65
|
+
GMN = "geometric mean"
|
|
42
66
|
MAX = "max"
|
|
43
67
|
MIN = "min"
|
|
44
68
|
OSA = "own-share weighted average"
|
|
45
69
|
OSD = "own-share weighted distance"
|
|
70
|
+
OSG = "own-share weighted geometric mean"
|
mergeron/core/__init__.py
CHANGED
|
@@ -1,68 +1,3 @@
|
|
|
1
|
-
from
|
|
1
|
+
from .. import VERSION # noqa: TID252
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from attrs import Attribute, field, frozen, validators
|
|
6
|
-
|
|
7
|
-
from .. import _PKG_NAME, RECConstants, UPPAggrSelector # noqa: TID252
|
|
8
|
-
|
|
9
|
-
__version__ = version(_PKG_NAME)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _delta_value_validator(
|
|
13
|
-
_instance: UPPBoundarySpec, _attribute: Attribute[float], _value: float, /
|
|
14
|
-
) -> None:
|
|
15
|
-
if not 0 <= _value <= 1:
|
|
16
|
-
raise ValueError(
|
|
17
|
-
"Margin-adjusted benchmark share ratio must lie between 0 and 1."
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _rec_spec_validator(
|
|
22
|
-
_instance: UPPBoundarySpec,
|
|
23
|
-
_attribute: Attribute[RECConstants],
|
|
24
|
-
_value: RECConstants,
|
|
25
|
-
/,
|
|
26
|
-
) -> None:
|
|
27
|
-
if _value == RECConstants.OUTIN:
|
|
28
|
-
raise ValueError(
|
|
29
|
-
f"Invalid recapture specification, {_value!r}. "
|
|
30
|
-
"You may consider specifying RECConstants.INOUT here, and "
|
|
31
|
-
"assigning the recapture rate for the merging-firm with "
|
|
32
|
-
'the smaller market-share to the attribue, "rec" of '
|
|
33
|
-
"the UPPBoundarySpec object you pass."
|
|
34
|
-
)
|
|
35
|
-
if _value is None and _instance.agg_method != UPPAggrSelector.MAX:
|
|
36
|
-
raise ValueError(
|
|
37
|
-
f"Specified aggregation method, {_instance.agg_method} requires a recapture specification."
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@frozen
|
|
42
|
-
class UPPBoundarySpec:
|
|
43
|
-
share_ratio: float = field(
|
|
44
|
-
kw_only=False,
|
|
45
|
-
default=0.075,
|
|
46
|
-
validator=(validators.instance_of(float), _delta_value_validator),
|
|
47
|
-
)
|
|
48
|
-
rec: float = field(
|
|
49
|
-
kw_only=False, default=0.80, validator=validators.instance_of(float)
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
agg_method: UPPAggrSelector = field(
|
|
53
|
-
kw_only=True,
|
|
54
|
-
default=UPPAggrSelector.MAX,
|
|
55
|
-
validator=validators.instance_of(UPPAggrSelector),
|
|
56
|
-
)
|
|
57
|
-
recapture_form: RECConstants | None = field(
|
|
58
|
-
kw_only=True,
|
|
59
|
-
default=RECConstants.INOUT,
|
|
60
|
-
validator=(
|
|
61
|
-
validators.optional(validators.instance_of(RECConstants)), # type: ignore
|
|
62
|
-
_rec_spec_validator,
|
|
63
|
-
),
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
precision: int = field(
|
|
67
|
-
kw_only=False, default=5, validator=validators.instance_of(int)
|
|
68
|
-
)
|
|
3
|
+
__version__ = VERSION
|
|
@@ -7,7 +7,8 @@ Data are downloaded or reused from a local copy, on demand.
|
|
|
7
7
|
For terms of use of Prof. Damodaran's data, please see:
|
|
8
8
|
https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
NOTES
|
|
11
|
+
-----
|
|
11
12
|
|
|
12
13
|
Prof. Damodaran notes that the data construction may not be
|
|
13
14
|
consistent from iteration to iteration. He also notes that,
|
|
@@ -32,29 +33,30 @@ price-cost margins fall in the interval :math:`[0, 1]`.
|
|
|
32
33
|
|
|
33
34
|
"""
|
|
34
35
|
|
|
36
|
+
import shutil
|
|
35
37
|
from collections.abc import Mapping
|
|
36
|
-
from importlib
|
|
38
|
+
from importlib import resources
|
|
37
39
|
from pathlib import Path
|
|
38
40
|
from types import MappingProxyType
|
|
39
41
|
|
|
40
42
|
import msgpack # type:ignore
|
|
41
43
|
import numpy as np
|
|
42
|
-
import
|
|
44
|
+
import urllib3
|
|
43
45
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
44
46
|
from numpy.typing import NDArray
|
|
45
|
-
from requests_toolbelt.downloadutils import stream # type: ignore
|
|
46
47
|
from scipy import stats # type: ignore
|
|
47
48
|
from xlrd import open_workbook # type: ignore
|
|
48
49
|
|
|
49
|
-
from .. import _PKG_NAME, DATA_DIR # noqa: TID252
|
|
50
|
-
|
|
51
|
-
__version__ = version(_PKG_NAME)
|
|
50
|
+
from .. import _PKG_NAME, DATA_DIR, VERSION # noqa: TID252
|
|
52
51
|
|
|
52
|
+
__version__ = VERSION
|
|
53
53
|
|
|
54
54
|
MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
|
|
55
55
|
|
|
56
|
+
u3pm = urllib3.PoolManager()
|
|
57
|
+
|
|
56
58
|
|
|
57
|
-
def
|
|
59
|
+
def mgn_data_getter(
|
|
58
60
|
_table_name: str = "margin",
|
|
59
61
|
*,
|
|
60
62
|
data_archive_path: Path | None = None,
|
|
@@ -68,32 +70,46 @@ def scrape_data_table(
|
|
|
68
70
|
_data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
|
|
69
71
|
|
|
70
72
|
_mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
|
|
71
|
-
_mgn_path = _data_archive_path.parent
|
|
73
|
+
_mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
|
|
72
74
|
if _data_archive_path.is_file() and not data_download_flag:
|
|
73
75
|
return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
|
|
74
76
|
elif _mgn_path.is_file():
|
|
75
77
|
_mgn_path.unlink()
|
|
76
|
-
_data_archive_path.
|
|
77
|
-
|
|
78
|
-
_REQ_TIMEOUT = (9.05, 27)
|
|
79
|
-
# NYU will eventually updates its server certificate, to one signed with
|
|
80
|
-
# "InCommon RSA Server CA 2.pem", the step below will be obsolete. In
|
|
81
|
-
# the interim, it is necessary to provide the certificate chain to the
|
|
82
|
-
# root CA, so that the obsolete CA certificate is validated.
|
|
83
|
-
_INCOMMON_2014_CERT_CHAIN_PATH = (
|
|
84
|
-
Path(__file__).parent / "InCommon RSA Server CA cert chain.pem"
|
|
85
|
-
)
|
|
86
|
-
try:
|
|
87
|
-
_urlopen_handle = requests.get(_mgn_urlstr, timeout=_REQ_TIMEOUT, stream=True)
|
|
88
|
-
except requests.exceptions.SSLError:
|
|
89
|
-
_urlopen_handle = requests.get(
|
|
90
|
-
_mgn_urlstr,
|
|
91
|
-
timeout=_REQ_TIMEOUT,
|
|
92
|
-
stream=True,
|
|
93
|
-
verify=str(_INCOMMON_2014_CERT_CHAIN_PATH),
|
|
94
|
-
)
|
|
78
|
+
if _data_archive_path.is_file():
|
|
79
|
+
_data_archive_path.unlink()
|
|
95
80
|
|
|
96
|
-
|
|
81
|
+
try:
|
|
82
|
+
_chunk_size = 1024 * 1024
|
|
83
|
+
with (
|
|
84
|
+
u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
|
|
85
|
+
_mgn_path.open("wb") as _mgn_file,
|
|
86
|
+
):
|
|
87
|
+
while True:
|
|
88
|
+
_data = _urlopen_handle.read(_chunk_size)
|
|
89
|
+
if not _data:
|
|
90
|
+
break
|
|
91
|
+
_mgn_file.write(_data)
|
|
92
|
+
|
|
93
|
+
print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
|
|
94
|
+
|
|
95
|
+
except urllib3.exceptions.MaxRetryError as _err:
|
|
96
|
+
if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
|
|
97
|
+
# Works fine with other sites secured with certificates
|
|
98
|
+
# from the Internet2 CA, such as,
|
|
99
|
+
# https://snap.stanford.edu/data/web-Stanford.txt.gz
|
|
100
|
+
print(
|
|
101
|
+
f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
|
|
102
|
+
"Using bundled copy."
|
|
103
|
+
)
|
|
104
|
+
if not _mgn_path.is_file():
|
|
105
|
+
with resources.as_file(
|
|
106
|
+
resources.files(f"{_PKG_NAME}.data").joinpath(
|
|
107
|
+
"damodaran_margin_data.xls"
|
|
108
|
+
)
|
|
109
|
+
) as _mgn_data_archive_path:
|
|
110
|
+
shutil.copy2(_mgn_data_archive_path, _mgn_path)
|
|
111
|
+
else:
|
|
112
|
+
raise _err
|
|
97
113
|
|
|
98
114
|
_xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
|
|
99
115
|
_xl_sheet = _xl_book.sheet_by_name("Industry Averages")
|
|
@@ -123,7 +139,7 @@ def mgn_data_builder(
|
|
|
123
139
|
_mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
|
|
124
140
|
) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
|
|
125
141
|
if _mgn_tbl_dict is None:
|
|
126
|
-
_mgn_tbl_dict =
|
|
142
|
+
_mgn_tbl_dict = mgn_data_getter()
|
|
127
143
|
|
|
128
144
|
_mgn_data_wts, _mgn_data_obs = (
|
|
129
145
|
_f.flatten()
|
|
@@ -169,17 +185,19 @@ def mgn_data_builder(
|
|
|
169
185
|
)
|
|
170
186
|
|
|
171
187
|
|
|
172
|
-
def
|
|
188
|
+
def mgn_data_resampler(
|
|
173
189
|
_sample_size: int | tuple[int, int] = (10**6, 2),
|
|
174
190
|
/,
|
|
175
191
|
*,
|
|
176
192
|
seed_sequence: SeedSequence | None = None,
|
|
177
193
|
) -> NDArray[np.float64]:
|
|
178
194
|
"""
|
|
179
|
-
Generate the
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
195
|
+
Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
|
|
196
|
+
|
|
197
|
+
The empirical distribution is estimated using a Gaussian KDE; the bandwidth
|
|
198
|
+
selected using Silverman's rule is narrowed to reflect that the margin data
|
|
199
|
+
are multimodal. Margins for firms in finance, investment, insurance, reinsurance, and
|
|
200
|
+
REITs are excluded from the sample used to estimate the empirical distribution.
|
|
183
201
|
|
|
184
202
|
Parameters
|
|
185
203
|
----------
|
|
@@ -198,28 +216,24 @@ def resample_mgn_data(
|
|
|
198
216
|
|
|
199
217
|
_seed_sequence = seed_sequence or SeedSequence(pool_size=8)
|
|
200
218
|
|
|
201
|
-
_x, _w, _ = mgn_data_builder(
|
|
219
|
+
_x, _w, _ = mgn_data_builder(mgn_data_getter())
|
|
202
220
|
|
|
203
|
-
_mgn_kde = stats.gaussian_kde(_x, weights=_w)
|
|
221
|
+
_mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
|
|
222
|
+
_mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0)
|
|
204
223
|
|
|
205
|
-
|
|
206
|
-
_mgn_kde: stats.gaussian_kde, _ssz: int, _seed_seq: SeedSequence
|
|
207
|
-
) -> NDArray[np.float64]:
|
|
208
|
-
_seed = Generator(PCG64DXSM(_seed_sequence))
|
|
209
|
-
|
|
210
|
-
# We enlarge the sample, then truncate to
|
|
211
|
-
# the range between [0.0, 1.0)
|
|
212
|
-
ssz_up = int(_ssz / (_mgn_kde.integrate_box_1d(0.0, 1.0) ** 2))
|
|
213
|
-
sample_1 = _mgn_kde.resample(ssz_up, seed=_seed)[0]
|
|
224
|
+
if isinstance(_sample_size, int):
|
|
214
225
|
return np.array(
|
|
215
|
-
|
|
226
|
+
_mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
|
|
227
|
+
0
|
|
228
|
+
]
|
|
216
229
|
)
|
|
217
|
-
|
|
218
|
-
if isinstance(_sample_size, int):
|
|
219
|
-
return _generate_draws(_mgn_kde, _sample_size, _seed_sequence)
|
|
220
|
-
else:
|
|
230
|
+
elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
|
|
221
231
|
_ssz, _num_cols = _sample_size
|
|
222
232
|
_ret_array = np.empty(_sample_size, np.float64)
|
|
223
233
|
for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
|
|
224
|
-
_ret_array[:, _idx] =
|
|
234
|
+
_ret_array[:, _idx] = _mgn_kde.resample(
|
|
235
|
+
_ssz, seed=Generator(PCG64DXSM(_seed_seq))
|
|
236
|
+
)[0]
|
|
225
237
|
return _ret_array
|
|
238
|
+
else:
|
|
239
|
+
raise ValueError(f"Invalid sample size: {_sample_size!r}")
|
mergeron/core/excel_helper.py
CHANGED
|
@@ -8,29 +8,21 @@ Includes a flexible system of defining cell formats.
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
-
import enum
|
|
12
11
|
from collections.abc import Mapping, Sequence
|
|
13
|
-
from
|
|
14
|
-
from types import MappingProxyType
|
|
15
|
-
from typing import Any
|
|
12
|
+
from typing import Any, ClassVar
|
|
16
13
|
|
|
17
14
|
import numpy as np
|
|
18
15
|
import numpy.typing as npt
|
|
19
16
|
import xlsxwriter # type: ignore
|
|
17
|
+
from aenum import Enum, unique # type: ignore
|
|
20
18
|
|
|
21
|
-
from .. import
|
|
19
|
+
from .. import VERSION # noqa: TID252
|
|
22
20
|
|
|
23
|
-
__version__ =
|
|
21
|
+
__version__ = VERSION
|
|
24
22
|
|
|
25
23
|
|
|
26
|
-
@
|
|
27
|
-
class
|
|
28
|
-
"""Unique mappings defining xlsxwirter Workbook formats"""
|
|
29
|
-
|
|
30
|
-
...
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class CFmt(CFmtParent):
|
|
24
|
+
@unique
|
|
25
|
+
class CFmt(Enum): # type: ignore
|
|
34
26
|
"""
|
|
35
27
|
Initialize cell formats for xlsxwriter.
|
|
36
28
|
|
|
@@ -42,31 +34,34 @@ class CFmt(CFmtParent):
|
|
|
42
34
|
See, https://xlsxwriter.readthedocs.io/format.html
|
|
43
35
|
"""
|
|
44
36
|
|
|
45
|
-
XL_DEFAULT =
|
|
46
|
-
XL_DEFAULT_2003 =
|
|
37
|
+
XL_DEFAULT: ClassVar = {"font_name": "Calibri", "font_size": 11}
|
|
38
|
+
XL_DEFAULT_2003: ClassVar = {"font_name": "Arial", "font_size": 10}
|
|
39
|
+
|
|
40
|
+
A_CTR: ClassVar = {"align": "center"}
|
|
41
|
+
A_CTR_ACROSS: ClassVar = {"align": "center_across"}
|
|
42
|
+
A_LEFT: ClassVar = {"align": "left"}
|
|
43
|
+
A_RIGHT: ClassVar = {"align": "right"}
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
A_RIGHT = MappingProxyType({"align": "right"})
|
|
45
|
+
BOLD: ClassVar = {"bold": True}
|
|
46
|
+
ITALIC: ClassVar = {"italic": True}
|
|
47
|
+
ULINE: ClassVar = {"underline": True}
|
|
52
48
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
49
|
+
TEXT_WRAP: ClassVar = {"text_wrap": True}
|
|
50
|
+
TEXT_ROTATE: ClassVar = {"rotation": 90}
|
|
51
|
+
IND_1: ClassVar = {"indent": 1}
|
|
56
52
|
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
DOLLAR_NUM: ClassVar = {"num_format": "[$$-409]#,##0.00"}
|
|
54
|
+
DT_NUM: ClassVar = {"num_format": "mm/dd/yyyy"}
|
|
55
|
+
QTY_NUM: ClassVar = {"num_format": "#,##0.0"}
|
|
56
|
+
PCT_NUM: ClassVar = {"num_format": "##0.000000%"}
|
|
57
|
+
AREA_NUM: ClassVar = {"num_format": "0.00000000"}
|
|
59
58
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
QTY_NUM = MappingProxyType({"num_format": "#,##0.0"})
|
|
63
|
-
PCT_NUM = MappingProxyType({"num_format": "##0.000000%"})
|
|
64
|
-
AREA_NUM = MappingProxyType({"num_format": "0.00000000"})
|
|
59
|
+
BAR_FILL: ClassVar = {"pattern": 1, "bg_color": "dfeadf"}
|
|
60
|
+
HDR_FILL: ClassVar = {"pattern": 1, "bg_color": "999999"}
|
|
65
61
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
HDR_BORDER = TOP_BORDER | BOT_BORDER
|
|
62
|
+
BOT_BORDER: ClassVar = {"bottom": 1, "bottom_color": "000000"}
|
|
63
|
+
TOP_BORDER: ClassVar = {"top": 1, "top_color": "000000"}
|
|
64
|
+
HDR_BORDER: ClassVar = TOP_BORDER | BOT_BORDER
|
|
70
65
|
|
|
71
66
|
|
|
72
67
|
def matrix_to_sheet(
|
|
@@ -208,7 +203,7 @@ def xl_fmt(
|
|
|
208
203
|
_xl_book: xlsxwriter.Workbook, _cell_fmt: Sequence[CFmt] | CFmt | None, /
|
|
209
204
|
) -> xlsxwriter.format.Format:
|
|
210
205
|
"""
|
|
211
|
-
Return :code:`xlsxwriter` `Format` object given a CFmt
|
|
206
|
+
Return :code:`xlsxwriter` `Format` object given a CFmt aenum, or tuple thereof.
|
|
212
207
|
|
|
213
208
|
Parameters
|
|
214
209
|
----------
|
|
@@ -216,14 +211,14 @@ def xl_fmt(
|
|
|
216
211
|
:code:`xlsxwriter.Workbook` object
|
|
217
212
|
|
|
218
213
|
_cell_fmt
|
|
219
|
-
:code:`CFmt`
|
|
214
|
+
:code:`CFmt` aenum object, or tuple thereof
|
|
220
215
|
|
|
221
216
|
Returns
|
|
222
217
|
-------
|
|
223
218
|
:code:`xlsxwriter` `Format` object
|
|
224
219
|
|
|
225
220
|
"""
|
|
226
|
-
_cell_fmt_dict: Mapping[str, Any] =
|
|
221
|
+
_cell_fmt_dict: Mapping[str, Any] = {}
|
|
227
222
|
if isinstance(_cell_fmt, tuple):
|
|
228
223
|
ensure_cell_format_spec_tuple(_cell_fmt)
|
|
229
224
|
for _cf in _cell_fmt:
|
|
@@ -4,13 +4,13 @@ as necessary
|
|
|
4
4
|
|
|
5
5
|
NOTES
|
|
6
6
|
-----
|
|
7
|
-
|
|
7
|
+
Reported row and column totals from source data are not stored.
|
|
8
8
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import shutil
|
|
12
12
|
from collections.abc import Mapping, Sequence
|
|
13
|
-
from importlib
|
|
13
|
+
from importlib import resources
|
|
14
14
|
from operator import itemgetter
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from types import MappingProxyType
|
|
@@ -22,12 +22,13 @@ import numpy as np
|
|
|
22
22
|
import re2 as re # type: ignore
|
|
23
23
|
import requests
|
|
24
24
|
from bs4 import BeautifulSoup
|
|
25
|
+
from icecream import ic # type: ignore
|
|
25
26
|
from numpy.testing import assert_array_equal
|
|
26
27
|
from numpy.typing import NDArray
|
|
27
28
|
|
|
28
|
-
from .. import _PKG_NAME, DATA_DIR # noqa: TID252
|
|
29
|
+
from .. import _PKG_NAME, DATA_DIR, VERSION # noqa: TID252
|
|
29
30
|
|
|
30
|
-
__version__ =
|
|
31
|
+
__version__ = VERSION
|
|
31
32
|
|
|
32
33
|
m.patch()
|
|
33
34
|
|
|
@@ -36,11 +37,16 @@ if not FTCDATA_DIR.is_dir():
|
|
|
36
37
|
FTCDATA_DIR.mkdir(parents=True)
|
|
37
38
|
|
|
38
39
|
INVDATA_ARCHIVE_PATH = DATA_DIR / "ftc_invdata.msgpack"
|
|
39
|
-
if
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
if (
|
|
41
|
+
not INVDATA_ARCHIVE_PATH.is_file()
|
|
42
|
+
and (
|
|
43
|
+
_bundled_copy := resources.files(f"{_PKG_NAME}.data").joinpath(
|
|
44
|
+
INVDATA_ARCHIVE_PATH.name
|
|
45
|
+
)
|
|
46
|
+
).is_file()
|
|
47
|
+
):
|
|
48
|
+
with resources.as_file(_bundled_copy) as _bundled_copy_path:
|
|
49
|
+
shutil.copy2(_bundled_copy_path, INVDATA_ARCHIVE_PATH)
|
|
44
50
|
|
|
45
51
|
TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
|
|
46
52
|
TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
|
|
@@ -86,8 +92,8 @@ CNT_FCOUNT_DICT = {
|
|
|
86
92
|
|
|
87
93
|
|
|
88
94
|
class INVTableData(NamedTuple):
|
|
89
|
-
|
|
90
|
-
|
|
95
|
+
industry_group: str
|
|
96
|
+
additional_evidence: str
|
|
91
97
|
data_array: NDArray[np.int64]
|
|
92
98
|
|
|
93
99
|
|
|
@@ -181,7 +187,9 @@ def construct_data(
|
|
|
181
187
|
_aggr_tables_list = [
|
|
182
188
|
_t
|
|
183
189
|
for _t in _invdata["1996-2003"][_table_type]
|
|
184
|
-
if re.sub(
|
|
190
|
+
if re.sub(
|
|
191
|
+
r"\W", "", _invdata["1996-2003"][_table_type][_t].industry_group
|
|
192
|
+
)
|
|
185
193
|
not in _industry_exclusion_list
|
|
186
194
|
]
|
|
187
195
|
|
|
@@ -254,8 +262,8 @@ def _construct_new_period_data(
|
|
|
254
262
|
for _table_no in _invdata_cuml[_table_type]:
|
|
255
263
|
_invdata_cuml_sub_table = _invdata_cuml[_table_type][_table_no]
|
|
256
264
|
_invdata_ind_group, _invdata_evid_cond, _invdata_cuml_array = (
|
|
257
|
-
_invdata_cuml_sub_table.
|
|
258
|
-
_invdata_cuml_sub_table.
|
|
265
|
+
_invdata_cuml_sub_table.industry_group,
|
|
266
|
+
_invdata_cuml_sub_table.additional_evidence,
|
|
259
267
|
_invdata_cuml_sub_table.data_array,
|
|
260
268
|
)
|
|
261
269
|
|
|
@@ -337,7 +345,7 @@ def _construct_new_period_data(
|
|
|
337
345
|
# _invdata_array_bld_enfcls < 0, _invdata_array_bld_enfcls, 0
|
|
338
346
|
# )
|
|
339
347
|
# if np.einsum('ij->', invdata_array_bld_tbc):
|
|
340
|
-
#
|
|
348
|
+
# ic(
|
|
341
349
|
# f"{_data_period}, {_table_no}, {_invdata_ind_group}:",
|
|
342
350
|
# abs(np.einsum('ij->', invdata_array_bld_tbc))
|
|
343
351
|
# )
|
|
@@ -395,22 +403,23 @@ def _parse_invdata() -> INVData:
|
|
|
395
403
|
by range of HHI and ∆HHI.
|
|
396
404
|
|
|
397
405
|
"""
|
|
406
|
+
raise ValueError(
|
|
407
|
+
"This function is defined here as documentation.\n"
|
|
408
|
+
"NOTE: License for `pymupdf`, upon which this function depends,"
|
|
409
|
+
" may be incompatible with the MIT license,"
|
|
410
|
+
" under which this pacakge is distributed."
|
|
411
|
+
" Making this fumction operable requires the user to modify"
|
|
412
|
+
" the source code as well as to install an additional package"
|
|
413
|
+
" not distributed with this package or included in its dependencies."
|
|
414
|
+
)
|
|
398
415
|
import fitz # type: ignore
|
|
399
|
-
# user must install pymupdf to make this function operable
|
|
400
416
|
|
|
401
|
-
_invdata_docnames
|
|
402
|
-
"040831horizmergersdata96-03.pdf",
|
|
403
|
-
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
404
|
-
"081201hsrmergerdata.pdf",
|
|
405
|
-
"130104horizontalmergerreport.pdf",
|
|
406
|
-
)
|
|
417
|
+
_invdata_docnames = _download_invdata(FTCDATA_DIR)
|
|
407
418
|
|
|
408
419
|
_invdata: dict[str, dict[str, dict[str, INVTableData]]] = {}
|
|
409
420
|
|
|
410
421
|
for _invdata_docname in _invdata_docnames:
|
|
411
422
|
_invdata_pdf_path = FTCDATA_DIR.joinpath(_invdata_docname)
|
|
412
|
-
if not _invdata_pdf_path.is_file():
|
|
413
|
-
_download_invdata(FTCDATA_DIR)
|
|
414
423
|
|
|
415
424
|
_invdata_fitz = fitz.open(_invdata_pdf_path)
|
|
416
425
|
_invdata_meta = _invdata_fitz.metadata
|
|
@@ -542,7 +551,7 @@ def _parse_table_blocks(
|
|
|
542
551
|
_invdata_evid_cond = "Unrestricted on additional evidence"
|
|
543
552
|
|
|
544
553
|
else:
|
|
545
|
-
#
|
|
554
|
+
# ic(_table_blocks)
|
|
546
555
|
_invdata_evid_cond = (
|
|
547
556
|
_table_blocks[1][-3].strip()
|
|
548
557
|
if _table_ser == 9
|
|
@@ -561,8 +570,8 @@ def _parse_table_blocks(
|
|
|
561
570
|
|
|
562
571
|
_table_array = process_table_func(_table_blocks)
|
|
563
572
|
if not isinstance(_table_array, np.ndarray) or _table_array.dtype != np.int64:
|
|
564
|
-
|
|
565
|
-
|
|
573
|
+
ic(_table_num)
|
|
574
|
+
ic(_table_blocks)
|
|
566
575
|
raise ValueError
|
|
567
576
|
|
|
568
577
|
_table_data = INVTableData(_invdata_ind_group, _invdata_evid_cond, _table_array)
|
|
@@ -610,7 +619,7 @@ def _process_table_blks_conc_type(
|
|
|
610
619
|
_col_totals = _row_array
|
|
611
620
|
else:
|
|
612
621
|
_invdata_array = (
|
|
613
|
-
np.
|
|
622
|
+
np.vstack((_invdata_array, _row_array))
|
|
614
623
|
if _invdata_array.shape
|
|
615
624
|
else _row_array
|
|
616
625
|
)
|
|
@@ -657,7 +666,7 @@ def _process_table_blks_cnt_type(
|
|
|
657
666
|
_col_totals = _row_list
|
|
658
667
|
else:
|
|
659
668
|
_invdata_array = (
|
|
660
|
-
np.
|
|
669
|
+
np.vstack((_invdata_array, _row_list))
|
|
661
670
|
if _invdata_array.shape
|
|
662
671
|
else _row_list
|
|
663
672
|
)
|
|
@@ -673,27 +682,43 @@ def _process_table_blks_cnt_type(
|
|
|
673
682
|
return _invdata_array[np.argsort(_invdata_array[:, 0])]
|
|
674
683
|
|
|
675
684
|
|
|
676
|
-
def _download_invdata(_dl_path: Path) ->
|
|
685
|
+
def _download_invdata(_dl_path: Path = FTCDATA_DIR) -> tuple[str, ...]:
|
|
686
|
+
if not _dl_path.is_dir():
|
|
687
|
+
_dl_path.mkdir(parents=True)
|
|
688
|
+
|
|
677
689
|
_invdata_homepage_urls = (
|
|
678
690
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
|
|
679
691
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
|
|
680
692
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
|
|
681
693
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
|
|
682
694
|
)
|
|
683
|
-
_invdata_docnames =
|
|
695
|
+
_invdata_docnames = (
|
|
696
|
+
"040831horizmergersdata96-03.pdf",
|
|
697
|
+
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
698
|
+
"081201hsrmergerdata.pdf",
|
|
699
|
+
"130104horizontalmergerreport.pdf",
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
if all(
|
|
703
|
+
_dl_path.joinpath(_invdata_docname).is_file()
|
|
704
|
+
for _invdata_docname in _invdata_docnames
|
|
705
|
+
):
|
|
706
|
+
return _invdata_docnames
|
|
707
|
+
|
|
708
|
+
_invdata_docnames_dl: tuple[str, ...] = ()
|
|
684
709
|
for _invdata_homepage_url in _invdata_homepage_urls:
|
|
685
710
|
_invdata_soup = BeautifulSoup(
|
|
686
711
|
requests.get(_invdata_homepage_url, verify=True, timeout=60).text,
|
|
687
712
|
"html.parser",
|
|
688
713
|
)
|
|
689
714
|
_invdata_attrs = [
|
|
690
|
-
(_g.get("
|
|
715
|
+
(_g.get("title", ""), _g.get("href", ""))
|
|
691
716
|
for _g in _invdata_soup.find_all("a")
|
|
692
717
|
if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
|
|
693
718
|
]
|
|
694
719
|
for _invdata_attr in _invdata_attrs:
|
|
695
|
-
|
|
696
|
-
|
|
720
|
+
_invdata_docname, _invdata_link = _invdata_attr
|
|
721
|
+
_invdata_docnames_dl += (_invdata_docname,)
|
|
697
722
|
with _dl_path.joinpath(_invdata_docname).open("wb") as _invdata_fh:
|
|
698
723
|
_invdata_fh.write(
|
|
699
724
|
requests.get(
|
|
@@ -701,4 +726,10 @@ def _download_invdata(_dl_path: Path) -> list[Any]:
|
|
|
701
726
|
).content
|
|
702
727
|
)
|
|
703
728
|
|
|
704
|
-
return
|
|
729
|
+
return _invdata_docnames_dl
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
if __name__ == "__main__":
|
|
733
|
+
print(
|
|
734
|
+
"This module defines functions for downloading and preparing FTC merger investigations data for further analysis."
|
|
735
|
+
)
|