mergeron 2025.739439.15__tar.gz → 2025.739439.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/PKG-INFO +1 -2
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/pyproject.toml +9 -4
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/__init__.py +1 -1
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/__init__.py +61 -0
- mergeron-2025.739439.19/src/mergeron/core/_process_ftc_merger_investigations_data.py +379 -0
- mergeron-2025.739439.19/src/mergeron/core/ftc_merger_investigations_data.py +364 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/guidelines_boundary_functions.py +4 -1
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/enforcement_stats.py +4 -5
- mergeron-2025.739439.15/src/mergeron/core/ftc_merger_investigations_data.py +0 -764
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/README.rst +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/empirical_margin_distribution.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/guidelines_boundaries.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/pseudorandom_numbers.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/__init__.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/damodaran_margin_data_serialized.zip +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/ftc_merger_investigations_data.zip +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/__init__.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/data_generation.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/data_generation_functions.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/upp_tests.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/perks/__init__.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/perks/guidelines_boundary_functions_extra.py +0 -0
- {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mergeron
|
|
3
|
-
Version: 2025.739439.
|
|
3
|
+
Version: 2025.739439.19
|
|
4
4
|
Summary: Python for analyzing merger enforcement policy
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: merger enforcement policy,merger guidelines,merger screening,enforcement presumptions,concentration standards,diversion ratio,upward pricing pressure,GUPPI
|
|
@@ -11,7 +11,6 @@ Classifier: Development Status :: 4 - Beta
|
|
|
11
11
|
Classifier: Environment :: Console
|
|
12
12
|
Classifier: Intended Audience :: End Users/Desktop
|
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
15
14
|
Classifier: Operating System :: OS Independent
|
|
16
15
|
Classifier: Programming Language :: Python
|
|
17
16
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
@@ -15,7 +15,7 @@ keywords = [
|
|
|
15
15
|
"upward pricing pressure",
|
|
16
16
|
"GUPPI",
|
|
17
17
|
]
|
|
18
|
-
version = "2025.739439.
|
|
18
|
+
version = "2025.739439.19"
|
|
19
19
|
requires-python = ">=3.13,<4.0" # need math.fma
|
|
20
20
|
|
|
21
21
|
# Classifiers list: https://pypi.org/classifiers/
|
|
@@ -24,7 +24,6 @@ classifiers = [
|
|
|
24
24
|
"Environment :: Console",
|
|
25
25
|
"Intended Audience :: End Users/Desktop",
|
|
26
26
|
"Intended Audience :: Science/Research",
|
|
27
|
-
"License :: OSI Approved :: MIT License",
|
|
28
27
|
"Operating System :: OS Independent",
|
|
29
28
|
"Programming Language :: Python",
|
|
30
29
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
@@ -189,6 +188,8 @@ preview = true
|
|
|
189
188
|
|
|
190
189
|
cache_fine_grained = true
|
|
191
190
|
ignore_missing_imports = false
|
|
191
|
+
warn_unreachable = false
|
|
192
|
+
warn_redundant_casts = true
|
|
192
193
|
strict = true
|
|
193
194
|
local_partial_types = true
|
|
194
195
|
allow_redefinition_new = true
|
|
@@ -240,8 +241,12 @@ commands = [
|
|
|
240
241
|
"poetry",
|
|
241
242
|
"install",
|
|
242
243
|
"--without",
|
|
243
|
-
"doc
|
|
244
|
+
"doc",
|
|
244
245
|
"--no-root",
|
|
246
|
+
],[
|
|
247
|
+
"ruff", "check", "./src",
|
|
248
|
+
],[
|
|
249
|
+
"mypy", "./src",
|
|
245
250
|
],
|
|
246
251
|
[
|
|
247
252
|
"pytest",
|
|
@@ -249,5 +254,5 @@ commands = [
|
|
|
249
254
|
],
|
|
250
255
|
]
|
|
251
256
|
|
|
252
|
-
description = "install pytest in a virtual environment and invoke
|
|
257
|
+
description = "install ruff, mpypy, pytest in a virtual environment and invoke each, in turn"
|
|
253
258
|
deps = ["poetry"]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import shutil
|
|
5
6
|
from collections.abc import Mapping
|
|
6
7
|
from decimal import Decimal
|
|
7
8
|
from types import MappingProxyType
|
|
@@ -20,6 +21,8 @@ from .. import ( # noqa: TID252
|
|
|
20
21
|
yamelize_attrs,
|
|
21
22
|
yaml_rt_mapper,
|
|
22
23
|
)
|
|
24
|
+
from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
|
|
25
|
+
from .. import data as mdat # noqa: TID252
|
|
23
26
|
|
|
24
27
|
__version__ = VERSION
|
|
25
28
|
|
|
@@ -40,6 +43,64 @@ class GuidelinesBoundary:
|
|
|
40
43
|
"""Area under the boundary."""
|
|
41
44
|
|
|
42
45
|
|
|
46
|
+
WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
|
|
47
|
+
"""Redefined, in case the user defines WORK_DIR between module imports."""
|
|
48
|
+
|
|
49
|
+
FID_WORK_DIR = WORK_DIR / "FTCData"
|
|
50
|
+
if not FID_WORK_DIR.is_dir():
|
|
51
|
+
FID_WORK_DIR.mkdir(parents=True)
|
|
52
|
+
|
|
53
|
+
INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
|
|
54
|
+
if not INVDATA_ARCHIVE_PATH.is_file():
|
|
55
|
+
shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH) # type: ignore
|
|
56
|
+
|
|
57
|
+
TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
|
|
58
|
+
CONC_TABLE_ALL = "Table 3.1"
|
|
59
|
+
CNT_TABLE_ALL = "Table 4.1"
|
|
60
|
+
|
|
61
|
+
TTL_KEY = 86825
|
|
62
|
+
CONC_HHI_DICT = {
|
|
63
|
+
"0 - 1,799": 0,
|
|
64
|
+
"1,800 - 1,999": 1800,
|
|
65
|
+
"2,000 - 2,399": 2000,
|
|
66
|
+
"2,400 - 2,999": 2400,
|
|
67
|
+
"3,000 - 3,999": 3000,
|
|
68
|
+
"4,000 - 4,999": 4000,
|
|
69
|
+
"5,000 - 6,999": 5000,
|
|
70
|
+
"7,000 - 10,000": 7000,
|
|
71
|
+
"TOTAL": TTL_KEY,
|
|
72
|
+
}
|
|
73
|
+
CONC_DELTA_DICT = {
|
|
74
|
+
"0 - 100": 0,
|
|
75
|
+
"100 - 200": 100,
|
|
76
|
+
"200 - 300": 200,
|
|
77
|
+
"300 - 500": 300,
|
|
78
|
+
"500 - 800": 500,
|
|
79
|
+
"800 - 1,200": 800,
|
|
80
|
+
"1,200 - 2,500": 1200,
|
|
81
|
+
"2,500 - 5,000": 2500,
|
|
82
|
+
"TOTAL": TTL_KEY,
|
|
83
|
+
}
|
|
84
|
+
CNT_FCOUNT_DICT = {
|
|
85
|
+
"2 to 1": 2,
|
|
86
|
+
"3 to 2": 3,
|
|
87
|
+
"4 to 3": 4,
|
|
88
|
+
"5 to 4": 5,
|
|
89
|
+
"6 to 5": 6,
|
|
90
|
+
"7 to 6": 7,
|
|
91
|
+
"8 to 7": 8,
|
|
92
|
+
"9 to 8": 9,
|
|
93
|
+
"10 to 9": 10,
|
|
94
|
+
"10 +": 11,
|
|
95
|
+
"TOTAL": TTL_KEY,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def invert_map(_dict: Mapping[Any, Any]) -> Mapping[Any, Any]:
|
|
100
|
+
"""Invert mapping, mapping values to keys of the original mapping."""
|
|
101
|
+
return {_v: _k for _k, _v in _dict.items()}
|
|
102
|
+
|
|
103
|
+
|
|
43
104
|
@frozen
|
|
44
105
|
class INVTableData:
|
|
45
106
|
"""Represents individual table of FTC merger investigations data."""
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""Download and parse FTC Merger Investigations Data.
|
|
2
|
+
|
|
3
|
+
This module provided as documentation only. The package
|
|
4
|
+
:code:`pymupdf` is a requirement of this module but is
|
|
5
|
+
distributed under a license that may be incompatible with
|
|
6
|
+
the MIT license under which this package is distributed.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from operator import itemgetter
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
# import pymupdf # type: ignore
|
|
19
|
+
import urllib3
|
|
20
|
+
from bs4 import BeautifulSoup
|
|
21
|
+
from numpy.testing import assert_array_equal
|
|
22
|
+
|
|
23
|
+
from .. import ArrayBIGINT # noqa: TID252
|
|
24
|
+
from . import (
|
|
25
|
+
CNT_FCOUNT_DICT,
|
|
26
|
+
CONC_DELTA_DICT,
|
|
27
|
+
CONC_HHI_DICT,
|
|
28
|
+
CONC_TABLE_ALL,
|
|
29
|
+
FID_WORK_DIR,
|
|
30
|
+
TABLE_TYPES,
|
|
31
|
+
TTL_KEY,
|
|
32
|
+
INVData,
|
|
33
|
+
INVData_in,
|
|
34
|
+
INVTableData,
|
|
35
|
+
_mappingproxy_from_mapping,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_invdata() -> INVData:
|
|
42
|
+
"""Parse FTC merger investigations data reports to structured data.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
Immutable dictionary of merger investigations data, keyed to
|
|
47
|
+
reporting period, and including all tables organized by
|
|
48
|
+
Firm Count (number of remaining competitors) and
|
|
49
|
+
by range of HHI and ∆HHI.
|
|
50
|
+
"""
|
|
51
|
+
raise ValueError(
|
|
52
|
+
"This function is defined here as documentation.\n"
|
|
53
|
+
"NOTE: License for `pymupdf`, upon which this function depends,"
|
|
54
|
+
" may be incompatible with the MIT license,"
|
|
55
|
+
" under which this pacakge is distributed."
|
|
56
|
+
" Making this fumction operable requires the user to modify"
|
|
57
|
+
" the source code as well as to install an additional package"
|
|
58
|
+
" not distributed with this package or identified as a requirement."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
invdata_docnames = _download_invdata(FID_WORK_DIR)
|
|
62
|
+
|
|
63
|
+
invdata: INVData_in = {}
|
|
64
|
+
|
|
65
|
+
for invdata_docname in invdata_docnames:
|
|
66
|
+
invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
|
|
67
|
+
|
|
68
|
+
invdata_doc = pymupdf.open(invdata_pdf_path) # type: ignore # noqa: F821
|
|
69
|
+
invdata_meta = invdata_doc.metadata
|
|
70
|
+
if invdata_meta["title"] == " ":
|
|
71
|
+
invdata_meta["title"] = ", ".join((
|
|
72
|
+
"Horizontal Merger Investigation Data",
|
|
73
|
+
"Fiscal Years",
|
|
74
|
+
"1996-2005",
|
|
75
|
+
))
|
|
76
|
+
|
|
77
|
+
data_period = "".join( # line-break here for readability
|
|
78
|
+
re.findall(r"(\d{4}) *(-) *(\d{4})", invdata_meta["title"])[0]
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Initialize containers for parsed data
|
|
82
|
+
invdata[data_period] = {k: {} for k in TABLE_TYPES}
|
|
83
|
+
|
|
84
|
+
for pdf_pg in invdata_doc.pages():
|
|
85
|
+
doc_pg_blocks = pdf_pg.get_text("blocks", sort=False)
|
|
86
|
+
# Across all published reports of FTC investigations data,
|
|
87
|
+
# sorting lines (PDF page blocks) by the lower coordinates
|
|
88
|
+
# and then the left coordinates is most effective for
|
|
89
|
+
# ordering table rows in top-to-bottom order; this doesn't
|
|
90
|
+
# work for the 1996-2005 data, however, so we resort later
|
|
91
|
+
doc_pg_blocks = sorted([
|
|
92
|
+
(f"{_f[3]:03.0f}{_f[0]:03.0f}{_f[1]:03.0f}{_f[2]:03.0f}", *_f)
|
|
93
|
+
for _f in doc_pg_blocks
|
|
94
|
+
if _f[-1] == 0
|
|
95
|
+
])
|
|
96
|
+
|
|
97
|
+
data_blocks: list[tuple[str]] = [("",)]
|
|
98
|
+
# Pages layouts not the same in all reports
|
|
99
|
+
pg_hdr_strings = (
|
|
100
|
+
"FEDERAL TRADE COMMISSION",
|
|
101
|
+
"HORIZONTAL MERGER INVESTIGATION DATA: FISCAL YEARS 1996 - 2011",
|
|
102
|
+
)
|
|
103
|
+
if len(doc_pg_blocks) > 4:
|
|
104
|
+
tnum = None
|
|
105
|
+
for _pg_blk in doc_pg_blocks:
|
|
106
|
+
if tnum := TABLE_NO_RE.fullmatch(_pg_blk[-3].strip()):
|
|
107
|
+
data_blocks = [
|
|
108
|
+
b_
|
|
109
|
+
for b_ in doc_pg_blocks
|
|
110
|
+
if not b_[-3].startswith(pg_hdr_strings)
|
|
111
|
+
and (
|
|
112
|
+
b_[-3].strip()
|
|
113
|
+
not in {"Significant Competitors", "Post Merger HHI"}
|
|
114
|
+
)
|
|
115
|
+
and not re.fullmatch(r"\d+", b_[-3].strip())
|
|
116
|
+
]
|
|
117
|
+
break
|
|
118
|
+
if not tnum:
|
|
119
|
+
continue
|
|
120
|
+
del tnum
|
|
121
|
+
else:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
_parse_page_blocks(invdata, data_period, data_blocks)
|
|
125
|
+
|
|
126
|
+
invdata_doc.close()
|
|
127
|
+
|
|
128
|
+
return _mappingproxy_from_mapping(invdata)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _parse_page_blocks(
|
|
132
|
+
_invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
|
|
133
|
+
) -> None:
|
|
134
|
+
if _data_period != "1996-2011":
|
|
135
|
+
_parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
|
|
136
|
+
else:
|
|
137
|
+
test_list = [
|
|
138
|
+
(g, f[-3].strip())
|
|
139
|
+
for g, f in enumerate(_doc_pg_blocks)
|
|
140
|
+
if TABLE_NO_RE.fullmatch(f[-3].strip())
|
|
141
|
+
]
|
|
142
|
+
# In the 1996-2011 report, there are 2 tables per page
|
|
143
|
+
if len(test_list) == 1:
|
|
144
|
+
table_a_blocks = _doc_pg_blocks
|
|
145
|
+
table_b_blocks: Sequence[Sequence[Any]] = []
|
|
146
|
+
else:
|
|
147
|
+
table_a_blocks, table_b_blocks = (
|
|
148
|
+
_doc_pg_blocks[test_list[0][0] : test_list[1][0]],
|
|
149
|
+
_doc_pg_blocks[test_list[1][0] :],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
for table_i_blocks in table_a_blocks, table_b_blocks:
|
|
153
|
+
if not table_i_blocks:
|
|
154
|
+
continue
|
|
155
|
+
_parse_table_blocks(_invdata, _data_period, table_i_blocks)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _parse_table_blocks(
|
|
159
|
+
_invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
|
|
160
|
+
) -> None:
|
|
161
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
162
|
+
table_num, table_ser, table_type = _identify_table_type(
|
|
163
|
+
_table_blocks[0][-3].strip()
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if _data_period == "1996-2011":
|
|
167
|
+
invdata_ind_group = (
|
|
168
|
+
_table_blocks[1][-3].split("\n")[1]
|
|
169
|
+
if table_num == "Table 4.8"
|
|
170
|
+
else _table_blocks[2][-3].split("\n", maxsplit=1)[0]
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if table_ser > 4:
|
|
174
|
+
invdata_evid_cond = (
|
|
175
|
+
_table_blocks[2][-3].split("\n")[1]
|
|
176
|
+
if table_ser in {9, 10}
|
|
177
|
+
else _table_blocks[3][-3].strip()
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
elif _data_period == "1996-2005":
|
|
181
|
+
_table_blocks = sorted(_table_blocks, key=itemgetter(6))
|
|
182
|
+
|
|
183
|
+
invdata_ind_group = _table_blocks[3][-3].strip()
|
|
184
|
+
if table_ser > 4:
|
|
185
|
+
invdata_evid_cond = _table_blocks[5][-3].strip()
|
|
186
|
+
|
|
187
|
+
elif table_ser % 2 == 0:
|
|
188
|
+
invdata_ind_group = _table_blocks[1][-3].split("\n")[2]
|
|
189
|
+
if (evid_cond_teststr := _table_blocks[2][-3].strip()) == "Outcome":
|
|
190
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
191
|
+
else:
|
|
192
|
+
invdata_evid_cond = evid_cond_teststr
|
|
193
|
+
|
|
194
|
+
elif _table_blocks[3][-3].startswith("FTC Horizontal Merger Investigations"):
|
|
195
|
+
invdata_ind_group = _table_blocks[3][-3].split("\n")[2]
|
|
196
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
197
|
+
|
|
198
|
+
else:
|
|
199
|
+
# print(_table_blocks)
|
|
200
|
+
invdata_evid_cond = (
|
|
201
|
+
_table_blocks[1][-3].strip()
|
|
202
|
+
if table_ser == 9
|
|
203
|
+
else _table_blocks[3][-3].strip()
|
|
204
|
+
)
|
|
205
|
+
invdata_ind_group = _table_blocks[4][-3].split("\n")[2]
|
|
206
|
+
|
|
207
|
+
if invdata_ind_group == "Pharmaceutical Markets":
|
|
208
|
+
invdata_ind_group = "Pharmaceuticals Markets"
|
|
209
|
+
|
|
210
|
+
process_table_func = (
|
|
211
|
+
_process_table_blks_conc_type
|
|
212
|
+
if table_type == TABLE_TYPES[0]
|
|
213
|
+
else _process_table_blks_cnt_type
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
table_array = process_table_func(_table_blocks)
|
|
217
|
+
if not isinstance(table_array, np.ndarray) or table_array.dtype != int:
|
|
218
|
+
print(table_num)
|
|
219
|
+
print(_table_blocks)
|
|
220
|
+
raise ValueError
|
|
221
|
+
|
|
222
|
+
table_data = INVTableData(invdata_ind_group, invdata_evid_cond, table_array)
|
|
223
|
+
_invdata[_data_period][table_type] |= {table_num: table_data}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str]:
|
|
227
|
+
tnum = _tnstr.split(" ")[1]
|
|
228
|
+
tsub = int(tnum.split(".")[0])
|
|
229
|
+
return _tnstr, tsub, TABLE_TYPES[(tsub + 1) % 2]
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _process_table_blks_conc_type(
|
|
233
|
+
_table_blocks: Sequence[Sequence[str]], /
|
|
234
|
+
) -> ArrayBIGINT:
|
|
235
|
+
conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
|
|
236
|
+
|
|
237
|
+
col_titles = tuple(CONC_DELTA_DICT.values())
|
|
238
|
+
col_totals: ArrayBIGINT = np.zeros(len(col_titles), int)
|
|
239
|
+
invdata_array: ArrayBIGINT = np.array(None)
|
|
240
|
+
|
|
241
|
+
for tbl_blk in _table_blocks:
|
|
242
|
+
if conc_row_pat.match(_blk_str := tbl_blk[-3]):
|
|
243
|
+
row_list: list[str] = _blk_str.strip().split("\n")
|
|
244
|
+
row_title: str = row_list.pop(0)
|
|
245
|
+
row_key: int = (
|
|
246
|
+
7000 if row_title.startswith("7,000") else CONC_HHI_DICT[row_title]
|
|
247
|
+
)
|
|
248
|
+
row_total = np.array(row_list.pop().replace(",", "").split("/"), int)
|
|
249
|
+
data_row_list: list[list[int]] = []
|
|
250
|
+
while row_list:
|
|
251
|
+
enfd_val, clsd_val = row_list.pop(0).split("/")
|
|
252
|
+
data_row_list += [
|
|
253
|
+
[
|
|
254
|
+
row_key,
|
|
255
|
+
col_titles[len(data_row_list)],
|
|
256
|
+
int(enfd_val),
|
|
257
|
+
int(clsd_val),
|
|
258
|
+
int(enfd_val) + int(clsd_val),
|
|
259
|
+
]
|
|
260
|
+
]
|
|
261
|
+
data_row_array = np.array(data_row_list, int)
|
|
262
|
+
del data_row_list
|
|
263
|
+
# Check row totals
|
|
264
|
+
assert_array_equal(row_total, np.einsum("ij->j", data_row_array[:, 2:4]))
|
|
265
|
+
|
|
266
|
+
if row_key == TTL_KEY:
|
|
267
|
+
col_totals = data_row_array
|
|
268
|
+
else:
|
|
269
|
+
invdata_array = (
|
|
270
|
+
np.vstack((invdata_array, data_row_array))
|
|
271
|
+
if invdata_array.shape
|
|
272
|
+
else data_row_array
|
|
273
|
+
)
|
|
274
|
+
del data_row_array
|
|
275
|
+
else:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Check column totals
|
|
279
|
+
for _col_tot in col_totals:
|
|
280
|
+
assert_array_equal(
|
|
281
|
+
_col_tot[2:],
|
|
282
|
+
np.einsum(
|
|
283
|
+
"ij->j", invdata_array[invdata_array[:, 1] == _col_tot[1]][:, 2:]
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return invdata_array[
|
|
288
|
+
np.argsort(np.einsum("ij,ij->i", [[100, 1]], invdata_array[:, :2]))
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _process_table_blks_cnt_type(
|
|
293
|
+
_table_blocks: Sequence[Sequence[str]], /
|
|
294
|
+
) -> ArrayBIGINT:
|
|
295
|
+
cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
|
|
296
|
+
|
|
297
|
+
invdata_array: ArrayBIGINT = np.array(None)
|
|
298
|
+
col_totals: ArrayBIGINT = np.zeros(3, int) # "enforced", "closed", "total"
|
|
299
|
+
|
|
300
|
+
for _tbl_blk in _table_blocks:
|
|
301
|
+
if cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
|
|
302
|
+
row_list_s = _blk_str.strip().replace(",", "").split("\n")
|
|
303
|
+
row_list = np.array([CNT_FCOUNT_DICT[row_list_s[0]], *row_list_s[1:]], int)
|
|
304
|
+
del row_list_s
|
|
305
|
+
if row_list[3] != row_list[1] + row_list[2]:
|
|
306
|
+
raise ValueError(
|
|
307
|
+
"Total number of investigations does not equal #enforced plus #closed."
|
|
308
|
+
)
|
|
309
|
+
if row_list[0] == TTL_KEY:
|
|
310
|
+
col_totals = row_list
|
|
311
|
+
else:
|
|
312
|
+
invdata_array = (
|
|
313
|
+
np.vstack((invdata_array, row_list))
|
|
314
|
+
if invdata_array.shape
|
|
315
|
+
else row_list
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
if not np.array_equal(
|
|
321
|
+
np.array(list(col_totals[1:]), int), np.einsum("ij->j", invdata_array[:, 1:])
|
|
322
|
+
):
|
|
323
|
+
raise ValueError("Column totals don't compute.")
|
|
324
|
+
|
|
325
|
+
return invdata_array[np.argsort(invdata_array[:, 0])]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
|
|
329
|
+
if not _dl_path.is_dir():
|
|
330
|
+
_dl_path.mkdir(parents=True)
|
|
331
|
+
|
|
332
|
+
invdata_homepage_urls = (
|
|
333
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
|
|
334
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
|
|
335
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
|
|
336
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
|
|
337
|
+
)
|
|
338
|
+
invdata_docnames = (
|
|
339
|
+
"040831horizmergersdata96-03.pdf",
|
|
340
|
+
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
341
|
+
"081201hsrmergerdata.pdf",
|
|
342
|
+
"130104horizontalmergerreport.pdf",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if all(
|
|
346
|
+
_dl_path.joinpath(invdata_docname).is_file()
|
|
347
|
+
for invdata_docname in invdata_docnames
|
|
348
|
+
):
|
|
349
|
+
return invdata_docnames
|
|
350
|
+
|
|
351
|
+
invdata_docnames_dl: tuple[str, ...] = ()
|
|
352
|
+
u3pm = urllib3.PoolManager()
|
|
353
|
+
chunk_size_ = 1024 * 1024
|
|
354
|
+
for invdata_homepage_url in invdata_homepage_urls:
|
|
355
|
+
with u3pm.request(
|
|
356
|
+
"GET", invdata_homepage_url, preload_content=False
|
|
357
|
+
) as _u3handle:
|
|
358
|
+
invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
|
|
359
|
+
invdata_attrs = [
|
|
360
|
+
(_g.get("title", ""), _g.get("href", ""))
|
|
361
|
+
for _g in invdata_soup.find_all("a")
|
|
362
|
+
if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
|
|
363
|
+
]
|
|
364
|
+
for invdata_attr in invdata_attrs:
|
|
365
|
+
invdata_docname, invdata_link = invdata_attr
|
|
366
|
+
invdata_docnames_dl += (invdata_docname,)
|
|
367
|
+
with (
|
|
368
|
+
u3pm.request(
|
|
369
|
+
"GET", f"https://www.ftc.gov/{invdata_link}", preload_content=False
|
|
370
|
+
) as _urlopen_handle,
|
|
371
|
+
_dl_path.joinpath(invdata_docname).open("wb") as invdata_fh,
|
|
372
|
+
):
|
|
373
|
+
while True:
|
|
374
|
+
data = _urlopen_handle.read(chunk_size_)
|
|
375
|
+
if not data:
|
|
376
|
+
break
|
|
377
|
+
invdata_fh.write(data)
|
|
378
|
+
|
|
379
|
+
return invdata_docnames_dl
|