mergeron 2025.739439.14__py3-none-any.whl → 2025.739439.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +1 -1
- mergeron/core/__init__.py +61 -0
- mergeron/core/_process_ftc_merger_investigations_data.py +379 -0
- mergeron/core/ftc_merger_investigations_data.py +16 -416
- mergeron/core/guidelines_boundary_functions.py +4 -1
- mergeron/gen/enforcement_stats.py +4 -5
- {mergeron-2025.739439.14.dist-info → mergeron-2025.739439.19.dist-info}/METADATA +1 -2
- {mergeron-2025.739439.14.dist-info → mergeron-2025.739439.19.dist-info}/RECORD +9 -8
- {mergeron-2025.739439.14.dist-info → mergeron-2025.739439.19.dist-info}/WHEEL +0 -0
mergeron/__init__.py
CHANGED
mergeron/core/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import shutil
|
|
5
6
|
from collections.abc import Mapping
|
|
6
7
|
from decimal import Decimal
|
|
7
8
|
from types import MappingProxyType
|
|
@@ -20,6 +21,8 @@ from .. import ( # noqa: TID252
|
|
|
20
21
|
yamelize_attrs,
|
|
21
22
|
yaml_rt_mapper,
|
|
22
23
|
)
|
|
24
|
+
from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
|
|
25
|
+
from .. import data as mdat # noqa: TID252
|
|
23
26
|
|
|
24
27
|
__version__ = VERSION
|
|
25
28
|
|
|
@@ -40,6 +43,64 @@ class GuidelinesBoundary:
|
|
|
40
43
|
"""Area under the boundary."""
|
|
41
44
|
|
|
42
45
|
|
|
46
|
+
WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
|
|
47
|
+
"""Redefined, in case the user defines WORK_DIR between module imports."""
|
|
48
|
+
|
|
49
|
+
FID_WORK_DIR = WORK_DIR / "FTCData"
|
|
50
|
+
if not FID_WORK_DIR.is_dir():
|
|
51
|
+
FID_WORK_DIR.mkdir(parents=True)
|
|
52
|
+
|
|
53
|
+
INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
|
|
54
|
+
if not INVDATA_ARCHIVE_PATH.is_file():
|
|
55
|
+
shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH) # type: ignore
|
|
56
|
+
|
|
57
|
+
TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
|
|
58
|
+
CONC_TABLE_ALL = "Table 3.1"
|
|
59
|
+
CNT_TABLE_ALL = "Table 4.1"
|
|
60
|
+
|
|
61
|
+
TTL_KEY = 86825
|
|
62
|
+
CONC_HHI_DICT = {
|
|
63
|
+
"0 - 1,799": 0,
|
|
64
|
+
"1,800 - 1,999": 1800,
|
|
65
|
+
"2,000 - 2,399": 2000,
|
|
66
|
+
"2,400 - 2,999": 2400,
|
|
67
|
+
"3,000 - 3,999": 3000,
|
|
68
|
+
"4,000 - 4,999": 4000,
|
|
69
|
+
"5,000 - 6,999": 5000,
|
|
70
|
+
"7,000 - 10,000": 7000,
|
|
71
|
+
"TOTAL": TTL_KEY,
|
|
72
|
+
}
|
|
73
|
+
CONC_DELTA_DICT = {
|
|
74
|
+
"0 - 100": 0,
|
|
75
|
+
"100 - 200": 100,
|
|
76
|
+
"200 - 300": 200,
|
|
77
|
+
"300 - 500": 300,
|
|
78
|
+
"500 - 800": 500,
|
|
79
|
+
"800 - 1,200": 800,
|
|
80
|
+
"1,200 - 2,500": 1200,
|
|
81
|
+
"2,500 - 5,000": 2500,
|
|
82
|
+
"TOTAL": TTL_KEY,
|
|
83
|
+
}
|
|
84
|
+
CNT_FCOUNT_DICT = {
|
|
85
|
+
"2 to 1": 2,
|
|
86
|
+
"3 to 2": 3,
|
|
87
|
+
"4 to 3": 4,
|
|
88
|
+
"5 to 4": 5,
|
|
89
|
+
"6 to 5": 6,
|
|
90
|
+
"7 to 6": 7,
|
|
91
|
+
"8 to 7": 8,
|
|
92
|
+
"9 to 8": 9,
|
|
93
|
+
"10 to 9": 10,
|
|
94
|
+
"10 +": 11,
|
|
95
|
+
"TOTAL": TTL_KEY,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def invert_map(_dict: Mapping[Any, Any]) -> Mapping[Any, Any]:
|
|
100
|
+
"""Invert mapping, mapping values to keys of the original mapping."""
|
|
101
|
+
return {_v: _k for _k, _v in _dict.items()}
|
|
102
|
+
|
|
103
|
+
|
|
43
104
|
@frozen
|
|
44
105
|
class INVTableData:
|
|
45
106
|
"""Represents individual table of FTC merger investigations data."""
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""Download and parse FTC Merger Investigations Data.
|
|
2
|
+
|
|
3
|
+
This module provided as documentation only. The package
|
|
4
|
+
:code:`pymupdf` is a requirement of this module but is
|
|
5
|
+
distributed under a license that may be incompatible with
|
|
6
|
+
the MIT license under which this package is distributed.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from operator import itemgetter
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
# import pymupdf # type: ignore
|
|
19
|
+
import urllib3
|
|
20
|
+
from bs4 import BeautifulSoup
|
|
21
|
+
from numpy.testing import assert_array_equal
|
|
22
|
+
|
|
23
|
+
from .. import ArrayBIGINT # noqa: TID252
|
|
24
|
+
from . import (
|
|
25
|
+
CNT_FCOUNT_DICT,
|
|
26
|
+
CONC_DELTA_DICT,
|
|
27
|
+
CONC_HHI_DICT,
|
|
28
|
+
CONC_TABLE_ALL,
|
|
29
|
+
FID_WORK_DIR,
|
|
30
|
+
TABLE_TYPES,
|
|
31
|
+
TTL_KEY,
|
|
32
|
+
INVData,
|
|
33
|
+
INVData_in,
|
|
34
|
+
INVTableData,
|
|
35
|
+
_mappingproxy_from_mapping,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_invdata() -> INVData:
|
|
42
|
+
"""Parse FTC merger investigations data reports to structured data.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
Immutable dictionary of merger investigations data, keyed to
|
|
47
|
+
reporting period, and including all tables organized by
|
|
48
|
+
Firm Count (number of remaining competitors) and
|
|
49
|
+
by range of HHI and ∆HHI.
|
|
50
|
+
"""
|
|
51
|
+
raise ValueError(
|
|
52
|
+
"This function is defined here as documentation.\n"
|
|
53
|
+
"NOTE: License for `pymupdf`, upon which this function depends,"
|
|
54
|
+
" may be incompatible with the MIT license,"
|
|
55
|
+
" under which this pacakge is distributed."
|
|
56
|
+
" Making this fumction operable requires the user to modify"
|
|
57
|
+
" the source code as well as to install an additional package"
|
|
58
|
+
" not distributed with this package or identified as a requirement."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
invdata_docnames = _download_invdata(FID_WORK_DIR)
|
|
62
|
+
|
|
63
|
+
invdata: INVData_in = {}
|
|
64
|
+
|
|
65
|
+
for invdata_docname in invdata_docnames:
|
|
66
|
+
invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
|
|
67
|
+
|
|
68
|
+
invdata_doc = pymupdf.open(invdata_pdf_path) # type: ignore # noqa: F821
|
|
69
|
+
invdata_meta = invdata_doc.metadata
|
|
70
|
+
if invdata_meta["title"] == " ":
|
|
71
|
+
invdata_meta["title"] = ", ".join((
|
|
72
|
+
"Horizontal Merger Investigation Data",
|
|
73
|
+
"Fiscal Years",
|
|
74
|
+
"1996-2005",
|
|
75
|
+
))
|
|
76
|
+
|
|
77
|
+
data_period = "".join( # line-break here for readability
|
|
78
|
+
re.findall(r"(\d{4}) *(-) *(\d{4})", invdata_meta["title"])[0]
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Initialize containers for parsed data
|
|
82
|
+
invdata[data_period] = {k: {} for k in TABLE_TYPES}
|
|
83
|
+
|
|
84
|
+
for pdf_pg in invdata_doc.pages():
|
|
85
|
+
doc_pg_blocks = pdf_pg.get_text("blocks", sort=False)
|
|
86
|
+
# Across all published reports of FTC investigations data,
|
|
87
|
+
# sorting lines (PDF page blocks) by the lower coordinates
|
|
88
|
+
# and then the left coordinates is most effective for
|
|
89
|
+
# ordering table rows in top-to-bottom order; this doesn't
|
|
90
|
+
# work for the 1996-2005 data, however, so we resort later
|
|
91
|
+
doc_pg_blocks = sorted([
|
|
92
|
+
(f"{_f[3]:03.0f}{_f[0]:03.0f}{_f[1]:03.0f}{_f[2]:03.0f}", *_f)
|
|
93
|
+
for _f in doc_pg_blocks
|
|
94
|
+
if _f[-1] == 0
|
|
95
|
+
])
|
|
96
|
+
|
|
97
|
+
data_blocks: list[tuple[str]] = [("",)]
|
|
98
|
+
# Pages layouts not the same in all reports
|
|
99
|
+
pg_hdr_strings = (
|
|
100
|
+
"FEDERAL TRADE COMMISSION",
|
|
101
|
+
"HORIZONTAL MERGER INVESTIGATION DATA: FISCAL YEARS 1996 - 2011",
|
|
102
|
+
)
|
|
103
|
+
if len(doc_pg_blocks) > 4:
|
|
104
|
+
tnum = None
|
|
105
|
+
for _pg_blk in doc_pg_blocks:
|
|
106
|
+
if tnum := TABLE_NO_RE.fullmatch(_pg_blk[-3].strip()):
|
|
107
|
+
data_blocks = [
|
|
108
|
+
b_
|
|
109
|
+
for b_ in doc_pg_blocks
|
|
110
|
+
if not b_[-3].startswith(pg_hdr_strings)
|
|
111
|
+
and (
|
|
112
|
+
b_[-3].strip()
|
|
113
|
+
not in {"Significant Competitors", "Post Merger HHI"}
|
|
114
|
+
)
|
|
115
|
+
and not re.fullmatch(r"\d+", b_[-3].strip())
|
|
116
|
+
]
|
|
117
|
+
break
|
|
118
|
+
if not tnum:
|
|
119
|
+
continue
|
|
120
|
+
del tnum
|
|
121
|
+
else:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
_parse_page_blocks(invdata, data_period, data_blocks)
|
|
125
|
+
|
|
126
|
+
invdata_doc.close()
|
|
127
|
+
|
|
128
|
+
return _mappingproxy_from_mapping(invdata)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _parse_page_blocks(
|
|
132
|
+
_invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
|
|
133
|
+
) -> None:
|
|
134
|
+
if _data_period != "1996-2011":
|
|
135
|
+
_parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
|
|
136
|
+
else:
|
|
137
|
+
test_list = [
|
|
138
|
+
(g, f[-3].strip())
|
|
139
|
+
for g, f in enumerate(_doc_pg_blocks)
|
|
140
|
+
if TABLE_NO_RE.fullmatch(f[-3].strip())
|
|
141
|
+
]
|
|
142
|
+
# In the 1996-2011 report, there are 2 tables per page
|
|
143
|
+
if len(test_list) == 1:
|
|
144
|
+
table_a_blocks = _doc_pg_blocks
|
|
145
|
+
table_b_blocks: Sequence[Sequence[Any]] = []
|
|
146
|
+
else:
|
|
147
|
+
table_a_blocks, table_b_blocks = (
|
|
148
|
+
_doc_pg_blocks[test_list[0][0] : test_list[1][0]],
|
|
149
|
+
_doc_pg_blocks[test_list[1][0] :],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
for table_i_blocks in table_a_blocks, table_b_blocks:
|
|
153
|
+
if not table_i_blocks:
|
|
154
|
+
continue
|
|
155
|
+
_parse_table_blocks(_invdata, _data_period, table_i_blocks)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _parse_table_blocks(
|
|
159
|
+
_invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
|
|
160
|
+
) -> None:
|
|
161
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
162
|
+
table_num, table_ser, table_type = _identify_table_type(
|
|
163
|
+
_table_blocks[0][-3].strip()
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if _data_period == "1996-2011":
|
|
167
|
+
invdata_ind_group = (
|
|
168
|
+
_table_blocks[1][-3].split("\n")[1]
|
|
169
|
+
if table_num == "Table 4.8"
|
|
170
|
+
else _table_blocks[2][-3].split("\n", maxsplit=1)[0]
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if table_ser > 4:
|
|
174
|
+
invdata_evid_cond = (
|
|
175
|
+
_table_blocks[2][-3].split("\n")[1]
|
|
176
|
+
if table_ser in {9, 10}
|
|
177
|
+
else _table_blocks[3][-3].strip()
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
elif _data_period == "1996-2005":
|
|
181
|
+
_table_blocks = sorted(_table_blocks, key=itemgetter(6))
|
|
182
|
+
|
|
183
|
+
invdata_ind_group = _table_blocks[3][-3].strip()
|
|
184
|
+
if table_ser > 4:
|
|
185
|
+
invdata_evid_cond = _table_blocks[5][-3].strip()
|
|
186
|
+
|
|
187
|
+
elif table_ser % 2 == 0:
|
|
188
|
+
invdata_ind_group = _table_blocks[1][-3].split("\n")[2]
|
|
189
|
+
if (evid_cond_teststr := _table_blocks[2][-3].strip()) == "Outcome":
|
|
190
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
191
|
+
else:
|
|
192
|
+
invdata_evid_cond = evid_cond_teststr
|
|
193
|
+
|
|
194
|
+
elif _table_blocks[3][-3].startswith("FTC Horizontal Merger Investigations"):
|
|
195
|
+
invdata_ind_group = _table_blocks[3][-3].split("\n")[2]
|
|
196
|
+
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
197
|
+
|
|
198
|
+
else:
|
|
199
|
+
# print(_table_blocks)
|
|
200
|
+
invdata_evid_cond = (
|
|
201
|
+
_table_blocks[1][-3].strip()
|
|
202
|
+
if table_ser == 9
|
|
203
|
+
else _table_blocks[3][-3].strip()
|
|
204
|
+
)
|
|
205
|
+
invdata_ind_group = _table_blocks[4][-3].split("\n")[2]
|
|
206
|
+
|
|
207
|
+
if invdata_ind_group == "Pharmaceutical Markets":
|
|
208
|
+
invdata_ind_group = "Pharmaceuticals Markets"
|
|
209
|
+
|
|
210
|
+
process_table_func = (
|
|
211
|
+
_process_table_blks_conc_type
|
|
212
|
+
if table_type == TABLE_TYPES[0]
|
|
213
|
+
else _process_table_blks_cnt_type
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
table_array = process_table_func(_table_blocks)
|
|
217
|
+
if not isinstance(table_array, np.ndarray) or table_array.dtype != int:
|
|
218
|
+
print(table_num)
|
|
219
|
+
print(_table_blocks)
|
|
220
|
+
raise ValueError
|
|
221
|
+
|
|
222
|
+
table_data = INVTableData(invdata_ind_group, invdata_evid_cond, table_array)
|
|
223
|
+
_invdata[_data_period][table_type] |= {table_num: table_data}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str]:
|
|
227
|
+
tnum = _tnstr.split(" ")[1]
|
|
228
|
+
tsub = int(tnum.split(".")[0])
|
|
229
|
+
return _tnstr, tsub, TABLE_TYPES[(tsub + 1) % 2]
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _process_table_blks_conc_type(
|
|
233
|
+
_table_blocks: Sequence[Sequence[str]], /
|
|
234
|
+
) -> ArrayBIGINT:
|
|
235
|
+
conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
|
|
236
|
+
|
|
237
|
+
col_titles = tuple(CONC_DELTA_DICT.values())
|
|
238
|
+
col_totals: ArrayBIGINT = np.zeros(len(col_titles), int)
|
|
239
|
+
invdata_array: ArrayBIGINT = np.array(None)
|
|
240
|
+
|
|
241
|
+
for tbl_blk in _table_blocks:
|
|
242
|
+
if conc_row_pat.match(_blk_str := tbl_blk[-3]):
|
|
243
|
+
row_list: list[str] = _blk_str.strip().split("\n")
|
|
244
|
+
row_title: str = row_list.pop(0)
|
|
245
|
+
row_key: int = (
|
|
246
|
+
7000 if row_title.startswith("7,000") else CONC_HHI_DICT[row_title]
|
|
247
|
+
)
|
|
248
|
+
row_total = np.array(row_list.pop().replace(",", "").split("/"), int)
|
|
249
|
+
data_row_list: list[list[int]] = []
|
|
250
|
+
while row_list:
|
|
251
|
+
enfd_val, clsd_val = row_list.pop(0).split("/")
|
|
252
|
+
data_row_list += [
|
|
253
|
+
[
|
|
254
|
+
row_key,
|
|
255
|
+
col_titles[len(data_row_list)],
|
|
256
|
+
int(enfd_val),
|
|
257
|
+
int(clsd_val),
|
|
258
|
+
int(enfd_val) + int(clsd_val),
|
|
259
|
+
]
|
|
260
|
+
]
|
|
261
|
+
data_row_array = np.array(data_row_list, int)
|
|
262
|
+
del data_row_list
|
|
263
|
+
# Check row totals
|
|
264
|
+
assert_array_equal(row_total, np.einsum("ij->j", data_row_array[:, 2:4]))
|
|
265
|
+
|
|
266
|
+
if row_key == TTL_KEY:
|
|
267
|
+
col_totals = data_row_array
|
|
268
|
+
else:
|
|
269
|
+
invdata_array = (
|
|
270
|
+
np.vstack((invdata_array, data_row_array))
|
|
271
|
+
if invdata_array.shape
|
|
272
|
+
else data_row_array
|
|
273
|
+
)
|
|
274
|
+
del data_row_array
|
|
275
|
+
else:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Check column totals
|
|
279
|
+
for _col_tot in col_totals:
|
|
280
|
+
assert_array_equal(
|
|
281
|
+
_col_tot[2:],
|
|
282
|
+
np.einsum(
|
|
283
|
+
"ij->j", invdata_array[invdata_array[:, 1] == _col_tot[1]][:, 2:]
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return invdata_array[
|
|
288
|
+
np.argsort(np.einsum("ij,ij->i", [[100, 1]], invdata_array[:, :2]))
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _process_table_blks_cnt_type(
|
|
293
|
+
_table_blocks: Sequence[Sequence[str]], /
|
|
294
|
+
) -> ArrayBIGINT:
|
|
295
|
+
cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
|
|
296
|
+
|
|
297
|
+
invdata_array: ArrayBIGINT = np.array(None)
|
|
298
|
+
col_totals: ArrayBIGINT = np.zeros(3, int) # "enforced", "closed", "total"
|
|
299
|
+
|
|
300
|
+
for _tbl_blk in _table_blocks:
|
|
301
|
+
if cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
|
|
302
|
+
row_list_s = _blk_str.strip().replace(",", "").split("\n")
|
|
303
|
+
row_list = np.array([CNT_FCOUNT_DICT[row_list_s[0]], *row_list_s[1:]], int)
|
|
304
|
+
del row_list_s
|
|
305
|
+
if row_list[3] != row_list[1] + row_list[2]:
|
|
306
|
+
raise ValueError(
|
|
307
|
+
"Total number of investigations does not equal #enforced plus #closed."
|
|
308
|
+
)
|
|
309
|
+
if row_list[0] == TTL_KEY:
|
|
310
|
+
col_totals = row_list
|
|
311
|
+
else:
|
|
312
|
+
invdata_array = (
|
|
313
|
+
np.vstack((invdata_array, row_list))
|
|
314
|
+
if invdata_array.shape
|
|
315
|
+
else row_list
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
if not np.array_equal(
|
|
321
|
+
np.array(list(col_totals[1:]), int), np.einsum("ij->j", invdata_array[:, 1:])
|
|
322
|
+
):
|
|
323
|
+
raise ValueError("Column totals don't compute.")
|
|
324
|
+
|
|
325
|
+
return invdata_array[np.argsort(invdata_array[:, 0])]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
|
|
329
|
+
if not _dl_path.is_dir():
|
|
330
|
+
_dl_path.mkdir(parents=True)
|
|
331
|
+
|
|
332
|
+
invdata_homepage_urls = (
|
|
333
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
|
|
334
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
|
|
335
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
|
|
336
|
+
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
|
|
337
|
+
)
|
|
338
|
+
invdata_docnames = (
|
|
339
|
+
"040831horizmergersdata96-03.pdf",
|
|
340
|
+
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
341
|
+
"081201hsrmergerdata.pdf",
|
|
342
|
+
"130104horizontalmergerreport.pdf",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if all(
|
|
346
|
+
_dl_path.joinpath(invdata_docname).is_file()
|
|
347
|
+
for invdata_docname in invdata_docnames
|
|
348
|
+
):
|
|
349
|
+
return invdata_docnames
|
|
350
|
+
|
|
351
|
+
invdata_docnames_dl: tuple[str, ...] = ()
|
|
352
|
+
u3pm = urllib3.PoolManager()
|
|
353
|
+
chunk_size_ = 1024 * 1024
|
|
354
|
+
for invdata_homepage_url in invdata_homepage_urls:
|
|
355
|
+
with u3pm.request(
|
|
356
|
+
"GET", invdata_homepage_url, preload_content=False
|
|
357
|
+
) as _u3handle:
|
|
358
|
+
invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
|
|
359
|
+
invdata_attrs = [
|
|
360
|
+
(_g.get("title", ""), _g.get("href", ""))
|
|
361
|
+
for _g in invdata_soup.find_all("a")
|
|
362
|
+
if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
|
|
363
|
+
]
|
|
364
|
+
for invdata_attr in invdata_attrs:
|
|
365
|
+
invdata_docname, invdata_link = invdata_attr
|
|
366
|
+
invdata_docnames_dl += (invdata_docname,)
|
|
367
|
+
with (
|
|
368
|
+
u3pm.request(
|
|
369
|
+
"GET", f"https://www.ftc.gov/{invdata_link}", preload_content=False
|
|
370
|
+
) as _urlopen_handle,
|
|
371
|
+
_dl_path.joinpath(invdata_docname).open("wb") as invdata_fh,
|
|
372
|
+
):
|
|
373
|
+
while True:
|
|
374
|
+
data = _urlopen_handle.read(chunk_size_)
|
|
375
|
+
if not data:
|
|
376
|
+
break
|
|
377
|
+
invdata_fh.write(data)
|
|
378
|
+
|
|
379
|
+
return invdata_docnames_dl
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""Methods to load and augmentFTC Merger Investigations Data.
|
|
2
|
+
|
|
3
|
+
Details on downloading and processing the data are specified in
|
|
4
|
+
the "private" module, :code:`_process_ftc_merger_investigations_data`.
|
|
5
|
+
|
|
3
6
|
|
|
4
7
|
Notes
|
|
5
8
|
-----
|
|
@@ -10,92 +13,31 @@ Reported row and column totals from source data are not stored.
|
|
|
10
13
|
from __future__ import annotations
|
|
11
14
|
|
|
12
15
|
import re
|
|
13
|
-
import
|
|
14
|
-
from collections.abc import Mapping, Sequence
|
|
15
|
-
from operator import itemgetter
|
|
16
|
+
from collections.abc import Sequence
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from types import MappingProxyType
|
|
18
|
-
from typing import Any
|
|
19
19
|
from zipfile import ZIP_DEFLATED, ZipFile
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
|
-
import urllib3
|
|
23
|
-
from bs4 import BeautifulSoup
|
|
24
|
-
from numpy.testing import assert_array_equal
|
|
25
22
|
|
|
26
|
-
from .. import EMPTY_ARRAYINT, VERSION,
|
|
27
|
-
from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
|
|
28
|
-
from .. import data as mdat # noqa: TID252
|
|
23
|
+
from .. import EMPTY_ARRAYINT, VERSION, this_yaml # noqa: TID252
|
|
29
24
|
from . import (
|
|
25
|
+
CNT_TABLE_ALL,
|
|
26
|
+
CONC_TABLE_ALL,
|
|
27
|
+
INVDATA_ARCHIVE_PATH,
|
|
28
|
+
TABLE_TYPES,
|
|
30
29
|
INVData,
|
|
31
30
|
INVData_in,
|
|
32
31
|
INVTableData,
|
|
33
32
|
_dict_from_mapping,
|
|
34
33
|
_mappingproxy_from_mapping,
|
|
35
34
|
)
|
|
35
|
+
from ._process_ftc_merger_investigations_data import _parse_invdata
|
|
36
36
|
|
|
37
37
|
__version__ = VERSION
|
|
38
38
|
|
|
39
39
|
# cspell: "includeRegExpList": ["strings", "comments", /( {3}['"]{3}).*?\\1/g]
|
|
40
40
|
|
|
41
|
-
WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
|
|
42
|
-
"""Redefined, in case the user defines WORK_DIR betweeen module imports."""
|
|
43
|
-
|
|
44
|
-
FID_WORK_DIR = WORK_DIR / "FTCData"
|
|
45
|
-
if not FID_WORK_DIR.is_dir():
|
|
46
|
-
FID_WORK_DIR.mkdir(parents=True)
|
|
47
|
-
|
|
48
|
-
INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
|
|
49
|
-
if not INVDATA_ARCHIVE_PATH.is_file():
|
|
50
|
-
shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH) # type: ignore
|
|
51
|
-
|
|
52
|
-
TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
|
|
53
|
-
TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
|
|
54
|
-
CONC_TABLE_ALL = "Table 3.1"
|
|
55
|
-
CNT_TABLE_ALL = "Table 4.1"
|
|
56
|
-
|
|
57
|
-
TTL_KEY = 86825
|
|
58
|
-
CONC_HHI_DICT = {
|
|
59
|
-
"0 - 1,799": 0,
|
|
60
|
-
"1,800 - 1,999": 1800,
|
|
61
|
-
"2,000 - 2,399": 2000,
|
|
62
|
-
"2,400 - 2,999": 2400,
|
|
63
|
-
"3,000 - 3,999": 3000,
|
|
64
|
-
"4,000 - 4,999": 4000,
|
|
65
|
-
"5,000 - 6,999": 5000,
|
|
66
|
-
"7,000 - 10,000": 7000,
|
|
67
|
-
"TOTAL": TTL_KEY,
|
|
68
|
-
}
|
|
69
|
-
CONC_DELTA_DICT = {
|
|
70
|
-
"0 - 100": 0,
|
|
71
|
-
"100 - 200": 100,
|
|
72
|
-
"200 - 300": 200,
|
|
73
|
-
"300 - 500": 300,
|
|
74
|
-
"500 - 800": 500,
|
|
75
|
-
"800 - 1,200": 800,
|
|
76
|
-
"1,200 - 2,500": 1200,
|
|
77
|
-
"2,500 - 5,000": 2500,
|
|
78
|
-
"TOTAL": TTL_KEY,
|
|
79
|
-
}
|
|
80
|
-
CNT_FCOUNT_DICT = {
|
|
81
|
-
"2 to 1": 2,
|
|
82
|
-
"3 to 2": 3,
|
|
83
|
-
"4 to 3": 4,
|
|
84
|
-
"5 to 4": 5,
|
|
85
|
-
"6 to 5": 6,
|
|
86
|
-
"7 to 6": 7,
|
|
87
|
-
"8 to 7": 8,
|
|
88
|
-
"9 to 8": 9,
|
|
89
|
-
"10 to 9": 10,
|
|
90
|
-
"10 +": 11,
|
|
91
|
-
"TOTAL": TTL_KEY,
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def invert_map(_dict: Mapping[Any, Any]) -> Mapping[Any, Any]:
|
|
96
|
-
"""Invert mapping, mapping values to keys of the original mapping."""
|
|
97
|
-
return {_v: _k for _k, _v in _dict.items()}
|
|
98
|
-
|
|
99
41
|
|
|
100
42
|
def construct_data(
|
|
101
43
|
_archive_path: Path = INVDATA_ARCHIVE_PATH,
|
|
@@ -268,7 +210,7 @@ def _construct_new_period_data(
|
|
|
268
210
|
|
|
269
211
|
invdata_cuml = _invdata[cuml_period]
|
|
270
212
|
|
|
271
|
-
base_period = "1996-{}".format(int(_data_period.split("-")[0]) - 1)
|
|
213
|
+
base_period = "1996-{}".format(int(_data_period.split("-", maxsplit=1)[0]) - 1)
|
|
272
214
|
invdata_base = _invdata[base_period]
|
|
273
215
|
|
|
274
216
|
if tuple(invdata_cuml.keys()) != TABLE_TYPES:
|
|
@@ -326,7 +268,9 @@ def _construct_new_period_data(
|
|
|
326
268
|
table_no
|
|
327
269
|
].data_array[:, -3:-1]
|
|
328
270
|
]
|
|
329
|
-
if pd_start == 1996 and pd_end < int(
|
|
271
|
+
if pd_start == 1996 and pd_end < int(
|
|
272
|
+
_data_period.split("-", maxsplit=1)[0]
|
|
273
|
+
):
|
|
330
274
|
invdata_base_array_stack += [
|
|
331
275
|
_invdata[data_period_detail][table_type][
|
|
332
276
|
table_no
|
|
@@ -414,350 +358,6 @@ def invdata_build_aggregate_table(
|
|
|
414
358
|
)
|
|
415
359
|
|
|
416
360
|
|
|
417
|
-
def _parse_invdata() -> INVData:
|
|
418
|
-
"""Parse FTC merger investigations data reports to structured data.
|
|
419
|
-
|
|
420
|
-
Returns
|
|
421
|
-
-------
|
|
422
|
-
Immutable dictionary of merger investigations data, keyed to
|
|
423
|
-
reporting period, and including all tables organized by
|
|
424
|
-
Firm Count (number of remaining competitors) and
|
|
425
|
-
by range of HHI and ∆HHI.
|
|
426
|
-
|
|
427
|
-
"""
|
|
428
|
-
raise ValueError(
|
|
429
|
-
"This function is defined here as documentation.\n"
|
|
430
|
-
"NOTE: License for `pymupdf`, upon which this function depends,"
|
|
431
|
-
" may be incompatible with the MIT license,"
|
|
432
|
-
" under which this pacakge is distributed."
|
|
433
|
-
" Making this fumction operable requires the user to modify"
|
|
434
|
-
" the source code as well as to install an additional package"
|
|
435
|
-
" not distributed with this package or identified as a requirement."
|
|
436
|
-
)
|
|
437
|
-
import pymupdf # type: ignore
|
|
438
|
-
|
|
439
|
-
invdata_docnames = _download_invdata(FID_WORK_DIR)
|
|
440
|
-
|
|
441
|
-
invdata: INVData_in = {}
|
|
442
|
-
|
|
443
|
-
for invdata_docname in invdata_docnames:
|
|
444
|
-
invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
|
|
445
|
-
|
|
446
|
-
invdata_doc = pymupdf.open(invdata_pdf_path)
|
|
447
|
-
invdata_meta = invdata_doc.metadata
|
|
448
|
-
if invdata_meta["title"] == " ":
|
|
449
|
-
invdata_meta["title"] = ", ".join((
|
|
450
|
-
"Horizontal Merger Investigation Data",
|
|
451
|
-
"Fiscal Years",
|
|
452
|
-
"1996-2005",
|
|
453
|
-
))
|
|
454
|
-
|
|
455
|
-
data_period = "".join( # line-break here for readability
|
|
456
|
-
re.findall(r"(\d{4}) *(-) *(\d{4})", invdata_meta["title"])[0]
|
|
457
|
-
)
|
|
458
|
-
|
|
459
|
-
# Initialize containers for parsed data
|
|
460
|
-
invdata[data_period] = {k: {} for k in TABLE_TYPES}
|
|
461
|
-
|
|
462
|
-
for pdf_pg in invdata_doc.pages():
|
|
463
|
-
doc_pg_blocks = pdf_pg.get_text("blocks", sort=False)
|
|
464
|
-
# Across all published reports of FTC investigations data,
|
|
465
|
-
# sorting lines (PDF page blocks) by the lower coordinates
|
|
466
|
-
# and then the left coordinates is most effective for
|
|
467
|
-
# ordering table rows in top-to-bottom order; this doesn't
|
|
468
|
-
# work for the 1996-2005 data, however, so we resort later
|
|
469
|
-
doc_pg_blocks = sorted([
|
|
470
|
-
(f"{_f[3]:03.0f}{_f[0]:03.0f}{_f[1]:03.0f}{_f[2]:03.0f}", *_f)
|
|
471
|
-
for _f in doc_pg_blocks
|
|
472
|
-
if _f[-1] == 0
|
|
473
|
-
])
|
|
474
|
-
|
|
475
|
-
data_blocks: list[tuple[str]] = [("",)]
|
|
476
|
-
# Pages layouts not the same in all reports
|
|
477
|
-
pg_hdr_strings = (
|
|
478
|
-
"FEDERAL TRADE COMMISSION",
|
|
479
|
-
"HORIZONTAL MERGER INVESTIGATION DATA: FISCAL YEARS 1996 - 2011",
|
|
480
|
-
)
|
|
481
|
-
if len(doc_pg_blocks) > 4:
|
|
482
|
-
tnum = None
|
|
483
|
-
for _pg_blk in doc_pg_blocks:
|
|
484
|
-
if tnum := TABLE_NO_RE.fullmatch(_pg_blk[-3].strip()):
|
|
485
|
-
data_blocks = [
|
|
486
|
-
b_
|
|
487
|
-
for b_ in doc_pg_blocks
|
|
488
|
-
if not b_[-3].startswith(pg_hdr_strings)
|
|
489
|
-
and (
|
|
490
|
-
b_[-3].strip()
|
|
491
|
-
not in {"Significant Competitors", "Post Merger HHI"}
|
|
492
|
-
)
|
|
493
|
-
and not re.fullmatch(r"\d+", b_[-3].strip())
|
|
494
|
-
]
|
|
495
|
-
break
|
|
496
|
-
if not tnum:
|
|
497
|
-
continue
|
|
498
|
-
del tnum
|
|
499
|
-
else:
|
|
500
|
-
continue
|
|
501
|
-
|
|
502
|
-
_parse_page_blocks(invdata, data_period, data_blocks)
|
|
503
|
-
|
|
504
|
-
invdata_doc.close()
|
|
505
|
-
|
|
506
|
-
return _mappingproxy_from_mapping(invdata)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def _parse_page_blocks(
|
|
510
|
-
_invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
|
|
511
|
-
) -> None:
|
|
512
|
-
if _data_period != "1996-2011":
|
|
513
|
-
_parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
|
|
514
|
-
else:
|
|
515
|
-
test_list = [
|
|
516
|
-
(g, f[-3].strip())
|
|
517
|
-
for g, f in enumerate(_doc_pg_blocks)
|
|
518
|
-
if TABLE_NO_RE.fullmatch(f[-3].strip())
|
|
519
|
-
]
|
|
520
|
-
# In the 1996-2011 report, there are 2 tables per page
|
|
521
|
-
if len(test_list) == 1:
|
|
522
|
-
table_a_blocks = _doc_pg_blocks
|
|
523
|
-
table_b_blocks: Sequence[Sequence[Any]] = []
|
|
524
|
-
else:
|
|
525
|
-
table_a_blocks, table_b_blocks = (
|
|
526
|
-
_doc_pg_blocks[test_list[0][0] : test_list[1][0]],
|
|
527
|
-
_doc_pg_blocks[test_list[1][0] :],
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
for table_i_blocks in table_a_blocks, table_b_blocks:
|
|
531
|
-
if not table_i_blocks:
|
|
532
|
-
continue
|
|
533
|
-
_parse_table_blocks(_invdata, _data_period, table_i_blocks)
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
def _parse_table_blocks(
|
|
537
|
-
_invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
|
|
538
|
-
) -> None:
|
|
539
|
-
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
540
|
-
table_num, table_ser, table_type = _identify_table_type(
|
|
541
|
-
_table_blocks[0][-3].strip()
|
|
542
|
-
)
|
|
543
|
-
|
|
544
|
-
if _data_period == "1996-2011":
|
|
545
|
-
invdata_ind_group = (
|
|
546
|
-
_table_blocks[1][-3].split("\n")[1]
|
|
547
|
-
if table_num == "Table 4.8"
|
|
548
|
-
else _table_blocks[2][-3].split("\n")[0]
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
if table_ser > 4:
|
|
552
|
-
invdata_evid_cond = (
|
|
553
|
-
_table_blocks[2][-3].split("\n")[1]
|
|
554
|
-
if table_ser in {9, 10}
|
|
555
|
-
else _table_blocks[3][-3].strip()
|
|
556
|
-
)
|
|
557
|
-
|
|
558
|
-
elif _data_period == "1996-2005":
|
|
559
|
-
_table_blocks = sorted(_table_blocks, key=itemgetter(6))
|
|
560
|
-
|
|
561
|
-
invdata_ind_group = _table_blocks[3][-3].strip()
|
|
562
|
-
if table_ser > 4:
|
|
563
|
-
invdata_evid_cond = _table_blocks[5][-3].strip()
|
|
564
|
-
|
|
565
|
-
elif table_ser % 2 == 0:
|
|
566
|
-
invdata_ind_group = _table_blocks[1][-3].split("\n")[2]
|
|
567
|
-
if (evid_cond_teststr := _table_blocks[2][-3].strip()) == "Outcome":
|
|
568
|
-
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
569
|
-
else:
|
|
570
|
-
invdata_evid_cond = evid_cond_teststr
|
|
571
|
-
|
|
572
|
-
elif _table_blocks[3][-3].startswith("FTC Horizontal Merger Investigations"):
|
|
573
|
-
invdata_ind_group = _table_blocks[3][-3].split("\n")[2]
|
|
574
|
-
invdata_evid_cond = "Unrestricted on additional evidence"
|
|
575
|
-
|
|
576
|
-
else:
|
|
577
|
-
# print(_table_blocks)
|
|
578
|
-
invdata_evid_cond = (
|
|
579
|
-
_table_blocks[1][-3].strip()
|
|
580
|
-
if table_ser == 9
|
|
581
|
-
else _table_blocks[3][-3].strip()
|
|
582
|
-
)
|
|
583
|
-
invdata_ind_group = _table_blocks[4][-3].split("\n")[2]
|
|
584
|
-
|
|
585
|
-
if invdata_ind_group == "Pharmaceutical Markets":
|
|
586
|
-
invdata_ind_group = "Pharmaceuticals Markets"
|
|
587
|
-
|
|
588
|
-
process_table_func = (
|
|
589
|
-
_process_table_blks_conc_type
|
|
590
|
-
if table_type == TABLE_TYPES[0]
|
|
591
|
-
else _process_table_blks_cnt_type
|
|
592
|
-
)
|
|
593
|
-
|
|
594
|
-
table_array = process_table_func(_table_blocks)
|
|
595
|
-
if not isinstance(table_array, np.ndarray) or table_array.dtype != int:
|
|
596
|
-
print(table_num)
|
|
597
|
-
print(_table_blocks)
|
|
598
|
-
raise ValueError
|
|
599
|
-
|
|
600
|
-
table_data = INVTableData(invdata_ind_group, invdata_evid_cond, table_array)
|
|
601
|
-
_invdata[_data_period][table_type] |= {table_num: table_data}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str]:
|
|
605
|
-
tnum = _tnstr.split(" ")[1]
|
|
606
|
-
tsub = int(tnum.split(".")[0])
|
|
607
|
-
return _tnstr, tsub, TABLE_TYPES[(tsub + 1) % 2]
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
def _process_table_blks_conc_type(
|
|
611
|
-
_table_blocks: Sequence[Sequence[str]], /
|
|
612
|
-
) -> ArrayBIGINT:
|
|
613
|
-
conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
|
|
614
|
-
|
|
615
|
-
col_titles = tuple(CONC_DELTA_DICT.values())
|
|
616
|
-
col_totals: ArrayBIGINT = np.zeros(len(col_titles), int)
|
|
617
|
-
invdata_array: ArrayBIGINT = np.array(None)
|
|
618
|
-
|
|
619
|
-
for tbl_blk in _table_blocks:
|
|
620
|
-
if conc_row_pat.match(_blk_str := tbl_blk[-3]):
|
|
621
|
-
row_list: list[str] = _blk_str.strip().split("\n")
|
|
622
|
-
row_title: str = row_list.pop(0)
|
|
623
|
-
row_key: int = (
|
|
624
|
-
7000 if row_title.startswith("7,000") else CONC_HHI_DICT[row_title]
|
|
625
|
-
)
|
|
626
|
-
row_total = np.array(row_list.pop().replace(",", "").split("/"), int)
|
|
627
|
-
data_row_list: list[list[int]] = []
|
|
628
|
-
while row_list:
|
|
629
|
-
enfd_val, clsd_val = row_list.pop(0).split("/")
|
|
630
|
-
data_row_list += [
|
|
631
|
-
[
|
|
632
|
-
row_key,
|
|
633
|
-
col_titles[len(data_row_list)],
|
|
634
|
-
int(enfd_val),
|
|
635
|
-
int(clsd_val),
|
|
636
|
-
int(enfd_val) + int(clsd_val),
|
|
637
|
-
]
|
|
638
|
-
]
|
|
639
|
-
data_row_array = np.array(data_row_list, int)
|
|
640
|
-
del data_row_list
|
|
641
|
-
# Check row totals
|
|
642
|
-
assert_array_equal(row_total, np.einsum("ij->j", data_row_array[:, 2:4]))
|
|
643
|
-
|
|
644
|
-
if row_key == TTL_KEY:
|
|
645
|
-
col_totals = data_row_array
|
|
646
|
-
else:
|
|
647
|
-
invdata_array = (
|
|
648
|
-
np.vstack((invdata_array, data_row_array))
|
|
649
|
-
if invdata_array.shape
|
|
650
|
-
else data_row_array
|
|
651
|
-
)
|
|
652
|
-
del data_row_array
|
|
653
|
-
else:
|
|
654
|
-
continue
|
|
655
|
-
|
|
656
|
-
# Check column totals
|
|
657
|
-
for _col_tot in col_totals:
|
|
658
|
-
assert_array_equal(
|
|
659
|
-
_col_tot[2:], # type: ignore
|
|
660
|
-
np.einsum(
|
|
661
|
-
"ij->j",
|
|
662
|
-
invdata_array[invdata_array[:, 1] == _col_tot[1]][:, 2:], # type: ignore
|
|
663
|
-
),
|
|
664
|
-
)
|
|
665
|
-
|
|
666
|
-
return invdata_array[
|
|
667
|
-
np.argsort(np.einsum("ij,ij->i", [[100, 1]], invdata_array[:, :2]))
|
|
668
|
-
]
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
def _process_table_blks_cnt_type(
|
|
672
|
-
_table_blocks: Sequence[Sequence[str]], /
|
|
673
|
-
) -> ArrayBIGINT:
|
|
674
|
-
cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
|
|
675
|
-
|
|
676
|
-
invdata_array: ArrayBIGINT = np.array(None)
|
|
677
|
-
col_totals: ArrayBIGINT = np.zeros(3, int) # "enforced", "closed", "total"
|
|
678
|
-
|
|
679
|
-
for _tbl_blk in _table_blocks:
|
|
680
|
-
if cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
|
|
681
|
-
row_list_s = _blk_str.strip().replace(",", "").split("\n")
|
|
682
|
-
row_list = np.array([CNT_FCOUNT_DICT[row_list_s[0]], *row_list_s[1:]], int)
|
|
683
|
-
del row_list_s
|
|
684
|
-
if row_list[3] != row_list[1] + row_list[2]:
|
|
685
|
-
raise ValueError(
|
|
686
|
-
"Total number of investigations does not equal #enforced plus #closed."
|
|
687
|
-
)
|
|
688
|
-
if row_list[0] == TTL_KEY:
|
|
689
|
-
col_totals = row_list
|
|
690
|
-
else:
|
|
691
|
-
invdata_array = (
|
|
692
|
-
np.vstack((invdata_array, row_list))
|
|
693
|
-
if invdata_array.shape
|
|
694
|
-
else row_list
|
|
695
|
-
)
|
|
696
|
-
else:
|
|
697
|
-
continue
|
|
698
|
-
|
|
699
|
-
if not np.array_equal(
|
|
700
|
-
np.array(list(col_totals[1:]), int), np.einsum("ij->j", invdata_array[:, 1:])
|
|
701
|
-
):
|
|
702
|
-
raise ValueError("Column totals don't compute.")
|
|
703
|
-
|
|
704
|
-
return invdata_array[np.argsort(invdata_array[:, 0])]
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
|
|
708
|
-
if not _dl_path.is_dir():
|
|
709
|
-
_dl_path.mkdir(parents=True)
|
|
710
|
-
|
|
711
|
-
invdata_homepage_urls = (
|
|
712
|
-
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
|
|
713
|
-
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
|
|
714
|
-
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
|
|
715
|
-
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
|
|
716
|
-
)
|
|
717
|
-
invdata_docnames = (
|
|
718
|
-
"040831horizmergersdata96-03.pdf",
|
|
719
|
-
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
720
|
-
"081201hsrmergerdata.pdf",
|
|
721
|
-
"130104horizontalmergerreport.pdf",
|
|
722
|
-
)
|
|
723
|
-
|
|
724
|
-
if all(
|
|
725
|
-
_dl_path.joinpath(invdata_docname).is_file()
|
|
726
|
-
for invdata_docname in invdata_docnames
|
|
727
|
-
):
|
|
728
|
-
return invdata_docnames
|
|
729
|
-
|
|
730
|
-
invdata_docnames_dl: tuple[str, ...] = ()
|
|
731
|
-
u3pm = urllib3.PoolManager()
|
|
732
|
-
chunk_size_ = 1024 * 1024
|
|
733
|
-
for invdata_homepage_url in invdata_homepage_urls:
|
|
734
|
-
with u3pm.request(
|
|
735
|
-
"GET", invdata_homepage_url, preload_content=False
|
|
736
|
-
) as _u3handle:
|
|
737
|
-
invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
|
|
738
|
-
invdata_attrs = [
|
|
739
|
-
(_g.get("title", ""), _g.get("href", ""))
|
|
740
|
-
for _g in invdata_soup.find_all("a")
|
|
741
|
-
if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
|
|
742
|
-
]
|
|
743
|
-
for invdata_attr in invdata_attrs:
|
|
744
|
-
invdata_docname, invdata_link = invdata_attr
|
|
745
|
-
invdata_docnames_dl += (invdata_docname,)
|
|
746
|
-
with (
|
|
747
|
-
u3pm.request(
|
|
748
|
-
"GET", f"https://www.ftc.gov/{invdata_link}", preload_content=False
|
|
749
|
-
) as _urlopen_handle,
|
|
750
|
-
_dl_path.joinpath(invdata_docname).open("wb") as invdata_fh,
|
|
751
|
-
):
|
|
752
|
-
while True:
|
|
753
|
-
data = _urlopen_handle.read(chunk_size_)
|
|
754
|
-
if not data:
|
|
755
|
-
break
|
|
756
|
-
invdata_fh.write(data)
|
|
757
|
-
|
|
758
|
-
return invdata_docnames_dl
|
|
759
|
-
|
|
760
|
-
|
|
761
361
|
if __name__ == "__main__":
|
|
762
362
|
print(
|
|
763
363
|
"This module defines functions for downloading and preparing FTC merger investigations data for further analysis."
|
|
@@ -728,7 +728,10 @@ def lerp[LerpT: (float, MPFloat, ArrayDouble)](
|
|
|
728
728
|
case _:
|
|
729
729
|
if not 0 <= _r <= 1:
|
|
730
730
|
raise ValueError("Specified interpolation weight must lie in [0, 1].")
|
|
731
|
-
|
|
731
|
+
if isinstance(_x1, np.ndarray) or isinstance(_x2, np.ndarray):
|
|
732
|
+
return (1 - _r) * _x1 + _r * _x2
|
|
733
|
+
else:
|
|
734
|
+
return fma(_x2, _r, fma(_x1, -_r, _x1))
|
|
732
735
|
|
|
733
736
|
|
|
734
737
|
def round_cust(
|
|
@@ -7,8 +7,7 @@ import numpy as np
|
|
|
7
7
|
from scipy.interpolate import make_interp_spline # type: ignore
|
|
8
8
|
|
|
9
9
|
from .. import VERSION, ArrayBIGINT, Enameled, this_yaml # noqa: TID252
|
|
10
|
-
from ..core import INVData, INVTableData # noqa: TID252
|
|
11
|
-
from ..core import ftc_merger_investigations_data as fid # noqa: TID252
|
|
10
|
+
from ..core import TABLE_TYPES, TTL_KEY, INVData, INVTableData # noqa: TID252
|
|
12
11
|
from . import INVResolution
|
|
13
12
|
|
|
14
13
|
__version__ = VERSION
|
|
@@ -117,7 +116,7 @@ ZONE_STRINGS = {
|
|
|
117
116
|
0: R"Green Zone (Safeharbor)",
|
|
118
117
|
1: R"Yellow Zone",
|
|
119
118
|
2: R"Red Zone (SLC Presumption)",
|
|
120
|
-
|
|
119
|
+
TTL_KEY: "TOTAL",
|
|
121
120
|
}
|
|
122
121
|
ZONE_DETAIL_STRINGS_HHI = {
|
|
123
122
|
0: Rf"HHI < {HHI_POST_ZONE_KNOTS[1]} pts.",
|
|
@@ -226,7 +225,7 @@ def enf_cnts_obs_byfirmcount(
|
|
|
226
225
|
f"Must be one of, {tuple(_data_array_dict.keys())!r}."
|
|
227
226
|
)
|
|
228
227
|
|
|
229
|
-
data_array_dict_sub = _data_array_dict[_data_period][
|
|
228
|
+
data_array_dict_sub = _data_array_dict[_data_period][TABLE_TYPES[1]]
|
|
230
229
|
|
|
231
230
|
table_no_ = table_no_lku(data_array_dict_sub, _table_ind_group, _table_evid_cond)
|
|
232
231
|
|
|
@@ -280,7 +279,7 @@ def enf_cnts_obs_byhhianddelta(
|
|
|
280
279
|
f"Must be one of, {tuple(_data_array_dict.keys())!r}."
|
|
281
280
|
)
|
|
282
281
|
|
|
283
|
-
data_array_dict_sub = _data_array_dict[_data_period][
|
|
282
|
+
data_array_dict_sub = _data_array_dict[_data_period][TABLE_TYPES[0]]
|
|
284
283
|
|
|
285
284
|
table_no_ = table_no_lku(data_array_dict_sub, _table_ind_group, _table_evid_cond)
|
|
286
285
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mergeron
|
|
3
|
-
Version: 2025.739439.
|
|
3
|
+
Version: 2025.739439.19
|
|
4
4
|
Summary: Python for analyzing merger enforcement policy
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: merger enforcement policy,merger guidelines,merger screening,enforcement presumptions,concentration standards,diversion ratio,upward pricing pressure,GUPPI
|
|
@@ -11,7 +11,6 @@ Classifier: Development Status :: 4 - Beta
|
|
|
11
11
|
Classifier: Environment :: Console
|
|
12
12
|
Classifier: Intended Audience :: End Users/Desktop
|
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
15
14
|
Classifier: Operating System :: OS Independent
|
|
16
15
|
Classifier: Programming Language :: Python
|
|
17
16
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
mergeron/__init__.py,sha256=
|
|
2
|
-
mergeron/core/__init__.py,sha256=
|
|
1
|
+
mergeron/__init__.py,sha256=l6ZRld7F3uTKlND0dbcE6AsSTLSIn9Zaig86VEVibsk,6734
|
|
2
|
+
mergeron/core/__init__.py,sha256=3QrvMtE0OPfdvhOHo_B2AopWMZHNGpCAjdx8wuYu1Wk,4584
|
|
3
|
+
mergeron/core/_process_ftc_merger_investigations_data.py,sha256=Ros_Ew9mqBmptvhf3jonS2Et8_OoFhN3ODtO1Q2Xv-c,13657
|
|
3
4
|
mergeron/core/empirical_margin_distribution.py,sha256=61U-KLB563BPWM5zWyWp82c4PhcsAG-IKI0WWYGjBKg,11740
|
|
4
|
-
mergeron/core/ftc_merger_investigations_data.py,sha256=
|
|
5
|
+
mergeron/core/ftc_merger_investigations_data.py,sha256=JjnsSlBBQX_-aiQuVEOInuMpsETbx6aXDUSTikCfvns,14552
|
|
5
6
|
mergeron/core/guidelines_boundaries.py,sha256=Z8rZvhHrxXBgrLGFpb6yldc8h3lN9rGtGj4yu-fyVBA,15450
|
|
6
|
-
mergeron/core/guidelines_boundary_functions.py,sha256=
|
|
7
|
+
mergeron/core/guidelines_boundary_functions.py,sha256=zgKHOWZcPuI6hbTkHb7O5YxYW8rCfZJATHM_gmdVhjw,30841
|
|
7
8
|
mergeron/core/pseudorandom_numbers.py,sha256=CFp-8eu0q2g-81LA0k2oCFltmp6Er7EkrAkoG19G7Os,10138
|
|
8
9
|
mergeron/data/__init__.py,sha256=SAFkR23RBM0zwGam2TeWmw08oHAKmU2YF-Nygj73ies,1845
|
|
9
10
|
mergeron/data/damodaran_margin_data_serialized.zip,sha256=Wc1v9buSrYTWWAravG8W9nPbgsU07zMtSAR2RvMQU5s,623482
|
|
@@ -11,11 +12,11 @@ mergeron/data/ftc_merger_investigations_data.zip,sha256=tiB2TLFyS9LMSFIv8DBA_oEE
|
|
|
11
12
|
mergeron/gen/__init__.py,sha256=6xUhaG4kWj2Qx8hLjgjupFWcJ0ZzAKDY9TN7mAFrANI,23880
|
|
12
13
|
mergeron/gen/data_generation.py,sha256=cZW3Dc6bNiBUPXjTDHZDwTc6x1sxXq2STCzfsvk6_tw,17638
|
|
13
14
|
mergeron/gen/data_generation_functions.py,sha256=SWzZ3I7ulkGBcL2F5CCKw2IvCm_wEplvqBasnSjSyU0,26129
|
|
14
|
-
mergeron/gen/enforcement_stats.py,sha256=
|
|
15
|
+
mergeron/gen/enforcement_stats.py,sha256=2MNEMxBgeIennS8hsiN-33aVEA_qGHy5hIh9FSxl0no,14324
|
|
15
16
|
mergeron/gen/upp_tests.py,sha256=v-tnhQ85j8zL-TTE52GC61GEZSRFfdCkjaitVQIz0FI,6464
|
|
16
17
|
mergeron/perks/__init__.py,sha256=gGRIuRc7I6OuWLzwSiSZSIE0PEoxAy2DRFWg0VVLlbE,484
|
|
17
18
|
mergeron/perks/guidelines_boundary_functions_extra.py,sha256=q-Cqk9t5oj4yiAsmZJcsfrH434oGvza4YVspFYpdV0g,22113
|
|
18
19
|
mergeron/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
|
19
|
-
mergeron-2025.739439.
|
|
20
|
-
mergeron-2025.739439.
|
|
21
|
-
mergeron-2025.739439.
|
|
20
|
+
mergeron-2025.739439.19.dist-info/METADATA,sha256=iNDwME8nXVTJyiD63Ra2DtbxH6vShIhKAEiOKmo-uJk,4116
|
|
21
|
+
mergeron-2025.739439.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
22
|
+
mergeron-2025.739439.19.dist-info/RECORD,,
|
|
File without changes
|