mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
|
@@ -4,30 +4,29 @@ as necessary
|
|
|
4
4
|
|
|
5
5
|
NOTES
|
|
6
6
|
-----
|
|
7
|
-
|
|
7
|
+
Reported row and column totals from source data are not stored.
|
|
8
8
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import shutil
|
|
12
12
|
from collections.abc import Mapping, Sequence
|
|
13
|
-
from importlib
|
|
13
|
+
from importlib import resources
|
|
14
14
|
from operator import itemgetter
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from types import MappingProxyType
|
|
17
|
-
from typing import Any, NamedTuple
|
|
17
|
+
from typing import Any, NamedTuple
|
|
18
18
|
|
|
19
19
|
import msgpack # type: ignore
|
|
20
20
|
import msgpack_numpy as m # type: ignore
|
|
21
21
|
import numpy as np
|
|
22
22
|
import re2 as re # type: ignore
|
|
23
|
-
import
|
|
23
|
+
import urllib3
|
|
24
24
|
from bs4 import BeautifulSoup
|
|
25
25
|
from numpy.testing import assert_array_equal
|
|
26
|
-
from numpy.typing import NDArray
|
|
27
26
|
|
|
28
|
-
from .. import _PKG_NAME, DATA_DIR # noqa: TID252
|
|
27
|
+
from .. import _PKG_NAME, DATA_DIR, VERSION, ArrayBIGINT # noqa: TID252
|
|
29
28
|
|
|
30
|
-
__version__ =
|
|
29
|
+
__version__ = VERSION
|
|
31
30
|
|
|
32
31
|
m.patch()
|
|
33
32
|
|
|
@@ -36,11 +35,16 @@ if not FTCDATA_DIR.is_dir():
|
|
|
36
35
|
FTCDATA_DIR.mkdir(parents=True)
|
|
37
36
|
|
|
38
37
|
INVDATA_ARCHIVE_PATH = DATA_DIR / "ftc_invdata.msgpack"
|
|
39
|
-
if
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
if (
|
|
39
|
+
not INVDATA_ARCHIVE_PATH.is_file()
|
|
40
|
+
and (
|
|
41
|
+
_bundled_copy := resources.files(f"{_PKG_NAME}.data").joinpath(
|
|
42
|
+
INVDATA_ARCHIVE_PATH.name
|
|
43
|
+
)
|
|
44
|
+
).is_file()
|
|
45
|
+
):
|
|
46
|
+
with resources.as_file(_bundled_copy) as _bundled_copy_path:
|
|
47
|
+
shutil.copy2(_bundled_copy_path, INVDATA_ARCHIVE_PATH)
|
|
44
48
|
|
|
45
49
|
TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
|
|
46
50
|
TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
|
|
@@ -86,16 +90,17 @@ CNT_FCOUNT_DICT = {
|
|
|
86
90
|
|
|
87
91
|
|
|
88
92
|
class INVTableData(NamedTuple):
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
data_array:
|
|
93
|
+
industry_group: str
|
|
94
|
+
additional_evidence: str
|
|
95
|
+
data_array: ArrayBIGINT
|
|
92
96
|
|
|
93
97
|
|
|
94
|
-
INVData
|
|
98
|
+
type INVData = Mapping[str, Mapping[str, Mapping[str, INVTableData]]]
|
|
99
|
+
type _INVData_in = dict[str, dict[str, dict[str, INVTableData]]]
|
|
95
100
|
|
|
96
101
|
|
|
97
102
|
def construct_data(
|
|
98
|
-
_archive_path: Path
|
|
103
|
+
_archive_path: Path = INVDATA_ARCHIVE_PATH,
|
|
99
104
|
*,
|
|
100
105
|
flag_backward_compatibility: bool = True,
|
|
101
106
|
flag_pharma_for_exclusion: bool = True,
|
|
@@ -130,11 +135,11 @@ def construct_data(
|
|
|
130
135
|
A dictionary of merger investigations data keyed to reporting periods
|
|
131
136
|
|
|
132
137
|
"""
|
|
133
|
-
|
|
138
|
+
|
|
134
139
|
if _archive_path.is_file() and not rebuild_data:
|
|
135
140
|
_archived_data = msgpack.unpackb(_archive_path.read_bytes(), use_list=False)
|
|
136
141
|
|
|
137
|
-
_invdata:
|
|
142
|
+
_invdata: _INVData_in = {}
|
|
138
143
|
for _period in _archived_data:
|
|
139
144
|
_invdata[_period] = {}
|
|
140
145
|
for _table_type in _archived_data[_period]:
|
|
@@ -145,7 +150,7 @@ def construct_data(
|
|
|
145
150
|
)
|
|
146
151
|
return MappingProxyType(_invdata)
|
|
147
152
|
|
|
148
|
-
_invdata = dict(_parse_invdata()) # Convert immutable to mutable
|
|
153
|
+
_invdata = dict(_parse_invdata()) # type: ignore # Convert immutable to mutable
|
|
149
154
|
|
|
150
155
|
# Add some data periods (
|
|
151
156
|
# only periods ending in 2011, others have few observations and
|
|
@@ -161,7 +166,7 @@ def construct_data(
|
|
|
161
166
|
|
|
162
167
|
# Create data for industries with no evidence on entry
|
|
163
168
|
for _data_period in _invdata:
|
|
164
|
-
|
|
169
|
+
_construct_no_evidence_data(_invdata, _data_period)
|
|
165
170
|
|
|
166
171
|
# Create a list of exclusions to named industries in the base period,
|
|
167
172
|
# for construction of aggregate enforcement statistics where feasible
|
|
@@ -181,7 +186,9 @@ def construct_data(
|
|
|
181
186
|
_aggr_tables_list = [
|
|
182
187
|
_t
|
|
183
188
|
for _t in _invdata["1996-2003"][_table_type]
|
|
184
|
-
if re.sub(
|
|
189
|
+
if re.sub(
|
|
190
|
+
r"\W", "", _invdata["1996-2003"][_table_type][_t].industry_group
|
|
191
|
+
)
|
|
185
192
|
not in _industry_exclusion_list
|
|
186
193
|
]
|
|
187
194
|
|
|
@@ -191,42 +198,54 @@ def construct_data(
|
|
|
191
198
|
)
|
|
192
199
|
}
|
|
193
200
|
|
|
194
|
-
_ = INVDATA_ARCHIVE_PATH.write_bytes(msgpack.packb(_invdata))
|
|
201
|
+
_ = INVDATA_ARCHIVE_PATH.write_bytes(msgpack.packb(_invdata)) # pyright: ignore
|
|
195
202
|
|
|
196
203
|
return MappingProxyType(_invdata)
|
|
197
204
|
|
|
198
205
|
|
|
199
|
-
def
|
|
206
|
+
def _construct_no_evidence_data(_invdata: _INVData_in, _data_period: str, /) -> None:
|
|
200
207
|
_invdata_ind_grp = "All Markets"
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
_invdata_sub_evid_cond_conc = _invdata[_data_period]["ByHHIandDelta"]
|
|
204
|
-
_invdata_sub_evid_cond_conc["Table 9.X"] = INVTableData(
|
|
205
|
-
_invdata_ind_grp,
|
|
206
|
-
_invdata_evid_cond,
|
|
207
|
-
np.column_stack((
|
|
208
|
-
_invdata_sub_evid_cond_conc["Table 3.1"].data_array[:, :2],
|
|
208
|
+
_table_nos_map = dict(
|
|
209
|
+
zip(
|
|
209
210
|
(
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
211
|
+
"No Entry Evidence",
|
|
212
|
+
"No Evidence on Customer Complaints",
|
|
213
|
+
"No Evidence on Hot Documents",
|
|
213
214
|
),
|
|
214
|
-
)),
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
_invdata_sub_evid_cond_fcount = _invdata[_data_period]["ByFirmCount"]
|
|
218
|
-
_invdata_sub_evid_cond_fcount["Table 10.X"] = INVTableData(
|
|
219
|
-
_invdata_ind_grp,
|
|
220
|
-
_invdata_evid_cond,
|
|
221
|
-
np.column_stack((
|
|
222
|
-
_invdata_sub_evid_cond_fcount["Table 4.1"].data_array[:, :1],
|
|
223
215
|
(
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
216
|
+
{"ByHHIandDelta": "Table 9.X", "ByFirmCount": "Table 10.X"},
|
|
217
|
+
{"ByHHIandDelta": "Table 7.X", "ByFirmCount": "Table 8.X"},
|
|
218
|
+
{"ByHHIandDelta": "Table 5.X", "ByFirmCount": "Table 6.X"},
|
|
227
219
|
),
|
|
228
|
-
|
|
220
|
+
strict=True,
|
|
221
|
+
)
|
|
229
222
|
)
|
|
223
|
+
for _invdata_evid_cond in (
|
|
224
|
+
"No Entry Evidence",
|
|
225
|
+
"No Evidence on Customer Complaints",
|
|
226
|
+
"No Evidence on Hot Documents",
|
|
227
|
+
):
|
|
228
|
+
for _stats_grp in ("ByHHIandDelta", "ByFirmCount"):
|
|
229
|
+
_invdata_sub_evid_cond_conc = _invdata[_data_period][_stats_grp]
|
|
230
|
+
|
|
231
|
+
_dtn = _table_nos_map[_invdata_evid_cond]["ByHHIandDelta"]
|
|
232
|
+
_stn0 = "Table 4.1" if _stats_grp == "ByFirmCount" else "Table 3.1"
|
|
233
|
+
_stn1, _stn2 = (_dtn.replace(".X", f".{_i}") for _i in ("1", "2"))
|
|
234
|
+
|
|
235
|
+
_invdata_sub_evid_cond_conc |= {
|
|
236
|
+
_dtn: INVTableData(
|
|
237
|
+
_invdata_ind_grp,
|
|
238
|
+
_invdata_evid_cond,
|
|
239
|
+
np.column_stack((
|
|
240
|
+
_invdata_sub_evid_cond_conc[_stn0].data_array[:, :2],
|
|
241
|
+
(
|
|
242
|
+
_invdata_sub_evid_cond_conc[_stn0].data_array[:, 2:]
|
|
243
|
+
- _invdata_sub_evid_cond_conc[_stn1].data_array[:, 2:]
|
|
244
|
+
- _invdata_sub_evid_cond_conc[_stn2].data_array[:, 2:]
|
|
245
|
+
),
|
|
246
|
+
)),
|
|
247
|
+
)
|
|
248
|
+
}
|
|
230
249
|
|
|
231
250
|
|
|
232
251
|
def _construct_new_period_data(
|
|
@@ -254,8 +273,8 @@ def _construct_new_period_data(
|
|
|
254
273
|
for _table_no in _invdata_cuml[_table_type]:
|
|
255
274
|
_invdata_cuml_sub_table = _invdata_cuml[_table_type][_table_no]
|
|
256
275
|
_invdata_ind_group, _invdata_evid_cond, _invdata_cuml_array = (
|
|
257
|
-
_invdata_cuml_sub_table.
|
|
258
|
-
_invdata_cuml_sub_table.
|
|
276
|
+
_invdata_cuml_sub_table.industry_group,
|
|
277
|
+
_invdata_cuml_sub_table.additional_evidence,
|
|
259
278
|
_invdata_cuml_sub_table.data_array,
|
|
260
279
|
)
|
|
261
280
|
|
|
@@ -279,7 +298,7 @@ def _construct_new_period_data(
|
|
|
279
298
|
# Consistency here means that the number of investigations reported
|
|
280
299
|
# in each period is no less than the number reported in
|
|
281
300
|
# any prior period.Although the time periods for table 3.2 through 3.5
|
|
282
|
-
# are not the
|
|
301
|
+
# are not the same in the data for 1996-2005 and 1996-2007 as in
|
|
283
302
|
# the data for the other periods, they are nonetheless shorter than
|
|
284
303
|
# the period 1996-2011, and hence the counts reported for 1996-2011
|
|
285
304
|
# cannot be less than those reported in these prior periods. Note that
|
|
@@ -331,7 +350,8 @@ def _construct_new_period_data(
|
|
|
331
350
|
_invdata_cuml_array[:, -3:-1] - _invdata_base_array[:, -3:-1] # type: ignore
|
|
332
351
|
)
|
|
333
352
|
|
|
334
|
-
#
|
|
353
|
+
# # // spellchecker: disable
|
|
354
|
+
# To examine the number of corrected values per table, // spellchecker: disable
|
|
335
355
|
# uncomment the statements below
|
|
336
356
|
# _invdata_array_bld_tbc = where(
|
|
337
357
|
# _invdata_array_bld_enfcls < 0, _invdata_array_bld_enfcls, 0
|
|
@@ -341,6 +361,7 @@ def _construct_new_period_data(
|
|
|
341
361
|
# f"{_data_period}, {_table_no}, {_invdata_ind_group}:",
|
|
342
362
|
# abs(np.einsum('ij->', invdata_array_bld_tbc))
|
|
343
363
|
# )
|
|
364
|
+
# # // spellchecker: disable
|
|
344
365
|
|
|
345
366
|
# Enforce non-negativity
|
|
346
367
|
_invdata_array_bld_enfcls = np.stack((
|
|
@@ -395,22 +416,23 @@ def _parse_invdata() -> INVData:
|
|
|
395
416
|
by range of HHI and ∆HHI.
|
|
396
417
|
|
|
397
418
|
"""
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"This function is defined here as documentation.\n"
|
|
421
|
+
"NOTE: License for `pymupdf`, upon which this function depends,"
|
|
422
|
+
" may be incompatible with the MIT license,"
|
|
423
|
+
" under which this pacakge is distributed."
|
|
424
|
+
" Making this fumction operable requires the user to modify"
|
|
425
|
+
" the source code as well as to install an additional package"
|
|
426
|
+
" not distributed with this package or included in its dependencies."
|
|
427
|
+
)
|
|
398
428
|
import fitz # type: ignore
|
|
399
|
-
# user must install pymupdf to make this function operable
|
|
400
429
|
|
|
401
|
-
_invdata_docnames
|
|
402
|
-
"040831horizmergersdata96-03.pdf",
|
|
403
|
-
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
404
|
-
"081201hsrmergerdata.pdf",
|
|
405
|
-
"130104horizontalmergerreport.pdf",
|
|
406
|
-
)
|
|
430
|
+
_invdata_docnames = _download_invdata(FTCDATA_DIR)
|
|
407
431
|
|
|
408
432
|
_invdata: dict[str, dict[str, dict[str, INVTableData]]] = {}
|
|
409
433
|
|
|
410
434
|
for _invdata_docname in _invdata_docnames:
|
|
411
435
|
_invdata_pdf_path = FTCDATA_DIR.joinpath(_invdata_docname)
|
|
412
|
-
if not _invdata_pdf_path.is_file():
|
|
413
|
-
_download_invdata(FTCDATA_DIR)
|
|
414
436
|
|
|
415
437
|
_invdata_fitz = fitz.open(_invdata_pdf_path)
|
|
416
438
|
_invdata_meta = _invdata_fitz.metadata
|
|
@@ -475,7 +497,7 @@ def _parse_invdata() -> INVData:
|
|
|
475
497
|
|
|
476
498
|
|
|
477
499
|
def _parse_page_blocks(
|
|
478
|
-
_invdata:
|
|
500
|
+
_invdata: _INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
|
|
479
501
|
) -> None:
|
|
480
502
|
if _data_period != "1996-2011":
|
|
481
503
|
_parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
|
|
@@ -502,7 +524,7 @@ def _parse_page_blocks(
|
|
|
502
524
|
|
|
503
525
|
|
|
504
526
|
def _parse_table_blocks(
|
|
505
|
-
_invdata:
|
|
527
|
+
_invdata: _INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
|
|
506
528
|
) -> None:
|
|
507
529
|
_invdata_evid_cond = "Unrestricted on additional evidence"
|
|
508
530
|
_table_num, _table_ser, _table_type = _identify_table_type(
|
|
@@ -577,12 +599,12 @@ def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str
|
|
|
577
599
|
|
|
578
600
|
def _process_table_blks_conc_type(
|
|
579
601
|
_table_blocks: Sequence[Sequence[str]], /
|
|
580
|
-
) ->
|
|
602
|
+
) -> ArrayBIGINT:
|
|
581
603
|
_conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
|
|
582
604
|
|
|
583
605
|
_col_titles_array = tuple(CONC_DELTA_DICT.values())
|
|
584
|
-
_col_totals:
|
|
585
|
-
_invdata_array:
|
|
606
|
+
_col_totals: ArrayBIGINT = np.zeros(len(_col_titles_array), np.int64)
|
|
607
|
+
_invdata_array: ArrayBIGINT = np.array(None)
|
|
586
608
|
|
|
587
609
|
for _tbl_blk in _table_blocks:
|
|
588
610
|
if _conc_row_pat.match(_blk_str := _tbl_blk[-3]):
|
|
@@ -610,7 +632,7 @@ def _process_table_blks_conc_type(
|
|
|
610
632
|
_col_totals = _row_array
|
|
611
633
|
else:
|
|
612
634
|
_invdata_array = (
|
|
613
|
-
np.
|
|
635
|
+
np.vstack((_invdata_array, _row_array))
|
|
614
636
|
if _invdata_array.shape
|
|
615
637
|
else _row_array
|
|
616
638
|
)
|
|
@@ -634,13 +656,11 @@ def _process_table_blks_conc_type(
|
|
|
634
656
|
|
|
635
657
|
def _process_table_blks_cnt_type(
|
|
636
658
|
_table_blocks: Sequence[Sequence[str]], /
|
|
637
|
-
) ->
|
|
659
|
+
) -> ArrayBIGINT:
|
|
638
660
|
_cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
|
|
639
661
|
|
|
640
|
-
_invdata_array:
|
|
641
|
-
_col_totals:
|
|
642
|
-
3, np.int64
|
|
643
|
-
) # "enforced", "closed", "total"
|
|
662
|
+
_invdata_array: ArrayBIGINT = np.array(None)
|
|
663
|
+
_col_totals: ArrayBIGINT = np.zeros(3, np.int64) # "enforced", "closed", "total"
|
|
644
664
|
|
|
645
665
|
for _tbl_blk in _table_blocks:
|
|
646
666
|
if _cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
|
|
@@ -657,7 +677,7 @@ def _process_table_blks_cnt_type(
|
|
|
657
677
|
_col_totals = _row_list
|
|
658
678
|
else:
|
|
659
679
|
_invdata_array = (
|
|
660
|
-
np.
|
|
680
|
+
np.vstack((_invdata_array, _row_list))
|
|
661
681
|
if _invdata_array.shape
|
|
662
682
|
else _row_list
|
|
663
683
|
)
|
|
@@ -673,32 +693,61 @@ def _process_table_blks_cnt_type(
|
|
|
673
693
|
return _invdata_array[np.argsort(_invdata_array[:, 0])]
|
|
674
694
|
|
|
675
695
|
|
|
676
|
-
def _download_invdata(_dl_path: Path) ->
|
|
696
|
+
def _download_invdata(_dl_path: Path = FTCDATA_DIR) -> tuple[str, ...]:
|
|
697
|
+
if not _dl_path.is_dir():
|
|
698
|
+
_dl_path.mkdir(parents=True)
|
|
699
|
+
|
|
677
700
|
_invdata_homepage_urls = (
|
|
678
701
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
|
|
679
702
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
|
|
680
703
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
|
|
681
704
|
"https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
|
|
682
705
|
)
|
|
683
|
-
_invdata_docnames =
|
|
706
|
+
_invdata_docnames = (
|
|
707
|
+
"040831horizmergersdata96-03.pdf",
|
|
708
|
+
"p035603horizmergerinvestigationdata1996-2005.pdf",
|
|
709
|
+
"081201hsrmergerdata.pdf",
|
|
710
|
+
"130104horizontalmergerreport.pdf",
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
if all(
|
|
714
|
+
_dl_path.joinpath(_invdata_docname).is_file()
|
|
715
|
+
for _invdata_docname in _invdata_docnames
|
|
716
|
+
):
|
|
717
|
+
return _invdata_docnames
|
|
718
|
+
|
|
719
|
+
_invdata_docnames_dl: tuple[str, ...] = ()
|
|
720
|
+
_u3pm = urllib3.PoolManager()
|
|
721
|
+
_chunk_size = 1024 * 1024
|
|
684
722
|
for _invdata_homepage_url in _invdata_homepage_urls:
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
723
|
+
with _u3pm.request(
|
|
724
|
+
"GET", _invdata_homepage_url, preload_content=False
|
|
725
|
+
) as _u3handle:
|
|
726
|
+
_invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
|
|
727
|
+
_invdata_attrs = [
|
|
728
|
+
(_g.get("title", ""), _g.get("href", ""))
|
|
729
|
+
for _g in _invdata_soup.find_all("a")
|
|
730
|
+
if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
|
|
731
|
+
]
|
|
694
732
|
for _invdata_attr in _invdata_attrs:
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
with
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
733
|
+
_invdata_docname, _invdata_link = _invdata_attr
|
|
734
|
+
_invdata_docnames_dl += (_invdata_docname,)
|
|
735
|
+
with (
|
|
736
|
+
_u3pm.request(
|
|
737
|
+
"GET", f"https://www.ftc.gov/{_invdata_link}", preload_content=False
|
|
738
|
+
) as _urlopen_handle,
|
|
739
|
+
_dl_path.joinpath(_invdata_docname).open("wb") as _invdata_fh,
|
|
740
|
+
):
|
|
741
|
+
while True:
|
|
742
|
+
_data = _urlopen_handle.read(_chunk_size)
|
|
743
|
+
if not _data:
|
|
744
|
+
break
|
|
745
|
+
_invdata_fh.write(_data)
|
|
746
|
+
|
|
747
|
+
return _invdata_docnames_dl
|
|
703
748
|
|
|
704
|
-
|
|
749
|
+
|
|
750
|
+
if __name__ == "__main__":
|
|
751
|
+
print(
|
|
752
|
+
"This module defines functions for downloading and preparing FTC merger investigations data for further analysis."
|
|
753
|
+
)
|