mergeron 2025.739439.15__tar.gz → 2025.739439.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (23) hide show
  1. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/PKG-INFO +1 -2
  2. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/pyproject.toml +9 -4
  3. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/__init__.py +1 -1
  4. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/__init__.py +61 -0
  5. mergeron-2025.739439.19/src/mergeron/core/_process_ftc_merger_investigations_data.py +379 -0
  6. mergeron-2025.739439.19/src/mergeron/core/ftc_merger_investigations_data.py +364 -0
  7. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/guidelines_boundary_functions.py +4 -1
  8. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/enforcement_stats.py +4 -5
  9. mergeron-2025.739439.15/src/mergeron/core/ftc_merger_investigations_data.py +0 -764
  10. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/README.rst +0 -0
  11. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/empirical_margin_distribution.py +0 -0
  12. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/guidelines_boundaries.py +0 -0
  13. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/core/pseudorandom_numbers.py +0 -0
  14. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/__init__.py +0 -0
  15. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/damodaran_margin_data_serialized.zip +0 -0
  16. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/data/ftc_merger_investigations_data.zip +0 -0
  17. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/__init__.py +0 -0
  18. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/data_generation.py +0 -0
  19. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/data_generation_functions.py +0 -0
  20. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/gen/upp_tests.py +0 -0
  21. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/perks/__init__.py +0 -0
  22. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/perks/guidelines_boundary_functions_extra.py +0 -0
  23. {mergeron-2025.739439.15 → mergeron-2025.739439.19}/src/mergeron/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mergeron
3
- Version: 2025.739439.15
3
+ Version: 2025.739439.19
4
4
  Summary: Python for analyzing merger enforcement policy
5
5
  License: MIT
6
6
  Keywords: merger enforcement policy,merger guidelines,merger screening,enforcement presumptions,concentration standards,diversion ratio,upward pricing pressure,GUPPI
@@ -11,7 +11,6 @@ Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Environment :: Console
12
12
  Classifier: Intended Audience :: End Users/Desktop
13
13
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: MIT License
15
14
  Classifier: Operating System :: OS Independent
16
15
  Classifier: Programming Language :: Python
17
16
  Classifier: Programming Language :: Python :: Implementation :: CPython
@@ -15,7 +15,7 @@ keywords = [
15
15
  "upward pricing pressure",
16
16
  "GUPPI",
17
17
  ]
18
- version = "2025.739439.15"
18
+ version = "2025.739439.19"
19
19
  requires-python = ">=3.13,<4.0" # need math.fma
20
20
 
21
21
  # Classifiers list: https://pypi.org/classifiers/
@@ -24,7 +24,6 @@ classifiers = [
24
24
  "Environment :: Console",
25
25
  "Intended Audience :: End Users/Desktop",
26
26
  "Intended Audience :: Science/Research",
27
- "License :: OSI Approved :: MIT License",
28
27
  "Operating System :: OS Independent",
29
28
  "Programming Language :: Python",
30
29
  "Programming Language :: Python :: Implementation :: CPython",
@@ -189,6 +188,8 @@ preview = true
189
188
 
190
189
  cache_fine_grained = true
191
190
  ignore_missing_imports = false
191
+ warn_unreachable = false
192
+ warn_redundant_casts = true
192
193
  strict = true
193
194
  local_partial_types = true
194
195
  allow_redefinition_new = true
@@ -240,8 +241,12 @@ commands = [
240
241
  "poetry",
241
242
  "install",
242
243
  "--without",
243
- "doc,lint",
244
+ "doc",
244
245
  "--no-root",
246
+ ],[
247
+ "ruff", "check", "./src",
248
+ ],[
249
+ "mypy", "./src",
245
250
  ],
246
251
  [
247
252
  "pytest",
@@ -249,5 +254,5 @@ commands = [
249
254
  ],
250
255
  ]
251
256
 
252
- description = "install pytest in a virtual environment and invoke it on the tests folder"
257
+ description = "install ruff, mpypy, pytest in a virtual environment and invoke each, in turn"
253
258
  deps = ["poetry"]
@@ -15,7 +15,7 @@ from ruamel import yaml
15
15
 
16
16
  _PKG_NAME: str = Path(__file__).parent.name
17
17
 
18
- VERSION = "2025.739439.15"
18
+ VERSION = "2025.739439.19"
19
19
 
20
20
  __version__ = VERSION
21
21
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import shutil
5
6
  from collections.abc import Mapping
6
7
  from decimal import Decimal
7
8
  from types import MappingProxyType
@@ -20,6 +21,8 @@ from .. import ( # noqa: TID252
20
21
  yamelize_attrs,
21
22
  yaml_rt_mapper,
22
23
  )
24
+ from .. import WORK_DIR as PKG_WORK_DIR # noqa: TID252
25
+ from .. import data as mdat # noqa: TID252
23
26
 
24
27
  __version__ = VERSION
25
28
 
@@ -40,6 +43,64 @@ class GuidelinesBoundary:
40
43
  """Area under the boundary."""
41
44
 
42
45
 
46
+ WORK_DIR = globals().get("WORK_DIR", PKG_WORK_DIR)
47
+ """Redefined, in case the user defines WORK_DIR between module imports."""
48
+
49
+ FID_WORK_DIR = WORK_DIR / "FTCData"
50
+ if not FID_WORK_DIR.is_dir():
51
+ FID_WORK_DIR.mkdir(parents=True)
52
+
53
+ INVDATA_ARCHIVE_PATH = WORK_DIR / mdat.FTC_MERGER_INVESTIGATIONS_DATA.name
54
+ if not INVDATA_ARCHIVE_PATH.is_file():
55
+ shutil.copy2(mdat.FTC_MERGER_INVESTIGATIONS_DATA, INVDATA_ARCHIVE_PATH) # type: ignore
56
+
57
+ TABLE_TYPES = ("ByHHIandDelta", "ByFirmCount")
58
+ CONC_TABLE_ALL = "Table 3.1"
59
+ CNT_TABLE_ALL = "Table 4.1"
60
+
61
+ TTL_KEY = 86825
62
+ CONC_HHI_DICT = {
63
+ "0 - 1,799": 0,
64
+ "1,800 - 1,999": 1800,
65
+ "2,000 - 2,399": 2000,
66
+ "2,400 - 2,999": 2400,
67
+ "3,000 - 3,999": 3000,
68
+ "4,000 - 4,999": 4000,
69
+ "5,000 - 6,999": 5000,
70
+ "7,000 - 10,000": 7000,
71
+ "TOTAL": TTL_KEY,
72
+ }
73
+ CONC_DELTA_DICT = {
74
+ "0 - 100": 0,
75
+ "100 - 200": 100,
76
+ "200 - 300": 200,
77
+ "300 - 500": 300,
78
+ "500 - 800": 500,
79
+ "800 - 1,200": 800,
80
+ "1,200 - 2,500": 1200,
81
+ "2,500 - 5,000": 2500,
82
+ "TOTAL": TTL_KEY,
83
+ }
84
+ CNT_FCOUNT_DICT = {
85
+ "2 to 1": 2,
86
+ "3 to 2": 3,
87
+ "4 to 3": 4,
88
+ "5 to 4": 5,
89
+ "6 to 5": 6,
90
+ "7 to 6": 7,
91
+ "8 to 7": 8,
92
+ "9 to 8": 9,
93
+ "10 to 9": 10,
94
+ "10 +": 11,
95
+ "TOTAL": TTL_KEY,
96
+ }
97
+
98
+
99
+ def invert_map(_dict: Mapping[Any, Any]) -> Mapping[Any, Any]:
100
+ """Invert mapping, mapping values to keys of the original mapping."""
101
+ return {_v: _k for _k, _v in _dict.items()}
102
+
103
+
43
104
  @frozen
44
105
  class INVTableData:
45
106
  """Represents individual table of FTC merger investigations data."""
@@ -0,0 +1,379 @@
1
+ """Download and parse FTC Merger Investigations Data.
2
+
3
+ This module provided as documentation only. The package
4
+ :code:`pymupdf` is a requirement of this module but is
5
+ distributed under a license that may be incompatible with
6
+ the MIT license under which this package is distributed.
7
+
8
+ """
9
+
10
+ import re
11
+ from collections.abc import Sequence
12
+ from operator import itemgetter
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import numpy as np
17
+
18
+ # import pymupdf # type: ignore
19
+ import urllib3
20
+ from bs4 import BeautifulSoup
21
+ from numpy.testing import assert_array_equal
22
+
23
+ from .. import ArrayBIGINT # noqa: TID252
24
+ from . import (
25
+ CNT_FCOUNT_DICT,
26
+ CONC_DELTA_DICT,
27
+ CONC_HHI_DICT,
28
+ CONC_TABLE_ALL,
29
+ FID_WORK_DIR,
30
+ TABLE_TYPES,
31
+ TTL_KEY,
32
+ INVData,
33
+ INVData_in,
34
+ INVTableData,
35
+ _mappingproxy_from_mapping,
36
+ )
37
+
38
+ TABLE_NO_RE = re.compile(r"Table \d+\.\d+")
39
+
40
+
41
+ def _parse_invdata() -> INVData:
42
+ """Parse FTC merger investigations data reports to structured data.
43
+
44
+ Returns
45
+ -------
46
+ Immutable dictionary of merger investigations data, keyed to
47
+ reporting period, and including all tables organized by
48
+ Firm Count (number of remaining competitors) and
49
+ by range of HHI and ∆HHI.
50
+ """
51
+ raise ValueError(
52
+ "This function is defined here as documentation.\n"
53
+ "NOTE: License for `pymupdf`, upon which this function depends,"
54
+ " may be incompatible with the MIT license,"
55
+ " under which this pacakge is distributed."
56
+ " Making this fumction operable requires the user to modify"
57
+ " the source code as well as to install an additional package"
58
+ " not distributed with this package or identified as a requirement."
59
+ )
60
+
61
+ invdata_docnames = _download_invdata(FID_WORK_DIR)
62
+
63
+ invdata: INVData_in = {}
64
+
65
+ for invdata_docname in invdata_docnames:
66
+ invdata_pdf_path = FID_WORK_DIR.joinpath(invdata_docname)
67
+
68
+ invdata_doc = pymupdf.open(invdata_pdf_path) # type: ignore # noqa: F821
69
+ invdata_meta = invdata_doc.metadata
70
+ if invdata_meta["title"] == " ":
71
+ invdata_meta["title"] = ", ".join((
72
+ "Horizontal Merger Investigation Data",
73
+ "Fiscal Years",
74
+ "1996-2005",
75
+ ))
76
+
77
+ data_period = "".join( # line-break here for readability
78
+ re.findall(r"(\d{4}) *(-) *(\d{4})", invdata_meta["title"])[0]
79
+ )
80
+
81
+ # Initialize containers for parsed data
82
+ invdata[data_period] = {k: {} for k in TABLE_TYPES}
83
+
84
+ for pdf_pg in invdata_doc.pages():
85
+ doc_pg_blocks = pdf_pg.get_text("blocks", sort=False)
86
+ # Across all published reports of FTC investigations data,
87
+ # sorting lines (PDF page blocks) by the lower coordinates
88
+ # and then the left coordinates is most effective for
89
+ # ordering table rows in top-to-bottom order; this doesn't
90
+ # work for the 1996-2005 data, however, so we resort later
91
+ doc_pg_blocks = sorted([
92
+ (f"{_f[3]:03.0f}{_f[0]:03.0f}{_f[1]:03.0f}{_f[2]:03.0f}", *_f)
93
+ for _f in doc_pg_blocks
94
+ if _f[-1] == 0
95
+ ])
96
+
97
+ data_blocks: list[tuple[str]] = [("",)]
98
+ # Pages layouts not the same in all reports
99
+ pg_hdr_strings = (
100
+ "FEDERAL TRADE COMMISSION",
101
+ "HORIZONTAL MERGER INVESTIGATION DATA: FISCAL YEARS 1996 - 2011",
102
+ )
103
+ if len(doc_pg_blocks) > 4:
104
+ tnum = None
105
+ for _pg_blk in doc_pg_blocks:
106
+ if tnum := TABLE_NO_RE.fullmatch(_pg_blk[-3].strip()):
107
+ data_blocks = [
108
+ b_
109
+ for b_ in doc_pg_blocks
110
+ if not b_[-3].startswith(pg_hdr_strings)
111
+ and (
112
+ b_[-3].strip()
113
+ not in {"Significant Competitors", "Post Merger HHI"}
114
+ )
115
+ and not re.fullmatch(r"\d+", b_[-3].strip())
116
+ ]
117
+ break
118
+ if not tnum:
119
+ continue
120
+ del tnum
121
+ else:
122
+ continue
123
+
124
+ _parse_page_blocks(invdata, data_period, data_blocks)
125
+
126
+ invdata_doc.close()
127
+
128
+ return _mappingproxy_from_mapping(invdata)
129
+
130
+
131
+ def _parse_page_blocks(
132
+ _invdata: INVData_in, _data_period: str, _doc_pg_blocks: Sequence[Sequence[Any]], /
133
+ ) -> None:
134
+ if _data_period != "1996-2011":
135
+ _parse_table_blocks(_invdata, _data_period, _doc_pg_blocks)
136
+ else:
137
+ test_list = [
138
+ (g, f[-3].strip())
139
+ for g, f in enumerate(_doc_pg_blocks)
140
+ if TABLE_NO_RE.fullmatch(f[-3].strip())
141
+ ]
142
+ # In the 1996-2011 report, there are 2 tables per page
143
+ if len(test_list) == 1:
144
+ table_a_blocks = _doc_pg_blocks
145
+ table_b_blocks: Sequence[Sequence[Any]] = []
146
+ else:
147
+ table_a_blocks, table_b_blocks = (
148
+ _doc_pg_blocks[test_list[0][0] : test_list[1][0]],
149
+ _doc_pg_blocks[test_list[1][0] :],
150
+ )
151
+
152
+ for table_i_blocks in table_a_blocks, table_b_blocks:
153
+ if not table_i_blocks:
154
+ continue
155
+ _parse_table_blocks(_invdata, _data_period, table_i_blocks)
156
+
157
+
158
+ def _parse_table_blocks(
159
+ _invdata: INVData_in, _data_period: str, _table_blocks: Sequence[Sequence[str]], /
160
+ ) -> None:
161
+ invdata_evid_cond = "Unrestricted on additional evidence"
162
+ table_num, table_ser, table_type = _identify_table_type(
163
+ _table_blocks[0][-3].strip()
164
+ )
165
+
166
+ if _data_period == "1996-2011":
167
+ invdata_ind_group = (
168
+ _table_blocks[1][-3].split("\n")[1]
169
+ if table_num == "Table 4.8"
170
+ else _table_blocks[2][-3].split("\n", maxsplit=1)[0]
171
+ )
172
+
173
+ if table_ser > 4:
174
+ invdata_evid_cond = (
175
+ _table_blocks[2][-3].split("\n")[1]
176
+ if table_ser in {9, 10}
177
+ else _table_blocks[3][-3].strip()
178
+ )
179
+
180
+ elif _data_period == "1996-2005":
181
+ _table_blocks = sorted(_table_blocks, key=itemgetter(6))
182
+
183
+ invdata_ind_group = _table_blocks[3][-3].strip()
184
+ if table_ser > 4:
185
+ invdata_evid_cond = _table_blocks[5][-3].strip()
186
+
187
+ elif table_ser % 2 == 0:
188
+ invdata_ind_group = _table_blocks[1][-3].split("\n")[2]
189
+ if (evid_cond_teststr := _table_blocks[2][-3].strip()) == "Outcome":
190
+ invdata_evid_cond = "Unrestricted on additional evidence"
191
+ else:
192
+ invdata_evid_cond = evid_cond_teststr
193
+
194
+ elif _table_blocks[3][-3].startswith("FTC Horizontal Merger Investigations"):
195
+ invdata_ind_group = _table_blocks[3][-3].split("\n")[2]
196
+ invdata_evid_cond = "Unrestricted on additional evidence"
197
+
198
+ else:
199
+ # print(_table_blocks)
200
+ invdata_evid_cond = (
201
+ _table_blocks[1][-3].strip()
202
+ if table_ser == 9
203
+ else _table_blocks[3][-3].strip()
204
+ )
205
+ invdata_ind_group = _table_blocks[4][-3].split("\n")[2]
206
+
207
+ if invdata_ind_group == "Pharmaceutical Markets":
208
+ invdata_ind_group = "Pharmaceuticals Markets"
209
+
210
+ process_table_func = (
211
+ _process_table_blks_conc_type
212
+ if table_type == TABLE_TYPES[0]
213
+ else _process_table_blks_cnt_type
214
+ )
215
+
216
+ table_array = process_table_func(_table_blocks)
217
+ if not isinstance(table_array, np.ndarray) or table_array.dtype != int:
218
+ print(table_num)
219
+ print(_table_blocks)
220
+ raise ValueError
221
+
222
+ table_data = INVTableData(invdata_ind_group, invdata_evid_cond, table_array)
223
+ _invdata[_data_period][table_type] |= {table_num: table_data}
224
+
225
+
226
+ def _identify_table_type(_tnstr: str = CONC_TABLE_ALL, /) -> tuple[str, int, str]:
227
+ tnum = _tnstr.split(" ")[1]
228
+ tsub = int(tnum.split(".")[0])
229
+ return _tnstr, tsub, TABLE_TYPES[(tsub + 1) % 2]
230
+
231
+
232
+ def _process_table_blks_conc_type(
233
+ _table_blocks: Sequence[Sequence[str]], /
234
+ ) -> ArrayBIGINT:
235
+ conc_row_pat = re.compile(r"((?:0|\d,\d{3}) (?:- \d+,\d{3}|\+)|TOTAL)")
236
+
237
+ col_titles = tuple(CONC_DELTA_DICT.values())
238
+ col_totals: ArrayBIGINT = np.zeros(len(col_titles), int)
239
+ invdata_array: ArrayBIGINT = np.array(None)
240
+
241
+ for tbl_blk in _table_blocks:
242
+ if conc_row_pat.match(_blk_str := tbl_blk[-3]):
243
+ row_list: list[str] = _blk_str.strip().split("\n")
244
+ row_title: str = row_list.pop(0)
245
+ row_key: int = (
246
+ 7000 if row_title.startswith("7,000") else CONC_HHI_DICT[row_title]
247
+ )
248
+ row_total = np.array(row_list.pop().replace(",", "").split("/"), int)
249
+ data_row_list: list[list[int]] = []
250
+ while row_list:
251
+ enfd_val, clsd_val = row_list.pop(0).split("/")
252
+ data_row_list += [
253
+ [
254
+ row_key,
255
+ col_titles[len(data_row_list)],
256
+ int(enfd_val),
257
+ int(clsd_val),
258
+ int(enfd_val) + int(clsd_val),
259
+ ]
260
+ ]
261
+ data_row_array = np.array(data_row_list, int)
262
+ del data_row_list
263
+ # Check row totals
264
+ assert_array_equal(row_total, np.einsum("ij->j", data_row_array[:, 2:4]))
265
+
266
+ if row_key == TTL_KEY:
267
+ col_totals = data_row_array
268
+ else:
269
+ invdata_array = (
270
+ np.vstack((invdata_array, data_row_array))
271
+ if invdata_array.shape
272
+ else data_row_array
273
+ )
274
+ del data_row_array
275
+ else:
276
+ continue
277
+
278
+ # Check column totals
279
+ for _col_tot in col_totals:
280
+ assert_array_equal(
281
+ _col_tot[2:],
282
+ np.einsum(
283
+ "ij->j", invdata_array[invdata_array[:, 1] == _col_tot[1]][:, 2:]
284
+ ),
285
+ )
286
+
287
+ return invdata_array[
288
+ np.argsort(np.einsum("ij,ij->i", [[100, 1]], invdata_array[:, :2]))
289
+ ]
290
+
291
+
292
+ def _process_table_blks_cnt_type(
293
+ _table_blocks: Sequence[Sequence[str]], /
294
+ ) -> ArrayBIGINT:
295
+ cnt_row_pat = re.compile(r"(\d+ (?:to \d+|\+)|TOTAL)")
296
+
297
+ invdata_array: ArrayBIGINT = np.array(None)
298
+ col_totals: ArrayBIGINT = np.zeros(3, int) # "enforced", "closed", "total"
299
+
300
+ for _tbl_blk in _table_blocks:
301
+ if cnt_row_pat.match(_blk_str := _tbl_blk[-3]):
302
+ row_list_s = _blk_str.strip().replace(",", "").split("\n")
303
+ row_list = np.array([CNT_FCOUNT_DICT[row_list_s[0]], *row_list_s[1:]], int)
304
+ del row_list_s
305
+ if row_list[3] != row_list[1] + row_list[2]:
306
+ raise ValueError(
307
+ "Total number of investigations does not equal #enforced plus #closed."
308
+ )
309
+ if row_list[0] == TTL_KEY:
310
+ col_totals = row_list
311
+ else:
312
+ invdata_array = (
313
+ np.vstack((invdata_array, row_list))
314
+ if invdata_array.shape
315
+ else row_list
316
+ )
317
+ else:
318
+ continue
319
+
320
+ if not np.array_equal(
321
+ np.array(list(col_totals[1:]), int), np.einsum("ij->j", invdata_array[:, 1:])
322
+ ):
323
+ raise ValueError("Column totals don't compute.")
324
+
325
+ return invdata_array[np.argsort(invdata_array[:, 0])]
326
+
327
+
328
+ def _download_invdata(_dl_path: Path = FID_WORK_DIR) -> tuple[str, ...]:
329
+ if not _dl_path.is_dir():
330
+ _dl_path.mkdir(parents=True)
331
+
332
+ invdata_homepage_urls = (
333
+ "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003",
334
+ "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2005-0",
335
+ "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2007-0",
336
+ "https://www.ftc.gov/reports/horizontal-merger-investigation-data-fiscal-years-1996-2011",
337
+ )
338
+ invdata_docnames = (
339
+ "040831horizmergersdata96-03.pdf",
340
+ "p035603horizmergerinvestigationdata1996-2005.pdf",
341
+ "081201hsrmergerdata.pdf",
342
+ "130104horizontalmergerreport.pdf",
343
+ )
344
+
345
+ if all(
346
+ _dl_path.joinpath(invdata_docname).is_file()
347
+ for invdata_docname in invdata_docnames
348
+ ):
349
+ return invdata_docnames
350
+
351
+ invdata_docnames_dl: tuple[str, ...] = ()
352
+ u3pm = urllib3.PoolManager()
353
+ chunk_size_ = 1024 * 1024
354
+ for invdata_homepage_url in invdata_homepage_urls:
355
+ with u3pm.request(
356
+ "GET", invdata_homepage_url, preload_content=False
357
+ ) as _u3handle:
358
+ invdata_soup = BeautifulSoup(_u3handle.data, "html.parser")
359
+ invdata_attrs = [
360
+ (_g.get("title", ""), _g.get("href", ""))
361
+ for _g in invdata_soup.find_all("a")
362
+ if _g.get("title", "") and _g.get("href", "").endswith(".pdf")
363
+ ]
364
+ for invdata_attr in invdata_attrs:
365
+ invdata_docname, invdata_link = invdata_attr
366
+ invdata_docnames_dl += (invdata_docname,)
367
+ with (
368
+ u3pm.request(
369
+ "GET", f"https://www.ftc.gov/{invdata_link}", preload_content=False
370
+ ) as _urlopen_handle,
371
+ _dl_path.joinpath(invdata_docname).open("wb") as invdata_fh,
372
+ ):
373
+ while True:
374
+ data = _urlopen_handle.read(chunk_size_)
375
+ if not data:
376
+ break
377
+ invdata_fh.write(data)
378
+
379
+ return invdata_docnames_dl