mergeron 2024.739125.2__py3-none-any.whl → 2024.739127.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +1 -1
- mergeron/gen/data_generation.py +21 -21
- mergeron/gen/enforcement_stats.py +3 -553
- mergeron/gen/upp_tests.py +16 -17
- {mergeron-2024.739125.2.dist-info → mergeron-2024.739127.0.dist-info}/METADATA +1 -1
- mergeron-2024.739127.0.dist-info/RECORD +24 -0
- mergeron/data/jinja2_LaTeX_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summarypaired_table_template_tabularray.tex.jinja2 +0 -81
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summarypaired_table_template_tikz.tex.jinja2 +0 -142
- mergeron/data/jinja2_LaTeX_templates/mergeron.cls +0 -155
- mergeron/data/jinja2_LaTeX_templates/mergeron_table_collection_template.tex.jinja2 +0 -93
- mergeron/data/jinja2_LaTeX_templates/setup_tikz_tables.sty +0 -129
- mergeron-2024.739125.2.dist-info/RECORD +0 -32
- {mergeron-2024.739125.2.dist-info → mergeron-2024.739127.0.dist-info}/WHEEL +0 -0
|
@@ -4,28 +4,12 @@ Methods to format and print summary statistics on merger enforcement patterns.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import enum
|
|
7
|
-
import shutil
|
|
8
|
-
import subprocess
|
|
9
7
|
from collections.abc import Mapping
|
|
10
|
-
from importlib import resources
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from types import SimpleNamespace
|
|
13
|
-
from typing import Literal
|
|
14
8
|
|
|
15
9
|
import numpy as np
|
|
16
|
-
import re2 as re # type: ignore
|
|
17
|
-
from jinja2 import Environment, FileSystemLoader, Template, select_autoescape
|
|
18
10
|
from scipy.interpolate import interp1d # type: ignore
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from .. import ( # noqa: TID252
|
|
22
|
-
_PKG_NAME,
|
|
23
|
-
DATA_DIR,
|
|
24
|
-
VERSION,
|
|
25
|
-
ArrayBIGINT,
|
|
26
|
-
ArrayDouble,
|
|
27
|
-
ArrayINT,
|
|
28
|
-
)
|
|
11
|
+
|
|
12
|
+
from .. import VERSION, ArrayBIGINT # noqa: TID252
|
|
29
13
|
from ..core import ftc_merger_investigations_data as fid # noqa: TID252
|
|
30
14
|
from . import INVResolution
|
|
31
15
|
|
|
@@ -81,92 +65,6 @@ class SortSelector(enum.StrEnum):
|
|
|
81
65
|
REV = "reversed"
|
|
82
66
|
|
|
83
67
|
|
|
84
|
-
cnt_format_str = R"{: >5,.0f}"
|
|
85
|
-
pct_format_str = R"{: >6.1f}\%"
|
|
86
|
-
ci_format_str = R"{0: >5.1f} [{2: >4.1f},{3: >5.1f}] \%"
|
|
87
|
-
|
|
88
|
-
moe_tmpl = Template(R"""
|
|
89
|
-
{% if (rv[2] - rv[0]) | abs == (rv[3] - rv[0]) | abs %}
|
|
90
|
-
{{- "[\pm {:.1f}]".format(rv[3] - rv[0]) -}}
|
|
91
|
-
{% else %}
|
|
92
|
-
{{- "[{:.1f}/+{:.1f}]".format(rv[2] - rv[0], rv[3] - rv[0]) -}}
|
|
93
|
-
{% endif %}
|
|
94
|
-
""")
|
|
95
|
-
|
|
96
|
-
# Define the LaTeX jinja environment
|
|
97
|
-
_template_resource = resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
|
|
98
|
-
_template_folder = DATA_DIR.joinpath(_template_resource.name)
|
|
99
|
-
with resources.as_file(
|
|
100
|
-
resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
|
|
101
|
-
) as _tmpl_src:
|
|
102
|
-
if not _template_folder.is_dir():
|
|
103
|
-
shutil.copytree(_tmpl_src, _template_folder)
|
|
104
|
-
|
|
105
|
-
# Place files related to rendering LaTeX in output data directory
|
|
106
|
-
if not (_out_path := DATA_DIR.joinpath(f"{_PKG_NAME}.cls")).is_file():
|
|
107
|
-
shutil.move(_template_folder / _out_path.name, _out_path)
|
|
108
|
-
|
|
109
|
-
# Write to LaTeX table settings file
|
|
110
|
-
if not (_DOTTEX := DATA_DIR / Rf"{_PKG_NAME}_TikZTableSettings.sty").is_file():
|
|
111
|
-
shutil.move(_template_folder / "setup_tikz_tables.sty", _DOTTEX)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def create_jinja_env(_tmpl_folder: Path = _template_folder, /) -> Environment:
|
|
115
|
-
"""Create jinja2 environment
|
|
116
|
-
|
|
117
|
-
Loader is the FileSystemLoader initialized with the given path as
|
|
118
|
-
template folder
|
|
119
|
-
|
|
120
|
-
Parameters
|
|
121
|
-
----------
|
|
122
|
-
_tmpl_folder : Path
|
|
123
|
-
Path to template folder
|
|
124
|
-
|
|
125
|
-
Returns
|
|
126
|
-
-------
|
|
127
|
-
Environment
|
|
128
|
-
jinja2 environment
|
|
129
|
-
"""
|
|
130
|
-
return Environment(
|
|
131
|
-
block_start_string=R"((*",
|
|
132
|
-
block_end_string="*))",
|
|
133
|
-
variable_start_string=R"\JINVAR{",
|
|
134
|
-
variable_end_string="}",
|
|
135
|
-
comment_start_string=R"((#", # r'#{',
|
|
136
|
-
comment_end_string=R"#))", # '}',
|
|
137
|
-
line_statement_prefix="##",
|
|
138
|
-
line_comment_prefix="%#",
|
|
139
|
-
trim_blocks=True,
|
|
140
|
-
lstrip_blocks=True,
|
|
141
|
-
autoescape=select_autoescape(disabled_extensions=("tex.jinja2",)),
|
|
142
|
-
loader=FileSystemLoader(_tmpl_folder),
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
LTX_ARRAY_LINEEND = "\\\\\n"
|
|
147
|
-
LaTeX_hrdcoldesc_format_str = "{}\n{}\n{}".format(
|
|
148
|
-
"".join((
|
|
149
|
-
R"\matrix[hcol, above=0pt of {}, nodes = {{",
|
|
150
|
-
R"text width={}, text depth=10pt, inner sep=3pt, minimum height=25pt,",
|
|
151
|
-
R"}},] ",
|
|
152
|
-
R"({}) ",
|
|
153
|
-
R"{{",
|
|
154
|
-
)),
|
|
155
|
-
R"\node[align = {},] {{ {} }}; \\",
|
|
156
|
-
R"}};",
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
class StatsContainer(SimpleNamespace):
|
|
161
|
-
"""A container for passing content to jinja2 templates
|
|
162
|
-
|
|
163
|
-
Other attributes added later, to fully populate selected jinja2 templates
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
invdata_hdrstr: str
|
|
167
|
-
invdata_datstr: str
|
|
168
|
-
|
|
169
|
-
|
|
170
68
|
# Parameters and functions to interpolate selected HHI and ΔHHI values
|
|
171
69
|
# recorded in fractions to ranges of values in points on the HHI scale
|
|
172
70
|
HHI_DELTA_KNOTS = np.array(
|
|
@@ -215,7 +113,6 @@ ZONE_DETAIL_STRINGS_HHI = {
|
|
|
215
113
|
1: R"HHI ∈ [{}, {}) pts. and ".format(*HHI_POST_ZONE_KNOTS[1:3]),
|
|
216
114
|
2: Rf"HHI ⩾ {HHI_POST_ZONE_KNOTS[2]} pts. and ",
|
|
217
115
|
}
|
|
218
|
-
|
|
219
116
|
ZONE_DETAIL_STRINGS_DELTA = {
|
|
220
117
|
0: "",
|
|
221
118
|
1: Rf"ΔHHI < {HHI_DELTA_KNOTS[1]} pts.",
|
|
@@ -224,100 +121,6 @@ ZONE_DETAIL_STRINGS_DELTA = {
|
|
|
224
121
|
4: Rf"ΔHHI ⩾ {HHI_DELTA_KNOTS[2]} pts.",
|
|
225
122
|
}
|
|
226
123
|
|
|
227
|
-
ZONE_STRINGS_LATEX_TIKZ = {
|
|
228
|
-
0: R"\node[align = left, fill=BrightGreen] {Green Zone (Safeharbor)};",
|
|
229
|
-
1: R"\node[align = left, fill=HiCoYellow] {Yellow Zone};",
|
|
230
|
-
2: R"\node[align = left, fill=VibrRed] {Red Zone (SLC Presumption)};",
|
|
231
|
-
fid.TTL_KEY: R"\node[align = left, fill=OBSHDRFill] {TOTAL};",
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
ZONE_STRINGS_LATEX_TBLR = {
|
|
235
|
-
0: R"\SetCell{l, bg=BrightGreen} {Green Zone (Safeharbor)}",
|
|
236
|
-
1: R"\SetCell{l, bg=HiCoYellow} {Yellow Zone}",
|
|
237
|
-
2: R"\SetCell{l, bg=VibrRed} {Red Zone (SLC Presumption)}",
|
|
238
|
-
fid.TTL_KEY: R"\SetCell{l, bg=OBSHDRFill} {TOTAL}",
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
ZONE_DETAIL_STRINGS_HHI_LATEX = {
|
|
242
|
-
0: Rf"HHI_{{post}} < \text{{{HHI_POST_ZONE_KNOTS[1]} pts.}}",
|
|
243
|
-
1: R"HHI_{{post}} \in \text{{[{}, {}) pts. and }} ".format(
|
|
244
|
-
*HHI_POST_ZONE_KNOTS[1:3]
|
|
245
|
-
),
|
|
246
|
-
2: Rf"HHI_{{post}} \geqslant \text{{{HHI_POST_ZONE_KNOTS[2]} pts. and }} ",
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
ZONE_DETAIL_STRINGS_DELTA_LATEX = {
|
|
250
|
-
0: "",
|
|
251
|
-
1: Rf"\Delta HHI < \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
252
|
-
2: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
253
|
-
3: R"\Delta HHI \in \text{{[{}, {}) pts.}}".format(*HHI_DELTA_KNOTS[1:3]),
|
|
254
|
-
4: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[2]} pts.}}",
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
def enf_stats_obs(
|
|
259
|
-
_data_array_dict: fid.INVData,
|
|
260
|
-
_data_period: str = "1996-2003",
|
|
261
|
-
_table_ind_group: IndustryGroup = IndustryGroup.ALL,
|
|
262
|
-
_table_evid_cond: OtherEvidence = OtherEvidence.UR,
|
|
263
|
-
_stats_group: StatsGrpSelector = StatsGrpSelector.FC,
|
|
264
|
-
_enf_spec: INVResolution = INVResolution.CLRN,
|
|
265
|
-
/,
|
|
266
|
-
*,
|
|
267
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.RPT,
|
|
268
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
269
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
270
|
-
print_to_screen: bool = True,
|
|
271
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
272
|
-
if _data_period not in _data_array_dict:
|
|
273
|
-
raise ValueError(
|
|
274
|
-
f"Value of _data_period, {f'"{_data_period}"'} is invalid.",
|
|
275
|
-
f"Must be in, {list(_data_array_dict.keys())!r}",
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
match _stats_group:
|
|
279
|
-
case StatsGrpSelector.ZN:
|
|
280
|
-
_enf_stats_table_func = enf_stats_table_byzone
|
|
281
|
-
case StatsGrpSelector.FC:
|
|
282
|
-
_enf_stats_table_func = enf_stats_table_onedim
|
|
283
|
-
case StatsGrpSelector.DL:
|
|
284
|
-
_enf_stats_table_func = enf_stats_table_onedim
|
|
285
|
-
case _:
|
|
286
|
-
raise ValueError(
|
|
287
|
-
'Statistics formatted, "{_stats_group}" not available here.'
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
_enf_stats_cnts = enf_cnts_obs_by_group(
|
|
291
|
-
_data_array_dict,
|
|
292
|
-
_data_period,
|
|
293
|
-
_table_ind_group,
|
|
294
|
-
_table_evid_cond,
|
|
295
|
-
_stats_group,
|
|
296
|
-
_enf_spec,
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
_enf_stats_hdr_list, _enf_stats_dat_list = _enf_stats_table_func(
|
|
300
|
-
_enf_stats_cnts,
|
|
301
|
-
None,
|
|
302
|
-
return_type_sel=return_type_sel,
|
|
303
|
-
sort_order=sort_order,
|
|
304
|
-
print_format=print_format,
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
if print_to_screen:
|
|
308
|
-
print(
|
|
309
|
-
f"{_enf_spec.capitalize()} stats ({return_type_sel})",
|
|
310
|
-
f"for Period: {_data_period}",
|
|
311
|
-
"\u2014",
|
|
312
|
-
f"{_table_ind_group};",
|
|
313
|
-
_table_evid_cond,
|
|
314
|
-
)
|
|
315
|
-
stats_print_rows(
|
|
316
|
-
_enf_stats_hdr_list, _enf_stats_dat_list, print_format=print_format
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
return _enf_stats_hdr_list, _enf_stats_dat_list
|
|
320
|
-
|
|
321
124
|
|
|
322
125
|
def enf_cnts_obs_by_group(
|
|
323
126
|
_invdata_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
|
|
@@ -546,360 +349,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
|
|
|
546
349
|
return _cnts_byconczone[1:]
|
|
547
350
|
|
|
548
351
|
|
|
549
|
-
def enf_stats_table_onedim(
|
|
550
|
-
_inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
|
|
551
|
-
_totals_row: int | None = None,
|
|
552
|
-
/,
|
|
553
|
-
*,
|
|
554
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
555
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
556
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
557
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
558
|
-
_ndim_in: int = 1
|
|
559
|
-
_dim_hdr_dict = {_v: _k for _k, _v in fid.CNT_FCOUNT_DICT.items()} | {
|
|
560
|
-
_v: (
|
|
561
|
-
"[2500, 5000]"
|
|
562
|
-
if _k == "2,500 +"
|
|
563
|
-
else f"[{_k.replace(",", "").replace(" - ", ", ")})"
|
|
564
|
-
)
|
|
565
|
-
for _k, _v in fid.CONC_DELTA_DICT.items()
|
|
566
|
-
if _k != "TOTAL"
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
if _totals_row:
|
|
570
|
-
_in_totals_row = _inparr[_totals_row, :]
|
|
571
|
-
_inparr_mask = np.ones(len(_inparr), dtype=bool)
|
|
572
|
-
_inparr_mask[_in_totals_row] = False
|
|
573
|
-
_inparr = _inparr[_inparr_mask]
|
|
574
|
-
else:
|
|
575
|
-
_in_totals_row = np.concatenate((
|
|
576
|
-
[fid.TTL_KEY],
|
|
577
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
578
|
-
))
|
|
579
|
-
|
|
580
|
-
if sort_order == SortSelector.REV:
|
|
581
|
-
_inparr = _inparr[::-1]
|
|
582
|
-
|
|
583
|
-
_inparr = np.vstack((_inparr, _in_totals_row))
|
|
584
|
-
|
|
585
|
-
_stats_hdr_list, _stats_dat_list = [], []
|
|
586
|
-
for _stats_row in _inparr:
|
|
587
|
-
_stats_hdr_str = _dim_hdr_dict[_stats_row[0]]
|
|
588
|
-
_stats_hdr_list += [
|
|
589
|
-
f"{{{_stats_hdr_str}}}"
|
|
590
|
-
if print_format in ("LaTeX", "LaTeX+TikZ")
|
|
591
|
-
else _stats_hdr_str
|
|
592
|
-
]
|
|
593
|
-
|
|
594
|
-
_stats_cnt = _stats_row[_ndim_in:]
|
|
595
|
-
_stats_tot = np.concatenate((
|
|
596
|
-
[_inparr[-1][_ndim_in]],
|
|
597
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
598
|
-
))
|
|
599
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
600
|
-
|
|
601
|
-
return _stats_hdr_list, _stats_dat_list
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
def enf_stats_table_byzone(
|
|
605
|
-
_inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
|
|
606
|
-
_totals_row: int | None = None,
|
|
607
|
-
/,
|
|
608
|
-
*,
|
|
609
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
610
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
611
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
612
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
613
|
-
_ndim_in: int = ZONE_VALS.shape[1]
|
|
614
|
-
|
|
615
|
-
_zone_str_dict = (
|
|
616
|
-
ZONE_STRINGS_LATEX_TIKZ
|
|
617
|
-
if print_format == "LaTeX+TikZ"
|
|
618
|
-
else (ZONE_STRINGS_LATEX_TBLR if print_format == "LaTeX" else ZONE_STRINGS)
|
|
619
|
-
)
|
|
620
|
-
_zone_str_keys = list(_zone_str_dict)
|
|
621
|
-
|
|
622
|
-
if sort_order == SortSelector.REV:
|
|
623
|
-
_inparr = _inparr[::-1]
|
|
624
|
-
_zone_str_keys = _zone_str_keys[:-1][::-1] + [_zone_str_keys[-1]]
|
|
625
|
-
|
|
626
|
-
if _totals_row is None:
|
|
627
|
-
_inparr = np.vstack((
|
|
628
|
-
_inparr,
|
|
629
|
-
np.concatenate((
|
|
630
|
-
[fid.TTL_KEY, -1, -1],
|
|
631
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
632
|
-
)),
|
|
633
|
-
))
|
|
634
|
-
|
|
635
|
-
_stats_hdr_list, _stats_dat_list = ([], [])
|
|
636
|
-
for _conc_zone in _zone_str_keys:
|
|
637
|
-
_stats_byzone_it = _inparr[_inparr[:, 0] == _conc_zone]
|
|
638
|
-
_stats_hdr_list += [_zone_str_dict[_conc_zone]]
|
|
639
|
-
|
|
640
|
-
_stats_cnt = np.einsum("ij->j", _stats_byzone_it[:, _ndim_in:])
|
|
641
|
-
_stats_tot = np.concatenate((
|
|
642
|
-
[_inparr[-1][_ndim_in]],
|
|
643
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
644
|
-
))
|
|
645
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
646
|
-
|
|
647
|
-
if _conc_zone in (2, fid.TTL_KEY):
|
|
648
|
-
continue
|
|
649
|
-
|
|
650
|
-
for _stats_byzone_detail in _stats_byzone_it:
|
|
651
|
-
# Only two sets of subtotals detail, so
|
|
652
|
-
# a conditional expression will do here
|
|
653
|
-
if print_format in ("LaTeX", "LaTeX+TikZ"):
|
|
654
|
-
_stats_text_color = "HiCoYellow" if _conc_zone == 1 else "BrightGreen"
|
|
655
|
-
_stats_hdr_list += [
|
|
656
|
-
R"{} {{\null\hfill \({}{}\) }}{}".format(
|
|
657
|
-
rf"\node[text = {_stats_text_color}, fill = white, align = right]"
|
|
658
|
-
if print_format == "LaTeX+TikZ"
|
|
659
|
-
else rf"\SetCell{{r, fg={_stats_text_color}, bg=white}}",
|
|
660
|
-
ZONE_DETAIL_STRINGS_HHI_LATEX[_stats_byzone_detail[1]],
|
|
661
|
-
(
|
|
662
|
-
""
|
|
663
|
-
if _stats_byzone_detail[2] == 0
|
|
664
|
-
else Rf"{ZONE_DETAIL_STRINGS_DELTA_LATEX[_stats_byzone_detail[2]]}"
|
|
665
|
-
),
|
|
666
|
-
";" if print_format == "LaTeX+TikZ" else "",
|
|
667
|
-
)
|
|
668
|
-
]
|
|
669
|
-
else:
|
|
670
|
-
_stats_hdr_list += [
|
|
671
|
-
R"{}{};".format(
|
|
672
|
-
ZONE_DETAIL_STRINGS_HHI[_stats_byzone_detail[1]],
|
|
673
|
-
(
|
|
674
|
-
""
|
|
675
|
-
if _stats_byzone_detail[2] == 0
|
|
676
|
-
else Rf"{ZONE_DETAIL_STRINGS_DELTA[_stats_byzone_detail[2]]}"
|
|
677
|
-
),
|
|
678
|
-
)
|
|
679
|
-
]
|
|
680
|
-
|
|
681
|
-
_stats_cnt = _stats_byzone_detail[_ndim_in:]
|
|
682
|
-
_stats_tot = np.concatenate((
|
|
683
|
-
[_inparr[-1][_ndim_in]],
|
|
684
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
685
|
-
))
|
|
686
|
-
_stats_dat_list += _stats_formatted_row(
|
|
687
|
-
_stats_cnt, _stats_tot, return_type_sel
|
|
688
|
-
)
|
|
689
|
-
|
|
690
|
-
return _stats_hdr_list, _stats_dat_list
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
def _stats_formatted_row(
|
|
694
|
-
_stats_row_cnt: ArrayBIGINT,
|
|
695
|
-
_stats_row_tot: ArrayBIGINT,
|
|
696
|
-
_return_type_sel: StatsReturnSelector,
|
|
697
|
-
/,
|
|
698
|
-
) -> list[list[str]]:
|
|
699
|
-
_stats_row_pct = _stats_row_cnt / _stats_row_tot
|
|
700
|
-
|
|
701
|
-
match _return_type_sel:
|
|
702
|
-
case StatsReturnSelector.RIN:
|
|
703
|
-
_stats_row_ci = np.array([
|
|
704
|
-
propn_ci(*g, method="Wilson")
|
|
705
|
-
for g in zip(_stats_row_cnt[1:], _stats_row_tot[1:], strict=True)
|
|
706
|
-
])
|
|
707
|
-
return [
|
|
708
|
-
[
|
|
709
|
-
pct_format_str.format(100 * _stats_row_pct[0]),
|
|
710
|
-
*[
|
|
711
|
-
ci_format_str.format(*100 * np.array(f)).replace(
|
|
712
|
-
R" nan [ nan, nan] \%", "---"
|
|
713
|
-
)
|
|
714
|
-
for f in _stats_row_ci
|
|
715
|
-
],
|
|
716
|
-
]
|
|
717
|
-
]
|
|
718
|
-
case StatsReturnSelector.RPT:
|
|
719
|
-
return [
|
|
720
|
-
[
|
|
721
|
-
pct_format_str.format(f).replace(R"nan\%", "---")
|
|
722
|
-
for f in 100 * _stats_row_pct
|
|
723
|
-
]
|
|
724
|
-
]
|
|
725
|
-
case _:
|
|
726
|
-
return [
|
|
727
|
-
[
|
|
728
|
-
cnt_format_str.format(f).replace(R"nan", "---")
|
|
729
|
-
for f in _stats_row_cnt
|
|
730
|
-
]
|
|
731
|
-
]
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
def stats_print_rows(
|
|
735
|
-
_enf_stats_hdr_list: list[str],
|
|
736
|
-
_enf_stats_dat_list: list[list[str]],
|
|
737
|
-
/,
|
|
738
|
-
*,
|
|
739
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "text",
|
|
740
|
-
) -> None:
|
|
741
|
-
for _idx, _hdr in enumerate(_enf_stats_hdr_list):
|
|
742
|
-
if print_format in ("LaTeX", "LaTeX+TikZ"):
|
|
743
|
-
_hdr_str = (
|
|
744
|
-
_hdr
|
|
745
|
-
if _hdr == "TOTAL"
|
|
746
|
-
else re.fullmatch(r".*\{.*\}\{(.*)\};?", _hdr)[1].strip()
|
|
747
|
-
)
|
|
748
|
-
print(
|
|
749
|
-
_hdr_str,
|
|
750
|
-
" & ",
|
|
751
|
-
" & ".join(_enf_stats_dat_list[_idx]),
|
|
752
|
-
LTX_ARRAY_LINEEND,
|
|
753
|
-
end="",
|
|
754
|
-
)
|
|
755
|
-
else:
|
|
756
|
-
print(_hdr, " | ", " | ".join(_enf_stats_dat_list[_idx]))
|
|
757
|
-
|
|
758
|
-
print()
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
def propn_ci(
|
|
762
|
-
_npos: ArrayINT | int = 4,
|
|
763
|
-
_nobs: ArrayINT | int = 10,
|
|
764
|
-
/,
|
|
765
|
-
*,
|
|
766
|
-
alpha: float = 0.05,
|
|
767
|
-
method: Literal[
|
|
768
|
-
"Agresti-Coull", "Clopper-Pearson", "Exact", "Wilson", "Score"
|
|
769
|
-
] = "Wilson",
|
|
770
|
-
) -> tuple[
|
|
771
|
-
ArrayDouble | float, ArrayDouble | float, ArrayDouble | float, ArrayDouble | float
|
|
772
|
-
]:
|
|
773
|
-
"""Returns point estimates and confidence interval for a proportion
|
|
774
|
-
|
|
775
|
-
Methods "Clopper-Pearson" and "Exact" are synoymous [3]_. Similarly,
|
|
776
|
-
"Wilson" and "Score" are synonyms here.
|
|
777
|
-
|
|
778
|
-
Parameters
|
|
779
|
-
----------
|
|
780
|
-
_npos
|
|
781
|
-
Number of positives
|
|
782
|
-
|
|
783
|
-
_nobs
|
|
784
|
-
Number of observed values
|
|
785
|
-
|
|
786
|
-
alpha
|
|
787
|
-
Significance level
|
|
788
|
-
|
|
789
|
-
method
|
|
790
|
-
Method to use for estimating confidence interval
|
|
791
|
-
|
|
792
|
-
Returns
|
|
793
|
-
-------
|
|
794
|
-
Raw and estimated proportions, and bounds of the confidence interval
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
References
|
|
798
|
-
----------
|
|
799
|
-
|
|
800
|
-
.. [3] Alan Agresti & Brent A. Coull (1998) Approximate is Better
|
|
801
|
-
than “Exact” for Interval Estimation of Binomial Proportions,
|
|
802
|
-
The American Statistician, 52:2, 119-126,
|
|
803
|
-
https://doi.org/10.1080/00031305.1998.10480550
|
|
804
|
-
|
|
805
|
-
"""
|
|
806
|
-
|
|
807
|
-
for _f in _npos, _nobs:
|
|
808
|
-
if not isinstance(_f, int | np.integer):
|
|
809
|
-
raise ValueError(
|
|
810
|
-
f"Count, {_f!r} must have type that is a subtype of np.integer."
|
|
811
|
-
)
|
|
812
|
-
|
|
813
|
-
if not _nobs:
|
|
814
|
-
return (np.nan, np.nan, np.nan, np.nan)
|
|
815
|
-
|
|
816
|
-
_raw_phat: ArrayDouble | float = _npos / _nobs
|
|
817
|
-
_est_phat: ArrayDouble | float
|
|
818
|
-
_est_ci_l: ArrayDouble | float
|
|
819
|
-
_est_ci_u: ArrayDouble | float
|
|
820
|
-
|
|
821
|
-
match method:
|
|
822
|
-
case "Clopper-Pearson" | "Exact":
|
|
823
|
-
_est_ci_l, _est_ci_u = (
|
|
824
|
-
beta.ppf(*_f)
|
|
825
|
-
for _f in (
|
|
826
|
-
(alpha / 2, _npos, _nobs - _npos + 1),
|
|
827
|
-
(1 - alpha / 2, _npos + 1, _nobs - _npos),
|
|
828
|
-
)
|
|
829
|
-
)
|
|
830
|
-
_est_phat = 1 / 2 * (_est_ci_l + _est_ci_u)
|
|
831
|
-
|
|
832
|
-
case "Agresti-Coull":
|
|
833
|
-
_zsc = norm.ppf(1 - alpha / 2)
|
|
834
|
-
_zscsq = _zsc * _zsc
|
|
835
|
-
_adjmt = 4 if alpha == 0.05 else _zscsq
|
|
836
|
-
_est_phat = (_npos + _adjmt / 2) / (_nobs + _adjmt)
|
|
837
|
-
_est_ci_l, _est_ci_u = (
|
|
838
|
-
_est_phat + _g
|
|
839
|
-
for _g in [
|
|
840
|
-
_f * _zsc * np.sqrt(_est_phat * (1 - _est_phat) / (_nobs + _adjmt))
|
|
841
|
-
for _f in (-1, 1)
|
|
842
|
-
]
|
|
843
|
-
)
|
|
844
|
-
|
|
845
|
-
case "Wilson" | "Score":
|
|
846
|
-
_zsc = norm.ppf(1 - alpha / 2)
|
|
847
|
-
_zscsq = _zsc * _zsc
|
|
848
|
-
_est_phat = (_npos + _zscsq / 2) / (_nobs + _zscsq)
|
|
849
|
-
_est_ci_l, _est_ci_u = (
|
|
850
|
-
_est_phat
|
|
851
|
-
+ _f
|
|
852
|
-
* _zsc
|
|
853
|
-
* np.sqrt(_nobs * _raw_phat * (1 - _raw_phat) + _zscsq / 4)
|
|
854
|
-
/ (_nobs + _zscsq)
|
|
855
|
-
for _f in (-1, 1)
|
|
856
|
-
)
|
|
857
|
-
|
|
858
|
-
case _:
|
|
859
|
-
raise ValueError(f"Method, {f'"{method}"'} not yet implemented.")
|
|
860
|
-
|
|
861
|
-
return _raw_phat, _est_phat, _est_ci_l, _est_ci_u
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
def render_table_pdf(
|
|
865
|
-
_table_collection_design: Template,
|
|
866
|
-
_table_collection_content: StatsContainer,
|
|
867
|
-
_output_tex_path: Path,
|
|
868
|
-
/,
|
|
869
|
-
) -> None:
|
|
870
|
-
"""Render table collection to PDF
|
|
871
|
-
|
|
872
|
-
Parameters
|
|
873
|
-
----------
|
|
874
|
-
_table_collection_design
|
|
875
|
-
A jinja2 template for generating the LaTeX file to render
|
|
876
|
-
|
|
877
|
-
_table_collection_content
|
|
878
|
-
Content for jinja2 template
|
|
879
|
-
|
|
880
|
-
_output_tex_path
|
|
881
|
-
Path to LaTeX output file to render to PDF
|
|
882
|
-
"""
|
|
883
|
-
|
|
884
|
-
with _output_tex_path.open("w", encoding="utf8") as _output_tex_file:
|
|
885
|
-
_output_tex_file.write(
|
|
886
|
-
_table_collection_design.render(tmpl_data=_table_collection_content)
|
|
887
|
-
)
|
|
888
|
-
print("\n", file=_output_tex_file)
|
|
889
|
-
|
|
890
|
-
_run_rc = subprocess.run( # noqa: S603
|
|
891
|
-
f"latexmk -f -quiet -synctex=0 -interaction=nonstopmode -file-line-error -pdflua {_output_tex_path}".split(),
|
|
892
|
-
check=True,
|
|
893
|
-
cwd=DATA_DIR,
|
|
894
|
-
)
|
|
895
|
-
if _run_rc:
|
|
896
|
-
subprocess.run("latexmk -quiet -c".split(), check=True, cwd=DATA_DIR) # noqa: S603
|
|
897
|
-
del _run_rc
|
|
898
|
-
|
|
899
|
-
print(f"Tables rendered to path, {_output_tex_path.with_suffix(".pdf")}")
|
|
900
|
-
|
|
901
|
-
|
|
902
352
|
if __name__ == "__main__":
|
|
903
353
|
print(
|
|
904
|
-
"This module provides methods to
|
|
354
|
+
"This module provides methods to aggregate statistics on merger enforcement patterns for reporting."
|
|
905
355
|
)
|
mergeron/gen/upp_tests.py
CHANGED
|
@@ -53,7 +53,7 @@ class INVRESCntsArgs(TypedDict, total=False):
|
|
|
53
53
|
saved_array_name_suffix: str
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
def
|
|
56
|
+
def compute_upp_test_counts(
|
|
57
57
|
_market_data_sample: MarketDataSample,
|
|
58
58
|
_upp_test_parms: gbl.HMGThresholds,
|
|
59
59
|
_upp_test_regime: UPPTestRegime,
|
|
@@ -84,7 +84,7 @@ def enf_cnts(
|
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
86
|
_enf_cnts_sim_array = -1 * np.ones((6, 2), np.int64)
|
|
87
|
-
_upp_test_arrays =
|
|
87
|
+
_upp_test_arrays = compute_upp_test_arrays(
|
|
88
88
|
_market_data_sample, _upp_test_parms, _upp_test_regime
|
|
89
89
|
)
|
|
90
90
|
|
|
@@ -191,7 +191,7 @@ def enf_cnts(
|
|
|
191
191
|
)
|
|
192
192
|
|
|
193
193
|
|
|
194
|
-
def
|
|
194
|
+
def compute_upp_test_arrays(
|
|
195
195
|
_market_data: MarketDataSample,
|
|
196
196
|
_upp_test_parms: gbl.HMGThresholds,
|
|
197
197
|
_sim_test_regime: UPPTestRegime,
|
|
@@ -213,8 +213,7 @@ def gen_upp_test_arrays(
|
|
|
213
213
|
|
|
214
214
|
"""
|
|
215
215
|
_g_bar, _divr_bar, _cmcr_bar, _ipr_bar = (
|
|
216
|
-
getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr")
|
|
217
|
-
)
|
|
216
|
+
getattr(_upp_test_parms, _f) for _f in ("guppi", "divr", "cmcr", "ipr"))
|
|
218
217
|
|
|
219
218
|
_guppi_array, _ipr_array, _cmcr_array = (
|
|
220
219
|
np.empty_like(_market_data.price_array) for _ in range(3)
|
|
@@ -236,13 +235,13 @@ def gen_upp_test_arrays(
|
|
|
236
235
|
|
|
237
236
|
np.divide(_ipr_array, 1 - _market_data.pcm_array, out=_cmcr_array)
|
|
238
237
|
|
|
239
|
-
(_divr_test_vector,) =
|
|
238
|
+
(_divr_test_vector,) = _compute_test_array_seq(
|
|
240
239
|
(_market_data.divr_array,),
|
|
241
240
|
_market_data.frmshr_array,
|
|
242
241
|
_sim_test_regime.divr_aggregator,
|
|
243
242
|
)
|
|
244
243
|
|
|
245
|
-
(_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) =
|
|
244
|
+
(_guppi_test_vector, _cmcr_test_vector, _ipr_test_vector) = _compute_test_array_seq(
|
|
246
245
|
(_guppi_array, _cmcr_array, _ipr_array),
|
|
247
246
|
_market_data.frmshr_array,
|
|
248
247
|
_sim_test_regime.guppi_aggregator,
|
|
@@ -267,7 +266,7 @@ def gen_upp_test_arrays(
|
|
|
267
266
|
return _upp_test_arrays
|
|
268
267
|
|
|
269
268
|
|
|
270
|
-
def
|
|
269
|
+
def _compute_test_array_seq(
|
|
271
270
|
_test_measure_seq: tuple[ArrayDouble, ...],
|
|
272
271
|
_wt_array: ArrayDouble,
|
|
273
272
|
_aggregator: UPPAggrSelector,
|
|
@@ -286,45 +285,45 @@ def _compute_test_value_seq(
|
|
|
286
285
|
|
|
287
286
|
match _aggregator:
|
|
288
287
|
case UPPAggrSelector.AVG:
|
|
289
|
-
|
|
288
|
+
_test_array_seq = (
|
|
290
289
|
1 / 2 * np.einsum("ij->i", _g)[:, None] for _g in _test_measure_seq
|
|
291
290
|
)
|
|
292
291
|
case UPPAggrSelector.CPA:
|
|
293
|
-
|
|
292
|
+
_test_array_seq = (
|
|
294
293
|
np.einsum("ij,ij->i", _wt_array[:, ::-1], _g)[:, None]
|
|
295
294
|
for _g in _test_measure_seq
|
|
296
295
|
)
|
|
297
296
|
case UPPAggrSelector.CPD:
|
|
298
|
-
|
|
297
|
+
_test_array_seq = (
|
|
299
298
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array[:, ::-1], _g, _g))[:, None]
|
|
300
299
|
for _g in _test_measure_seq
|
|
301
300
|
)
|
|
302
301
|
case UPPAggrSelector.DIS:
|
|
303
|
-
|
|
302
|
+
_test_array_seq = (
|
|
304
303
|
np.sqrt(1 / 2 * np.einsum("ij,ij->i", _g, _g))[:, None]
|
|
305
304
|
for _g in _test_measure_seq
|
|
306
305
|
)
|
|
307
306
|
case UPPAggrSelector.MAX:
|
|
308
|
-
|
|
307
|
+
_test_array_seq = (
|
|
309
308
|
_g.max(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
310
309
|
)
|
|
311
310
|
case UPPAggrSelector.MIN:
|
|
312
|
-
|
|
311
|
+
_test_array_seq = (
|
|
313
312
|
_g.min(axis=1, keepdims=True) for _g in _test_measure_seq
|
|
314
313
|
)
|
|
315
314
|
case UPPAggrSelector.OSA:
|
|
316
|
-
|
|
315
|
+
_test_array_seq = (
|
|
317
316
|
np.einsum("ij,ij->i", _wt_array, _g)[:, None]
|
|
318
317
|
for _g in _test_measure_seq
|
|
319
318
|
)
|
|
320
319
|
case UPPAggrSelector.OSD:
|
|
321
|
-
|
|
320
|
+
_test_array_seq = (
|
|
322
321
|
np.sqrt(np.einsum("ij,ij,ij->i", _wt_array, _g, _g))[:, None]
|
|
323
322
|
for _g in _test_measure_seq
|
|
324
323
|
)
|
|
325
324
|
case _:
|
|
326
325
|
raise ValueError("GUPPI/diversion ratio aggregation method is invalid.")
|
|
327
|
-
return tuple(
|
|
326
|
+
return tuple(_test_array_seq)
|
|
328
327
|
|
|
329
328
|
|
|
330
329
|
def initialize_hd5(
|