mergeron 2024.739125.3__py3-none-any.whl → 2024.739127.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +8 -9
- mergeron/core/ftc_merger_investigations_data.py +2 -2
- mergeron/core/guidelines_boundaries.py +2 -2
- mergeron/demo/visualize_empirical_margin_distribution.py +2 -3
- mergeron/gen/data_generation.py +21 -21
- mergeron/gen/enforcement_stats.py +3 -538
- mergeron/gen/upp_tests.py +19 -18
- {mergeron-2024.739125.3.dist-info → mergeron-2024.739127.1.dist-info}/METADATA +6 -6
- mergeron-2024.739127.1.dist-info/RECORD +24 -0
- mergeron/data/jinja2_LaTeX_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summarypaired_table_template_tabularray.tex.jinja2 +0 -81
- mergeron/data/jinja2_LaTeX_templates/ftcinvdata_summarypaired_table_template_tikz.tex.jinja2 +0 -142
- mergeron/data/jinja2_LaTeX_templates/mergeron.cls +0 -155
- mergeron/data/jinja2_LaTeX_templates/mergeron_table_collection_template.tex.jinja2 +0 -93
- mergeron/data/jinja2_LaTeX_templates/setup_tikz_tables.sty +0 -129
- mergeron-2024.739125.3.dist-info/RECORD +0 -32
- {mergeron-2024.739125.3.dist-info → mergeron-2024.739127.1.dist-info}/WHEEL +0 -0
|
@@ -4,28 +4,12 @@ Methods to format and print summary statistics on merger enforcement patterns.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import enum
|
|
7
|
-
import shutil
|
|
8
|
-
import subprocess
|
|
9
7
|
from collections.abc import Mapping
|
|
10
|
-
from importlib import resources
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from types import SimpleNamespace
|
|
13
|
-
from typing import Literal
|
|
14
8
|
|
|
15
9
|
import numpy as np
|
|
16
|
-
import re2 as re # type: ignore
|
|
17
|
-
from jinja2 import Environment, FileSystemLoader, Template, select_autoescape
|
|
18
10
|
from scipy.interpolate import interp1d # type: ignore
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from .. import ( # noqa: TID252
|
|
22
|
-
_PKG_NAME,
|
|
23
|
-
DATA_DIR,
|
|
24
|
-
VERSION,
|
|
25
|
-
ArrayBIGINT,
|
|
26
|
-
ArrayDouble,
|
|
27
|
-
ArrayINT,
|
|
28
|
-
)
|
|
11
|
+
|
|
12
|
+
from .. import VERSION, ArrayBIGINT # noqa: TID252
|
|
29
13
|
from ..core import ftc_merger_investigations_data as fid # noqa: TID252
|
|
30
14
|
from . import INVResolution
|
|
31
15
|
|
|
@@ -81,81 +65,6 @@ class SortSelector(enum.StrEnum):
|
|
|
81
65
|
REV = "reversed"
|
|
82
66
|
|
|
83
67
|
|
|
84
|
-
cnt_format_str = R"{: >5,.0f}"
|
|
85
|
-
pct_format_str = R"{: >6.1f}\%"
|
|
86
|
-
ci_format_str = R"{0: >5.1f} [{2: >4.1f},{3: >5.1f}] \%"
|
|
87
|
-
|
|
88
|
-
moe_tmpl = Template(R"""
|
|
89
|
-
{% if (rv[2] - rv[0]) | abs == (rv[3] - rv[0]) | abs %}
|
|
90
|
-
{{- "[\pm {:.1f}]".format(rv[3] - rv[0]) -}}
|
|
91
|
-
{% else %}
|
|
92
|
-
{{- "[{:.1f}/+{:.1f}]".format(rv[2] - rv[0], rv[3] - rv[0]) -}}
|
|
93
|
-
{% endif %}
|
|
94
|
-
""")
|
|
95
|
-
|
|
96
|
-
# Define the LaTeX jinja environment
|
|
97
|
-
_template_resource = resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
|
|
98
|
-
_template_folder = DATA_DIR.joinpath(_template_resource.name)
|
|
99
|
-
with resources.as_file(
|
|
100
|
-
resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
|
|
101
|
-
) as _tmpl_src:
|
|
102
|
-
if not _template_folder.is_dir():
|
|
103
|
-
shutil.copytree(_tmpl_src, _template_folder)
|
|
104
|
-
|
|
105
|
-
# Place files related to rendering LaTeX in output data directory
|
|
106
|
-
if not (_out_path := DATA_DIR.joinpath(f"{_PKG_NAME}.cls")).is_file():
|
|
107
|
-
shutil.move(_template_folder / _out_path.name, _out_path)
|
|
108
|
-
|
|
109
|
-
# Write to LaTeX table settings file
|
|
110
|
-
if not (_DOTTEX := DATA_DIR / Rf"{_PKG_NAME}_TikZTableSettings.sty").is_file():
|
|
111
|
-
shutil.move(_template_folder / "setup_tikz_tables.sty", _DOTTEX)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def create_jinja_env(_tmpl_folder: Path = _template_folder, /) -> Environment:
|
|
115
|
-
"""Create jinja2 environment
|
|
116
|
-
|
|
117
|
-
Loader is the FileSystemLoader initialized with the given path as
|
|
118
|
-
template folder
|
|
119
|
-
|
|
120
|
-
Parameters
|
|
121
|
-
----------
|
|
122
|
-
_tmpl_folder : Path
|
|
123
|
-
Path to template folder
|
|
124
|
-
|
|
125
|
-
Returns
|
|
126
|
-
-------
|
|
127
|
-
Environment
|
|
128
|
-
jinja2 environment
|
|
129
|
-
"""
|
|
130
|
-
return Environment(
|
|
131
|
-
block_start_string=R"((*",
|
|
132
|
-
block_end_string="*))",
|
|
133
|
-
variable_start_string=R"\JINVAR{",
|
|
134
|
-
variable_end_string="}",
|
|
135
|
-
comment_start_string=R"((#", # r'#{',
|
|
136
|
-
comment_end_string=R"#))", # '}',
|
|
137
|
-
line_statement_prefix="##",
|
|
138
|
-
line_comment_prefix="%#",
|
|
139
|
-
trim_blocks=True,
|
|
140
|
-
lstrip_blocks=True,
|
|
141
|
-
autoescape=select_autoescape(disabled_extensions=("tex.jinja2",)),
|
|
142
|
-
loader=FileSystemLoader(_tmpl_folder),
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
LTX_ARRAY_LINEEND = "\\\\\n"
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
class StatsContainer(SimpleNamespace):
|
|
150
|
-
"""A container for passing content to jinja2 templates
|
|
151
|
-
|
|
152
|
-
Other attributes added later, to fully populate selected jinja2 templates
|
|
153
|
-
"""
|
|
154
|
-
|
|
155
|
-
invdata_hdrstr: str
|
|
156
|
-
invdata_datstr: str
|
|
157
|
-
|
|
158
|
-
|
|
159
68
|
# Parameters and functions to interpolate selected HHI and ΔHHI values
|
|
160
69
|
# recorded in fractions to ranges of values in points on the HHI scale
|
|
161
70
|
HHI_DELTA_KNOTS = np.array(
|
|
@@ -204,7 +113,6 @@ ZONE_DETAIL_STRINGS_HHI = {
|
|
|
204
113
|
1: R"HHI ∈ [{}, {}) pts. and ".format(*HHI_POST_ZONE_KNOTS[1:3]),
|
|
205
114
|
2: Rf"HHI ⩾ {HHI_POST_ZONE_KNOTS[2]} pts. and ",
|
|
206
115
|
}
|
|
207
|
-
|
|
208
116
|
ZONE_DETAIL_STRINGS_DELTA = {
|
|
209
117
|
0: "",
|
|
210
118
|
1: Rf"ΔHHI < {HHI_DELTA_KNOTS[1]} pts.",
|
|
@@ -213,100 +121,6 @@ ZONE_DETAIL_STRINGS_DELTA = {
|
|
|
213
121
|
4: Rf"ΔHHI ⩾ {HHI_DELTA_KNOTS[2]} pts.",
|
|
214
122
|
}
|
|
215
123
|
|
|
216
|
-
ZONE_STRINGS_LATEX_TIKZ = {
|
|
217
|
-
0: R"\node[align=left, fill=BrightGreen] {Green Zone (Safeharbor)};",
|
|
218
|
-
1: R"\node[align=left, fill=HiCoYellow] {Yellow Zone};",
|
|
219
|
-
2: R"\node[align=left, fill=VibrRed] {Red Zone (SLC Presumption)};",
|
|
220
|
-
fid.TTL_KEY: R"\node[align=left, fill=OBSHDRFill] {TOTAL};",
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
ZONE_STRINGS_LATEX_TBLR = {
|
|
224
|
-
0: R"\SetCell{l, bg=BrightGreen} {Green Zone (Safeharbor)}",
|
|
225
|
-
1: R"\SetCell{l, bg=HiCoYellow} {Yellow Zone}",
|
|
226
|
-
2: R"\SetCell{l, bg=VibrRed} {Red Zone (SLC Presumption)}",
|
|
227
|
-
fid.TTL_KEY: R"\SetCell{l, bg=OBSHDRFill} {TOTAL}",
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
ZONE_DETAIL_STRINGS_HHI_LATEX = {
|
|
231
|
-
0: Rf"HHI_{{post}} < \text{{{HHI_POST_ZONE_KNOTS[1]} pts.}}",
|
|
232
|
-
1: R"HHI_{{post}} \in \text{{[{}, {}) pts. and }} ".format(
|
|
233
|
-
*HHI_POST_ZONE_KNOTS[1:3]
|
|
234
|
-
),
|
|
235
|
-
2: Rf"HHI_{{post}} \geqslant \text{{{HHI_POST_ZONE_KNOTS[2]} pts. and }} ",
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
ZONE_DETAIL_STRINGS_DELTA_LATEX = {
|
|
239
|
-
0: "",
|
|
240
|
-
1: Rf"\Delta HHI < \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
241
|
-
2: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
242
|
-
3: R"\Delta HHI \in \text{{[{}, {}) pts.}}".format(*HHI_DELTA_KNOTS[1:3]),
|
|
243
|
-
4: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[2]} pts.}}",
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def enf_stats_obs(
|
|
248
|
-
_data_array_dict: fid.INVData,
|
|
249
|
-
_data_period: str = "1996-2003",
|
|
250
|
-
_table_ind_group: IndustryGroup = IndustryGroup.ALL,
|
|
251
|
-
_table_evid_cond: OtherEvidence = OtherEvidence.UR,
|
|
252
|
-
_stats_group: StatsGrpSelector = StatsGrpSelector.FC,
|
|
253
|
-
_enf_spec: INVResolution = INVResolution.CLRN,
|
|
254
|
-
/,
|
|
255
|
-
*,
|
|
256
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.RPT,
|
|
257
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
258
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
259
|
-
print_to_screen: bool = True,
|
|
260
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
261
|
-
if _data_period not in _data_array_dict:
|
|
262
|
-
raise ValueError(
|
|
263
|
-
f"Value of _data_period, {f'"{_data_period}"'} is invalid.",
|
|
264
|
-
f"Must be in, {list(_data_array_dict.keys())!r}",
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
match _stats_group:
|
|
268
|
-
case StatsGrpSelector.ZN:
|
|
269
|
-
_enf_stats_table_func = enf_stats_table_byzone
|
|
270
|
-
case StatsGrpSelector.FC:
|
|
271
|
-
_enf_stats_table_func = enf_stats_table_onedim
|
|
272
|
-
case StatsGrpSelector.DL:
|
|
273
|
-
_enf_stats_table_func = enf_stats_table_onedim
|
|
274
|
-
case _:
|
|
275
|
-
raise ValueError(
|
|
276
|
-
'Statistics formatted, "{_stats_group}" not available here.'
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
_enf_stats_cnts = enf_cnts_obs_by_group(
|
|
280
|
-
_data_array_dict,
|
|
281
|
-
_data_period,
|
|
282
|
-
_table_ind_group,
|
|
283
|
-
_table_evid_cond,
|
|
284
|
-
_stats_group,
|
|
285
|
-
_enf_spec,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
_enf_stats_hdr_list, _enf_stats_dat_list = _enf_stats_table_func(
|
|
289
|
-
_enf_stats_cnts,
|
|
290
|
-
None,
|
|
291
|
-
return_type_sel=return_type_sel,
|
|
292
|
-
sort_order=sort_order,
|
|
293
|
-
print_format=print_format,
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
if print_to_screen:
|
|
297
|
-
print(
|
|
298
|
-
f"{_enf_spec.capitalize()} stats ({return_type_sel})",
|
|
299
|
-
f"for Period: {_data_period}",
|
|
300
|
-
"\u2014",
|
|
301
|
-
f"{_table_ind_group};",
|
|
302
|
-
_table_evid_cond,
|
|
303
|
-
)
|
|
304
|
-
stats_print_rows(
|
|
305
|
-
_enf_stats_hdr_list, _enf_stats_dat_list, print_format=print_format
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
return _enf_stats_hdr_list, _enf_stats_dat_list
|
|
309
|
-
|
|
310
124
|
|
|
311
125
|
def enf_cnts_obs_by_group(
|
|
312
126
|
_invdata_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
|
|
@@ -535,356 +349,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
|
|
|
535
349
|
return _cnts_byconczone[1:]
|
|
536
350
|
|
|
537
351
|
|
|
538
|
-
def enf_stats_table_onedim(
|
|
539
|
-
_inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
|
|
540
|
-
_totals_row: int | None = None,
|
|
541
|
-
/,
|
|
542
|
-
*,
|
|
543
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
544
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
545
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
546
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
547
|
-
_ndim_in: int = 1
|
|
548
|
-
_dim_hdr_dict = {_v: _k for _k, _v in fid.CNT_FCOUNT_DICT.items()} | {
|
|
549
|
-
_v: (
|
|
550
|
-
"[2500, 5000]"
|
|
551
|
-
if _k == "2,500 +"
|
|
552
|
-
else f"[{_k.replace(",", "").replace(" - ", ", ")})"
|
|
553
|
-
)
|
|
554
|
-
for _k, _v in fid.CONC_DELTA_DICT.items()
|
|
555
|
-
if _k != "TOTAL"
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
if _totals_row:
|
|
559
|
-
_in_totals_row = _inparr[_totals_row, :]
|
|
560
|
-
_inparr_mask = np.ones(len(_inparr), dtype=bool)
|
|
561
|
-
_inparr_mask[_in_totals_row] = False
|
|
562
|
-
_inparr = _inparr[_inparr_mask]
|
|
563
|
-
else:
|
|
564
|
-
_in_totals_row = np.concatenate((
|
|
565
|
-
[fid.TTL_KEY],
|
|
566
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
567
|
-
))
|
|
568
|
-
|
|
569
|
-
if sort_order == SortSelector.REV:
|
|
570
|
-
_inparr = _inparr[::-1]
|
|
571
|
-
|
|
572
|
-
_inparr = np.vstack((_inparr, _in_totals_row))
|
|
573
|
-
|
|
574
|
-
_stats_hdr_list, _stats_dat_list = [], []
|
|
575
|
-
for _stats_row in _inparr:
|
|
576
|
-
_stats_hdr_str = _dim_hdr_dict[_stats_row[0]]
|
|
577
|
-
_stats_hdr_list += [
|
|
578
|
-
f"{{{_stats_hdr_str}}}"
|
|
579
|
-
if print_format in ("LaTeX", "LaTeX+TikZ")
|
|
580
|
-
else _stats_hdr_str
|
|
581
|
-
]
|
|
582
|
-
|
|
583
|
-
_stats_cnt = _stats_row[_ndim_in:]
|
|
584
|
-
_stats_tot = np.concatenate((
|
|
585
|
-
[_inparr[-1][_ndim_in]],
|
|
586
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
587
|
-
))
|
|
588
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
589
|
-
|
|
590
|
-
return _stats_hdr_list, _stats_dat_list
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
def enf_stats_table_byzone(
|
|
594
|
-
_inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
|
|
595
|
-
_totals_row: int | None = None,
|
|
596
|
-
/,
|
|
597
|
-
*,
|
|
598
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
599
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
600
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
|
|
601
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
602
|
-
_ndim_in: int = ZONE_VALS.shape[1]
|
|
603
|
-
|
|
604
|
-
_zone_str_dict = (
|
|
605
|
-
ZONE_STRINGS_LATEX_TIKZ
|
|
606
|
-
if print_format == "LaTeX+TikZ"
|
|
607
|
-
else (ZONE_STRINGS_LATEX_TBLR if print_format == "LaTeX" else ZONE_STRINGS)
|
|
608
|
-
)
|
|
609
|
-
_zone_str_keys = list(_zone_str_dict)
|
|
610
|
-
|
|
611
|
-
if sort_order == SortSelector.REV:
|
|
612
|
-
_inparr = _inparr[::-1]
|
|
613
|
-
_zone_str_keys = _zone_str_keys[:-1][::-1] + [_zone_str_keys[-1]]
|
|
614
|
-
|
|
615
|
-
if _totals_row is None:
|
|
616
|
-
_inparr = np.vstack((
|
|
617
|
-
_inparr,
|
|
618
|
-
np.concatenate((
|
|
619
|
-
[fid.TTL_KEY, -1, -1],
|
|
620
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
621
|
-
)),
|
|
622
|
-
))
|
|
623
|
-
|
|
624
|
-
_stats_hdr_list, _stats_dat_list = ([], [])
|
|
625
|
-
for _conc_zone in _zone_str_keys:
|
|
626
|
-
_stats_byzone_it = _inparr[_inparr[:, 0] == _conc_zone]
|
|
627
|
-
_stats_hdr_list += [_zone_str_dict[_conc_zone]]
|
|
628
|
-
|
|
629
|
-
_stats_cnt = np.einsum("ij->j", _stats_byzone_it[:, _ndim_in:])
|
|
630
|
-
_stats_tot = np.concatenate((
|
|
631
|
-
[_inparr[-1][_ndim_in]],
|
|
632
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
633
|
-
))
|
|
634
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
635
|
-
|
|
636
|
-
if _conc_zone in (2, fid.TTL_KEY):
|
|
637
|
-
continue
|
|
638
|
-
|
|
639
|
-
for _stats_byzone_detail in _stats_byzone_it:
|
|
640
|
-
# Only two sets of subtotals detail, so
|
|
641
|
-
# a conditional expression will do here
|
|
642
|
-
if print_format in ("LaTeX", "LaTeX+TikZ"):
|
|
643
|
-
_stats_text_color = "HiCoYellow" if _conc_zone == 1 else "BrightGreen"
|
|
644
|
-
_stats_hdr_list += [
|
|
645
|
-
R"{} {{\null\hfill \({}{}\) }}{}".format(
|
|
646
|
-
rf"\node[text={_stats_text_color}, fill=white, align=right]"
|
|
647
|
-
if print_format == "LaTeX+TikZ"
|
|
648
|
-
else rf"\SetCell{{r, fg={_stats_text_color}, bg=white}}",
|
|
649
|
-
ZONE_DETAIL_STRINGS_HHI_LATEX[_stats_byzone_detail[1]],
|
|
650
|
-
(
|
|
651
|
-
""
|
|
652
|
-
if _stats_byzone_detail[2] == 0
|
|
653
|
-
else Rf"{ZONE_DETAIL_STRINGS_DELTA_LATEX[_stats_byzone_detail[2]]}"
|
|
654
|
-
),
|
|
655
|
-
";" if print_format == "LaTeX+TikZ" else "",
|
|
656
|
-
)
|
|
657
|
-
]
|
|
658
|
-
else:
|
|
659
|
-
_stats_hdr_list += [
|
|
660
|
-
R"{}{};".format(
|
|
661
|
-
ZONE_DETAIL_STRINGS_HHI[_stats_byzone_detail[1]],
|
|
662
|
-
(
|
|
663
|
-
""
|
|
664
|
-
if _stats_byzone_detail[2] == 0
|
|
665
|
-
else Rf"{ZONE_DETAIL_STRINGS_DELTA[_stats_byzone_detail[2]]}"
|
|
666
|
-
),
|
|
667
|
-
)
|
|
668
|
-
]
|
|
669
|
-
|
|
670
|
-
_stats_cnt = _stats_byzone_detail[_ndim_in:]
|
|
671
|
-
_stats_tot = np.concatenate((
|
|
672
|
-
[_inparr[-1][_ndim_in]],
|
|
673
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
674
|
-
))
|
|
675
|
-
_stats_dat_list += _stats_formatted_row(
|
|
676
|
-
_stats_cnt, _stats_tot, return_type_sel
|
|
677
|
-
)
|
|
678
|
-
|
|
679
|
-
return _stats_hdr_list, _stats_dat_list
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
def _stats_formatted_row(
|
|
683
|
-
_stats_row_cnt: ArrayBIGINT,
|
|
684
|
-
_stats_row_tot: ArrayBIGINT,
|
|
685
|
-
_return_type_sel: StatsReturnSelector,
|
|
686
|
-
/,
|
|
687
|
-
) -> list[list[str]]:
|
|
688
|
-
_stats_row_pct = _stats_row_cnt / _stats_row_tot
|
|
689
|
-
|
|
690
|
-
match _return_type_sel:
|
|
691
|
-
case StatsReturnSelector.RIN:
|
|
692
|
-
_stats_row_ci = np.array([
|
|
693
|
-
propn_ci(*g, method="Wilson")
|
|
694
|
-
for g in zip(_stats_row_cnt[1:], _stats_row_tot[1:], strict=True)
|
|
695
|
-
])
|
|
696
|
-
return [
|
|
697
|
-
[
|
|
698
|
-
pct_format_str.format(100 * _stats_row_pct[0]),
|
|
699
|
-
*[
|
|
700
|
-
ci_format_str.format(*100 * np.array(f)).replace(
|
|
701
|
-
R" nan [ nan, nan] \%", "---"
|
|
702
|
-
)
|
|
703
|
-
for f in _stats_row_ci
|
|
704
|
-
],
|
|
705
|
-
]
|
|
706
|
-
]
|
|
707
|
-
case StatsReturnSelector.RPT:
|
|
708
|
-
return [
|
|
709
|
-
[
|
|
710
|
-
pct_format_str.format(f).replace(R"nan\%", "---")
|
|
711
|
-
for f in 100 * _stats_row_pct
|
|
712
|
-
]
|
|
713
|
-
]
|
|
714
|
-
case _:
|
|
715
|
-
return [
|
|
716
|
-
[
|
|
717
|
-
cnt_format_str.format(f).replace(R"nan", "---")
|
|
718
|
-
for f in _stats_row_cnt
|
|
719
|
-
]
|
|
720
|
-
]
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
def stats_print_rows(
|
|
724
|
-
_enf_stats_hdr_list: list[str],
|
|
725
|
-
_enf_stats_dat_list: list[list[str]],
|
|
726
|
-
/,
|
|
727
|
-
*,
|
|
728
|
-
print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "text",
|
|
729
|
-
) -> None:
|
|
730
|
-
for _idx, _hdr in enumerate(_enf_stats_hdr_list):
|
|
731
|
-
if print_format in ("LaTeX", "LaTeX+TikZ"):
|
|
732
|
-
_hdr_str = re.search(r" \{(.*?)\};?\z", _hdr)[-1].strip()
|
|
733
|
-
print(
|
|
734
|
-
_hdr_str,
|
|
735
|
-
" & ",
|
|
736
|
-
" & ".join(_enf_stats_dat_list[_idx]),
|
|
737
|
-
LTX_ARRAY_LINEEND,
|
|
738
|
-
end="",
|
|
739
|
-
)
|
|
740
|
-
else:
|
|
741
|
-
print(_hdr, " | ", " | ".join(_enf_stats_dat_list[_idx]))
|
|
742
|
-
|
|
743
|
-
print()
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
def propn_ci(
|
|
747
|
-
_npos: ArrayINT | int = 4,
|
|
748
|
-
_nobs: ArrayINT | int = 10,
|
|
749
|
-
/,
|
|
750
|
-
*,
|
|
751
|
-
alpha: float = 0.05,
|
|
752
|
-
method: Literal[
|
|
753
|
-
"Agresti-Coull", "Clopper-Pearson", "Exact", "Wilson", "Score"
|
|
754
|
-
] = "Wilson",
|
|
755
|
-
) -> tuple[
|
|
756
|
-
ArrayDouble | float, ArrayDouble | float, ArrayDouble | float, ArrayDouble | float
|
|
757
|
-
]:
|
|
758
|
-
"""Returns point estimates and confidence interval for a proportion
|
|
759
|
-
|
|
760
|
-
Methods "Clopper-Pearson" and "Exact" are synoymous [3]_. Similarly,
|
|
761
|
-
"Wilson" and "Score" are synonyms here.
|
|
762
|
-
|
|
763
|
-
Parameters
|
|
764
|
-
----------
|
|
765
|
-
_npos
|
|
766
|
-
Number of positives
|
|
767
|
-
|
|
768
|
-
_nobs
|
|
769
|
-
Number of observed values
|
|
770
|
-
|
|
771
|
-
alpha
|
|
772
|
-
Significance level
|
|
773
|
-
|
|
774
|
-
method
|
|
775
|
-
Method to use for estimating confidence interval
|
|
776
|
-
|
|
777
|
-
Returns
|
|
778
|
-
-------
|
|
779
|
-
Raw and estimated proportions, and bounds of the confidence interval
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
References
|
|
783
|
-
----------
|
|
784
|
-
|
|
785
|
-
.. [3] Alan Agresti & Brent A. Coull (1998) Approximate is Better
|
|
786
|
-
than “Exact” for Interval Estimation of Binomial Proportions,
|
|
787
|
-
The American Statistician, 52:2, 119-126,
|
|
788
|
-
https://doi.org/10.1080/00031305.1998.10480550
|
|
789
|
-
|
|
790
|
-
"""
|
|
791
|
-
|
|
792
|
-
for _f in _npos, _nobs:
|
|
793
|
-
if not isinstance(_f, int | np.integer):
|
|
794
|
-
raise ValueError(
|
|
795
|
-
f"Count, {_f!r} must have type that is a subtype of np.integer."
|
|
796
|
-
)
|
|
797
|
-
|
|
798
|
-
if not _nobs:
|
|
799
|
-
return (np.nan, np.nan, np.nan, np.nan)
|
|
800
|
-
|
|
801
|
-
_raw_phat: ArrayDouble | float = _npos / _nobs
|
|
802
|
-
_est_phat: ArrayDouble | float
|
|
803
|
-
_est_ci_l: ArrayDouble | float
|
|
804
|
-
_est_ci_u: ArrayDouble | float
|
|
805
|
-
|
|
806
|
-
match method:
|
|
807
|
-
case "Clopper-Pearson" | "Exact":
|
|
808
|
-
_est_ci_l, _est_ci_u = (
|
|
809
|
-
beta.ppf(*_f)
|
|
810
|
-
for _f in (
|
|
811
|
-
(alpha / 2, _npos, _nobs - _npos + 1),
|
|
812
|
-
(1 - alpha / 2, _npos + 1, _nobs - _npos),
|
|
813
|
-
)
|
|
814
|
-
)
|
|
815
|
-
_est_phat = 1 / 2 * (_est_ci_l + _est_ci_u)
|
|
816
|
-
|
|
817
|
-
case "Agresti-Coull":
|
|
818
|
-
_zsc = norm.ppf(1 - alpha / 2)
|
|
819
|
-
_zscsq = _zsc * _zsc
|
|
820
|
-
_adjmt = 4 if alpha == 0.05 else _zscsq
|
|
821
|
-
_est_phat = (_npos + _adjmt / 2) / (_nobs + _adjmt)
|
|
822
|
-
_est_ci_l, _est_ci_u = (
|
|
823
|
-
_est_phat + _g
|
|
824
|
-
for _g in [
|
|
825
|
-
_f * _zsc * np.sqrt(_est_phat * (1 - _est_phat) / (_nobs + _adjmt))
|
|
826
|
-
for _f in (-1, 1)
|
|
827
|
-
]
|
|
828
|
-
)
|
|
829
|
-
|
|
830
|
-
case "Wilson" | "Score":
|
|
831
|
-
_zsc = norm.ppf(1 - alpha / 2)
|
|
832
|
-
_zscsq = _zsc * _zsc
|
|
833
|
-
_est_phat = (_npos + _zscsq / 2) / (_nobs + _zscsq)
|
|
834
|
-
_est_ci_l, _est_ci_u = (
|
|
835
|
-
_est_phat
|
|
836
|
-
+ _f
|
|
837
|
-
* _zsc
|
|
838
|
-
* np.sqrt(_nobs * _raw_phat * (1 - _raw_phat) + _zscsq / 4)
|
|
839
|
-
/ (_nobs + _zscsq)
|
|
840
|
-
for _f in (-1, 1)
|
|
841
|
-
)
|
|
842
|
-
|
|
843
|
-
case _:
|
|
844
|
-
raise ValueError(f"Method, {f'"{method}"'} not yet implemented.")
|
|
845
|
-
|
|
846
|
-
return _raw_phat, _est_phat, _est_ci_l, _est_ci_u
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
def render_table_pdf(
|
|
850
|
-
_table_collection_design: Template,
|
|
851
|
-
_table_collection_content: StatsContainer,
|
|
852
|
-
_output_tex_path: Path,
|
|
853
|
-
/,
|
|
854
|
-
) -> None:
|
|
855
|
-
"""Render table collection to PDF
|
|
856
|
-
|
|
857
|
-
Parameters
|
|
858
|
-
----------
|
|
859
|
-
_table_collection_design
|
|
860
|
-
A jinja2 template for generating the LaTeX file to render
|
|
861
|
-
|
|
862
|
-
_table_collection_content
|
|
863
|
-
Content for jinja2 template
|
|
864
|
-
|
|
865
|
-
_output_tex_path
|
|
866
|
-
Path to LaTeX output file to render to PDF
|
|
867
|
-
"""
|
|
868
|
-
|
|
869
|
-
with _output_tex_path.open("w", encoding="utf8") as _output_tex_file:
|
|
870
|
-
_output_tex_file.write(
|
|
871
|
-
_table_collection_design.render(tmpl_data=_table_collection_content)
|
|
872
|
-
)
|
|
873
|
-
print("\n", file=_output_tex_file)
|
|
874
|
-
|
|
875
|
-
_run_rc = subprocess.run( # noqa: S603
|
|
876
|
-
f"latexmk -f -quiet -synctex=0 -interaction=nonstopmode -file-line-error -pdflua {_output_tex_path}".split(),
|
|
877
|
-
check=True,
|
|
878
|
-
cwd=DATA_DIR,
|
|
879
|
-
)
|
|
880
|
-
if _run_rc:
|
|
881
|
-
subprocess.run("latexmk -quiet -c".split(), check=True, cwd=DATA_DIR) # noqa: S603
|
|
882
|
-
del _run_rc
|
|
883
|
-
|
|
884
|
-
print(f"Tables rendered to path, {_output_tex_path.with_suffix(".pdf")}")
|
|
885
|
-
|
|
886
|
-
|
|
887
352
|
if __name__ == "__main__":
|
|
888
353
|
print(
|
|
889
|
-
"This module provides methods to
|
|
354
|
+
"This module provides methods to aggregate statistics on merger enforcement patterns for reporting."
|
|
890
355
|
)
|