mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
|
@@ -1,709 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Methods to format and print summary data on merger enforcement patterns.
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import enum
|
|
7
|
-
import shutil
|
|
8
|
-
import subprocess
|
|
9
|
-
from collections.abc import Mapping, Sequence
|
|
10
|
-
from importlib.metadata import version
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from types import SimpleNamespace
|
|
13
|
-
|
|
14
|
-
import numpy as np
|
|
15
|
-
import re2 as re # type: ignore
|
|
16
|
-
from jinja2 import Environment, FileSystemLoader, Template, select_autoescape
|
|
17
|
-
from numpy.typing import NDArray
|
|
18
|
-
from scipy.interpolate import interp1d # type: ignore
|
|
19
|
-
|
|
20
|
-
from .. import _PKG_NAME, DATA_DIR # noqa: TID252
|
|
21
|
-
from ..core import ftc_merger_investigations_data as fid # noqa: TID252
|
|
22
|
-
from ..core.proportions_tests import propn_ci # noqa: TID252
|
|
23
|
-
from . import TF, TI, INVResolution
|
|
24
|
-
|
|
25
|
-
__version__ = version(_PKG_NAME)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@enum.unique
|
|
29
|
-
class INDGRPConstants(enum.StrEnum):
|
|
30
|
-
ALL = "All Markets"
|
|
31
|
-
GRO = "Grocery Markets"
|
|
32
|
-
OIL = "Oil Markets"
|
|
33
|
-
CHM = "Chemical Markets"
|
|
34
|
-
PHM = "Pharmaceuticals Markets"
|
|
35
|
-
HOS = "Hospital Markets"
|
|
36
|
-
EDS = "Electronically-Controlled Devices and Systems Markets"
|
|
37
|
-
BRD = "Branded Consumer Goods Markets"
|
|
38
|
-
OTH = '"Other" Markets'
|
|
39
|
-
IIC = "Industries in Common"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@enum.unique
|
|
43
|
-
class EVIDENConstants(enum.StrEnum):
|
|
44
|
-
HD = "Hot Documents Identified"
|
|
45
|
-
CC = "Strong Customer Complaints"
|
|
46
|
-
NE = "No Entry Evidence"
|
|
47
|
-
ED = "Entry Difficult"
|
|
48
|
-
EE = "Entry Easy"
|
|
49
|
-
UR = "Unrestricted on additional evidence"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@enum.unique
|
|
53
|
-
class StatsGrpSelector(enum.StrEnum):
|
|
54
|
-
FC = "ByFirmCount"
|
|
55
|
-
HD = "ByHHIandDelta"
|
|
56
|
-
DL = "ByDelta"
|
|
57
|
-
ZN = "ByConcZone"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@enum.unique
|
|
61
|
-
class StatsReturnSelector(enum.StrEnum):
|
|
62
|
-
CNT = "count"
|
|
63
|
-
RPT = "rate, point"
|
|
64
|
-
RIN = "rate, interval"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
@enum.unique
|
|
68
|
-
class SortSelector(enum.StrEnum):
|
|
69
|
-
UCH = "unchanged"
|
|
70
|
-
REV = "reversed"
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
cnt_format_str = R"{: >5,.0f}"
|
|
74
|
-
pct_format_str = R"{: >6.1f}\%"
|
|
75
|
-
ci_format_str = R"{0: >5.1f} [{2: >4.1f},{3: >5.1f}] \%"
|
|
76
|
-
|
|
77
|
-
moe_tmpl = Template(R"""
|
|
78
|
-
{% if (rv[2] - rv[0]) | abs == (rv[3] - rv[0]) | abs %}
|
|
79
|
-
{{- "[\pm {:.1f}]".format(rv[3] - rv[0]) -}}
|
|
80
|
-
{% else %}
|
|
81
|
-
{{- "[{:.1f}/+{:.1f}]".format(rv[2] - rv[0], rv[3] - rv[0]) -}}
|
|
82
|
-
{% endif %}
|
|
83
|
-
""")
|
|
84
|
-
|
|
85
|
-
LTX_ARRAY_LINEEND = R"\\" "\n"
|
|
86
|
-
latex_hrdcoldesc_format_str = "{}\n{}\n{}".format(
|
|
87
|
-
"".join((
|
|
88
|
-
R"\matrix[hcol, above=0pt of {}, nodes = {{",
|
|
89
|
-
R"text width={}, text depth=10pt, inner sep=3pt, minimum height=25pt,",
|
|
90
|
-
R"}},] ",
|
|
91
|
-
R"({}) ",
|
|
92
|
-
R"{{",
|
|
93
|
-
)),
|
|
94
|
-
R"\node[align = {},] {{ {} }}; \\",
|
|
95
|
-
R"}};",
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class StatsContainer(SimpleNamespace):
|
|
100
|
-
"""A container for passing content to jinja2 templates
|
|
101
|
-
|
|
102
|
-
Other attributes added later, to fully populate selected jinja2 templates
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
invdata_hdrstr: str
|
|
106
|
-
invdata_datstr: str
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
# Define the latex jinja environment
|
|
110
|
-
# http://eosrei.net/articles/2015/11/latex-templates-python-and-jinja2-generate-pdfs
|
|
111
|
-
latex_jinja_env = Environment(
|
|
112
|
-
block_start_string=R"((*",
|
|
113
|
-
block_end_string="*))",
|
|
114
|
-
variable_start_string=R"\JINVAR{",
|
|
115
|
-
variable_end_string="}",
|
|
116
|
-
comment_start_string=R"((#", # r'#{',
|
|
117
|
-
comment_end_string=R"#))", # '}',
|
|
118
|
-
line_statement_prefix="##",
|
|
119
|
-
line_comment_prefix="%#",
|
|
120
|
-
trim_blocks=True,
|
|
121
|
-
lstrip_blocks=True,
|
|
122
|
-
autoescape=select_autoescape(disabled_extensions=("tex.jinja2",)),
|
|
123
|
-
loader=FileSystemLoader(Path(__file__).parents[1] / "jinja_LaTex_templates"),
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Place files related to rendering latex in output data directory
|
|
127
|
-
if not (_out_path := DATA_DIR.joinpath(f"{_PKG_NAME}.cls")).is_file():
|
|
128
|
-
shutil.copyfile(
|
|
129
|
-
Path(__file__).parents[1].joinpath("jinja_LaTex_templates", "mergeron.cls"),
|
|
130
|
-
_out_path,
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if not (_DOTTEX := DATA_DIR / Rf"{_PKG_NAME}_TikZTableSettings.tex").is_file():
|
|
135
|
-
# Write to dottex
|
|
136
|
-
with _DOTTEX.open("w", encoding="UTF-8") as _table_helper_dottex:
|
|
137
|
-
_table_helper_dottex.write(
|
|
138
|
-
latex_jinja_env.get_template("setup_tikz_tables.tex.jinja2").render(
|
|
139
|
-
tmpl_data=StatsContainer()
|
|
140
|
-
)
|
|
141
|
-
)
|
|
142
|
-
print("\n", file=_table_helper_dottex)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
# Parameters and functions to interpolate selected HHI and ΔHHI values
|
|
146
|
-
# recorded in fractions to ranges of values in points on the HHI scale
|
|
147
|
-
HHI_DELTA_KNOTS = np.array(
|
|
148
|
-
[0, 100, 200, 300, 500, 800, 1200, 2500, 5001], dtype=np.int64
|
|
149
|
-
)
|
|
150
|
-
HHI_POST_ZONE_KNOTS = np.array([0, 1800, 2400, 10001], dtype=np.int64)
|
|
151
|
-
hhi_delta_ranger, hhi_zone_post_ranger = (
|
|
152
|
-
interp1d(_f / 1e4, _f, kind="previous", assume_sorted=True)
|
|
153
|
-
for _f in (HHI_DELTA_KNOTS, HHI_POST_ZONE_KNOTS)
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
HMG_PRESUMPTION_ZONE_DICT = {
|
|
157
|
-
HHI_POST_ZONE_KNOTS[0]: {
|
|
158
|
-
HHI_DELTA_KNOTS[0]: (0, 0, 0),
|
|
159
|
-
HHI_DELTA_KNOTS[1]: (0, 0, 0),
|
|
160
|
-
HHI_DELTA_KNOTS[2]: (0, 0, 0),
|
|
161
|
-
},
|
|
162
|
-
HHI_POST_ZONE_KNOTS[1]: {
|
|
163
|
-
HHI_DELTA_KNOTS[0]: (0, 1, 1),
|
|
164
|
-
HHI_DELTA_KNOTS[1]: (1, 1, 2),
|
|
165
|
-
HHI_DELTA_KNOTS[2]: (1, 1, 2),
|
|
166
|
-
},
|
|
167
|
-
HHI_POST_ZONE_KNOTS[2]: {
|
|
168
|
-
HHI_DELTA_KNOTS[0]: (0, 2, 1),
|
|
169
|
-
HHI_DELTA_KNOTS[1]: (1, 2, 3),
|
|
170
|
-
HHI_DELTA_KNOTS[2]: (2, 2, 4),
|
|
171
|
-
},
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
ZONE_VALS = np.unique(
|
|
175
|
-
np.row_stack([
|
|
176
|
-
tuple(HMG_PRESUMPTION_ZONE_DICT[_k].values())
|
|
177
|
-
for _k in HMG_PRESUMPTION_ZONE_DICT
|
|
178
|
-
]),
|
|
179
|
-
axis=0,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
ZONE_STRINGS = {
|
|
183
|
-
0: R"\node[align = left, fill=BrightGreen] {Green Zone (Safeharbor)};",
|
|
184
|
-
1: R"\node[align = left, fill=HiCoYellow] {Yellow Zone};",
|
|
185
|
-
2: R"\node[align = left, fill=VibrRed] {Red Zone (SLC Presumption)};",
|
|
186
|
-
fid.TTL_KEY: R"\node[align = left, fill=OBSHDRFill] {TOTAL};",
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
ZONE_DETAIL_STRINGS_HHI = {
|
|
190
|
-
0: Rf"HHI_{{post}} < \text{{{HHI_POST_ZONE_KNOTS[1]} pts.}}",
|
|
191
|
-
1: R"HHI_{{post}} \in \text{{[{}, {}) pts. and }} ".format(
|
|
192
|
-
*HHI_POST_ZONE_KNOTS[1:3]
|
|
193
|
-
),
|
|
194
|
-
2: Rf"HHI_{{post}} \geqslant \text{{{HHI_POST_ZONE_KNOTS[2]} pts. and }} ",
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
ZONE_DETAIL_STRINGS_DELTA = {
|
|
198
|
-
0: "",
|
|
199
|
-
1: Rf"\Delta HHI < \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
200
|
-
2: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
|
|
201
|
-
3: R"\Delta HHI \in \text{{[{}, {}) pts.}}".format(*HHI_DELTA_KNOTS[1:3]),
|
|
202
|
-
4: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[2]} pts.}}",
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def invres_stats_output(
|
|
207
|
-
_data_array_dict: fid.INVData,
|
|
208
|
-
_data_period: str = "1996-2003",
|
|
209
|
-
_table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
|
|
210
|
-
_table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
|
|
211
|
-
_stats_group: StatsGrpSelector = StatsGrpSelector.FC,
|
|
212
|
-
_invres_spec: INVResolution = INVResolution.CLRN,
|
|
213
|
-
/,
|
|
214
|
-
*,
|
|
215
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.RPT,
|
|
216
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
217
|
-
print_to_screen: bool = True,
|
|
218
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
219
|
-
if _data_period not in _data_array_dict:
|
|
220
|
-
raise ValueError(
|
|
221
|
-
f"Value of _data_period, {f'"{_data_period}"'} is invalid.",
|
|
222
|
-
f"Must be in, {list(_data_array_dict.keys())!r}",
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
match _stats_group:
|
|
226
|
-
case StatsGrpSelector.ZN:
|
|
227
|
-
_latex_tbl_invres_stats_func = latex_tbl_invres_stats_byzone
|
|
228
|
-
case StatsGrpSelector.FC:
|
|
229
|
-
_latex_tbl_invres_stats_func = latex_tbl_invres_stats_1dim
|
|
230
|
-
case StatsGrpSelector.DL:
|
|
231
|
-
_latex_tbl_invres_stats_func = latex_tbl_invres_stats_1dim
|
|
232
|
-
case _:
|
|
233
|
-
raise ValueError(
|
|
234
|
-
'Statistics formatted, "{_stats_group}" not available here.'
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
_invres_stats_cnts = invres_stats_cnts_by_group(
|
|
238
|
-
_data_array_dict,
|
|
239
|
-
_data_period,
|
|
240
|
-
_table_ind_group,
|
|
241
|
-
_table_evid_cond,
|
|
242
|
-
_stats_group,
|
|
243
|
-
_invres_spec,
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
_invres_stats_hdr_list, _invres_stats_dat_list = _latex_tbl_invres_stats_func(
|
|
247
|
-
_invres_stats_cnts, None, return_type_sel=return_type_sel, sort_order=sort_order
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
if print_to_screen:
|
|
251
|
-
print(
|
|
252
|
-
f"{_invres_spec.capitalize()} stats ({return_type_sel})",
|
|
253
|
-
f"for Period: {_data_period}",
|
|
254
|
-
"\u2014",
|
|
255
|
-
f"{_table_ind_group};",
|
|
256
|
-
_table_evid_cond,
|
|
257
|
-
)
|
|
258
|
-
stats_print_rows(_invres_stats_hdr_list, _invres_stats_dat_list)
|
|
259
|
-
|
|
260
|
-
return _invres_stats_hdr_list, _invres_stats_dat_list
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def invres_stats_cnts_by_group(
|
|
264
|
-
_invdata_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
|
|
265
|
-
_study_period: str,
|
|
266
|
-
_table_ind_grp: INDGRPConstants,
|
|
267
|
-
_table_evid_cond: EVIDENConstants,
|
|
268
|
-
_stats_group: StatsGrpSelector,
|
|
269
|
-
_invres_spec: INVResolution,
|
|
270
|
-
/,
|
|
271
|
-
) -> NDArray[np.int64]:
|
|
272
|
-
if _stats_group == StatsGrpSelector.HD:
|
|
273
|
-
raise ValueError(
|
|
274
|
-
f"Clearance/enforcement statistics, '{_stats_group}' not valied here."
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
match _stats_group:
|
|
278
|
-
case StatsGrpSelector.FC:
|
|
279
|
-
_cnts_func = invres_cnts_byfirmcount
|
|
280
|
-
_cnts_listing_func = invres_cnts_listing_byfirmcount
|
|
281
|
-
case StatsGrpSelector.DL:
|
|
282
|
-
_cnts_func = invres_cnts_bydelta
|
|
283
|
-
_cnts_listing_func = invres_cnts_listing_byhhianddelta
|
|
284
|
-
case StatsGrpSelector.ZN:
|
|
285
|
-
_cnts_func = invres_cnts_byconczone
|
|
286
|
-
_cnts_listing_func = invres_cnts_listing_byhhianddelta
|
|
287
|
-
|
|
288
|
-
return _cnts_func(
|
|
289
|
-
_cnts_listing_func(
|
|
290
|
-
_invdata_array_dict,
|
|
291
|
-
_study_period,
|
|
292
|
-
_table_ind_grp,
|
|
293
|
-
_table_evid_cond,
|
|
294
|
-
_invres_spec,
|
|
295
|
-
)
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def invres_cnts_listing_byfirmcount(
|
|
300
|
-
_data_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
|
|
301
|
-
_data_period: str = "1996-2003",
|
|
302
|
-
_table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
|
|
303
|
-
_table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
|
|
304
|
-
_invres_spec: INVResolution = INVResolution.CLRN,
|
|
305
|
-
/,
|
|
306
|
-
) -> NDArray[np.int64]:
|
|
307
|
-
if _data_period not in _data_array_dict:
|
|
308
|
-
raise ValueError(
|
|
309
|
-
f"Invalid value of data period, {f'"{_data_period}"'}."
|
|
310
|
-
f"Must be one of, {tuple(_data_array_dict.keys())!r}."
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
_data_array_dict_sub = _data_array_dict[_data_period][fid.TABLE_TYPES[1]]
|
|
314
|
-
|
|
315
|
-
_table_no = table_no_lku(_data_array_dict_sub, _table_ind_group, _table_evid_cond)
|
|
316
|
-
|
|
317
|
-
_cnts_array = _data_array_dict_sub[_table_no].data_array
|
|
318
|
-
|
|
319
|
-
_ndim_in = 1
|
|
320
|
-
_stats_kept_indxs = []
|
|
321
|
-
match _invres_spec:
|
|
322
|
-
case INVResolution.CLRN:
|
|
323
|
-
_stats_kept_indxs = [-1, -2]
|
|
324
|
-
case INVResolution.ENFT:
|
|
325
|
-
_stats_kept_indxs = [-1, -3]
|
|
326
|
-
case INVResolution.BOTH:
|
|
327
|
-
_stats_kept_indxs = [-1, -3, -2]
|
|
328
|
-
|
|
329
|
-
return np.column_stack([
|
|
330
|
-
_cnts_array[:, :_ndim_in],
|
|
331
|
-
_cnts_array[:, _stats_kept_indxs],
|
|
332
|
-
])
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
def invres_cnts_listing_byhhianddelta(
|
|
336
|
-
_data_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
|
|
337
|
-
_data_period: str = "1996-2003",
|
|
338
|
-
_table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
|
|
339
|
-
_table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
|
|
340
|
-
_invres_spec: INVResolution = INVResolution.CLRN,
|
|
341
|
-
/,
|
|
342
|
-
) -> NDArray[np.int64]:
|
|
343
|
-
if _data_period not in _data_array_dict:
|
|
344
|
-
raise ValueError(
|
|
345
|
-
f"Invalid value of data period, {f'"{_data_period}"'}."
|
|
346
|
-
f"Must be one of, {tuple(_data_array_dict.keys())!r}."
|
|
347
|
-
)
|
|
348
|
-
|
|
349
|
-
_data_array_dict_sub = _data_array_dict[_data_period][fid.TABLE_TYPES[0]]
|
|
350
|
-
|
|
351
|
-
_table_no = table_no_lku(_data_array_dict_sub, _table_ind_group, _table_evid_cond)
|
|
352
|
-
|
|
353
|
-
_cnts_array = _data_array_dict_sub[_table_no].data_array
|
|
354
|
-
|
|
355
|
-
_ndim_in = 2
|
|
356
|
-
_stats_kept_indxs = []
|
|
357
|
-
match _invres_spec:
|
|
358
|
-
case INVResolution.CLRN:
|
|
359
|
-
_stats_kept_indxs = [-1, -2]
|
|
360
|
-
case INVResolution.ENFT:
|
|
361
|
-
_stats_kept_indxs = [-1, -3]
|
|
362
|
-
case INVResolution.BOTH:
|
|
363
|
-
_stats_kept_indxs = [-1, -3, -2]
|
|
364
|
-
|
|
365
|
-
return np.column_stack([
|
|
366
|
-
_cnts_array[:, :_ndim_in],
|
|
367
|
-
_cnts_array[:, _stats_kept_indxs],
|
|
368
|
-
])
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
def table_no_lku(
|
|
372
|
-
_data_array_dict_sub: Mapping[str, fid.INVTableData],
|
|
373
|
-
_table_ind_group: INDGRPConstants = INDGRPConstants.ALL,
|
|
374
|
-
_table_evid_cond: EVIDENConstants = EVIDENConstants.UR,
|
|
375
|
-
/,
|
|
376
|
-
) -> str:
|
|
377
|
-
if _table_ind_group not in (
|
|
378
|
-
_igl := [_data_array_dict_sub[_v].ind_grp for _v in _data_array_dict_sub]
|
|
379
|
-
):
|
|
380
|
-
raise ValueError(
|
|
381
|
-
f"Invalid value for industry group, {f'"{_table_ind_group}"'}."
|
|
382
|
-
f"Must be one of {_igl!r}"
|
|
383
|
-
)
|
|
384
|
-
|
|
385
|
-
_tno = next(
|
|
386
|
-
_t
|
|
387
|
-
for _t in _data_array_dict_sub
|
|
388
|
-
if all((
|
|
389
|
-
_data_array_dict_sub[_t].ind_grp == _table_ind_group,
|
|
390
|
-
_data_array_dict_sub[_t].evid_cond == _table_evid_cond,
|
|
391
|
-
))
|
|
392
|
-
)
|
|
393
|
-
|
|
394
|
-
return _tno
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
def invres_cnts_byfirmcount(
|
|
398
|
-
_cnts_array: NDArray[np.integer[TI]], /
|
|
399
|
-
) -> NDArray[np.int64]:
|
|
400
|
-
_ndim_in = 1
|
|
401
|
-
return np.row_stack([
|
|
402
|
-
np.concatenate([
|
|
403
|
-
(f,),
|
|
404
|
-
np.einsum("ij->j", _cnts_array[_cnts_array[:, 0] == f][:, _ndim_in:]),
|
|
405
|
-
])
|
|
406
|
-
for f in np.unique(_cnts_array[:, 0])
|
|
407
|
-
])
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
def invres_cnts_bydelta(_cnts_array: NDArray[np.integer[TI]], /) -> NDArray[np.int64]:
|
|
411
|
-
_ndim_in = 2
|
|
412
|
-
return np.row_stack([
|
|
413
|
-
np.concatenate([
|
|
414
|
-
(f,),
|
|
415
|
-
np.einsum("ij->j", _cnts_array[_cnts_array[:, 1] == f][:, _ndim_in:]),
|
|
416
|
-
])
|
|
417
|
-
for f in HHI_DELTA_KNOTS[:-1]
|
|
418
|
-
])
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
def invres_cnts_byconczone(
|
|
422
|
-
_cnts_array: NDArray[np.integer[TI]], /
|
|
423
|
-
) -> NDArray[np.int64]:
|
|
424
|
-
# Prepare to tag clearance stats by presumption zone
|
|
425
|
-
_hhi_zone_post_ranged = hhi_zone_post_ranger(_cnts_array[:, 0] / 1e4)
|
|
426
|
-
_hhi_delta_ranged = hhi_delta_ranger(_cnts_array[:, 1] / 1e4)
|
|
427
|
-
|
|
428
|
-
# Step 1: Tag and agg. from HHI-post and Delta to zone triple
|
|
429
|
-
# NOTE: Although you could just map and not (partially) aggregate in this step,
|
|
430
|
-
# the mapped array is a copy, and is larger without partial aggregation, so
|
|
431
|
-
# aggregation reduces the footprint of this step in memory. Although this point
|
|
432
|
-
# is more relevant for generated than observed data, using the same coding pattern
|
|
433
|
-
# in both cases does make life easier
|
|
434
|
-
_ndim_in = 2
|
|
435
|
-
_nkeys = 3
|
|
436
|
-
_cnts_byhhipostanddelta = -1 * np.ones(
|
|
437
|
-
_nkeys + _cnts_array.shape[1] - _ndim_in, dtype=np.int64
|
|
438
|
-
)
|
|
439
|
-
_cnts_byconczone = -1 * np.ones_like(_cnts_byhhipostanddelta)
|
|
440
|
-
for _hhi_zone_post_lim in HHI_POST_ZONE_KNOTS[:-1]:
|
|
441
|
-
_level_test = _hhi_zone_post_ranged == _hhi_zone_post_lim
|
|
442
|
-
|
|
443
|
-
for _hhi_zone_delta_lim in HHI_DELTA_KNOTS[:3]:
|
|
444
|
-
_delta_test = (
|
|
445
|
-
(_hhi_delta_ranged > HHI_DELTA_KNOTS[1])
|
|
446
|
-
if _hhi_zone_delta_lim == HHI_DELTA_KNOTS[2]
|
|
447
|
-
else (_hhi_delta_ranged == _hhi_zone_delta_lim)
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
_zone_val = HMG_PRESUMPTION_ZONE_DICT[_hhi_zone_post_lim][
|
|
451
|
-
_hhi_zone_delta_lim
|
|
452
|
-
]
|
|
453
|
-
|
|
454
|
-
_conc_test = _level_test & _delta_test
|
|
455
|
-
|
|
456
|
-
_cnts_byhhipostanddelta = np.row_stack((
|
|
457
|
-
_cnts_byhhipostanddelta,
|
|
458
|
-
np.array(
|
|
459
|
-
(
|
|
460
|
-
*_zone_val,
|
|
461
|
-
*np.einsum("ij->j", _cnts_array[:, _ndim_in:][_conc_test]),
|
|
462
|
-
),
|
|
463
|
-
dtype=np.int64,
|
|
464
|
-
),
|
|
465
|
-
))
|
|
466
|
-
_cnts_byhhipostanddelta = _cnts_byhhipostanddelta[1:]
|
|
467
|
-
|
|
468
|
-
for _zone_val in ZONE_VALS:
|
|
469
|
-
# Logical-and of multiple vectors:
|
|
470
|
-
_hhi_zone_test = (
|
|
471
|
-
1
|
|
472
|
-
* np.column_stack([
|
|
473
|
-
_cnts_byhhipostanddelta[:, _idx] == _val
|
|
474
|
-
for _idx, _val in enumerate(_zone_val)
|
|
475
|
-
])
|
|
476
|
-
).prod(axis=1) == 1
|
|
477
|
-
|
|
478
|
-
_cnts_byconczone = np.row_stack((
|
|
479
|
-
_cnts_byconczone,
|
|
480
|
-
np.concatenate(
|
|
481
|
-
(
|
|
482
|
-
_zone_val,
|
|
483
|
-
np.einsum(
|
|
484
|
-
"ij->j", _cnts_byhhipostanddelta[_hhi_zone_test][:, _nkeys:]
|
|
485
|
-
),
|
|
486
|
-
),
|
|
487
|
-
dtype=np.int64,
|
|
488
|
-
),
|
|
489
|
-
))
|
|
490
|
-
|
|
491
|
-
return _cnts_byconczone[1:]
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
def latex_tbl_invres_stats_1dim(
|
|
495
|
-
_inparr: NDArray[np.floating[TF] | np.integer[TI]],
|
|
496
|
-
_totals_row: int | None = None,
|
|
497
|
-
/,
|
|
498
|
-
*,
|
|
499
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
500
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
501
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
502
|
-
_ndim_in: int = 1
|
|
503
|
-
_dim_hdr_dict = {
|
|
504
|
-
_v: (_k if _k == "TOTAL" else f"{{{_k}}}")
|
|
505
|
-
for _k, _v in fid.CNT_FCOUNT_DICT.items()
|
|
506
|
-
} | {
|
|
507
|
-
_v: (
|
|
508
|
-
"{[2500, 5000]}"
|
|
509
|
-
if _k == "2,500 +"
|
|
510
|
-
else f"{{[{_k.replace(",", "").replace(" - ", ", ")})}}"
|
|
511
|
-
)
|
|
512
|
-
for _k, _v in fid.CONC_DELTA_DICT.items()
|
|
513
|
-
if _k != "TOTAL"
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
if _totals_row:
|
|
517
|
-
_in_totals_row = _inparr[_totals_row, :]
|
|
518
|
-
_inparr_mask = np.ones(len(_inparr), dtype=bool)
|
|
519
|
-
_inparr_mask[_in_totals_row] = False
|
|
520
|
-
_inparr = _inparr[_inparr_mask]
|
|
521
|
-
else:
|
|
522
|
-
_in_totals_row = np.concatenate((
|
|
523
|
-
[fid.TTL_KEY],
|
|
524
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
525
|
-
))
|
|
526
|
-
|
|
527
|
-
if sort_order == SortSelector.REV:
|
|
528
|
-
_inparr = _inparr[::-1]
|
|
529
|
-
|
|
530
|
-
_inparr = np.row_stack((_inparr, _in_totals_row))
|
|
531
|
-
|
|
532
|
-
_stats_hdr_list, _stats_dat_list = [], []
|
|
533
|
-
for _stats_row in _inparr:
|
|
534
|
-
_stats_hdr_list += [_dim_hdr_dict[_stats_row[0]]]
|
|
535
|
-
|
|
536
|
-
_stats_cnt = _stats_row[_ndim_in:]
|
|
537
|
-
_stats_tot = np.concatenate((
|
|
538
|
-
[_inparr[-1][_ndim_in]],
|
|
539
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
540
|
-
))
|
|
541
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
542
|
-
|
|
543
|
-
return _stats_hdr_list, _stats_dat_list
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
def latex_tbl_invres_stats_byzone(
|
|
547
|
-
_inparr: NDArray[np.floating[TF] | np.integer[TI]],
|
|
548
|
-
_totals_row: int | None = None,
|
|
549
|
-
/,
|
|
550
|
-
*,
|
|
551
|
-
return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
|
|
552
|
-
sort_order: SortSelector = SortSelector.UCH,
|
|
553
|
-
) -> tuple[list[str], list[list[str]]]:
|
|
554
|
-
_ndim_in: int = ZONE_VALS.shape[1]
|
|
555
|
-
|
|
556
|
-
_zone_str_keys = list(ZONE_STRINGS)
|
|
557
|
-
if sort_order == SortSelector.REV:
|
|
558
|
-
_inparr = _inparr[::-1]
|
|
559
|
-
_zone_str_keys = _zone_str_keys[:-1][::-1] + [_zone_str_keys[-1]]
|
|
560
|
-
|
|
561
|
-
if _totals_row is None:
|
|
562
|
-
_inparr = np.row_stack((
|
|
563
|
-
_inparr,
|
|
564
|
-
np.concatenate((
|
|
565
|
-
[fid.TTL_KEY, -1, -1],
|
|
566
|
-
np.einsum("ij->j", _inparr[:, _ndim_in:]),
|
|
567
|
-
)),
|
|
568
|
-
))
|
|
569
|
-
|
|
570
|
-
_stats_hdr_list, _stats_dat_list = ([], [])
|
|
571
|
-
for _conc_zone in _zone_str_keys:
|
|
572
|
-
_stats_byzone_it = _inparr[_inparr[:, 0] == _conc_zone]
|
|
573
|
-
_stats_hdr_list += [ZONE_STRINGS[_conc_zone]]
|
|
574
|
-
|
|
575
|
-
_stats_cnt = np.einsum("ij->j", _stats_byzone_it[:, _ndim_in:])
|
|
576
|
-
_stats_tot = np.concatenate((
|
|
577
|
-
[_inparr[-1][_ndim_in]],
|
|
578
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
579
|
-
))
|
|
580
|
-
_stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
|
|
581
|
-
|
|
582
|
-
if _conc_zone in (2, fid.TTL_KEY):
|
|
583
|
-
continue
|
|
584
|
-
|
|
585
|
-
for _stats_byzone_detail in _stats_byzone_it:
|
|
586
|
-
# Only two sets of subtotals detail, so
|
|
587
|
-
# a conditional expression will do here
|
|
588
|
-
_stats_text_color = "HiCoYellow" if _conc_zone == 1 else "BrightGreen"
|
|
589
|
-
_stats_hdr_list += [
|
|
590
|
-
R"{} {{ \({}{}\) }};".format(
|
|
591
|
-
rf"\node[text = {_stats_text_color}, fill = white, align = right]",
|
|
592
|
-
ZONE_DETAIL_STRINGS_HHI[_stats_byzone_detail[1]],
|
|
593
|
-
(
|
|
594
|
-
""
|
|
595
|
-
if _stats_byzone_detail[2] == 0
|
|
596
|
-
else Rf"{ZONE_DETAIL_STRINGS_DELTA[_stats_byzone_detail[2]]}"
|
|
597
|
-
),
|
|
598
|
-
)
|
|
599
|
-
]
|
|
600
|
-
|
|
601
|
-
_stats_cnt = _stats_byzone_detail[_ndim_in:]
|
|
602
|
-
_stats_tot = np.concatenate((
|
|
603
|
-
[_inparr[-1][_ndim_in]],
|
|
604
|
-
_stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
|
|
605
|
-
))
|
|
606
|
-
_stats_dat_list += _stats_formatted_row(
|
|
607
|
-
_stats_cnt, _stats_tot, return_type_sel
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
return _stats_hdr_list, _stats_dat_list
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
def _stats_formatted_row(
|
|
614
|
-
_stats_row_cnt: NDArray[np.integer[TI]],
|
|
615
|
-
_stats_row_tot: NDArray[np.integer[TI]],
|
|
616
|
-
_return_type_sel: StatsReturnSelector,
|
|
617
|
-
/,
|
|
618
|
-
) -> list[list[str]]:
|
|
619
|
-
_stats_row_pct = _stats_row_cnt / _stats_row_tot
|
|
620
|
-
|
|
621
|
-
match _return_type_sel:
|
|
622
|
-
case StatsReturnSelector.RIN:
|
|
623
|
-
_stats_row_ci = np.array([
|
|
624
|
-
propn_ci(*g, method="Wilson")
|
|
625
|
-
for g in zip(_stats_row_cnt[1:], _stats_row_tot[1:], strict=True)
|
|
626
|
-
])
|
|
627
|
-
return [
|
|
628
|
-
[
|
|
629
|
-
pct_format_str.format(100 * _stats_row_pct[0]),
|
|
630
|
-
*[
|
|
631
|
-
ci_format_str.format(*100 * np.array(f)).replace(
|
|
632
|
-
R" nan [ nan, nan] \%", "---"
|
|
633
|
-
)
|
|
634
|
-
for f in _stats_row_ci
|
|
635
|
-
],
|
|
636
|
-
]
|
|
637
|
-
]
|
|
638
|
-
case StatsReturnSelector.RPT:
|
|
639
|
-
return [
|
|
640
|
-
[
|
|
641
|
-
pct_format_str.format(f).replace(R"nan\%", "---")
|
|
642
|
-
for f in 100 * _stats_row_pct
|
|
643
|
-
]
|
|
644
|
-
]
|
|
645
|
-
case _:
|
|
646
|
-
return [
|
|
647
|
-
[
|
|
648
|
-
cnt_format_str.format(f).replace(R"nan", "---")
|
|
649
|
-
for f in _stats_row_cnt
|
|
650
|
-
]
|
|
651
|
-
]
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
def stats_print_rows(
|
|
655
|
-
_invres_stats_hdr_list: list[str], _invres_stats_dat_list: list[list[str]]
|
|
656
|
-
) -> None:
|
|
657
|
-
for _idx, _hdr in enumerate(_invres_stats_hdr_list):
|
|
658
|
-
# _hv = (
|
|
659
|
-
# re.match(r"^\\node.*?(\{.*\});?", _hdr)[1]
|
|
660
|
-
# if _hdr.startswith(R"\node")
|
|
661
|
-
# else _hdr
|
|
662
|
-
# )
|
|
663
|
-
_hdr_str = (
|
|
664
|
-
_hdr if _hdr == "TOTAL" else re.fullmatch(r".*?\{(.*)\};?", _hdr)[1].strip()
|
|
665
|
-
)
|
|
666
|
-
print(
|
|
667
|
-
_hdr_str,
|
|
668
|
-
"&",
|
|
669
|
-
" & ".join(_invres_stats_dat_list[_idx]),
|
|
670
|
-
LTX_ARRAY_LINEEND,
|
|
671
|
-
end="",
|
|
672
|
-
)
|
|
673
|
-
print()
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
def render_table_pdf(
|
|
677
|
-
_table_dottex_pathlist: Sequence[str], _table_coll_path: str, /
|
|
678
|
-
) -> None:
|
|
679
|
-
_table_collection_design = latex_jinja_env.get_template(
|
|
680
|
-
"mergeron_table_collection_template.tex.jinja2"
|
|
681
|
-
)
|
|
682
|
-
_table_collection_content = StatsContainer()
|
|
683
|
-
|
|
684
|
-
_table_collection_content.path_list = _table_dottex_pathlist
|
|
685
|
-
|
|
686
|
-
with Path(DATA_DIR / _table_coll_path).open(
|
|
687
|
-
"w", encoding="utf8"
|
|
688
|
-
) as _table_coll_file:
|
|
689
|
-
_table_coll_file.write(
|
|
690
|
-
_table_collection_design.render(tmpl_data=_table_collection_content)
|
|
691
|
-
)
|
|
692
|
-
print("\n", file=_table_coll_file)
|
|
693
|
-
|
|
694
|
-
_run_rc = subprocess.run(
|
|
695
|
-
f"latexmk -f -quiet -synctex=0 -interaction=nonstopmode -file-line-error -pdflua {_table_coll_path}".split(), # noqa: S603
|
|
696
|
-
check=True,
|
|
697
|
-
cwd=DATA_DIR,
|
|
698
|
-
)
|
|
699
|
-
if _run_rc:
|
|
700
|
-
subprocess.run(
|
|
701
|
-
"latexmk -quiet -c".split(), # noqa: S603
|
|
702
|
-
check=True,
|
|
703
|
-
cwd=DATA_DIR,
|
|
704
|
-
)
|
|
705
|
-
del _run_rc
|
|
706
|
-
|
|
707
|
-
print(
|
|
708
|
-
f"Tables rendered to path, {f"{Path(DATA_DIR / _table_coll_path).with_suffix(".pdf")}"}"
|
|
709
|
-
)
|