PyPI - mergeron - Versions diffs - 2024.739125.3__py3-none-any.whl → 2024.739127.1__py3-none-any.whl - Mend - Supply Chain Defender

mergeron 2024.739125.3py3-none-any.whl → 2024.739127.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mergeron might be problematic. Click here for more details.

Files changed (19) hide show

mergeron/gen/enforcement_stats.py CHANGED Viewed

@@ -4,28 +4,12 @@ Methods to format and print summary statistics on merger enforcement patterns.
 """
 import enum
-import shutil
-import subprocess
 from collections.abc import Mapping
-from importlib import resources
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Literal
 import numpy as np
-import re2 as re  # type: ignore
-from jinja2 import Environment, FileSystemLoader, Template, select_autoescape
 from scipy.interpolate import interp1d  # type: ignore
-from scipy.stats import beta, norm  # type: ignore
-from .. import (  # noqa: TID252
-    _PKG_NAME,
-    DATA_DIR,
-    VERSION,
-    ArrayBIGINT,
-    ArrayDouble,
-    ArrayINT,
-)
+from .. import VERSION, ArrayBIGINT  # noqa: TID252
 from ..core import ftc_merger_investigations_data as fid  # noqa: TID252
 from . import INVResolution
@@ -81,81 +65,6 @@ class SortSelector(enum.StrEnum):
     REV = "reversed"
-cnt_format_str = R"{: >5,.0f}"
-pct_format_str = R"{: >6.1f}\%"
-ci_format_str = R"{0: >5.1f} [{2: >4.1f},{3: >5.1f}] \%"
-moe_tmpl = Template(R"""
-    {% if (rv[2] - rv[0]) | abs == (rv[3] - rv[0]) | abs %}
-         {{- "[\pm {:.1f}]".format(rv[3] - rv[0]) -}}
-    {% else %}
-         {{- "[{:.1f}/+{:.1f}]".format(rv[2] - rv[0], rv[3] - rv[0]) -}}
-    {% endif %}
-    """)
-# Define the LaTeX jinja environment
-_template_resource = resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
-_template_folder = DATA_DIR.joinpath(_template_resource.name)
-with resources.as_file(
-    resources.files(f"{_PKG_NAME}.data.jinja2_LaTeX_templates")
-) as _tmpl_src:
-    if not _template_folder.is_dir():
-        shutil.copytree(_tmpl_src, _template_folder)
-# Place files related to rendering LaTeX in output data directory
-if not (_out_path := DATA_DIR.joinpath(f"{_PKG_NAME}.cls")).is_file():
-    shutil.move(_template_folder / _out_path.name, _out_path)
-# Write to LaTeX table settings file
-if not (_DOTTEX := DATA_DIR / Rf"{_PKG_NAME}_TikZTableSettings.sty").is_file():
-    shutil.move(_template_folder / "setup_tikz_tables.sty", _DOTTEX)
-def create_jinja_env(_tmpl_folder: Path = _template_folder, /) -> Environment:
-    """Create jinja2 environment
-    Loader is the FileSystemLoader initialized with the given path as
-    template folder
-    Parameters
-    ----------
-    _tmpl_folder : Path
-        Path to template folder
-    Returns
-    -------
-    Environment
-        jinja2 environment
-    """
-    return Environment(
-        block_start_string=R"((*",
-        block_end_string="*))",
-        variable_start_string=R"\JINVAR{",
-        variable_end_string="}",
-        comment_start_string=R"((#",  # r'#{',
-        comment_end_string=R"#))",  # '}',
-        line_statement_prefix="##",
-        line_comment_prefix="%#",
-        trim_blocks=True,
-        lstrip_blocks=True,
-        autoescape=select_autoescape(disabled_extensions=("tex.jinja2",)),
-        loader=FileSystemLoader(_tmpl_folder),
-    )
-LTX_ARRAY_LINEEND = "\\\\\n"
-class StatsContainer(SimpleNamespace):
-    """A container for passing content to jinja2 templates
-    Other attributes added later, to fully populate selected jinja2 templates
-    """
-    invdata_hdrstr: str
-    invdata_datstr: str
 # Parameters and functions to interpolate selected HHI and ΔHHI values
 #   recorded in fractions to ranges of values in points on the HHI scale
 HHI_DELTA_KNOTS = np.array(
@@ -204,7 +113,6 @@ ZONE_DETAIL_STRINGS_HHI = {
     1: R"HHI ∈ [{}, {}) pts. and ".format(*HHI_POST_ZONE_KNOTS[1:3]),
     2: Rf"HHI ⩾ {HHI_POST_ZONE_KNOTS[2]} pts. and ",
 }
 ZONE_DETAIL_STRINGS_DELTA = {
     0: "",
     1: Rf"ΔHHI < {HHI_DELTA_KNOTS[1]} pts.",
@@ -213,100 +121,6 @@ ZONE_DETAIL_STRINGS_DELTA = {
     4: Rf"ΔHHI ⩾ {HHI_DELTA_KNOTS[2]} pts.",
 }
-ZONE_STRINGS_LATEX_TIKZ = {
-    0: R"\node[align=left, fill=BrightGreen] {Green Zone (Safeharbor)};",
-    1: R"\node[align=left, fill=HiCoYellow] {Yellow Zone};",
-    2: R"\node[align=left, fill=VibrRed] {Red Zone (SLC Presumption)};",
-    fid.TTL_KEY: R"\node[align=left, fill=OBSHDRFill] {TOTAL};",
-}
-ZONE_STRINGS_LATEX_TBLR = {
-    0: R"\SetCell{l, bg=BrightGreen} {Green Zone (Safeharbor)}",
-    1: R"\SetCell{l, bg=HiCoYellow} {Yellow Zone}",
-    2: R"\SetCell{l, bg=VibrRed} {Red Zone (SLC Presumption)}",
-    fid.TTL_KEY: R"\SetCell{l, bg=OBSHDRFill} {TOTAL}",
-}
-ZONE_DETAIL_STRINGS_HHI_LATEX = {
-    0: Rf"HHI_{{post}} < \text{{{HHI_POST_ZONE_KNOTS[1]} pts.}}",
-    1: R"HHI_{{post}} \in \text{{[{}, {}) pts. and }} ".format(
-        *HHI_POST_ZONE_KNOTS[1:3]
-    ),
-    2: Rf"HHI_{{post}} \geqslant \text{{{HHI_POST_ZONE_KNOTS[2]} pts. and }} ",
-}
-ZONE_DETAIL_STRINGS_DELTA_LATEX = {
-    0: "",
-    1: Rf"\Delta HHI < \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
-    2: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[1]} pts.}}",
-    3: R"\Delta HHI \in \text{{[{}, {}) pts.}}".format(*HHI_DELTA_KNOTS[1:3]),
-    4: Rf"\Delta HHI \geqslant \text{{{HHI_DELTA_KNOTS[2]} pts.}}",
-}
-def enf_stats_obs(
-    _data_array_dict: fid.INVData,
-    _data_period: str = "1996-2003",
-    _table_ind_group: IndustryGroup = IndustryGroup.ALL,
-    _table_evid_cond: OtherEvidence = OtherEvidence.UR,
-    _stats_group: StatsGrpSelector = StatsGrpSelector.FC,
-    _enf_spec: INVResolution = INVResolution.CLRN,
-    /,
-    *,
-    return_type_sel: StatsReturnSelector = StatsReturnSelector.RPT,
-    sort_order: SortSelector = SortSelector.UCH,
-    print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
-    print_to_screen: bool = True,
-) -> tuple[list[str], list[list[str]]]:
-    if _data_period not in _data_array_dict:
-        raise ValueError(
-            f"Value of _data_period, {f'"{_data_period}"'} is invalid.",
-            f"Must be in, {list(_data_array_dict.keys())!r}",
-        )
-    match _stats_group:
-        case StatsGrpSelector.ZN:
-            _enf_stats_table_func = enf_stats_table_byzone
-        case StatsGrpSelector.FC:
-            _enf_stats_table_func = enf_stats_table_onedim
-        case StatsGrpSelector.DL:
-            _enf_stats_table_func = enf_stats_table_onedim
-        case _:
-            raise ValueError(
-                'Statistics formatted, "{_stats_group}" not available here.'
-            )
-    _enf_stats_cnts = enf_cnts_obs_by_group(
-        _data_array_dict,
-        _data_period,
-        _table_ind_group,
-        _table_evid_cond,
-        _stats_group,
-        _enf_spec,
-    )
-    _enf_stats_hdr_list, _enf_stats_dat_list = _enf_stats_table_func(
-        _enf_stats_cnts,
-        None,
-        return_type_sel=return_type_sel,
-        sort_order=sort_order,
-        print_format=print_format,
-    )
-    if print_to_screen:
-        print(
-            f"{_enf_spec.capitalize()} stats ({return_type_sel})",
-            f"for Period: {_data_period}",
-            "\u2014",
-            f"{_table_ind_group};",
-            _table_evid_cond,
-        )
-        stats_print_rows(
-            _enf_stats_hdr_list, _enf_stats_dat_list, print_format=print_format
-        )
-    return _enf_stats_hdr_list, _enf_stats_dat_list
 def enf_cnts_obs_by_group(
     _invdata_array_dict: Mapping[str, Mapping[str, Mapping[str, fid.INVTableData]]],
@@ -535,356 +349,7 @@ def enf_cnts_byconczone(_cnts_array: ArrayBIGINT, /) -> ArrayBIGINT:
     return _cnts_byconczone[1:]
-def enf_stats_table_onedim(
-    _inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
-    _totals_row: int | None = None,
-    /,
-    *,
-    return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
-    sort_order: SortSelector = SortSelector.UCH,
-    print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
-) -> tuple[list[str], list[list[str]]]:
-    _ndim_in: int = 1
-    _dim_hdr_dict = {_v: _k for _k, _v in fid.CNT_FCOUNT_DICT.items()} | {
-        _v: (
-            "[2500, 5000]"
-            if _k == "2,500 +"
-            else f"[{_k.replace(",", "").replace(" - ", ", ")})"
-        )
-        for _k, _v in fid.CONC_DELTA_DICT.items()
-        if _k != "TOTAL"
-    }
-    if _totals_row:
-        _in_totals_row = _inparr[_totals_row, :]
-        _inparr_mask = np.ones(len(_inparr), dtype=bool)
-        _inparr_mask[_in_totals_row] = False
-        _inparr = _inparr[_inparr_mask]
-    else:
-        _in_totals_row = np.concatenate((
-            [fid.TTL_KEY],
-            np.einsum("ij->j", _inparr[:, _ndim_in:]),
-        ))
-    if sort_order == SortSelector.REV:
-        _inparr = _inparr[::-1]
-    _inparr = np.vstack((_inparr, _in_totals_row))
-    _stats_hdr_list, _stats_dat_list = [], []
-    for _stats_row in _inparr:
-        _stats_hdr_str = _dim_hdr_dict[_stats_row[0]]
-        _stats_hdr_list += [
-            f"{{{_stats_hdr_str}}}"
-            if print_format in ("LaTeX", "LaTeX+TikZ")
-            else _stats_hdr_str
-        ]
-        _stats_cnt = _stats_row[_ndim_in:]
-        _stats_tot = np.concatenate((
-            [_inparr[-1][_ndim_in]],
-            _stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
-        ))
-        _stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
-    return _stats_hdr_list, _stats_dat_list
-def enf_stats_table_byzone(
-    _inparr: ArrayDouble | ArrayBIGINT | ArrayDouble | ArrayBIGINT,
-    _totals_row: int | None = None,
-    /,
-    *,
-    return_type_sel: StatsReturnSelector = StatsReturnSelector.CNT,
-    sort_order: SortSelector = SortSelector.UCH,
-    print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "LaTeX",
-) -> tuple[list[str], list[list[str]]]:
-    _ndim_in: int = ZONE_VALS.shape[1]
-    _zone_str_dict = (
-        ZONE_STRINGS_LATEX_TIKZ
-        if print_format == "LaTeX+TikZ"
-        else (ZONE_STRINGS_LATEX_TBLR if print_format == "LaTeX" else ZONE_STRINGS)
-    )
-    _zone_str_keys = list(_zone_str_dict)
-    if sort_order == SortSelector.REV:
-        _inparr = _inparr[::-1]
-        _zone_str_keys = _zone_str_keys[:-1][::-1] + [_zone_str_keys[-1]]
-    if _totals_row is None:
-        _inparr = np.vstack((
-            _inparr,
-            np.concatenate((
-                [fid.TTL_KEY, -1, -1],
-                np.einsum("ij->j", _inparr[:, _ndim_in:]),
-            )),
-        ))
-    _stats_hdr_list, _stats_dat_list = ([], [])
-    for _conc_zone in _zone_str_keys:
-        _stats_byzone_it = _inparr[_inparr[:, 0] == _conc_zone]
-        _stats_hdr_list += [_zone_str_dict[_conc_zone]]
-        _stats_cnt = np.einsum("ij->j", _stats_byzone_it[:, _ndim_in:])
-        _stats_tot = np.concatenate((
-            [_inparr[-1][_ndim_in]],
-            _stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
-        ))
-        _stats_dat_list += _stats_formatted_row(_stats_cnt, _stats_tot, return_type_sel)
-        if _conc_zone in (2, fid.TTL_KEY):
-            continue
-        for _stats_byzone_detail in _stats_byzone_it:
-            # Only two sets of subtotals detail, so
-            # a conditional expression will do here
-            if print_format in ("LaTeX", "LaTeX+TikZ"):
-                _stats_text_color = "HiCoYellow" if _conc_zone == 1 else "BrightGreen"
-                _stats_hdr_list += [
-                    R"{} {{\null\hfill \({}{}\) }}{}".format(
-                        rf"\node[text={_stats_text_color}, fill=white, align=right]"
-                        if print_format == "LaTeX+TikZ"
-                        else rf"\SetCell{{r, fg={_stats_text_color}, bg=white}}",
-                        ZONE_DETAIL_STRINGS_HHI_LATEX[_stats_byzone_detail[1]],
-                        (
-                            ""
-                            if _stats_byzone_detail[2] == 0
-                            else Rf"{ZONE_DETAIL_STRINGS_DELTA_LATEX[_stats_byzone_detail[2]]}"
-                        ),
-                        ";" if print_format == "LaTeX+TikZ" else "",
-                    )
-                ]
-            else:
-                _stats_hdr_list += [
-                    R"{}{};".format(
-                        ZONE_DETAIL_STRINGS_HHI[_stats_byzone_detail[1]],
-                        (
-                            ""
-                            if _stats_byzone_detail[2] == 0
-                            else Rf"{ZONE_DETAIL_STRINGS_DELTA[_stats_byzone_detail[2]]}"
-                        ),
-                    )
-                ]
-            _stats_cnt = _stats_byzone_detail[_ndim_in:]
-            _stats_tot = np.concatenate((
-                [_inparr[-1][_ndim_in]],
-                _stats_cnt[0] * np.ones_like(_stats_cnt[1:]),
-            ))
-            _stats_dat_list += _stats_formatted_row(
-                _stats_cnt, _stats_tot, return_type_sel
-            )
-    return _stats_hdr_list, _stats_dat_list
-def _stats_formatted_row(
-    _stats_row_cnt: ArrayBIGINT,
-    _stats_row_tot: ArrayBIGINT,
-    _return_type_sel: StatsReturnSelector,
-    /,
-) -> list[list[str]]:
-    _stats_row_pct = _stats_row_cnt / _stats_row_tot
-    match _return_type_sel:
-        case StatsReturnSelector.RIN:
-            _stats_row_ci = np.array([
-                propn_ci(*g, method="Wilson")
-                for g in zip(_stats_row_cnt[1:], _stats_row_tot[1:], strict=True)
-            ])
-            return [
-                [
-                    pct_format_str.format(100 * _stats_row_pct[0]),
-                    *[
-                        ci_format_str.format(*100 * np.array(f)).replace(
-                            R"  nan [ nan,  nan] \%", "---"
-                        )
-                        for f in _stats_row_ci
-                    ],
-                ]
-            ]
-        case StatsReturnSelector.RPT:
-            return [
-                [
-                    pct_format_str.format(f).replace(R"nan\%", "---")
-                    for f in 100 * _stats_row_pct
-                ]
-            ]
-        case _:
-            return [
-                [
-                    cnt_format_str.format(f).replace(R"nan", "---")
-                    for f in _stats_row_cnt
-                ]
-            ]
-def stats_print_rows(
-    _enf_stats_hdr_list: list[str],
-    _enf_stats_dat_list: list[list[str]],
-    /,
-    *,
-    print_format: Literal["text", "LaTeX", "LaTeX+TikZ"] = "text",
-) -> None:
-    for _idx, _hdr in enumerate(_enf_stats_hdr_list):
-        if print_format in ("LaTeX", "LaTeX+TikZ"):
-            _hdr_str = re.search(r" \{(.*?)\};?\z", _hdr)[-1].strip()
-            print(
-                _hdr_str,
-                " & ",
-                " & ".join(_enf_stats_dat_list[_idx]),
-                LTX_ARRAY_LINEEND,
-                end="",
-            )
-        else:
-            print(_hdr, " | ", " | ".join(_enf_stats_dat_list[_idx]))
-    print()
-def propn_ci(
-    _npos: ArrayINT | int = 4,
-    _nobs: ArrayINT | int = 10,
-    /,
-    *,
-    alpha: float = 0.05,
-    method: Literal[
-        "Agresti-Coull", "Clopper-Pearson", "Exact", "Wilson", "Score"
-    ] = "Wilson",
-) -> tuple[
-    ArrayDouble | float, ArrayDouble | float, ArrayDouble | float, ArrayDouble | float
-]:
-    """Returns point estimates and confidence interval for a proportion
-    Methods "Clopper-Pearson" and "Exact" are synoymous [3]_.  Similarly,
-    "Wilson" and "Score" are synonyms here.
-    Parameters
-    ----------
-    _npos
-        Number of positives
-    _nobs
-        Number of observed values
-    alpha
-        Significance level
-    method
-        Method to use for estimating confidence interval
-    Returns
-    -------
-        Raw and estimated proportions, and bounds of the confidence interval
-    References
-    ----------
-    .. [3] Alan Agresti & Brent A. Coull (1998) Approximate is Better
-       than “Exact” for Interval Estimation of Binomial Proportions,
-       The American Statistician, 52:2, 119-126,
-       https://doi.org/10.1080/00031305.1998.10480550
-    """
-    for _f in _npos, _nobs:
-        if not isinstance(_f, int | np.integer):
-            raise ValueError(
-                f"Count, {_f!r} must have type that is a subtype of np.integer."
-            )
-    if not _nobs:
-        return (np.nan, np.nan, np.nan, np.nan)
-    _raw_phat: ArrayDouble | float = _npos / _nobs
-    _est_phat: ArrayDouble | float
-    _est_ci_l: ArrayDouble | float
-    _est_ci_u: ArrayDouble | float
-    match method:
-        case "Clopper-Pearson" | "Exact":
-            _est_ci_l, _est_ci_u = (
-                beta.ppf(*_f)
-                for _f in (
-                    (alpha / 2, _npos, _nobs - _npos + 1),
-                    (1 - alpha / 2, _npos + 1, _nobs - _npos),
-                )
-            )
-            _est_phat = 1 / 2 * (_est_ci_l + _est_ci_u)
-        case "Agresti-Coull":
-            _zsc = norm.ppf(1 - alpha / 2)
-            _zscsq = _zsc * _zsc
-            _adjmt = 4 if alpha == 0.05 else _zscsq
-            _est_phat = (_npos + _adjmt / 2) / (_nobs + _adjmt)
-            _est_ci_l, _est_ci_u = (
-                _est_phat + _g
-                for _g in [
-                    _f * _zsc * np.sqrt(_est_phat * (1 - _est_phat) / (_nobs + _adjmt))
-                    for _f in (-1, 1)
-                ]
-            )
-        case "Wilson" | "Score":
-            _zsc = norm.ppf(1 - alpha / 2)
-            _zscsq = _zsc * _zsc
-            _est_phat = (_npos + _zscsq / 2) / (_nobs + _zscsq)
-            _est_ci_l, _est_ci_u = (
-                _est_phat
-                + _f
-                * _zsc
-                * np.sqrt(_nobs * _raw_phat * (1 - _raw_phat) + _zscsq / 4)
-                / (_nobs + _zscsq)
-                for _f in (-1, 1)
-            )
-        case _:
-            raise ValueError(f"Method, {f'"{method}"'} not yet implemented.")
-    return _raw_phat, _est_phat, _est_ci_l, _est_ci_u
-def render_table_pdf(
-    _table_collection_design: Template,
-    _table_collection_content: StatsContainer,
-    _output_tex_path: Path,
-    /,
-) -> None:
-    """Render table collection to PDF
-    Parameters
-    ----------
-    _table_collection_design
-        A jinja2 template for generating the LaTeX file to render
-    _table_collection_content
-        Content for jinja2 template
-    _output_tex_path
-        Path to LaTeX output file to render to PDF
-    """
-    with _output_tex_path.open("w", encoding="utf8") as _output_tex_file:
-        _output_tex_file.write(
-            _table_collection_design.render(tmpl_data=_table_collection_content)
-        )
-        print("\n", file=_output_tex_file)
-    _run_rc = subprocess.run(  # noqa: S603
-        f"latexmk -f -quiet -synctex=0 -interaction=nonstopmode -file-line-error -pdflua {_output_tex_path}".split(),
-        check=True,
-        cwd=DATA_DIR,
-    )
-    if _run_rc:
-        subprocess.run("latexmk -quiet -c".split(), check=True, cwd=DATA_DIR)  # noqa: S603
-    del _run_rc
-    print(f"Tables rendered to path, {_output_tex_path.with_suffix(".pdf")}")
 if __name__ == "__main__":
     print(
-        "This module provides methods to format and print summary statistics on merger enforcement patterns.."
+        "This module provides methods to aggregate statistics on merger enforcement patterns for reporting."
     )