PyPI - pySEQTarget - Versions diffs - 0.10.0__py3-none-any.whl - Mend

pySEQTarget 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

pySEQTarget/SEQopts.py +197 -0
pySEQTarget/SEQoutput.py +163 -0
pySEQTarget/SEQuential.py +375 -0
pySEQTarget/__init__.py +5 -0
pySEQTarget/analysis/__init__.py +8 -0
pySEQTarget/analysis/_hazard.py +211 -0
pySEQTarget/analysis/_outcome_fit.py +75 -0
pySEQTarget/analysis/_risk_estimates.py +136 -0
pySEQTarget/analysis/_subgroup_fit.py +30 -0
pySEQTarget/analysis/_survival_pred.py +372 -0
pySEQTarget/data/__init__.py +19 -0
pySEQTarget/error/__init__.py +2 -0
pySEQTarget/error/_datachecker.py +38 -0
pySEQTarget/error/_param_checker.py +50 -0
pySEQTarget/expansion/__init__.py +5 -0
pySEQTarget/expansion/_binder.py +98 -0
pySEQTarget/expansion/_diagnostics.py +53 -0
pySEQTarget/expansion/_dynamic.py +73 -0
pySEQTarget/expansion/_mapper.py +44 -0
pySEQTarget/expansion/_selection.py +31 -0
pySEQTarget/helpers/__init__.py +8 -0
pySEQTarget/helpers/_bootstrap.py +111 -0
pySEQTarget/helpers/_col_string.py +6 -0
pySEQTarget/helpers/_format_time.py +6 -0
pySEQTarget/helpers/_output_files.py +167 -0
pySEQTarget/helpers/_pad.py +7 -0
pySEQTarget/helpers/_predict_model.py +9 -0
pySEQTarget/helpers/_prepare_data.py +19 -0
pySEQTarget/initialization/__init__.py +5 -0
pySEQTarget/initialization/_censoring.py +53 -0
pySEQTarget/initialization/_denominator.py +39 -0
pySEQTarget/initialization/_numerator.py +37 -0
pySEQTarget/initialization/_outcome.py +56 -0
pySEQTarget/plot/__init__.py +1 -0
pySEQTarget/plot/_survival_plot.py +104 -0
pySEQTarget/weighting/__init__.py +8 -0
pySEQTarget/weighting/_weight_bind.py +86 -0
pySEQTarget/weighting/_weight_data.py +47 -0
pySEQTarget/weighting/_weight_fit.py +99 -0
pySEQTarget/weighting/_weight_pred.py +192 -0
pySEQTarget/weighting/_weight_stats.py +23 -0
pyseqtarget-0.10.0.dist-info/METADATA +98 -0
pyseqtarget-0.10.0.dist-info/RECORD +46 -0
pyseqtarget-0.10.0.dist-info/WHEEL +5 -0
pyseqtarget-0.10.0.dist-info/licenses/LICENSE +21 -0
pyseqtarget-0.10.0.dist-info/top_level.txt +1 -0

pySEQTarget/expansion/_binder.py ADDED Viewed

@@ -0,0 +1,98 @@
+import polars as pl
+from ._mapper import _mapper
+def _binder(self, kept_cols):
+    """
+    Internal function to bind data to the map created by __mapper
+    """
+    excluded = {
+        "dose",
+        f"dose{self.indicator_squared}",
+        "followup",
+        f"followup{self.indicator_squared}",
+        "tx_lag",
+        "trial",
+        f"trial{self.indicator_squared}",
+        self.time_col,
+        f"{self.time_col}{self.indicator_squared}",
+    }
+    cols = kept_cols.union({self.eligible_col, self.outcome_col, self.treatment_col})
+    cols = {col for col in cols if col is not None}
+    regular = {
+        col
+        for col in cols
+        if not (self.indicator_baseline in col or self.indicator_squared in col)
+        and col not in excluded
+    }
+    baseline = {
+        col for col in cols if self.indicator_baseline in col and col not in excluded
+    }
+    bas_kept = {col.replace(self.indicator_baseline, "") for col in baseline}
+    squared = {
+        col for col in cols if self.indicator_squared in col and col not in excluded
+    }
+    sq_kept = {col.replace(self.indicator_squared, "") for col in squared}
+    kept = list(regular.union(bas_kept).union(sq_kept))
+    if self.selection_first_trial:
+        DT = (
+            self.data.sort([self.id_col, self.time_col])
+            .with_columns(
+                [
+                    pl.col(self.time_col).alias("period"),
+                    pl.col(self.time_col).alias("followup"),
+                    pl.lit(0).alias("trial"),
+                ]
+            )
+            .drop(self.time_col)
+        )
+    else:
+        DT = _mapper(
+            self.data, self.id_col, self.time_col, self.followup_min, self.followup_max
+        )
+        DT = DT.join(
+            self.data.select([self.id_col, self.time_col] + kept),
+            left_on=[self.id_col, "period"],
+            right_on=[self.id_col, self.time_col],
+            how="left",
+        )
+    DT = DT.sort([self.id_col, "trial", "followup"]).with_columns(
+        [
+            (pl.col("trial") ** 2).alias(f"trial{self.indicator_squared}"),
+            (pl.col("followup") ** 2).alias(f"followup{self.indicator_squared}"),
+        ]
+    )
+    if squared:
+        squares = []
+        for sq in squared:
+            col = sq.replace(self.indicator_squared, "")
+            squares.append((pl.col(col) ** 2).alias(f"{col}{self.indicator_squared}"))
+        DT = DT.with_columns(squares)
+    baseline_cols = {bas.replace(self.indicator_baseline, "") for bas in baseline}
+    needed = {self.eligible_col, self.treatment_col}
+    baseline_cols.update({c for c in needed})
+    bas = [
+        pl.col(c)
+        .first()
+        .over([self.id_col, "trial"])
+        .alias(f"{c}{self.indicator_baseline}")
+        for c in baseline_cols
+    ]
+    DT = (
+        DT.with_columns(bas)
+        .filter(pl.col(f"{self.eligible_col}{self.indicator_baseline}") == 1)
+        .drop([f"{self.eligible_col}{self.indicator_baseline}", self.eligible_col])
+    )
+    return DT

pySEQTarget/expansion/_diagnostics.py ADDED Viewed

@@ -0,0 +1,53 @@
+import polars as pl
+def _diagnostics(self):
+    unique_out = _outcome_diag(self, unique=True)
+    nonunique_out = _outcome_diag(self, unique=False)
+    out = {"unique_outcomes": unique_out, "nonunique_outcomes": nonunique_out}
+    if self.method == "censoring":
+        unique_switch = _switch_diag(self, unique=True)
+        nonunique_switch = _switch_diag(self, unique=False)
+        out.update(
+            {"unique_switches": unique_switch, "nonunique_switches": nonunique_switch}
+        )
+    self.diagnostics = out
+def _outcome_diag(self, unique):
+    if unique:
+        data = (
+            self.DT.select([self.id_col, self.treatment_col, self.outcome_col])
+            .group_by(self.id_col)
+            .last()
+        )
+    else:
+        data = self.DT
+    out = data.group_by([self.treatment_col, self.outcome_col]).len()
+    return out
+def _switch_diag(self, unique):
+    if not self.excused:
+        data = self.DT.with_columns(pl.lit(False).alias("isExcused"))
+    else:
+        data = self.DT
+    if unique:
+        data = (
+            data.select([self.id_col, self.treatment_col, "switch", "isExcused"])
+            .with_columns(
+                pl.when((pl.col("switch") == 0) & (pl.col("isExcused")))
+                .then(1)
+                .otherwise(pl.col("switch"))
+                .alias("switch")
+            )
+            .group_by(self.id_col)
+            .last()
+        )
+    out = data.group_by([self.treatment_col, "isExcused", "switch"]).len()
+    return out

pySEQTarget/expansion/_dynamic.py ADDED Viewed

@@ -0,0 +1,73 @@
+import polars as pl
+def _dynamic(self):
+    """
+    Handles special cases for the data from the __mapper -> __binder pipeline
+    """
+    if self.method == "dose-response":
+        DT = self.DT.with_columns(
+            pl.col(self.treatment_col)
+            .cum_sum()
+            .over([self.id_col, "trial"])
+            .alias("dose")
+        ).with_columns([(pl.col("dose") ** 2).alias(f"dose{self.indicator_squared}")])
+        self.DT = DT
+    elif self.method == "censoring":
+        DT = self.DT.sort([self.id_col, "trial", "followup"]).with_columns(
+            pl.col(self.treatment_col)
+            .shift(1)
+            .over([self.id_col, "trial"])
+            .alias("tx_lag")
+        )
+        switch = (
+            pl.when(pl.col("followup") == 0)
+            .then(pl.lit(False))
+            .otherwise(pl.col("tx_lag") != pl.col(self.treatment_col))
+        )
+        is_excused = pl.lit(False)
+        if self.excused:
+            conditions = []
+            for i, val in enumerate(self.treatment_level):
+                colname = self.excused_colnames[i]
+                if colname is not None:
+                    conditions.append(
+                        (pl.col(colname) == 1) & (pl.col(self.treatment_col) == val)
+                    )
+            if conditions:
+                excused = pl.any_horizontal(conditions)
+                is_excused = switch & excused
+        DT = DT.with_columns(
+            [switch.alias("switch"), is_excused.alias("isExcused")]
+        ).sort([self.id_col, "trial", "followup"])
+        if self.excused:
+            DT = (
+                DT.with_columns(
+                    pl.col("isExcused")
+                    .cast(pl.Int8)
+                    .cum_sum()
+                    .over([self.id_col, "trial"])
+                    .alias("_excused_tmp")
+                )
+                .with_columns(
+                    pl.when(pl.col("_excused_tmp") > 0)
+                    .then(pl.lit(False))
+                    .otherwise(pl.col("switch"))
+                    .alias("switch")
+                )
+                .drop("_excused_tmp")
+            )
+        DT = DT.filter(
+            (pl.col("switch").cum_max().shift(1, fill_value=False)).over(
+                [self.id_col, "trial"]
+            )
+            == 0
+        ).with_columns(pl.col("switch").cast(pl.Int8).alias("switch"))
+        self.DT = DT.drop(["tx_lag"])

pySEQTarget/expansion/_mapper.py ADDED Viewed

@@ -0,0 +1,44 @@
+import math
+import polars as pl
+def _mapper(data, id_col, time_col, min_followup=-math.inf, max_followup=math.inf):
+    """
+    Internal function to create the expanded map to bind data to.
+    """
+    DT = (
+        data.select([pl.col(id_col), pl.col(time_col)])
+        .with_columns([pl.col(id_col).cum_count().over(id_col).sub(1).alias("trial")])
+        .with_columns(
+            [
+                pl.struct(
+                    [
+                        pl.col(time_col),
+                        pl.col(time_col).max().over(id_col).alias("max_time"),
+                    ]
+                )
+                .map_elements(
+                    lambda x: list(range(x[time_col], x["max_time"] + 1)),
+                    return_dtype=pl.List(pl.Int64),
+                )
+                .alias("period")
+            ]
+        )
+        .explode("period")
+        .drop(pl.col(time_col))
+        .with_columns(
+            [
+                pl.col(id_col)
+                .cum_count()
+                .over([id_col, "trial"])
+                .sub(1)
+                .alias("followup")
+            ]
+        )
+        .filter(
+            (pl.col("followup") >= min_followup) & (pl.col("followup") <= max_followup)
+        )
+    )
+    return DT

pySEQTarget/expansion/_selection.py ADDED Viewed

@@ -0,0 +1,31 @@
+import polars as pl
+def _random_selection(self):
+    """
+    Handles the case where random selection is applied for data from
+    the __mapper -> __binder -> optionally __dynamic pipeline
+    """
+    UIDs = (
+        self.DT.select(
+            [self.id_col, "trial", f"{self.treatment_col}{self.indicator_baseline}"]
+        )
+        .with_columns((pl.col(self.id_col) + "_" + pl.col("trial")).alias("trialID"))
+        .filter(pl.col(f"{self.treatment_col}{self.indicator_baseline}") == 0)
+        .unique("trialID")
+        .to_series()
+        .to_list()
+    )
+    NIDs = len(UIDs)
+    sample = self._rng.choice(
+        UIDs, size=int(self.selection_sample * NIDs), replace=False
+    )
+    self.DT = (
+        self.DT.with_columns(
+            (pl.col(self.id_col) + "_" + pl.col("trial")).alias("trialID")
+        )
+        .filter(pl.col("trialID").is_in(sample))
+        .drop("trialID")
+    )

pySEQTarget/helpers/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from ._bootstrap import bootstrap_loop as bootstrap_loop
+from ._col_string import _col_string as _col_string
+from ._format_time import _format_time as _format_time
+from ._output_files import _build_md as _build_md
+from ._output_files import _build_pdf as _build_pdf
+from ._pad import _pad as _pad
+from ._predict_model import _predict_model as _predict_model
+from ._prepare_data import _prepare_data as _prepare_data

pySEQTarget/helpers/_bootstrap.py ADDED Viewed

@@ -0,0 +1,111 @@
+import copy
+import time
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from functools import wraps
+import numpy as np
+import polars as pl
+from tqdm import tqdm
+from ._format_time import _format_time
+def _prepare_boot_data(self, data, boot_id):
+    id_counts = self._boot_samples[boot_id]
+    counts = pl.DataFrame(
+        {self.id_col: list(id_counts.keys()), "count": list(id_counts.values())}
+    )
+    bootstrapped = data.join(counts, on=self.id_col, how="inner")
+    bootstrapped = (
+        bootstrapped.with_columns(pl.int_ranges(0, pl.col("count")).alias("replicate"))
+        .explode("replicate")
+        .with_columns(
+            (
+                pl.col(self.id_col).cast(pl.Utf8)
+                + "_"
+                + pl.col("replicate").cast(pl.Utf8)
+            ).alias(self.id_col)
+        )
+        .drop("count", "replicate")
+    )
+    return bootstrapped
+def _bootstrap_worker(obj, method_name, original_DT, i, seed, args, kwargs):
+    obj = copy.deepcopy(obj)
+    obj._rng = (
+        np.random.RandomState(seed + i) if seed is not None else np.random.RandomState()
+    )
+    obj.DT = _prepare_boot_data(obj, original_DT, i)
+    # Disable bootstrapping to prevent recursion
+    obj.bootstrap_nboot = 0
+    method = getattr(obj, method_name)
+    result = method(*args, **kwargs)
+    obj._rng = None
+    return result
+def bootstrap_loop(method):
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(self, "outcome_model"):
+            self.outcome_model = []
+        start = time.perf_counter()
+        results = []
+        full = method(self, *args, **kwargs)
+        results.append(full)
+        if getattr(self, "bootstrap_nboot") > 0 and getattr(
+            self, "_boot_samples", None
+        ):
+            original_DT = self.DT
+            nboot = self.bootstrap_nboot
+            ncores = self.ncores
+            seed = getattr(self, "seed", None)
+            method_name = method.__name__
+            if getattr(self, "parallel", False):
+                original_rng = getattr(self, "_rng", None)
+                self._rng = None
+                with ProcessPoolExecutor(max_workers=ncores) as executor:
+                    futures = [
+                        executor.submit(
+                            _bootstrap_worker,
+                            self,
+                            method_name,
+                            original_DT,
+                            i,
+                            seed,
+                            args,
+                            kwargs,
+                        )
+                        for i in range(nboot)
+                    ]
+                    for j in tqdm(
+                        as_completed(futures), total=nboot, desc="Bootstrapping..."
+                    ):
+                        results.append(j.result())
+                self._rng = original_rng
+            else:
+                for i in tqdm(range(nboot), desc="Bootstrapping..."):
+                    self.DT = _prepare_boot_data(self, original_DT, i)
+                    boot_fit = method(self, *args, **kwargs)
+                    results.append(boot_fit)
+            self.DT = original_DT
+            end = time.perf_counter()
+            self._model_time = _format_time(start, end)
+        self.outcome_model = results
+        return results
+    return wrapper

pySEQTarget/helpers/_col_string.py ADDED Viewed

@@ -0,0 +1,6 @@
+def _col_string(expressions):
+    cols = set()
+    for expression in expressions:
+        if expression is not None:
+            cols.update(expression.replace("+", " ").replace("*", " ").split())
+    return cols

pySEQTarget/helpers/_format_time.py ADDED Viewed

@@ -0,0 +1,6 @@
+def _format_time(start, end):
+    elapsed = end - start
+    days, rem = divmod(elapsed, 86400)
+    hours, rem = divmod(rem, 3600)
+    minutes, seconds = divmod(rem, 60)
+    return f"{int(days)}-{int(hours):02d}:{int(minutes):02d}:{seconds:05.2f}"

pySEQTarget/helpers/_output_files.py ADDED Viewed

@@ -0,0 +1,167 @@
+import datetime
+def _build_md(self, img_path: str = None) -> str:
+    """
+    Builds markdown content for SEQuential analysis results.
+    :param self: SEQoutput instance
+    :param img_path: Path to saved KM graph image (if any)
+    :return: Markdown string
+    """
+    lines = []
+    lines.append(f"# SEQuential Analysis: {datetime.date.today()}: {self.method}")
+    lines.append("")
+    if self.options.weighted:
+        lines.append("## Weighting")
+        lines.append("")
+        lines.append("### Numerator Model")
+        lines.append("")
+        lines.append("```")
+        lines.append(str(self.numerator_models[0].summary()))
+        lines.append("```")
+        lines.append("")
+        lines.append("### Denominator Model")
+        lines.append("")
+        lines.append("```")
+        lines.append(str(self.denominator_models[0].summary()))
+        lines.append("```")
+        lines.append("")
+        if self.options.compevent_colname is not None and self.compevent_models:
+            lines.append("### Competing Event Model")
+            lines.append("")
+            lines.append("```")
+            lines.append(str(self.compevent_models[0].summary()))
+            lines.append("```")
+            lines.append("")
+        lines.append("### Weighting Statistics")
+        lines.append("")
+        lines.append(self.weight_statistics.to_pandas().to_markdown(index=False))
+        lines.append("")
+    lines.append("## Outcome")
+    lines.append("")
+    lines.append("### Outcome Model")
+    lines.append("")
+    lines.append("```")
+    lines.append(str(self.outcome_models[0].summary()))
+    lines.append("```")
+    lines.append("")
+    if self.options.hazard_estimate and self.hazard is not None:
+        lines.append("### Hazard")
+        lines.append("")
+        lines.append(self.hazard.to_pandas().to_markdown(index=False))
+        lines.append("")
+    if self.options.km_curves:
+        lines.append("### Survival")
+        lines.append("")
+        if self.risk_difference is not None:
+            lines.append("#### Risk Differences")
+            lines.append("")
+            lines.append(self.risk_difference.to_pandas().to_markdown(index=False))
+            lines.append("")
+        if self.risk_ratio is not None:
+            lines.append("#### Risk Ratios")
+            lines.append("")
+            lines.append(self.risk_ratio.to_pandas().to_markdown(index=False))
+            lines.append("")
+        if self.km_graph is not None and img_path is not None:
+            lines.append("#### Survival Curves")
+            lines.append("")
+            lines.append(f"![Kaplan-Meier Survival Curves]({img_path})")
+            lines.append("")
+    if self.diagnostic_tables:
+        lines.append("## Diagnostic Tables")
+        lines.append("")
+        for name, table in self.diagnostic_tables.items():
+            lines.append(f"### {name.replace('_', ' ').title()}")
+            lines.append("")
+            lines.append(table.to_pandas().to_markdown(index=False))
+            lines.append("")
+    return "\n".join(lines)
+def _build_pdf(md_content: str, filename: str, img_path: str = None) -> None:
+    """
+    Converts markdown content to PDF.
+    :param md_content: Markdown string
+    :param filename: Output PDF path
+    :param img_path: Absolute path to image file (if any)
+    """
+    try:
+        import markdown
+        from weasyprint import CSS, HTML
+    except ImportError:
+        raise ImportError(
+            "PDF generation requires 'markdown' and 'weasyprint'. "
+            "Install with: pip install markdown weasyprint"
+        )
+    html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"])
+    if img_path:
+        img_name = img_path.split("/")[-1]
+        html_content = html_content.replace(
+            f'src="{img_name}"', f'src="file://{img_path}"'
+        )
+    css = CSS(
+        string="""
+        body {
+            font-family: Arial, sans-serif;
+            font-size: 11pt;
+            line-height: 1.4;
+            margin: 2cm;
+        }
+        h1 { color: #2c3e50; border-bottom: 2px solid #2c3e50; padding-bottom: 0.3em; }
+        h2 { color: #34495e; border-bottom: 1px solid #bdc3c7; padding-bottom: 0.2em; }
+        h3 { color: #7f8c8d; }
+        table {
+            border-collapse: collapse;
+            width: 100%;
+            margin: 1em 0;
+        }
+        th, td {
+            border: 1px solid #bdc3c7;
+            padding: 8px;
+            text-align: left;
+        }
+        th { background-color: #ecf0f1; }
+        tr:nth-child(even) { background-color: #f9f9f9; }
+        pre {
+            background-color: #f4f4f4;
+            padding: 1em;
+            border-radius: 4px;
+            overflow-x: auto;
+            font-size: 9pt;
+        }
+        code { font-family: 'Courier New', monospace; }
+        img { max-width: 100%; height: auto; }
+    """
+    )
+    full_html = f"""
+    <!DOCTYPE html>
+    <html>
+    <head><meta charset="utf-8"></head>
+    <body>{html_content}</body>
+    </html>
+    """
+    HTML(string=full_html).write_pdf(filename, stylesheets=[css])

pySEQTarget/helpers/_pad.py ADDED Viewed

@@ -0,0 +1,7 @@
+def _pad(a, b):
+    len_a, len_b = len(a), len(b)
+    if len_a < len_b:
+        a = a + [None] * (len_b - len_a)
+    elif len_b < len_a:
+        b = b + [None] * (len_a - len_b)
+    return a, b

pySEQTarget/helpers/_predict_model.py ADDED Viewed

@@ -0,0 +1,9 @@
+import numpy as np
+def _predict_model(self, model, newdata):
+    newdata = newdata.to_pandas()
+    for col in self.fixed_cols:
+        if col in newdata.columns:
+            newdata[col] = newdata[col].astype("category")
+    return np.array(model.predict(newdata))

pySEQTarget/helpers/_prepare_data.py ADDED Viewed

@@ -0,0 +1,19 @@
+import polars as pl
+def _prepare_data(self, DT):
+    binaries = [
+        self.eligible_col,
+        self.outcome_col,
+        self.cense_colname,
+    ]  # self.excused_colnames + self.weight_eligible_colnames
+    binary_colnames = [col for col in binaries if col in DT.columns and not None]
+    DT = DT.with_columns(
+        [
+            *[pl.col(col).cast(pl.Categorical) for col in self.fixed_cols],
+            *[pl.col(col).cast(pl.Int8) for col in binary_colnames],
+            pl.col(self.id_col).cast(pl.Utf8),
+        ]
+    )
+    return DT

pySEQTarget/initialization/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from ._censoring import _cense_denominator as _cense_denominator
+from ._censoring import _cense_numerator as _cense_numerator
+from ._denominator import _denominator as _denominator
+from ._numerator import _numerator as _numerator
+from ._outcome import _outcome as _outcome