PyPI - csrlite - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

csrlite 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

csrlite/__init__.py +52 -0
csrlite/cm/cm_listing.py +497 -0
csrlite/cm/cm_summary.py +327 -0
csrlite/common/rtf.py +52 -0
csrlite/disposition/disposition.py +1 -1
csrlite/ie/ie_listing.py +109 -0
csrlite/ie/ie_summary.py +292 -0
csrlite/mh/mh_listing.py +209 -0
csrlite/mh/mh_summary.py +333 -0
csrlite/pd/pd_listing.py +461 -0
{csrlite-0.2.0.dist-info → csrlite-0.3.0.dist-info}/METADATA +2 -2
csrlite-0.3.0.dist-info/RECORD +26 -0
csrlite-0.2.0.dist-info/RECORD +0 -19
{csrlite-0.2.0.dist-info → csrlite-0.3.0.dist-info}/WHEEL +0 -0
{csrlite-0.2.0.dist-info → csrlite-0.3.0.dist-info}/top_level.txt +0 -0

csrlite/ie/ie_summary.py ADDED Viewed

@@ -0,0 +1,292 @@
+# pyre-strict
+"""
+Inclusion/Exclusion (IE) Table Analysis Functions
+This module provides a pipeline for IE summary analysis:
+- ie_ard: Generate Analysis Results Data (ARD)
+- ie_df: Transform ARD to display format
+- ie_rtf: Generate formatted RTF output
+- study_plan_to_ie_summary: Batch generation from StudyPlan
+"""
+from pathlib import Path
+from typing import Any
+import polars as pl
+from ..common.parse import StudyPlanParser
+from ..common.plan import StudyPlan
+from ..common.rtf import create_rtf_table_n_pct
+from ..common.utils import apply_common_filters
+def study_plan_to_ie_summary(
+    study_plan: StudyPlan,
+) -> list[str]:
+    """
+    Generate IE Summary Table outputs for all analyses defined in StudyPlan.
+    """
+    # Meta data
+    analysis_type = "ie_summary"
+    output_dir = study_plan.output_dir
+    title = "Summary of Protocol Deviations (Inclusion/Exclusion)"
+    # footnote = ["Percentages are based on the number of enrolled participants."]
+    # Defaults
+    criteria_df_name = "adie"
+    # Ensure output directory exists
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    # Initialize parser
+    parser = StudyPlanParser(study_plan)
+    # Get expanded plan (Manually expansion to avoid AttributeError)
+    plans = study_plan.study_data.get("plans", [])
+    all_specs = []
+    for plan_data in plans:
+        expanded = study_plan.expander.expand_plan(plan_data)
+        for p in expanded:
+            all_specs.append(study_plan.expander.create_analysis_spec(p))
+    plan_df = pl.DataFrame(all_specs)
+    if "analysis" in plan_df.columns:
+        ie_plans = plan_df.filter(pl.col("analysis") == analysis_type)
+    else:
+        ie_plans = pl.DataFrame()
+    generated_files = []
+    # Iterate over analyses
+    for analysis in ie_plans.iter_rows(named=True):
+        # Load data
+        # Note: IE analysis needs both ADSL (for population/group) and ADIE (for criteria)
+        pop_name = analysis.get("population", "enrolled")
+        group_kw = analysis.get("group")  # Can be None
+        try:
+            if group_kw:
+                # Load Filtered Population (ADSL) with Group
+                adsl, group_col = parser.get_population_data(pop_name, group_kw)
+                group_col = group_col.upper()
+                grp_suffix = group_col
+            else:
+                # Load Filtered Population (ADSL) without Group
+                # Manual load + filter since get_population_data requires group
+                (adsl_raw,) = parser.get_datasets("adsl")
+                pop_filter = parser.get_population_filter(pop_name)
+                adsl, _ = apply_common_filters(
+                    population=adsl_raw,
+                    observation=None,
+                    population_filter=pop_filter,
+                    observation_filter=None,
+                )
+                group_col = None
+                grp_suffix = "total"
+        except ValueError as e:
+            print(f"Error loading population: {e}")
+            continue
+        # Load ADIE
+        try:
+            (adie,) = parser.get_datasets(criteria_df_name)
+        except ValueError as e:
+            print(f"Error loading datasets: {e}")
+            continue
+        # Output filename
+        filename = f"{analysis_type}_{pop_name}_{grp_suffix}.rtf".lower()
+        output_path = f"{output_dir}/{filename}"
+        # Generate ARD
+        ard = ie_ard(adsl=adsl, adie=adie, group_col=group_col)
+        # Generate DF
+        df = ie_df(ard)
+        # Generate RTF
+        ie_rtf(df, output_path, title=title)
+        generated_files.append(output_path)
+    return generated_files
+def ie_ard(adsl: pl.DataFrame, adie: pl.DataFrame, group_col: str | None = None) -> pl.DataFrame:
+    """
+    Generate Analysis Results Data (ARD) for IE Table.
+    Structure:
+    - Total Screening Failures
+    - Exclusion Criteria Met
+      - [Detail]
+    - Inclusion Criteria Not Met
+      - [Detail]
+    """
+    # If group_col is None, create a dummy group column
+    actual_group_col: str = group_col if group_col else "Total"
+    # 1. Prepare Data
+    # Join ADIE to ADSL to get treatment group info
+    df_joined: pl.DataFrame = adie.join(
+        adsl.select(["USUBJID"] + ([group_col] if group_col else [])), on="USUBJID", how="inner"
+    )
+    if not group_col:
+        # Add dummy Total column
+        df_joined = df_joined.with_columns(pl.lit("Total").alias("Total"))
+    # Define hierarchy
+    results: list[dict[str, Any]] = []
+    # Get distinct groups
+    groups: list[str]
+    if group_col:
+        groups_raw: list[str | None] = sorted(adsl.select(group_col).unique().to_series().to_list())
+        groups = [g for g in groups_raw if g is not None]
+    else:
+        groups = ["Total"]
+    # Helper to calculate n and pct (pct of what? usually pct of failures? or pct of screened?)
+    # Usually IE table % is based on Total Screening Failures.
+    # Let's count Total Screening Failures per Group first.
+    # Total Screening Failures (Subjects present in ADIE)
+    # Note: A subject can match multiple criteria.
+    total_failures_by_group = df_joined.group_by(actual_group_col).agg(
+        pl.col("USUBJID").n_unique().alias("count")
+    )
+    total_failures_map: dict[str, int] = {
+        row[actual_group_col]: row["count"] for row in total_failures_by_group.iter_rows(named=True)
+    }
+    # Helper for row generation
+    def add_row(
+        label: str, filter_expr: pl.Expr | None = None, is_header: bool = False, indent: int = 0
+    ) -> None:
+        row_data: dict[str, Any] = {"label": label, "indent": indent, "is_header": is_header}
+        for g in groups:
+            # Filter data for this group
+            g_df = df_joined.filter(pl.col(actual_group_col) == g)
+            if filter_expr is not None:
+                # Filter specific criteria
+                g_df = g_df.filter(filter_expr)
+            n = g_df.select("USUBJID").n_unique()
+            # Pct based on total failures in that group?
+            denom = total_failures_map.get(g, 0)
+            pct = (n / denom * 100) if denom > 0 else 0.0
+            row_data[f"count_{g}"] = n
+            row_data[f"pct_{g}"] = pct
+        results.append(row_data)
+    # 1. Total Screening Failures
+    add_row("Total Screening Failures")
+    # 2. Exclusion Criteria Met
+    excl_expr = pl.col("PARAMCAT") == "EXCLUSION CRITERIA MET"
+    add_row("Exclusion Criteria Met", excl_expr, is_header=True, indent=1)
+    # Details for Exclusion
+    excl_params = (
+        df_joined.filter(excl_expr).select("PARAM").unique().sort("PARAM").to_series().to_list()
+    )
+    for param in excl_params:
+        add_row(param, excl_expr & (pl.col("PARAM") == param), indent=2)
+    # 3. Inclusion Criteria Not Met
+    incl_expr = pl.col("PARAMCAT") == "INCLUSION CRITERIA NOT MET"
+    add_row("Inclusion Criteria Not Met", incl_expr, is_header=True, indent=1)
+    # Details for Inclusion
+    incl_params = (
+        df_joined.filter(incl_expr).select("PARAM").unique().sort("PARAM").to_series().to_list()
+    )
+    for param in incl_params:
+        add_row(param, incl_expr & (pl.col("PARAM") == param), indent=2)
+    return pl.DataFrame(results)
+def ie_df(ard: pl.DataFrame) -> pl.DataFrame:
+    """Transform ARD to display DataFrame."""
+    # Find group columns
+    cols = ard.columns
+    group_cols = [c for c in cols if c.startswith("count_")]
+    groups = [c.replace("count_", "") for c in group_cols]
+    # Create valid Polars expressions for selecting columns
+    # Apply indentation: 3 spaces per indent level
+    # Note: Using \u00A0 (NBSP) might be safer for RTF if spaces get collapsed,
+    # but regular spaces usually work in table cells. Let's start with regular spaces.
+    select_exprs = [
+        (pl.lit("   ").repeat_by(pl.col("indent")).list.join("") + pl.col("label")).alias(
+            "Criteria"
+        )
+    ]
+    for g in groups:
+        # Format n (%)
+        # We need to construct the string.
+        # Polars string formatting
+        # format: "{n} ({pct:.1f})"
+        # Note: Polars doesn't have f-string strictly in expressions like python
+        # We use strict casting and concatenation
+        col_n = pl.col(f"count_{g}")
+        col_pct = pl.col(f"pct_{g}")
+        fmt = (
+            col_n.cast(pl.Utf8)
+            + " ("
+            + col_pct.map_elements(lambda x: f"{x:.1f}", return_dtype=pl.Utf8)
+            + ")"
+        ).alias(g)
+        select_exprs.append(fmt)
+    return ard.select(select_exprs)
+def ie_rtf(df: pl.DataFrame, output_path: str, title: str = "") -> None:
+    """Generate RTF."""
+    # Rename Criteria column to empty string for display if needed or keep as is?
+    # Usually "Criteria".
+    # Calculate number of columns
+    n_cols = len(df.columns)
+    # Build first-level column headers (use actual column names)
+    col_header_1 = list(df.columns)
+    # Build second-level column headers (empty for first, "n (%)" for groups)
+    col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
+    # Calculate column widths - auto-calculate
+    # [n_cols-1, 1, 1, 1, ...]
+    col_widths = [float(n_cols - 1)] + [1.0] * (n_cols - 1)
+    rtf_doc = create_rtf_table_n_pct(
+        df=df,
+        col_header_1=col_header_1,
+        col_header_2=col_header_2,
+        col_widths=col_widths,
+        title=title,
+        footnote=None,
+        source=None,
+    )
+    rtf_doc.write_rtf(output_path)

csrlite/mh/mh_listing.py ADDED Viewed

@@ -0,0 +1,209 @@
+# pyre-strict
+"""
+Medical History (MH) Listing Analysis Functions
+"""
+from pathlib import Path
+import polars as pl
+from ..common.parse import StudyPlanParser
+from ..common.plan import StudyPlan
+from ..common.rtf import create_rtf_listing
+from ..common.utils import apply_common_filters
+def mh_listing(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None = "SAFFL = 'Y'",
+    observation_filter: str | None = "MHOCCUR = 'Y'",
+    id: tuple[str, str] = ("USUBJID", "Subject ID"),
+    title: list[str] | None = None,
+    footnote: list[str] | None = None,
+    source: list[str] | None = None,
+    output_file: str = "mh_listing.rtf",
+    population_columns: list[tuple[str, str]] | None = None,
+    observation_columns: list[tuple[str, str]] | None = None,
+    sort_columns: list[str] | None = None,
+) -> str:
+    """
+    Generate Medical History Listing.
+    """
+    if title is None:
+        title = ["Listing of Medical History"]
+    # Generate DF
+    df = mh_listing_df(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+        id_col=id[0],
+        pop_cols=population_columns,
+        obs_cols=observation_columns,
+        sort_cols=sort_columns,
+    )
+    # Generate RTF
+    mh_listing_rtf(df=df, output_path=output_file, title=title, footnote=footnote, source=source)
+    return output_file
+def mh_listing_df(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None,
+    observation_filter: str | None,
+    id_col: str,
+    pop_cols: list[tuple[str, str]] | None,
+    obs_cols: list[tuple[str, str]] | None,
+    sort_cols: list[str] | None,
+) -> pl.DataFrame:
+    # Defaults
+    if pop_cols is None:
+        # Default interesting cols from ADSL
+        pop_cols = [("TRT01A", "Treatment"), ("AGE", "Age"), ("SEX", "Sex")]
+    if obs_cols is None:
+        # Default from ADMH
+        obs_cols = [
+            ("MHSEQ", "Seq"),
+            ("MHBODSYS", "System Organ Class"),
+            ("MHDECOD", "Preferred Term"),
+            ("MHSTDTC", "Start Date"),
+            ("MHENRTPT", "Status"),
+        ]
+    # Apply filters
+    adsl, adq = apply_common_filters(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+    )
+    if adq is None:
+        raise ValueError("Observation data is missing")
+    # Join
+    # Select specific columns from ADSL
+    pop_col_names = [c[0] for c in pop_cols]
+    # Ensure ID is there
+    if id_col not in pop_col_names:
+        pop_col_names = [id_col] + pop_col_names
+    adsl_sub = adsl.select(pop_col_names)
+    joined = adq.join(adsl_sub, on=id_col, how="inner")
+    # Sort
+    if sort_cols:
+        # Check if cols exist
+        valid_sorts = [c for c in sort_cols if c in joined.columns]
+        if valid_sorts:
+            joined = joined.sort(valid_sorts)
+    # Select display columns (id + pop + obs)
+    display_cols = [id_col] + [c[0] for c in pop_cols if c[0] != id_col] + [c[0] for c in obs_cols]
+    final_df = joined.select([c for c in display_cols if c in joined.columns])
+    # Rename for display?
+    # Usually listing keeps raw names or we Map them.
+    # The create_rtf_listing function takes col_header list.
+    return final_df
+def mh_listing_rtf(
+    df: pl.DataFrame,
+    output_path: str,
+    title: list[str] | str,
+    footnote: list[str] | None,
+    source: list[str] | None,
+) -> None:
+    if df.is_empty():
+        return
+    # Generate headers from predefined mapping or current logic?
+    # Here we just use column names for simplicity or we could pass headers.
+    # We didn't output headers from mh_listing_df.
+    # Let's assume the order is maintained.
+    headers = list(df.columns)
+    # Approximate widths
+    # ID: 1, TRT: 1.5, AGE: 0.5, SEX: 0.5, SEQ: 0.5, SOC: 2, PT: 2, DATE: 1, STATUS: 1
+    # Total ~ 10 units?
+    # Simple uniform distribution or weighted?
+    n_cols = len(headers)
+    col_widths = [1.0] * n_cols
+    rtf_doc = create_rtf_listing(
+        df=df,
+        col_header=headers,
+        col_widths=col_widths,
+        title=title,
+        footnote=footnote,
+        source=source,
+    )
+    rtf_doc.write_rtf(output_path)
+def study_plan_to_mh_listing(study_plan: StudyPlan) -> list[str]:
+    """
+    Batch generate MH listings.
+    """
+    analysis_type = "mh_listing"
+    output_dir = study_plan.output_dir
+    parser = StudyPlanParser(study_plan)
+    plans = study_plan.study_data.get("plans", [])
+    all_specs = []
+    for plan_data in plans:
+        expanded = study_plan.expander.expand_plan(plan_data)
+        for p in expanded:
+            all_specs.append(study_plan.expander.create_analysis_spec(p))
+    plan_df = pl.DataFrame(all_specs)
+    if "analysis" in plan_df.columns:
+        mh_plans = plan_df.filter(pl.col("analysis") == analysis_type)
+    else:
+        mh_plans = pl.DataFrame()
+    generated_files = []
+    for analysis in mh_plans.iter_rows(named=True):
+        pop_name = analysis.get("population", "enrolled")
+        try:
+            # Load Population
+            adsl, _ = parser.get_population_data(pop_name, "trt01a")  # dummy group
+            (admh,) = parser.get_datasets("admh")
+            filename = f"{analysis_type}_{pop_name}.rtf".lower()
+            output_path = f"{output_dir}/{filename}"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            mh_listing(
+                population=adsl,
+                observation=admh,
+                population_filter=None,
+                observation_filter=None,  # Show all?
+                output_file=output_path,
+                title=["Listing of Medical History", f"({pop_name} Population)"],
+                source=["Source: ADSL, ADMH"],
+            )
+            generated_files.append(output_path)
+        except Exception as e:
+            print(f"Error generating MH listing: {e}")
+            continue
+    return generated_files

csrlite 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

csrlite 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl