PyPI - csrlite - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

csrlite 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

csrlite/__init__.py +110 -71
csrlite/ae/__init__.py +1 -1
csrlite/ae/ae_listing.py +494 -494
csrlite/ae/ae_specific.py +483 -483
csrlite/ae/ae_summary.py +401 -401
csrlite/ae/ae_utils.py +62 -62
csrlite/cm/cm_listing.py +497 -0
csrlite/cm/cm_summary.py +327 -0
csrlite/common/config.py +34 -34
csrlite/common/count.py +293 -293
csrlite/common/parse.py +308 -308
csrlite/common/plan.py +365 -365
csrlite/common/rtf.py +137 -137
csrlite/common/utils.py +33 -33
csrlite/common/yaml_loader.py +71 -71
csrlite/disposition/__init__.py +2 -2
csrlite/disposition/disposition.py +332 -332
csrlite/ie/ie_listing.py +109 -0
csrlite/ie/{ie.py → ie_summary.py} +292 -405
csrlite/mh/mh_listing.py +209 -0
csrlite/mh/mh_summary.py +333 -0
csrlite/pd/pd_listing.py +461 -0
{csrlite-0.2.1.dist-info → csrlite-0.3.0.dist-info}/METADATA +68 -68
csrlite-0.3.0.dist-info/RECORD +26 -0
csrlite-0.2.1.dist-info/RECORD +0 -20
{csrlite-0.2.1.dist-info → csrlite-0.3.0.dist-info}/WHEEL +0 -0
{csrlite-0.2.1.dist-info → csrlite-0.3.0.dist-info}/top_level.txt +0 -0

csrlite/mh/mh_listing.py ADDED Viewed

@@ -0,0 +1,209 @@
+# pyre-strict
+"""
+Medical History (MH) Listing Analysis Functions
+"""
+from pathlib import Path
+import polars as pl
+from ..common.parse import StudyPlanParser
+from ..common.plan import StudyPlan
+from ..common.rtf import create_rtf_listing
+from ..common.utils import apply_common_filters
+def mh_listing(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None = "SAFFL = 'Y'",
+    observation_filter: str | None = "MHOCCUR = 'Y'",
+    id: tuple[str, str] = ("USUBJID", "Subject ID"),
+    title: list[str] | None = None,
+    footnote: list[str] | None = None,
+    source: list[str] | None = None,
+    output_file: str = "mh_listing.rtf",
+    population_columns: list[tuple[str, str]] | None = None,
+    observation_columns: list[tuple[str, str]] | None = None,
+    sort_columns: list[str] | None = None,
+) -> str:
+    """
+    Generate Medical History Listing.
+    """
+    if title is None:
+        title = ["Listing of Medical History"]
+    # Generate DF
+    df = mh_listing_df(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+        id_col=id[0],
+        pop_cols=population_columns,
+        obs_cols=observation_columns,
+        sort_cols=sort_columns,
+    )
+    # Generate RTF
+    mh_listing_rtf(df=df, output_path=output_file, title=title, footnote=footnote, source=source)
+    return output_file
+def mh_listing_df(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None,
+    observation_filter: str | None,
+    id_col: str,
+    pop_cols: list[tuple[str, str]] | None,
+    obs_cols: list[tuple[str, str]] | None,
+    sort_cols: list[str] | None,
+) -> pl.DataFrame:
+    # Defaults
+    if pop_cols is None:
+        # Default interesting cols from ADSL
+        pop_cols = [("TRT01A", "Treatment"), ("AGE", "Age"), ("SEX", "Sex")]
+    if obs_cols is None:
+        # Default from ADMH
+        obs_cols = [
+            ("MHSEQ", "Seq"),
+            ("MHBODSYS", "System Organ Class"),
+            ("MHDECOD", "Preferred Term"),
+            ("MHSTDTC", "Start Date"),
+            ("MHENRTPT", "Status"),
+        ]
+    # Apply filters
+    adsl, adq = apply_common_filters(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+    )
+    if adq is None:
+        raise ValueError("Observation data is missing")
+    # Join
+    # Select specific columns from ADSL
+    pop_col_names = [c[0] for c in pop_cols]
+    # Ensure ID is there
+    if id_col not in pop_col_names:
+        pop_col_names = [id_col] + pop_col_names
+    adsl_sub = adsl.select(pop_col_names)
+    joined = adq.join(adsl_sub, on=id_col, how="inner")
+    # Sort
+    if sort_cols:
+        # Check if cols exist
+        valid_sorts = [c for c in sort_cols if c in joined.columns]
+        if valid_sorts:
+            joined = joined.sort(valid_sorts)
+    # Select display columns (id + pop + obs)
+    display_cols = [id_col] + [c[0] for c in pop_cols if c[0] != id_col] + [c[0] for c in obs_cols]
+    final_df = joined.select([c for c in display_cols if c in joined.columns])
+    # Rename for display?
+    # Usually listing keeps raw names or we Map them.
+    # The create_rtf_listing function takes col_header list.
+    return final_df
+def mh_listing_rtf(
+    df: pl.DataFrame,
+    output_path: str,
+    title: list[str] | str,
+    footnote: list[str] | None,
+    source: list[str] | None,
+) -> None:
+    if df.is_empty():
+        return
+    # Generate headers from predefined mapping or current logic?
+    # Here we just use column names for simplicity or we could pass headers.
+    # We didn't output headers from mh_listing_df.
+    # Let's assume the order is maintained.
+    headers = list(df.columns)
+    # Approximate widths
+    # ID: 1, TRT: 1.5, AGE: 0.5, SEX: 0.5, SEQ: 0.5, SOC: 2, PT: 2, DATE: 1, STATUS: 1
+    # Total ~ 10 units?
+    # Simple uniform distribution or weighted?
+    n_cols = len(headers)
+    col_widths = [1.0] * n_cols
+    rtf_doc = create_rtf_listing(
+        df=df,
+        col_header=headers,
+        col_widths=col_widths,
+        title=title,
+        footnote=footnote,
+        source=source,
+    )
+    rtf_doc.write_rtf(output_path)
+def study_plan_to_mh_listing(study_plan: StudyPlan) -> list[str]:
+    """
+    Batch generate MH listings.
+    """
+    analysis_type = "mh_listing"
+    output_dir = study_plan.output_dir
+    parser = StudyPlanParser(study_plan)
+    plans = study_plan.study_data.get("plans", [])
+    all_specs = []
+    for plan_data in plans:
+        expanded = study_plan.expander.expand_plan(plan_data)
+        for p in expanded:
+            all_specs.append(study_plan.expander.create_analysis_spec(p))
+    plan_df = pl.DataFrame(all_specs)
+    if "analysis" in plan_df.columns:
+        mh_plans = plan_df.filter(pl.col("analysis") == analysis_type)
+    else:
+        mh_plans = pl.DataFrame()
+    generated_files = []
+    for analysis in mh_plans.iter_rows(named=True):
+        pop_name = analysis.get("population", "enrolled")
+        try:
+            # Load Population
+            adsl, _ = parser.get_population_data(pop_name, "trt01a")  # dummy group
+            (admh,) = parser.get_datasets("admh")
+            filename = f"{analysis_type}_{pop_name}.rtf".lower()
+            output_path = f"{output_dir}/{filename}"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            mh_listing(
+                population=adsl,
+                observation=admh,
+                population_filter=None,
+                observation_filter=None,  # Show all?
+                output_file=output_path,
+                title=["Listing of Medical History", f"({pop_name} Population)"],
+                source=["Source: ADSL, ADMH"],
+            )
+            generated_files.append(output_path)
+        except Exception as e:
+            print(f"Error generating MH listing: {e}")
+            continue
+    return generated_files

csrlite/mh/mh_summary.py ADDED Viewed

@@ -0,0 +1,333 @@
+# pyre-strict
+"""
+Medical History (MH) Summary Analysis Functions
+"""
+from pathlib import Path
+from typing import Any
+import polars as pl
+from ..common.parse import StudyPlanParser
+from ..common.plan import StudyPlan
+from ..common.rtf import create_rtf_table_n_pct
+from ..common.utils import apply_common_filters
+def mh_summary(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None = "SAFFL = 'Y'",
+    observation_filter: str | None = "MHOCCUR = 'Y'",
+    id: tuple[str, str] = ("USUBJID", "Subject ID"),
+    group: tuple[str, str] = ("TRT01A", "Treatment"),
+    variables: list[tuple[str, str]] | None = None,
+    title: list[str] | None = None,
+    footnote: list[str] | None = None,
+    source: list[str] | None = None,
+    output_file: str = "mh_summary.rtf",
+) -> str:
+    """
+    Generate Medical History Summary Table.
+    """
+    if title is None:
+        title = ["Summary of Medical History by Body System and Preferred Term"]
+    if variables is None:
+        # Default hierarchy: Body System -> Preferred Term
+        variables = [("MHBODSYS", "System Organ Class"), ("MHDECOD", "Preferred Term")]
+    # Generate ARD
+    ard = mh_summary_ard(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+        group_col=group[0],
+        id_col=id[0],
+        variables=variables,
+    )
+    # Transform to Display DF
+    df = mh_summary_df(ard)
+    # Generate RTF
+    mh_summary_rtf(df=df, output_path=output_file, title=title, footnote=footnote, source=source)
+    return output_file
+def mh_summary_ard(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None,
+    observation_filter: str | None,
+    group_col: str,
+    id_col: str,
+    variables: list[tuple[str, str]],
+) -> pl.DataFrame:
+    """
+    Generate ARD for MH Summary.
+    Hierarchy is often Body System -> Preferred Term.
+    """
+    # Apply filters
+    adsl, adq = apply_common_filters(
+        population=population,
+        observation=observation,
+        population_filter=population_filter,
+        observation_filter=observation_filter,
+    )
+    if adq is None:
+        # Should not happen as we passed observation df
+        raise ValueError("Observation data is missing")
+    # This summary usually nests MHDECOD under MHBODSYS
+    # Structure:
+    # Any Medical History (1=1)
+    #   Body System 1
+    #     Term A
+    #     Term B
+    # We can reuse count_subject_with_observation but it handles list of flexible conditions.
+    # For nested structure, we might need manual construction or nested calls.
+    # Let's assume standard 2-level nesting: MHBODSYS -> MHDECOD
+    # Check if variables match this pattern
+    # Identify the hierarchy columns
+    # If standard usage: variables=[("MHBODSYS", "SOC"), ("MHDECOD", "PT")]
+    # We will build a list of (filter_expr, label, indent_level, is_header)
+    specs: list[dict[str, Any]] = []
+    # 1. Overall "Any Medical History"
+    specs.append(
+        {"filter": pl.lit(True), "label": "Any Medical History", "indent": 0, "is_header": False}
+    )
+    # Get distinct Body Systems
+    bodsys_list: list[str | None] = (
+        adq.select("MHBODSYS").unique().sort("MHBODSYS").to_series().to_list()
+    )
+    for sys in bodsys_list:
+        if sys is None:
+            continue
+        # Add Body System Row
+        specs.append(
+            {
+                "filter": pl.col("MHBODSYS") == sys,
+                "label": sys,
+                "indent": 1,
+                "is_header": False,  # It has counts
+            }
+        )
+        # Get distinct Terms within this System
+        terms: list[str | None] = (
+            adq.filter(pl.col("MHBODSYS") == sys)
+            .select("MHDECOD")
+            .unique()
+            .sort("MHDECOD")
+            .to_series()
+            .to_list()
+        )
+        for term in terms:
+            if term is None:
+                continue
+            specs.append(
+                {
+                    "filter": (pl.col("MHBODSYS") == sys) & (pl.col("MHDECOD") == term),
+                    "label": term,
+                    "indent": 2,
+                    "is_header": False,
+                }
+            )
+    # Now calculate counts for each spec
+    results: list[dict[str, Any]] = []
+    # Get total population counts by group
+    pop_counts = adsl.group_by(group_col).count().sort(group_col)
+    groups: list[Any] = pop_counts.select(group_col).to_series().to_list()
+    # Pre-calculate totals map
+    pop_totals: dict[Any, int] = {
+        row[group_col]: row["count"] for row in pop_counts.iter_rows(named=True)
+    }
+    # Helper to calculate row
+    def calc_row(
+        spec: dict[str, Any], obs_data: pl.DataFrame, pop_data: pl.DataFrame
+    ) -> dict[str, Any]:
+        row_res = {"label": spec["label"], "indent": spec["indent"], "is_header": spec["is_header"]}
+        # Filter observation data based on spec string/expr
+        # Note: count_subject_with_observation logic handles join.
+        # We can simulate logic here.
+        # 1. Filter ADQ based on criteria
+        filtered_obs = obs_data.filter(spec["filter"])
+        # 2. Join with ADSL to get groups (inner join to count only subjects in population)
+        # But we already filtered ADSL (population).
+        subset = filtered_obs.join(pop_data.select([id_col, group_col]), on=id_col, how="inner")
+        # 3. Group by Group Col
+        counts = subset.select(id_col, group_col).unique().group_by(group_col).count()
+        counts_map = {row[group_col]: row["count"] for row in counts.iter_rows(named=True)}
+        for g in groups:
+            n = counts_map.get(g, 0)
+            denom = pop_totals.get(g, 0)
+            pct = (n / denom * 100.0) if denom > 0 else 0.0
+            row_res[f"count_{g}"] = n
+            row_res[f"pct_{g}"] = pct
+        return row_res
+    for spec in specs:
+        results.append(calc_row(spec, adq, adsl))
+    return pl.DataFrame(results)
+def mh_summary_df(ard: pl.DataFrame) -> pl.DataFrame:
+    """
+    Transform ARD to Display DataFrame.
+    """
+    if ard.is_empty():
+        return pl.DataFrame()
+    # Identify group columns
+    cols = ard.columns
+    group_cols = [c for c in cols if c.startswith("count_")]
+    groups = [c.replace("count_", "") for c in group_cols]
+    select_exprs = [
+        (pl.lit("   ").repeat_by(pl.col("indent")).list.join("") + pl.col("label")).alias(
+            "Medical History"
+        )
+    ]
+    for g in groups:
+        col_n = pl.col(f"count_{g}")
+        col_pct = pl.col(f"pct_{g}")
+        fmt = (
+            col_n.cast(pl.Utf8)
+            + " ("
+            + col_pct.map_elements(lambda x: f"{x:.1f}", return_dtype=pl.Utf8)
+            + ")"
+        ).alias(g)
+        select_exprs.append(fmt)
+    return ard.select(select_exprs)
+def mh_summary_rtf(
+    df: pl.DataFrame,
+    output_path: str,
+    title: list[str] | str,
+    footnote: list[str] | None,
+    source: list[str] | None,
+) -> None:
+    """
+    Generate RTF document.
+    """
+    if df.is_empty():
+        # Handle empty case?
+        return
+    n_cols = len(df.columns)
+    col_width_first = 2.5
+    remaining_width = 7.0  # Approx page width
+    col_width_others = remaining_width / (n_cols - 1)
+    col_widths = [col_width_first] + [col_width_others] * (n_cols - 1)
+    col_header_1 = list(df.columns)
+    col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
+    rtf_doc = create_rtf_table_n_pct(
+        df=df,
+        col_header_1=col_header_1,
+        col_header_2=col_header_2,
+        col_widths=col_widths,
+        title=title,
+        footnote=footnote,
+        source=source,
+    )
+    rtf_doc.write_rtf(output_path)
+def study_plan_to_mh_summary(study_plan: StudyPlan) -> list[str]:
+    """
+    Batch generate MH summaries from study plan.
+    """
+    analysis_type = "mh_summary"
+    output_dir = study_plan.output_dir
+    # Initialize parser
+    parser = StudyPlanParser(study_plan)
+    # Get plans
+    plans = study_plan.study_data.get("plans", [])
+    all_specs = []
+    for plan_data in plans:
+        expanded = study_plan.expander.expand_plan(plan_data)
+        for p in expanded:
+            all_specs.append(study_plan.expander.create_analysis_spec(p))
+    plan_df = pl.DataFrame(all_specs)
+    if "analysis" in plan_df.columns:
+        mh_plans = plan_df.filter(pl.col("analysis") == analysis_type)
+    else:
+        mh_plans = pl.DataFrame()
+    generated_files = []
+    for analysis in mh_plans.iter_rows(named=True):
+        pop_name = analysis.get("population", "enrolled")
+        group_kw = analysis.get("group", "trt01a")  # specific key?
+        try:
+            # Load Population
+            adsl, group_col = parser.get_population_data(pop_name, group_kw)
+            # Load MH Data
+            # Note: Assuming 'admh' is the dataset name
+            (admh,) = parser.get_datasets("admh")
+            filename = f"{analysis_type}_{pop_name}_{group_kw}.rtf".lower()
+            output_path = f"{output_dir}/{filename}"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            mh_summary(
+                population=adsl,
+                observation=admh,
+                population_filter=None,  # Already filtered by parser
+                observation_filter="MHOCCUR = 'Y'",
+                group=(group_col, group_col),  # Use actual col name
+                output_file=output_path,
+                title=[
+                    "Summary of Medical History by System Organ Class and Preferred Term",
+                    f"({pop_name} Population)",
+                ],
+                source=["Source: ADSL, ADMH"],
+            )
+            generated_files.append(output_path)
+        except Exception as e:
+            print(f"Error generating MH summary: {e}")
+            continue
+    return generated_files

csrlite 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

csrlite 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl