PyPI - csrlite - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

csrlite 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

csrlite/__init__.py +16 -8
csrlite/ae/ae_listing.py +2 -0
csrlite/ae/ae_specific.py +10 -5
csrlite/ae/ae_summary.py +4 -2
csrlite/ae/ae_utils.py +0 -70
csrlite/common/config.py +34 -0
csrlite/common/count.py +174 -80
csrlite/common/plan.py +79 -67
csrlite/common/rtf.py +85 -0
csrlite/common/utils.py +4 -4
csrlite/disposition/disposition.py +126 -95
{csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/METADATA +7 -7
csrlite-0.2.0.dist-info/RECORD +19 -0
csrlite-0.1.0.dist-info/RECORD +0 -17
{csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/WHEEL +0 -0
{csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/top_level.txt +0 -0

csrlite/common/plan.py CHANGED Viewed

@@ -6,17 +6,19 @@ using YAML plans with template inheritance and keyword resolution.
 """
 import itertools
-from dataclasses import dataclass, field, fields
+import logging
 from pathlib import Path
 from typing import Any, Dict, List, Optional, cast
 import polars as pl
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 from .yaml_loader import YamlInheritanceLoader
+logger: logging.Logger = logging.getLogger(__name__)
-@dataclass
-class Keyword:
+class Keyword(BaseModel):
     """Base keyword definition."""
     name: str
@@ -24,54 +26,60 @@ class Keyword:
     description: Optional[str] = None
-@dataclass
 class Population(Keyword):
     """Population definition with filter."""
     filter: str = ""
-@dataclass
 class Observation(Keyword):
     """Observation/timepoint definition with filter."""
     filter: str = ""
-@dataclass
 class Parameter(Keyword):
-    """Parameter definition with filter.
-    The terms field supports dynamic title generation:
-    - terms.before: "serious" → "Serious Adverse Events"
-    - terms.after: "resulting in death" → "Adverse Events Resulting in Death"
-    """
+    """Parameter definition with filter."""
     filter: str = ""
     terms: Optional[Dict[str, str]] = None
-    indent: int = 0  # Indentation level for hierarchical display
+    indent: int = 0
-@dataclass
 class Group(Keyword):
     """Treatment group definition."""
     variable: str = ""
-    level: List[str] = field(default_factory=list)
-    group_label: List[str] = field(default_factory=list)
-@dataclass
-class DataSource:
+    level: List[str] = Field(default_factory=list)
+    group_label: List[str] = Field(default_factory=list)
+    # Allow label to be excluded if it conflicts or handled manually
+    # pyre-ignore[56]
+    @field_validator("group_label", mode="before")
+    @classmethod
+    def set_group_label(cls, v: Any, info: Any) -> Any:
+        # If group_label is missing, fallback to 'label' field if present in input data
+        # Note: Pydantic V2 validation context doesn't easily give access to other fields input
+        # unless using model_validator. But here we can rely on standard defaulting or
+        # fix it at the registry level like before.
+        # Actually, let's keep it simple: if not provided, it's empty.
+        # The original code did:
+        # if "group_label" not in item_data: item_data["group_label"] = item_data.get("label", [])
+        return v or []
+class DataSource(BaseModel):
     """Data source definition."""
     name: str
     path: str
-    dataframe: Optional[pl.DataFrame] = None
+    dataframe: Optional[pl.DataFrame] = Field(default=None, exclude=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True)
-@dataclass
-class AnalysisPlan:
+class AnalysisPlan(BaseModel):
     """Individual analysis plan specification."""
     analysis: str
@@ -91,37 +99,48 @@ class AnalysisPlan:
         return "_".join(parts)
-class KeywordRegistry:
+class KeywordRegistry(BaseModel):
     """Registry for managing keywords."""
-    def __init__(self) -> None:
-        self.populations: Dict[str, Population] = {}
-        self.observations: Dict[str, Observation] = {}
-        self.parameters: Dict[str, Parameter] = {}
-        self.groups: Dict[str, Group] = {}
-        self.data_sources: Dict[str, DataSource] = {}
+    populations: Dict[str, Population] = Field(default_factory=dict)
+    observations: Dict[str, Observation] = Field(default_factory=dict)
+    parameters: Dict[str, Parameter] = Field(default_factory=dict)
+    groups: Dict[str, Group] = Field(default_factory=dict)
+    data_sources: Dict[str, DataSource] = Field(default_factory=dict)
     def load_from_dict(self, data: Dict[str, Any]) -> None:
         """Load keywords from a dictionary."""
-        self._load_keyword_type(data, "population", Population, self.populations)
-        self._load_keyword_type(data, "observation", Observation, self.observations)
-        self._load_keyword_type(data, "parameter", Parameter, self.parameters)
-        self._load_keyword_type(data, "group", Group, self.groups)
-        self._load_keyword_type(data, "data", DataSource, self.data_sources)
-    def _load_keyword_type(
-        self, data: Dict[str, Any], key: str, keyword_class: Any, target_dict: Dict[str, Any]
-    ) -> None:
-        """Generic method to load a type of keyword."""
-        for item_data in data.get(key, []):
-            if keyword_class == Group and "group_label" not in item_data:
-                item_data["group_label"] = item_data.get("label", [])
-            expected_fields = {f.name for f in fields(keyword_class) if f.init}
-            filtered_data = {k: v for k, v in item_data.items() if k in expected_fields}
-            instance = keyword_class(**filtered_data)
-            target_dict[instance.name] = instance
+        # We manually load so we can handle the dict-to-list-of-models transformation
+        # and the specific logic for defaults.
+        for item in data.get("population", []):
+            pop_item = Population(**item)
+            self.populations[pop_item.name] = pop_item
+        for item in data.get("observation", []):
+            obs_item = Observation(**item)
+            self.observations[obs_item.name] = obs_item
+        for item in data.get("parameter", []):
+            param_item = Parameter(**item)
+            self.parameters[param_item.name] = param_item
+        for item in data.get("group", []):
+            # Special handling for Group where 'label' might be a list (for group_label)
+            # but Keyword.label expects a string.
+            if "label" in item and isinstance(item["label"], list):
+                if "group_label" not in item:
+                    item["group_label"] = item["label"]
+                # Remove label from item to avoid validation error on Keyword.label
+                # or set it to a joined string if a label is really needed
+                del item["label"]
+            group_item = Group(**item)
+            self.groups[group_item.name] = group_item
+        for item in data.get("data", []):
+            ds_item = DataSource(**item)
+            self.data_sources[ds_item.name] = ds_item
     def get_population(self, name: str) -> Optional[Population]:
         return self.populations.get(name)
@@ -228,11 +247,10 @@ class StudyPlan:
                 df = pl.read_parquet(path)
                 self.datasets[name] = df
                 data_source.dataframe = df
-                print(f"Successfully loaded dataset '{name}' from '{path}'")
+                logger.info(f"Successfully loaded dataset '{name}' from '{path}'")
             except Exception as e:
-                print(
-                    f"Warning: Could not load dataset '{name}' from '{data_source.path}'. "
-                    f"Reason: {e}"
+                logger.warning(
+                    f"Could not load dataset '{name}' from '{data_source.path}'. Reason: {e}"
                 )
     def get_plan_df(self) -> pl.DataFrame:
@@ -306,34 +324,28 @@ class StudyPlan:
     def print(self) -> None:
         """Print comprehensive study plan information using Polars DataFrames."""
-        print("ADaM Metadata:")
+        logger.info("ADaM Metadata:")
         if (df := self.get_dataset_df()) is not None:
-            print("\nData Sources:")
-            print(df)
+            logger.info(f"\nData Sources:\n{df}")
         if (df := self.get_population_df()) is not None:
-            print("\nAnalysis Population Type:")
-            print(df)
+            logger.info(f"\nAnalysis Population Type:\n{df}")
         if (df := self.get_observation_df()) is not None:
-            print("\nAnalysis Observation Type:")
-            print(df)
+            logger.info(f"\nAnalysis Observation Type:\n{df}")
         if (df := self.get_parameter_df()) is not None:
-            print("\nAnalysis Parameter Type:")
-            print(df)
+            logger.info(f"\nAnalysis Parameter Type:\n{df}")
         if (df := self.get_group_df()) is not None:
-            print("\nAnalysis Groups:")
-            print(df)
+            logger.info(f"\nAnalysis Groups:\n{df}")
         if (df := self.get_plan_df()) is not None:
-            print("\nAnalysis Plans:")
-            print(df)
+            logger.info(f"\nAnalysis Plans:\n{df}")
     def __str__(self) -> str:
-        study_name = self.study_data.get("study", Dict[str, Any]()).get("name", "Unknown")
+        study_name = self.study_data.get("study", {}).get("name", "Unknown")
         condensed_plans = len(self.study_data.get("plans", []))
         individual_analyses = len(self.get_plan_df())
         return (

csrlite/common/rtf.py ADDED Viewed

@@ -0,0 +1,85 @@
+# pyre-strict
+from typing import Any
+import polars as pl
+from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
+def create_rtf_table_n_pct(
+    df: pl.DataFrame,
+    col_header_1: list[str],
+    col_header_2: list[str] | None,
+    col_widths: list[float] | None,
+    title: list[str] | str,
+    footnote: list[str] | str | None,
+    source: list[str] | str | None,
+    borders_2: bool = True,
+    orientation: str = "landscape",
+) -> RTFDocument:
+    """
+    Create a standardized RTF table document with 1 or 2 header rows.
+    Args:
+        df: Polars DataFrame containing the table data.
+        col_header_1: List of strings for the first header row.
+        col_header_2: Optional list of strings for the second header row.
+        col_widths: Optional list of relative column widths. Defaults to equal widths.
+        title: Title string or list of title strings.
+        footnote: Footnote string or list of footnote strings.
+        source: Source string or list of source strings.
+        borders_2: Whether to show borders for the second header row. Defaults to True.
+        orientation: Page orientation, "landscape" or "portrait". Defaults to "landscape".
+    Returns:
+        RTFDocument object.
+    """
+    n_cols = len(df.columns)
+    # Calculate column widths if None - simple default
+    if col_widths is None:
+        col_widths = [1.0] * n_cols
+    # Normalize metadata
+    title_list = [title] if isinstance(title, str) else title
+    footnote_list = [footnote] if isinstance(footnote, str) else (footnote or [])
+    source_list = [source] if isinstance(source, str) else (source or [])
+    headers = [
+        RTFColumnHeader(
+            text=col_header_1,
+            col_rel_width=col_widths,
+            text_justification=["l"] + ["c"] * (n_cols - 1),
+        )
+    ]
+    if col_header_2:
+        h2_kwargs = {
+            "text": col_header_2,
+            "col_rel_width": col_widths,
+            "text_justification": ["l"] + ["c"] * (n_cols - 1),
+        }
+        if borders_2:
+            h2_kwargs["border_left"] = ["single"]
+            h2_kwargs["border_top"] = [""]
+        headers.append(RTFColumnHeader(**h2_kwargs))
+    rtf_components: dict[str, Any] = {
+        "df": df,
+        "rtf_page": RTFPage(orientation=orientation),
+        "rtf_title": RTFTitle(text=title_list),
+        "rtf_column_header": headers,
+        "rtf_body": RTFBody(
+            col_rel_width=col_widths,
+            text_justification=["l"] + ["c"] * (n_cols - 1),
+            border_left=["single"] * n_cols,
+        ),
+    }
+    if footnote_list:
+        rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
+    if source_list:
+        rtf_components["rtf_source"] = RTFSource(text=source_list)
+    return RTFDocument(**rtf_components)

csrlite/common/utils.py CHANGED Viewed

@@ -4,11 +4,11 @@ import polars as pl
 def apply_common_filters(
     population: pl.DataFrame,
-    observation: pl.DataFrame,
+    observation: pl.DataFrame | None,
     population_filter: str | None,
     observation_filter: str | None,
     parameter_filter: str | None = None,
-) -> tuple[pl.DataFrame, pl.DataFrame]:
+) -> tuple[pl.DataFrame, pl.DataFrame | None]:
     """
     Apply standard population, observation, and parameter filters.
@@ -23,11 +23,11 @@ def apply_common_filters(
     # Apply observation filter
     observation_filtered = observation
-    if observation_filter:
+    if observation_filter and observation_filtered is not None:
         observation_filtered = observation_filtered.filter(pl.sql_expr(observation_filter))
     # Apply parameter filter
-    if parameter_filter:
+    if parameter_filter and observation_filtered is not None:
         observation_filtered = observation_filtered.filter(pl.sql_expr(parameter_filter))
     return population_filtered, observation_filtered

csrlite 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

csrlite 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl