PyPI - csrlite - Versions diffs - 0.1.0__py3-none-any.whl - Mend

csrlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

csrlite/__init__.py +50 -0
csrlite/ae/__init__.py +1 -0
csrlite/ae/ae_listing.py +492 -0
csrlite/ae/ae_specific.py +478 -0
csrlite/ae/ae_summary.py +399 -0
csrlite/ae/ae_utils.py +132 -0
csrlite/common/count.py +199 -0
csrlite/common/parse.py +308 -0
csrlite/common/plan.py +353 -0
csrlite/common/utils.py +33 -0
csrlite/common/yaml_loader.py +71 -0
csrlite/disposition/__init__.py +2 -0
csrlite/disposition/disposition.py +301 -0
csrlite-0.1.0.dist-info/METADATA +68 -0
csrlite-0.1.0.dist-info/RECORD +17 -0
csrlite-0.1.0.dist-info/WHEEL +5 -0
csrlite-0.1.0.dist-info/top_level.txt +1 -0

csrlite/common/plan.py ADDED Viewed

@@ -0,0 +1,353 @@
+# pyre-strict
+"""
+Clean, simple TLF plan system.
+This module provides a straightforward implementation for clinical TLF generation
+using YAML plans with template inheritance and keyword resolution.
+"""
+import itertools
+from dataclasses import dataclass, field, fields
+from pathlib import Path
+from typing import Any, Dict, List, Optional, cast
+import polars as pl
+from .yaml_loader import YamlInheritanceLoader
+@dataclass
+class Keyword:
+    """Base keyword definition."""
+    name: str
+    label: Optional[str] = None
+    description: Optional[str] = None
+@dataclass
+class Population(Keyword):
+    """Population definition with filter."""
+    filter: str = ""
+@dataclass
+class Observation(Keyword):
+    """Observation/timepoint definition with filter."""
+    filter: str = ""
+@dataclass
+class Parameter(Keyword):
+    """Parameter definition with filter.
+    The terms field supports dynamic title generation:
+    - terms.before: "serious" → "Serious Adverse Events"
+    - terms.after: "resulting in death" → "Adverse Events Resulting in Death"
+    """
+    filter: str = ""
+    terms: Optional[Dict[str, str]] = None
+    indent: int = 0  # Indentation level for hierarchical display
+@dataclass
+class Group(Keyword):
+    """Treatment group definition."""
+    variable: str = ""
+    level: List[str] = field(default_factory=list)
+    group_label: List[str] = field(default_factory=list)
+@dataclass
+class DataSource:
+    """Data source definition."""
+    name: str
+    path: str
+    dataframe: Optional[pl.DataFrame] = None
+@dataclass
+class AnalysisPlan:
+    """Individual analysis plan specification."""
+    analysis: str
+    population: str
+    observation: Optional[str] = None
+    group: Optional[str] = None
+    parameter: Optional[str] = None
+    @property
+    def id(self) -> str:
+        """Generate unique analysis ID."""
+        parts = [self.analysis, self.population]
+        if self.observation:
+            parts.append(self.observation)
+        if self.parameter:
+            parts.append(self.parameter)
+        return "_".join(parts)
+class KeywordRegistry:
+    """Registry for managing keywords."""
+    def __init__(self) -> None:
+        self.populations: Dict[str, Population] = {}
+        self.observations: Dict[str, Observation] = {}
+        self.parameters: Dict[str, Parameter] = {}
+        self.groups: Dict[str, Group] = {}
+        self.data_sources: Dict[str, DataSource] = {}
+    def load_from_dict(self, data: Dict[str, Any]) -> None:
+        """Load keywords from a dictionary."""
+        self._load_keyword_type(data, "population", Population, self.populations)
+        self._load_keyword_type(data, "observation", Observation, self.observations)
+        self._load_keyword_type(data, "parameter", Parameter, self.parameters)
+        self._load_keyword_type(data, "group", Group, self.groups)
+        self._load_keyword_type(data, "data", DataSource, self.data_sources)
+    def _load_keyword_type(
+        self, data: Dict[str, Any], key: str, keyword_class: Any, target_dict: Dict[str, Any]
+    ) -> None:
+        """Generic method to load a type of keyword."""
+        for item_data in data.get(key, []):
+            if keyword_class == Group and "group_label" not in item_data:
+                item_data["group_label"] = item_data.get("label", [])
+            expected_fields = {f.name for f in fields(keyword_class) if f.init}
+            filtered_data = {k: v for k, v in item_data.items() if k in expected_fields}
+            instance = keyword_class(**filtered_data)
+            target_dict[instance.name] = instance
+    def get_population(self, name: str) -> Optional[Population]:
+        return self.populations.get(name)
+    def get_observation(self, name: str) -> Optional[Observation]:
+        return self.observations.get(name)
+    def get_parameter(self, name: str) -> Optional[Parameter]:
+        return self.parameters.get(name)
+    def get_group(self, name: str) -> Optional[Group]:
+        return self.groups.get(name)
+    def get_data_source(self, name: str) -> Optional[DataSource]:
+        return self.data_sources.get(name)
+class PlanExpander:
+    """Expands condensed plans into individual analysis specifications."""
+    def __init__(self, keywords: KeywordRegistry) -> None:
+        self.keywords = keywords
+    def expand_plan(self, plan_data: Dict[str, Any]) -> List[AnalysisPlan]:
+        """Expand a single condensed plan into individual plans."""
+        analysis = plan_data["analysis"]
+        populations = self._to_list(plan_data.get("population", []))
+        observations: List[Any] = self._to_list(plan_data.get("observation")) or [None]
+        parameters: List[Any] = self._parse_parameters(plan_data.get("parameter")) or [None]
+        group = plan_data.get("group")
+        expanded_plans = [
+            AnalysisPlan(
+                analysis=analysis, population=pop, observation=obs, group=group, parameter=param
+            )
+            for pop, obs, param in itertools.product(populations, observations, parameters)
+        ]
+        return expanded_plans
+    def create_analysis_spec(self, plan: AnalysisPlan) -> Dict[str, Any]:
+        """Create a summary analysis specification with keywords."""
+        spec = {
+            "analysis": plan.analysis,
+            "population": plan.population,
+            "observation": plan.observation,
+            "parameter": plan.parameter,
+            "group": plan.group,
+        }
+        return spec
+    def _to_list(self, value: Any) -> List[str]:
+        if value is None:
+            return []
+        if isinstance(value, str):
+            return [value]
+        return list(value)
+    def _parse_parameters(self, value: Any) -> Optional[List[str]]:
+        if value is None:
+            return None
+        if isinstance(value, str):
+            return [value]  # Keep semicolon-separated values as single parameter
+        return list(value)
+    def _generate_title(self, plan: AnalysisPlan) -> str:
+        parts = [plan.analysis.replace("_", " ").title()]
+        if (pop := self.keywords.get_population(plan.population)) and pop.label:
+            parts.append(f"- {pop.label}")
+        if plan.observation:
+            obs = self.keywords.get_observation(plan.observation)
+            if obs and obs.label:
+                parts.append(f"- {obs.label}")
+        if plan.parameter:
+            param = self.keywords.get_parameter(plan.parameter)
+            if param and param.label:
+                parts.append(f"- {param.label}")
+        return " ".join(parts)
+class StudyPlan:
+    """Main study plan."""
+    def __init__(self, study_data: Dict[str, Any], base_path: Optional[Path] = None) -> None:
+        self.study_data = study_data
+        self.base_path: Path = base_path or Path(".")
+        self.datasets: Dict[str, pl.DataFrame] = {}
+        self.keywords = KeywordRegistry()
+        self.expander = PlanExpander(self.keywords)
+        self.keywords.load_from_dict(self.study_data)
+        self.load_datasets()
+    @property
+    def output_dir(self) -> str:
+        """Get output directory from study configuration."""
+        study_config = self.study_data.get("study", {})
+        return cast(str, study_config.get("output", "."))
+    def load_datasets(self) -> None:
+        """Load datasets from paths specified in data_sources."""
+        for name, data_source in self.keywords.data_sources.items():
+            try:
+                # Ensure the path is relative to the base_path of the plan
+                path = self.base_path / data_source.path
+                df = pl.read_parquet(path)
+                self.datasets[name] = df
+                data_source.dataframe = df
+                print(f"Successfully loaded dataset '{name}' from '{path}'")
+            except Exception as e:
+                print(
+                    f"Warning: Could not load dataset '{name}' from '{data_source.path}'. "
+                    f"Reason: {e}"
+                )
+    def get_plan_df(self) -> pl.DataFrame:
+        """Expand all condensed plans into a DataFrame of detailed specifications."""
+        all_specs = [
+            self.expander.create_analysis_spec(plan)
+            for plan_data in self.study_data.get("plans", [])
+            for plan in self.expander.expand_plan(plan_data)
+        ]
+        return pl.DataFrame(all_specs)
+    def get_dataset_df(self) -> Optional[pl.DataFrame]:
+        """Get a DataFrame of data sources."""
+        if not self.keywords.data_sources:
+            return None
+        return pl.DataFrame(
+            [
+                {"name": name, "path": ds.path, "loaded": name in self.datasets}
+                for name, ds in self.keywords.data_sources.items()
+            ]
+        )
+    def get_population_df(self) -> Optional[pl.DataFrame]:
+        """Get a DataFrame of analysis populations."""
+        if not self.keywords.populations:
+            return None
+        return pl.DataFrame(
+            [
+                {"name": name, "label": pop.label, "filter": pop.filter}
+                for name, pop in self.keywords.populations.items()
+            ]
+        )
+    def get_observation_df(self) -> Optional[pl.DataFrame]:
+        """Get a DataFrame of analysis observations."""
+        if not self.keywords.observations:
+            return None
+        return pl.DataFrame(
+            [
+                {"name": name, "label": obs.label, "filter": obs.filter}
+                for name, obs in self.keywords.observations.items()
+            ]
+        )
+    def get_parameter_df(self) -> Optional[pl.DataFrame]:
+        """Get a DataFrame of analysis parameters."""
+        if not self.keywords.parameters:
+            return None
+        return pl.DataFrame(
+            [
+                {"name": name, "label": param.label, "filter": param.filter}
+                for name, param in self.keywords.parameters.items()
+            ]
+        )
+    def get_group_df(self) -> Optional[pl.DataFrame]:
+        """Get a DataFrame of analysis groups."""
+        if not self.keywords.groups:
+            return None
+        return pl.DataFrame(
+            [
+                {
+                    "name": name,
+                    "variable": group.variable,
+                    "levels": str(group.level),
+                    "labels": str(group.group_label),
+                }
+                for name, group in self.keywords.groups.items()
+            ]
+        )
+    def print(self) -> None:
+        """Print comprehensive study plan information using Polars DataFrames."""
+        print("ADaM Metadata:")
+        if (df := self.get_dataset_df()) is not None:
+            print("\nData Sources:")
+            print(df)
+        if (df := self.get_population_df()) is not None:
+            print("\nAnalysis Population Type:")
+            print(df)
+        if (df := self.get_observation_df()) is not None:
+            print("\nAnalysis Observation Type:")
+            print(df)
+        if (df := self.get_parameter_df()) is not None:
+            print("\nAnalysis Parameter Type:")
+            print(df)
+        if (df := self.get_group_df()) is not None:
+            print("\nAnalysis Groups:")
+            print(df)
+        if (df := self.get_plan_df()) is not None:
+            print("\nAnalysis Plans:")
+            print(df)
+    def __str__(self) -> str:
+        study_name = self.study_data.get("study", Dict[str, Any]()).get("name", "Unknown")
+        condensed_plans = len(self.study_data.get("plans", []))
+        individual_analyses = len(self.get_plan_df())
+        return (
+            f"StudyPlan(study='{study_name}', plans={condensed_plans}, "
+            f"analyses={individual_analyses})"
+        )
+def load_plan(plan_path: str) -> StudyPlan:
+    """
+    Loads a study plan from a YAML file, resolving template inheritance.
+    """
+    path = Path(plan_path)
+    base_path = path.parent
+    loader = YamlInheritanceLoader(base_path)
+    study_data = loader.load(path.name)
+    return StudyPlan(study_data, base_path)

csrlite/common/utils.py ADDED Viewed

@@ -0,0 +1,33 @@
+# pyre-strict
+import polars as pl
+def apply_common_filters(
+    population: pl.DataFrame,
+    observation: pl.DataFrame,
+    population_filter: str | None,
+    observation_filter: str | None,
+    parameter_filter: str | None = None,
+) -> tuple[pl.DataFrame, pl.DataFrame]:
+    """
+    Apply standard population, observation, and parameter filters.
+    Returns:
+        Tuple of (filtered_population, filtered_observation_pre_id_match)
+    """
+    # Apply population filter
+    if population_filter:
+        population_filtered = population.filter(pl.sql_expr(population_filter))
+    else:
+        population_filtered = population
+    # Apply observation filter
+    observation_filtered = observation
+    if observation_filter:
+        observation_filtered = observation_filtered.filter(pl.sql_expr(observation_filter))
+    # Apply parameter filter
+    if parameter_filter:
+        observation_filtered = observation_filtered.filter(pl.sql_expr(parameter_filter))
+    return population_filtered, observation_filtered

csrlite/common/yaml_loader.py ADDED Viewed

@@ -0,0 +1,71 @@
+# pyre-strict
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict, Optional
+import yaml
+class YamlInheritanceLoader:
+    def __init__(self, base_path: Optional[Path] = None) -> None:
+        self.base_path: Path = base_path or Path(".")
+    def load(self, file_name: str) -> Dict[str, Any]:
+        """
+        Load a YAML file by name relative to base_path and resolve inheritance.
+        """
+        file_path = self.base_path / file_name
+        if not file_path.exists():
+            raise FileNotFoundError(f"YAML file not found: {file_path}")
+        with open(file_path, "r") as f:
+            data = yaml.safe_load(f) or {}
+        return self._resolve_inheritance(data)
+    def _resolve_inheritance(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        templates = data.get("study", {}).get("template", [])
+        if isinstance(templates, str):
+            templates = [templates]
+        if not templates:
+            return data
+        merged_template_data: Dict[str, Any] = {}
+        for template_file in templates:
+            template_data = self.load(template_file)
+            merged_template_data = self._deep_merge(merged_template_data, template_data)
+        return self._deep_merge(merged_template_data, data)
+    def _deep_merge(self, dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, Any]:
+        merged = deepcopy(dict1)
+        for key, value in dict2.items():
+            if key in merged and isinstance(merged[key], list) and isinstance(value, list):
+                # Heuristic to check if these are lists of keywords (dicts with a 'name')
+                # This logic is specific to how this project uses YAML inheritance.
+                is_keyword_list = all(isinstance(i, dict) and "name" in i for i in value) and all(
+                    isinstance(i, dict) and "name" in i for i in merged[key]
+                )
+                if is_keyword_list:
+                    merged_by_name = {item["name"]: item for item in merged[key]}
+                    for item in value:
+                        if item["name"] in merged_by_name:
+                            # It's a dict merge, so we can recursively call _deep_merge
+                            merged_by_name[item["name"]] = self._deep_merge(
+                                merged_by_name[item["name"]], item
+                            )
+                        else:
+                            merged_by_name[item["name"]] = item
+                    merged[key] = list(merged_by_name.values())
+                else:
+                    # Fallback for simple lists: concatenate and remove duplicates
+                    # Note: This is a simple approach and might not be suitable for all list types.
+                    merged[key].extend([item for item in value if item not in merged[key]])
+            elif key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+                merged[key] = self._deep_merge(merged[key], value)
+            else:
+                merged[key] = value
+        return merged

csrlite/disposition/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Disposition package
2	+ # Import main functions but don't re-export to avoid shadowing