csrlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/common/plan.py ADDED
@@ -0,0 +1,353 @@
1
+ # pyre-strict
2
+ """
3
+ Clean, simple TLF plan system.
4
+ This module provides a straightforward implementation for clinical TLF generation
5
+ using YAML plans with template inheritance and keyword resolution.
6
+ """
7
+
8
+ import itertools
9
+ from dataclasses import dataclass, field, fields
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, cast
12
+
13
+ import polars as pl
14
+
15
+ from .yaml_loader import YamlInheritanceLoader
16
+
17
+
18
+ @dataclass
19
+ class Keyword:
20
+ """Base keyword definition."""
21
+
22
+ name: str
23
+ label: Optional[str] = None
24
+ description: Optional[str] = None
25
+
26
+
27
+ @dataclass
28
+ class Population(Keyword):
29
+ """Population definition with filter."""
30
+
31
+ filter: str = ""
32
+
33
+
34
+ @dataclass
35
+ class Observation(Keyword):
36
+ """Observation/timepoint definition with filter."""
37
+
38
+ filter: str = ""
39
+
40
+
41
+ @dataclass
42
+ class Parameter(Keyword):
43
+ """Parameter definition with filter.
44
+
45
+ The terms field supports dynamic title generation:
46
+ - terms.before: "serious" → "Serious Adverse Events"
47
+ - terms.after: "resulting in death" → "Adverse Events Resulting in Death"
48
+ """
49
+
50
+ filter: str = ""
51
+ terms: Optional[Dict[str, str]] = None
52
+ indent: int = 0 # Indentation level for hierarchical display
53
+
54
+
55
+ @dataclass
56
+ class Group(Keyword):
57
+ """Treatment group definition."""
58
+
59
+ variable: str = ""
60
+ level: List[str] = field(default_factory=list)
61
+ group_label: List[str] = field(default_factory=list)
62
+
63
+
64
+ @dataclass
65
+ class DataSource:
66
+ """Data source definition."""
67
+
68
+ name: str
69
+ path: str
70
+ dataframe: Optional[pl.DataFrame] = None
71
+
72
+
73
+ @dataclass
74
+ class AnalysisPlan:
75
+ """Individual analysis plan specification."""
76
+
77
+ analysis: str
78
+ population: str
79
+ observation: Optional[str] = None
80
+ group: Optional[str] = None
81
+ parameter: Optional[str] = None
82
+
83
+ @property
84
+ def id(self) -> str:
85
+ """Generate unique analysis ID."""
86
+ parts = [self.analysis, self.population]
87
+ if self.observation:
88
+ parts.append(self.observation)
89
+ if self.parameter:
90
+ parts.append(self.parameter)
91
+ return "_".join(parts)
92
+
93
+
94
+ class KeywordRegistry:
95
+ """Registry for managing keywords."""
96
+
97
+ def __init__(self) -> None:
98
+ self.populations: Dict[str, Population] = {}
99
+ self.observations: Dict[str, Observation] = {}
100
+ self.parameters: Dict[str, Parameter] = {}
101
+ self.groups: Dict[str, Group] = {}
102
+ self.data_sources: Dict[str, DataSource] = {}
103
+
104
+ def load_from_dict(self, data: Dict[str, Any]) -> None:
105
+ """Load keywords from a dictionary."""
106
+ self._load_keyword_type(data, "population", Population, self.populations)
107
+ self._load_keyword_type(data, "observation", Observation, self.observations)
108
+ self._load_keyword_type(data, "parameter", Parameter, self.parameters)
109
+ self._load_keyword_type(data, "group", Group, self.groups)
110
+ self._load_keyword_type(data, "data", DataSource, self.data_sources)
111
+
112
+ def _load_keyword_type(
113
+ self, data: Dict[str, Any], key: str, keyword_class: Any, target_dict: Dict[str, Any]
114
+ ) -> None:
115
+ """Generic method to load a type of keyword."""
116
+ for item_data in data.get(key, []):
117
+ if keyword_class == Group and "group_label" not in item_data:
118
+ item_data["group_label"] = item_data.get("label", [])
119
+
120
+ expected_fields = {f.name for f in fields(keyword_class) if f.init}
121
+ filtered_data = {k: v for k, v in item_data.items() if k in expected_fields}
122
+
123
+ instance = keyword_class(**filtered_data)
124
+ target_dict[instance.name] = instance
125
+
126
+ def get_population(self, name: str) -> Optional[Population]:
127
+ return self.populations.get(name)
128
+
129
+ def get_observation(self, name: str) -> Optional[Observation]:
130
+ return self.observations.get(name)
131
+
132
+ def get_parameter(self, name: str) -> Optional[Parameter]:
133
+ return self.parameters.get(name)
134
+
135
+ def get_group(self, name: str) -> Optional[Group]:
136
+ return self.groups.get(name)
137
+
138
+ def get_data_source(self, name: str) -> Optional[DataSource]:
139
+ return self.data_sources.get(name)
140
+
141
+
142
+ class PlanExpander:
143
+ """Expands condensed plans into individual analysis specifications."""
144
+
145
+ def __init__(self, keywords: KeywordRegistry) -> None:
146
+ self.keywords = keywords
147
+
148
+ def expand_plan(self, plan_data: Dict[str, Any]) -> List[AnalysisPlan]:
149
+ """Expand a single condensed plan into individual plans."""
150
+ analysis = plan_data["analysis"]
151
+ populations = self._to_list(plan_data.get("population", []))
152
+ observations: List[Any] = self._to_list(plan_data.get("observation")) or [None]
153
+ parameters: List[Any] = self._parse_parameters(plan_data.get("parameter")) or [None]
154
+ group = plan_data.get("group")
155
+
156
+ expanded_plans = [
157
+ AnalysisPlan(
158
+ analysis=analysis, population=pop, observation=obs, group=group, parameter=param
159
+ )
160
+ for pop, obs, param in itertools.product(populations, observations, parameters)
161
+ ]
162
+ return expanded_plans
163
+
164
+ def create_analysis_spec(self, plan: AnalysisPlan) -> Dict[str, Any]:
165
+ """Create a summary analysis specification with keywords."""
166
+ spec = {
167
+ "analysis": plan.analysis,
168
+ "population": plan.population,
169
+ "observation": plan.observation,
170
+ "parameter": plan.parameter,
171
+ "group": plan.group,
172
+ }
173
+ return spec
174
+
175
+ def _to_list(self, value: Any) -> List[str]:
176
+ if value is None:
177
+ return []
178
+ if isinstance(value, str):
179
+ return [value]
180
+ return list(value)
181
+
182
+ def _parse_parameters(self, value: Any) -> Optional[List[str]]:
183
+ if value is None:
184
+ return None
185
+ if isinstance(value, str):
186
+ return [value] # Keep semicolon-separated values as single parameter
187
+ return list(value)
188
+
189
+ def _generate_title(self, plan: AnalysisPlan) -> str:
190
+ parts = [plan.analysis.replace("_", " ").title()]
191
+ if (pop := self.keywords.get_population(plan.population)) and pop.label:
192
+ parts.append(f"- {pop.label}")
193
+ if plan.observation:
194
+ obs = self.keywords.get_observation(plan.observation)
195
+ if obs and obs.label:
196
+ parts.append(f"- {obs.label}")
197
+ if plan.parameter:
198
+ param = self.keywords.get_parameter(plan.parameter)
199
+ if param and param.label:
200
+ parts.append(f"- {param.label}")
201
+ return " ".join(parts)
202
+
203
+
204
+ class StudyPlan:
205
+ """Main study plan."""
206
+
207
+ def __init__(self, study_data: Dict[str, Any], base_path: Optional[Path] = None) -> None:
208
+ self.study_data = study_data
209
+ self.base_path: Path = base_path or Path(".")
210
+ self.datasets: Dict[str, pl.DataFrame] = {}
211
+ self.keywords = KeywordRegistry()
212
+ self.expander = PlanExpander(self.keywords)
213
+ self.keywords.load_from_dict(self.study_data)
214
+ self.load_datasets()
215
+
216
+ @property
217
+ def output_dir(self) -> str:
218
+ """Get output directory from study configuration."""
219
+ study_config = self.study_data.get("study", {})
220
+ return cast(str, study_config.get("output", "."))
221
+
222
+ def load_datasets(self) -> None:
223
+ """Load datasets from paths specified in data_sources."""
224
+ for name, data_source in self.keywords.data_sources.items():
225
+ try:
226
+ # Ensure the path is relative to the base_path of the plan
227
+ path = self.base_path / data_source.path
228
+ df = pl.read_parquet(path)
229
+ self.datasets[name] = df
230
+ data_source.dataframe = df
231
+ print(f"Successfully loaded dataset '{name}' from '{path}'")
232
+ except Exception as e:
233
+ print(
234
+ f"Warning: Could not load dataset '{name}' from '{data_source.path}'. "
235
+ f"Reason: {e}"
236
+ )
237
+
238
+ def get_plan_df(self) -> pl.DataFrame:
239
+ """Expand all condensed plans into a DataFrame of detailed specifications."""
240
+ all_specs = [
241
+ self.expander.create_analysis_spec(plan)
242
+ for plan_data in self.study_data.get("plans", [])
243
+ for plan in self.expander.expand_plan(plan_data)
244
+ ]
245
+ return pl.DataFrame(all_specs)
246
+
247
+ def get_dataset_df(self) -> Optional[pl.DataFrame]:
248
+ """Get a DataFrame of data sources."""
249
+ if not self.keywords.data_sources:
250
+ return None
251
+ return pl.DataFrame(
252
+ [
253
+ {"name": name, "path": ds.path, "loaded": name in self.datasets}
254
+ for name, ds in self.keywords.data_sources.items()
255
+ ]
256
+ )
257
+
258
+ def get_population_df(self) -> Optional[pl.DataFrame]:
259
+ """Get a DataFrame of analysis populations."""
260
+ if not self.keywords.populations:
261
+ return None
262
+ return pl.DataFrame(
263
+ [
264
+ {"name": name, "label": pop.label, "filter": pop.filter}
265
+ for name, pop in self.keywords.populations.items()
266
+ ]
267
+ )
268
+
269
+ def get_observation_df(self) -> Optional[pl.DataFrame]:
270
+ """Get a DataFrame of analysis observations."""
271
+ if not self.keywords.observations:
272
+ return None
273
+ return pl.DataFrame(
274
+ [
275
+ {"name": name, "label": obs.label, "filter": obs.filter}
276
+ for name, obs in self.keywords.observations.items()
277
+ ]
278
+ )
279
+
280
+ def get_parameter_df(self) -> Optional[pl.DataFrame]:
281
+ """Get a DataFrame of analysis parameters."""
282
+ if not self.keywords.parameters:
283
+ return None
284
+ return pl.DataFrame(
285
+ [
286
+ {"name": name, "label": param.label, "filter": param.filter}
287
+ for name, param in self.keywords.parameters.items()
288
+ ]
289
+ )
290
+
291
+ def get_group_df(self) -> Optional[pl.DataFrame]:
292
+ """Get a DataFrame of analysis groups."""
293
+ if not self.keywords.groups:
294
+ return None
295
+ return pl.DataFrame(
296
+ [
297
+ {
298
+ "name": name,
299
+ "variable": group.variable,
300
+ "levels": str(group.level),
301
+ "labels": str(group.group_label),
302
+ }
303
+ for name, group in self.keywords.groups.items()
304
+ ]
305
+ )
306
+
307
+ def print(self) -> None:
308
+ """Print comprehensive study plan information using Polars DataFrames."""
309
+ print("ADaM Metadata:")
310
+
311
+ if (df := self.get_dataset_df()) is not None:
312
+ print("\nData Sources:")
313
+ print(df)
314
+
315
+ if (df := self.get_population_df()) is not None:
316
+ print("\nAnalysis Population Type:")
317
+ print(df)
318
+
319
+ if (df := self.get_observation_df()) is not None:
320
+ print("\nAnalysis Observation Type:")
321
+ print(df)
322
+
323
+ if (df := self.get_parameter_df()) is not None:
324
+ print("\nAnalysis Parameter Type:")
325
+ print(df)
326
+
327
+ if (df := self.get_group_df()) is not None:
328
+ print("\nAnalysis Groups:")
329
+ print(df)
330
+
331
+ if (df := self.get_plan_df()) is not None:
332
+ print("\nAnalysis Plans:")
333
+ print(df)
334
+
335
+ def __str__(self) -> str:
336
+ study_name = self.study_data.get("study", Dict[str, Any]()).get("name", "Unknown")
337
+ condensed_plans = len(self.study_data.get("plans", []))
338
+ individual_analyses = len(self.get_plan_df())
339
+ return (
340
+ f"StudyPlan(study='{study_name}', plans={condensed_plans}, "
341
+ f"analyses={individual_analyses})"
342
+ )
343
+
344
+
345
+ def load_plan(plan_path: str) -> StudyPlan:
346
+ """
347
+ Loads a study plan from a YAML file, resolving template inheritance.
348
+ """
349
+ path = Path(plan_path)
350
+ base_path = path.parent
351
+ loader = YamlInheritanceLoader(base_path)
352
+ study_data = loader.load(path.name)
353
+ return StudyPlan(study_data, base_path)
@@ -0,0 +1,33 @@
1
+ # pyre-strict
2
+ import polars as pl
3
+
4
+
5
+ def apply_common_filters(
6
+ population: pl.DataFrame,
7
+ observation: pl.DataFrame,
8
+ population_filter: str | None,
9
+ observation_filter: str | None,
10
+ parameter_filter: str | None = None,
11
+ ) -> tuple[pl.DataFrame, pl.DataFrame]:
12
+ """
13
+ Apply standard population, observation, and parameter filters.
14
+
15
+ Returns:
16
+ Tuple of (filtered_population, filtered_observation_pre_id_match)
17
+ """
18
+ # Apply population filter
19
+ if population_filter:
20
+ population_filtered = population.filter(pl.sql_expr(population_filter))
21
+ else:
22
+ population_filtered = population
23
+
24
+ # Apply observation filter
25
+ observation_filtered = observation
26
+ if observation_filter:
27
+ observation_filtered = observation_filtered.filter(pl.sql_expr(observation_filter))
28
+
29
+ # Apply parameter filter
30
+ if parameter_filter:
31
+ observation_filtered = observation_filtered.filter(pl.sql_expr(parameter_filter))
32
+
33
+ return population_filtered, observation_filtered
@@ -0,0 +1,71 @@
1
+ # pyre-strict
2
+ from copy import deepcopy
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Optional
5
+
6
+ import yaml
7
+
8
+
9
+ class YamlInheritanceLoader:
10
+ def __init__(self, base_path: Optional[Path] = None) -> None:
11
+ self.base_path: Path = base_path or Path(".")
12
+
13
+ def load(self, file_name: str) -> Dict[str, Any]:
14
+ """
15
+ Load a YAML file by name relative to base_path and resolve inheritance.
16
+ """
17
+ file_path = self.base_path / file_name
18
+ if not file_path.exists():
19
+ raise FileNotFoundError(f"YAML file not found: {file_path}")
20
+
21
+ with open(file_path, "r") as f:
22
+ data = yaml.safe_load(f) or {}
23
+
24
+ return self._resolve_inheritance(data)
25
+
26
+ def _resolve_inheritance(self, data: Dict[str, Any]) -> Dict[str, Any]:
27
+ templates = data.get("study", {}).get("template", [])
28
+ if isinstance(templates, str):
29
+ templates = [templates]
30
+
31
+ if not templates:
32
+ return data
33
+
34
+ merged_template_data: Dict[str, Any] = {}
35
+ for template_file in templates:
36
+ template_data = self.load(template_file)
37
+ merged_template_data = self._deep_merge(merged_template_data, template_data)
38
+
39
+ return self._deep_merge(merged_template_data, data)
40
+
41
+ def _deep_merge(self, dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, Any]:
42
+ merged = deepcopy(dict1)
43
+ for key, value in dict2.items():
44
+ if key in merged and isinstance(merged[key], list) and isinstance(value, list):
45
+ # Heuristic to check if these are lists of keywords (dicts with a 'name')
46
+ # This logic is specific to how this project uses YAML inheritance.
47
+ is_keyword_list = all(isinstance(i, dict) and "name" in i for i in value) and all(
48
+ isinstance(i, dict) and "name" in i for i in merged[key]
49
+ )
50
+
51
+ if is_keyword_list:
52
+ merged_by_name = {item["name"]: item for item in merged[key]}
53
+ for item in value:
54
+ if item["name"] in merged_by_name:
55
+ # It's a dict merge, so we can recursively call _deep_merge
56
+ merged_by_name[item["name"]] = self._deep_merge(
57
+ merged_by_name[item["name"]], item
58
+ )
59
+ else:
60
+ merged_by_name[item["name"]] = item
61
+ merged[key] = list(merged_by_name.values())
62
+ else:
63
+ # Fallback for simple lists: concatenate and remove duplicates
64
+ # Note: This is a simple approach and might not be suitable for all list types.
65
+ merged[key].extend([item for item in value if item not in merged[key]])
66
+
67
+ elif key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
68
+ merged[key] = self._deep_merge(merged[key], value)
69
+ else:
70
+ merged[key] = value
71
+ return merged
@@ -0,0 +1,2 @@
1
+ # Disposition package
2
+ # Import main functions but don't re-export to avoid shadowing