csrlite 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csrlite/__init__.py +91 -110
- csrlite/ae/__init__.py +1 -1
- csrlite/ae/ae_listing.py +494 -494
- csrlite/ae/ae_specific.py +483 -483
- csrlite/ae/ae_summary.py +401 -401
- csrlite/ae/ae_utils.py +62 -62
- csrlite/cm/cm_listing.py +497 -497
- csrlite/cm/cm_summary.py +327 -327
- csrlite/common/config.py +34 -34
- csrlite/common/count.py +293 -293
- csrlite/common/parse.py +308 -308
- csrlite/common/plan.py +365 -365
- csrlite/common/rtf.py +166 -137
- csrlite/common/utils.py +33 -33
- csrlite/common/yaml_loader.py +71 -71
- csrlite/disposition/__init__.py +2 -2
- csrlite/disposition/disposition.py +332 -332
- csrlite/ie/{ie_summary.py → ie.py} +405 -292
- csrlite/pd/pd_listing.py +461 -461
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/METADATA +68 -68
- csrlite-0.3.2.dist-info/RECORD +23 -0
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/WHEEL +1 -1
- csrlite/ie/ie_listing.py +0 -109
- csrlite/mh/mh_listing.py +0 -209
- csrlite/mh/mh_summary.py +0 -333
- csrlite-0.3.0.dist-info/RECORD +0 -26
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/top_level.txt +0 -0
csrlite/common/plan.py
CHANGED
|
@@ -1,365 +1,365 @@
|
|
|
1
|
-
# pyre-strict
|
|
2
|
-
"""
|
|
3
|
-
Clean, simple TLF plan system.
|
|
4
|
-
This module provides a straightforward implementation for clinical TLF generation
|
|
5
|
-
using YAML plans with template inheritance and keyword resolution.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import itertools
|
|
9
|
-
import logging
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from typing import Any, Dict, List, Optional, cast
|
|
12
|
-
|
|
13
|
-
import polars as pl
|
|
14
|
-
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
15
|
-
|
|
16
|
-
from .yaml_loader import YamlInheritanceLoader
|
|
17
|
-
|
|
18
|
-
logger: logging.Logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class Keyword(BaseModel):
|
|
22
|
-
"""Base keyword definition."""
|
|
23
|
-
|
|
24
|
-
name: str
|
|
25
|
-
label: Optional[str] = None
|
|
26
|
-
description: Optional[str] = None
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class Population(Keyword):
|
|
30
|
-
"""Population definition with filter."""
|
|
31
|
-
|
|
32
|
-
filter: str = ""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class Observation(Keyword):
|
|
36
|
-
"""Observation/timepoint definition with filter."""
|
|
37
|
-
|
|
38
|
-
filter: str = ""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class Parameter(Keyword):
|
|
42
|
-
"""Parameter definition with filter."""
|
|
43
|
-
|
|
44
|
-
filter: str = ""
|
|
45
|
-
terms: Optional[Dict[str, str]] = None
|
|
46
|
-
indent: int = 0
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class Group(Keyword):
|
|
50
|
-
"""Treatment group definition."""
|
|
51
|
-
|
|
52
|
-
variable: str = ""
|
|
53
|
-
level: List[str] = Field(default_factory=list)
|
|
54
|
-
group_label: List[str] = Field(default_factory=list)
|
|
55
|
-
|
|
56
|
-
# Allow label to be excluded if it conflicts or handled manually
|
|
57
|
-
|
|
58
|
-
# pyre-ignore[56]
|
|
59
|
-
@field_validator("group_label", mode="before")
|
|
60
|
-
@classmethod
|
|
61
|
-
def set_group_label(cls, v: Any, info: Any) -> Any:
|
|
62
|
-
# If group_label is missing, fallback to 'label' field if present in input data
|
|
63
|
-
# Note: Pydantic V2 validation context doesn't easily give access to other fields input
|
|
64
|
-
# unless using model_validator. But here we can rely on standard defaulting or
|
|
65
|
-
# fix it at the registry level like before.
|
|
66
|
-
# Actually, let's keep it simple: if not provided, it's empty.
|
|
67
|
-
# The original code did:
|
|
68
|
-
# if "group_label" not in item_data: item_data["group_label"] = item_data.get("label", [])
|
|
69
|
-
return v or []
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class DataSource(BaseModel):
|
|
73
|
-
"""Data source definition."""
|
|
74
|
-
|
|
75
|
-
name: str
|
|
76
|
-
path: str
|
|
77
|
-
dataframe: Optional[pl.DataFrame] = Field(default=None, exclude=True)
|
|
78
|
-
|
|
79
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class AnalysisPlan(BaseModel):
|
|
83
|
-
"""Individual analysis plan specification."""
|
|
84
|
-
|
|
85
|
-
analysis: str
|
|
86
|
-
population: str
|
|
87
|
-
observation: Optional[str] = None
|
|
88
|
-
group: Optional[str] = None
|
|
89
|
-
parameter: Optional[str] = None
|
|
90
|
-
|
|
91
|
-
@property
|
|
92
|
-
def id(self) -> str:
|
|
93
|
-
"""Generate unique analysis ID."""
|
|
94
|
-
parts = [self.analysis, self.population]
|
|
95
|
-
if self.observation:
|
|
96
|
-
parts.append(self.observation)
|
|
97
|
-
if self.parameter:
|
|
98
|
-
parts.append(self.parameter)
|
|
99
|
-
return "_".join(parts)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class KeywordRegistry(BaseModel):
|
|
103
|
-
"""Registry for managing keywords."""
|
|
104
|
-
|
|
105
|
-
populations: Dict[str, Population] = Field(default_factory=dict)
|
|
106
|
-
observations: Dict[str, Observation] = Field(default_factory=dict)
|
|
107
|
-
parameters: Dict[str, Parameter] = Field(default_factory=dict)
|
|
108
|
-
groups: Dict[str, Group] = Field(default_factory=dict)
|
|
109
|
-
data_sources: Dict[str, DataSource] = Field(default_factory=dict)
|
|
110
|
-
|
|
111
|
-
def load_from_dict(self, data: Dict[str, Any]) -> None:
|
|
112
|
-
"""Load keywords from a dictionary."""
|
|
113
|
-
# We manually load so we can handle the dict-to-list-of-models transformation
|
|
114
|
-
# and the specific logic for defaults.
|
|
115
|
-
|
|
116
|
-
for item in data.get("population", []):
|
|
117
|
-
pop_item = Population(**item)
|
|
118
|
-
self.populations[pop_item.name] = pop_item
|
|
119
|
-
|
|
120
|
-
for item in data.get("observation", []):
|
|
121
|
-
obs_item = Observation(**item)
|
|
122
|
-
self.observations[obs_item.name] = obs_item
|
|
123
|
-
|
|
124
|
-
for item in data.get("parameter", []):
|
|
125
|
-
param_item = Parameter(**item)
|
|
126
|
-
self.parameters[param_item.name] = param_item
|
|
127
|
-
|
|
128
|
-
for item in data.get("group", []):
|
|
129
|
-
# Special handling for Group where 'label' might be a list (for group_label)
|
|
130
|
-
# but Keyword.label expects a string.
|
|
131
|
-
if "label" in item and isinstance(item["label"], list):
|
|
132
|
-
if "group_label" not in item:
|
|
133
|
-
item["group_label"] = item["label"]
|
|
134
|
-
# Remove label from item to avoid validation error on Keyword.label
|
|
135
|
-
# or set it to a joined string if a label is really needed
|
|
136
|
-
del item["label"]
|
|
137
|
-
|
|
138
|
-
group_item = Group(**item)
|
|
139
|
-
self.groups[group_item.name] = group_item
|
|
140
|
-
|
|
141
|
-
for item in data.get("data", []):
|
|
142
|
-
ds_item = DataSource(**item)
|
|
143
|
-
self.data_sources[ds_item.name] = ds_item
|
|
144
|
-
|
|
145
|
-
def get_population(self, name: str) -> Optional[Population]:
|
|
146
|
-
return self.populations.get(name)
|
|
147
|
-
|
|
148
|
-
def get_observation(self, name: str) -> Optional[Observation]:
|
|
149
|
-
return self.observations.get(name)
|
|
150
|
-
|
|
151
|
-
def get_parameter(self, name: str) -> Optional[Parameter]:
|
|
152
|
-
return self.parameters.get(name)
|
|
153
|
-
|
|
154
|
-
def get_group(self, name: str) -> Optional[Group]:
|
|
155
|
-
return self.groups.get(name)
|
|
156
|
-
|
|
157
|
-
def get_data_source(self, name: str) -> Optional[DataSource]:
|
|
158
|
-
return self.data_sources.get(name)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
class PlanExpander:
|
|
162
|
-
"""Expands condensed plans into individual analysis specifications."""
|
|
163
|
-
|
|
164
|
-
def __init__(self, keywords: KeywordRegistry) -> None:
|
|
165
|
-
self.keywords = keywords
|
|
166
|
-
|
|
167
|
-
def expand_plan(self, plan_data: Dict[str, Any]) -> List[AnalysisPlan]:
|
|
168
|
-
"""Expand a single condensed plan into individual plans."""
|
|
169
|
-
analysis = plan_data["analysis"]
|
|
170
|
-
populations = self._to_list(plan_data.get("population", []))
|
|
171
|
-
observations: List[Any] = self._to_list(plan_data.get("observation")) or [None]
|
|
172
|
-
parameters: List[Any] = self._parse_parameters(plan_data.get("parameter")) or [None]
|
|
173
|
-
group = plan_data.get("group")
|
|
174
|
-
|
|
175
|
-
expanded_plans = [
|
|
176
|
-
AnalysisPlan(
|
|
177
|
-
analysis=analysis, population=pop, observation=obs, group=group, parameter=param
|
|
178
|
-
)
|
|
179
|
-
for pop, obs, param in itertools.product(populations, observations, parameters)
|
|
180
|
-
]
|
|
181
|
-
return expanded_plans
|
|
182
|
-
|
|
183
|
-
def create_analysis_spec(self, plan: AnalysisPlan) -> Dict[str, Any]:
|
|
184
|
-
"""Create a summary analysis specification with keywords."""
|
|
185
|
-
spec = {
|
|
186
|
-
"analysis": plan.analysis,
|
|
187
|
-
"population": plan.population,
|
|
188
|
-
"observation": plan.observation,
|
|
189
|
-
"parameter": plan.parameter,
|
|
190
|
-
"group": plan.group,
|
|
191
|
-
}
|
|
192
|
-
return spec
|
|
193
|
-
|
|
194
|
-
def _to_list(self, value: Any) -> List[str]:
|
|
195
|
-
if value is None:
|
|
196
|
-
return []
|
|
197
|
-
if isinstance(value, str):
|
|
198
|
-
return [value]
|
|
199
|
-
return list(value)
|
|
200
|
-
|
|
201
|
-
def _parse_parameters(self, value: Any) -> Optional[List[str]]:
|
|
202
|
-
if value is None:
|
|
203
|
-
return None
|
|
204
|
-
if isinstance(value, str):
|
|
205
|
-
return [value] # Keep semicolon-separated values as single parameter
|
|
206
|
-
return list(value)
|
|
207
|
-
|
|
208
|
-
def _generate_title(self, plan: AnalysisPlan) -> str:
|
|
209
|
-
parts = [plan.analysis.replace("_", " ").title()]
|
|
210
|
-
if (pop := self.keywords.get_population(plan.population)) and pop.label:
|
|
211
|
-
parts.append(f"- {pop.label}")
|
|
212
|
-
if plan.observation:
|
|
213
|
-
obs = self.keywords.get_observation(plan.observation)
|
|
214
|
-
if obs and obs.label:
|
|
215
|
-
parts.append(f"- {obs.label}")
|
|
216
|
-
if plan.parameter:
|
|
217
|
-
param = self.keywords.get_parameter(plan.parameter)
|
|
218
|
-
if param and param.label:
|
|
219
|
-
parts.append(f"- {param.label}")
|
|
220
|
-
return " ".join(parts)
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
class StudyPlan:
|
|
224
|
-
"""Main study plan."""
|
|
225
|
-
|
|
226
|
-
def __init__(self, study_data: Dict[str, Any], base_path: Optional[Path] = None) -> None:
|
|
227
|
-
self.study_data = study_data
|
|
228
|
-
self.base_path: Path = base_path or Path(".")
|
|
229
|
-
self.datasets: Dict[str, pl.DataFrame] = {}
|
|
230
|
-
self.keywords = KeywordRegistry()
|
|
231
|
-
self.expander = PlanExpander(self.keywords)
|
|
232
|
-
self.keywords.load_from_dict(self.study_data)
|
|
233
|
-
self.load_datasets()
|
|
234
|
-
|
|
235
|
-
@property
|
|
236
|
-
def output_dir(self) -> str:
|
|
237
|
-
"""Get output directory from study configuration."""
|
|
238
|
-
study_config = self.study_data.get("study", {})
|
|
239
|
-
return cast(str, study_config.get("output", "."))
|
|
240
|
-
|
|
241
|
-
def load_datasets(self) -> None:
|
|
242
|
-
"""Load datasets from paths specified in data_sources."""
|
|
243
|
-
for name, data_source in self.keywords.data_sources.items():
|
|
244
|
-
try:
|
|
245
|
-
# Ensure the path is relative to the base_path of the plan
|
|
246
|
-
path = self.base_path / data_source.path
|
|
247
|
-
df = pl.read_parquet(path)
|
|
248
|
-
self.datasets[name] = df
|
|
249
|
-
data_source.dataframe = df
|
|
250
|
-
logger.info(f"Successfully loaded dataset '{name}' from '{path}'")
|
|
251
|
-
except Exception as e:
|
|
252
|
-
logger.warning(
|
|
253
|
-
f"Could not load dataset '{name}' from '{data_source.path}'. Reason: {e}"
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
def get_plan_df(self) -> pl.DataFrame:
|
|
257
|
-
"""Expand all condensed plans into a DataFrame of detailed specifications."""
|
|
258
|
-
all_specs = [
|
|
259
|
-
self.expander.create_analysis_spec(plan)
|
|
260
|
-
for plan_data in self.study_data.get("plans", [])
|
|
261
|
-
for plan in self.expander.expand_plan(plan_data)
|
|
262
|
-
]
|
|
263
|
-
return pl.DataFrame(all_specs)
|
|
264
|
-
|
|
265
|
-
def get_dataset_df(self) -> Optional[pl.DataFrame]:
|
|
266
|
-
"""Get a DataFrame of data sources."""
|
|
267
|
-
if not self.keywords.data_sources:
|
|
268
|
-
return None
|
|
269
|
-
return pl.DataFrame(
|
|
270
|
-
[
|
|
271
|
-
{"name": name, "path": ds.path, "loaded": name in self.datasets}
|
|
272
|
-
for name, ds in self.keywords.data_sources.items()
|
|
273
|
-
]
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
def get_population_df(self) -> Optional[pl.DataFrame]:
|
|
277
|
-
"""Get a DataFrame of analysis populations."""
|
|
278
|
-
if not self.keywords.populations:
|
|
279
|
-
return None
|
|
280
|
-
return pl.DataFrame(
|
|
281
|
-
[
|
|
282
|
-
{"name": name, "label": pop.label, "filter": pop.filter}
|
|
283
|
-
for name, pop in self.keywords.populations.items()
|
|
284
|
-
]
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
def get_observation_df(self) -> Optional[pl.DataFrame]:
|
|
288
|
-
"""Get a DataFrame of analysis observations."""
|
|
289
|
-
if not self.keywords.observations:
|
|
290
|
-
return None
|
|
291
|
-
return pl.DataFrame(
|
|
292
|
-
[
|
|
293
|
-
{"name": name, "label": obs.label, "filter": obs.filter}
|
|
294
|
-
for name, obs in self.keywords.observations.items()
|
|
295
|
-
]
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
def get_parameter_df(self) -> Optional[pl.DataFrame]:
|
|
299
|
-
"""Get a DataFrame of analysis parameters."""
|
|
300
|
-
if not self.keywords.parameters:
|
|
301
|
-
return None
|
|
302
|
-
return pl.DataFrame(
|
|
303
|
-
[
|
|
304
|
-
{"name": name, "label": param.label, "filter": param.filter}
|
|
305
|
-
for name, param in self.keywords.parameters.items()
|
|
306
|
-
]
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
def get_group_df(self) -> Optional[pl.DataFrame]:
|
|
310
|
-
"""Get a DataFrame of analysis groups."""
|
|
311
|
-
if not self.keywords.groups:
|
|
312
|
-
return None
|
|
313
|
-
return pl.DataFrame(
|
|
314
|
-
[
|
|
315
|
-
{
|
|
316
|
-
"name": name,
|
|
317
|
-
"variable": group.variable,
|
|
318
|
-
"levels": str(group.level),
|
|
319
|
-
"labels": str(group.group_label),
|
|
320
|
-
}
|
|
321
|
-
for name, group in self.keywords.groups.items()
|
|
322
|
-
]
|
|
323
|
-
)
|
|
324
|
-
|
|
325
|
-
def print(self) -> None:
|
|
326
|
-
"""Print comprehensive study plan information using Polars DataFrames."""
|
|
327
|
-
logger.info("ADaM Metadata:")
|
|
328
|
-
|
|
329
|
-
if (df := self.get_dataset_df()) is not None:
|
|
330
|
-
logger.info(f"\nData Sources:\n{df}")
|
|
331
|
-
|
|
332
|
-
if (df := self.get_population_df()) is not None:
|
|
333
|
-
logger.info(f"\nAnalysis Population Type:\n{df}")
|
|
334
|
-
|
|
335
|
-
if (df := self.get_observation_df()) is not None:
|
|
336
|
-
logger.info(f"\nAnalysis Observation Type:\n{df}")
|
|
337
|
-
|
|
338
|
-
if (df := self.get_parameter_df()) is not None:
|
|
339
|
-
logger.info(f"\nAnalysis Parameter Type:\n{df}")
|
|
340
|
-
|
|
341
|
-
if (df := self.get_group_df()) is not None:
|
|
342
|
-
logger.info(f"\nAnalysis Groups:\n{df}")
|
|
343
|
-
|
|
344
|
-
if (df := self.get_plan_df()) is not None:
|
|
345
|
-
logger.info(f"\nAnalysis Plans:\n{df}")
|
|
346
|
-
|
|
347
|
-
def __str__(self) -> str:
|
|
348
|
-
study_name = self.study_data.get("study", {}).get("name", "Unknown")
|
|
349
|
-
condensed_plans = len(self.study_data.get("plans", []))
|
|
350
|
-
individual_analyses = len(self.get_plan_df())
|
|
351
|
-
return (
|
|
352
|
-
f"StudyPlan(study='{study_name}', plans={condensed_plans}, "
|
|
353
|
-
f"analyses={individual_analyses})"
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
def load_plan(plan_path: str) -> StudyPlan:
|
|
358
|
-
"""
|
|
359
|
-
Loads a study plan from a YAML file, resolving template inheritance.
|
|
360
|
-
"""
|
|
361
|
-
path = Path(plan_path)
|
|
362
|
-
base_path = path.parent
|
|
363
|
-
loader = YamlInheritanceLoader(base_path)
|
|
364
|
-
study_data = loader.load(path.name)
|
|
365
|
-
return StudyPlan(study_data, base_path)
|
|
1
|
+
# pyre-strict
|
|
2
|
+
"""
|
|
3
|
+
Clean, simple TLF plan system.
|
|
4
|
+
This module provides a straightforward implementation for clinical TLF generation
|
|
5
|
+
using YAML plans with template inheritance and keyword resolution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import itertools
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional, cast
|
|
12
|
+
|
|
13
|
+
import polars as pl
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
15
|
+
|
|
16
|
+
from .yaml_loader import YamlInheritanceLoader
|
|
17
|
+
|
|
18
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Keyword(BaseModel):
|
|
22
|
+
"""Base keyword definition."""
|
|
23
|
+
|
|
24
|
+
name: str
|
|
25
|
+
label: Optional[str] = None
|
|
26
|
+
description: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Population(Keyword):
|
|
30
|
+
"""Population definition with filter."""
|
|
31
|
+
|
|
32
|
+
filter: str = ""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Observation(Keyword):
|
|
36
|
+
"""Observation/timepoint definition with filter."""
|
|
37
|
+
|
|
38
|
+
filter: str = ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Parameter(Keyword):
|
|
42
|
+
"""Parameter definition with filter."""
|
|
43
|
+
|
|
44
|
+
filter: str = ""
|
|
45
|
+
terms: Optional[Dict[str, str]] = None
|
|
46
|
+
indent: int = 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Group(Keyword):
|
|
50
|
+
"""Treatment group definition."""
|
|
51
|
+
|
|
52
|
+
variable: str = ""
|
|
53
|
+
level: List[str] = Field(default_factory=list)
|
|
54
|
+
group_label: List[str] = Field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
# Allow label to be excluded if it conflicts or handled manually
|
|
57
|
+
|
|
58
|
+
# pyre-ignore[56]
|
|
59
|
+
@field_validator("group_label", mode="before")
|
|
60
|
+
@classmethod
|
|
61
|
+
def set_group_label(cls, v: Any, info: Any) -> Any:
|
|
62
|
+
# If group_label is missing, fallback to 'label' field if present in input data
|
|
63
|
+
# Note: Pydantic V2 validation context doesn't easily give access to other fields input
|
|
64
|
+
# unless using model_validator. But here we can rely on standard defaulting or
|
|
65
|
+
# fix it at the registry level like before.
|
|
66
|
+
# Actually, let's keep it simple: if not provided, it's empty.
|
|
67
|
+
# The original code did:
|
|
68
|
+
# if "group_label" not in item_data: item_data["group_label"] = item_data.get("label", [])
|
|
69
|
+
return v or []
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DataSource(BaseModel):
|
|
73
|
+
"""Data source definition."""
|
|
74
|
+
|
|
75
|
+
name: str
|
|
76
|
+
path: str
|
|
77
|
+
dataframe: Optional[pl.DataFrame] = Field(default=None, exclude=True)
|
|
78
|
+
|
|
79
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class AnalysisPlan(BaseModel):
|
|
83
|
+
"""Individual analysis plan specification."""
|
|
84
|
+
|
|
85
|
+
analysis: str
|
|
86
|
+
population: str
|
|
87
|
+
observation: Optional[str] = None
|
|
88
|
+
group: Optional[str] = None
|
|
89
|
+
parameter: Optional[str] = None
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def id(self) -> str:
|
|
93
|
+
"""Generate unique analysis ID."""
|
|
94
|
+
parts = [self.analysis, self.population]
|
|
95
|
+
if self.observation:
|
|
96
|
+
parts.append(self.observation)
|
|
97
|
+
if self.parameter:
|
|
98
|
+
parts.append(self.parameter)
|
|
99
|
+
return "_".join(parts)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class KeywordRegistry(BaseModel):
|
|
103
|
+
"""Registry for managing keywords."""
|
|
104
|
+
|
|
105
|
+
populations: Dict[str, Population] = Field(default_factory=dict)
|
|
106
|
+
observations: Dict[str, Observation] = Field(default_factory=dict)
|
|
107
|
+
parameters: Dict[str, Parameter] = Field(default_factory=dict)
|
|
108
|
+
groups: Dict[str, Group] = Field(default_factory=dict)
|
|
109
|
+
data_sources: Dict[str, DataSource] = Field(default_factory=dict)
|
|
110
|
+
|
|
111
|
+
def load_from_dict(self, data: Dict[str, Any]) -> None:
|
|
112
|
+
"""Load keywords from a dictionary."""
|
|
113
|
+
# We manually load so we can handle the dict-to-list-of-models transformation
|
|
114
|
+
# and the specific logic for defaults.
|
|
115
|
+
|
|
116
|
+
for item in data.get("population", []):
|
|
117
|
+
pop_item = Population(**item)
|
|
118
|
+
self.populations[pop_item.name] = pop_item
|
|
119
|
+
|
|
120
|
+
for item in data.get("observation", []):
|
|
121
|
+
obs_item = Observation(**item)
|
|
122
|
+
self.observations[obs_item.name] = obs_item
|
|
123
|
+
|
|
124
|
+
for item in data.get("parameter", []):
|
|
125
|
+
param_item = Parameter(**item)
|
|
126
|
+
self.parameters[param_item.name] = param_item
|
|
127
|
+
|
|
128
|
+
for item in data.get("group", []):
|
|
129
|
+
# Special handling for Group where 'label' might be a list (for group_label)
|
|
130
|
+
# but Keyword.label expects a string.
|
|
131
|
+
if "label" in item and isinstance(item["label"], list):
|
|
132
|
+
if "group_label" not in item:
|
|
133
|
+
item["group_label"] = item["label"]
|
|
134
|
+
# Remove label from item to avoid validation error on Keyword.label
|
|
135
|
+
# or set it to a joined string if a label is really needed
|
|
136
|
+
del item["label"]
|
|
137
|
+
|
|
138
|
+
group_item = Group(**item)
|
|
139
|
+
self.groups[group_item.name] = group_item
|
|
140
|
+
|
|
141
|
+
for item in data.get("data", []):
|
|
142
|
+
ds_item = DataSource(**item)
|
|
143
|
+
self.data_sources[ds_item.name] = ds_item
|
|
144
|
+
|
|
145
|
+
def get_population(self, name: str) -> Optional[Population]:
|
|
146
|
+
return self.populations.get(name)
|
|
147
|
+
|
|
148
|
+
def get_observation(self, name: str) -> Optional[Observation]:
|
|
149
|
+
return self.observations.get(name)
|
|
150
|
+
|
|
151
|
+
def get_parameter(self, name: str) -> Optional[Parameter]:
|
|
152
|
+
return self.parameters.get(name)
|
|
153
|
+
|
|
154
|
+
def get_group(self, name: str) -> Optional[Group]:
|
|
155
|
+
return self.groups.get(name)
|
|
156
|
+
|
|
157
|
+
def get_data_source(self, name: str) -> Optional[DataSource]:
|
|
158
|
+
return self.data_sources.get(name)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class PlanExpander:
|
|
162
|
+
"""Expands condensed plans into individual analysis specifications."""
|
|
163
|
+
|
|
164
|
+
def __init__(self, keywords: KeywordRegistry) -> None:
|
|
165
|
+
self.keywords = keywords
|
|
166
|
+
|
|
167
|
+
def expand_plan(self, plan_data: Dict[str, Any]) -> List[AnalysisPlan]:
|
|
168
|
+
"""Expand a single condensed plan into individual plans."""
|
|
169
|
+
analysis = plan_data["analysis"]
|
|
170
|
+
populations = self._to_list(plan_data.get("population", []))
|
|
171
|
+
observations: List[Any] = self._to_list(plan_data.get("observation")) or [None]
|
|
172
|
+
parameters: List[Any] = self._parse_parameters(plan_data.get("parameter")) or [None]
|
|
173
|
+
group = plan_data.get("group")
|
|
174
|
+
|
|
175
|
+
expanded_plans = [
|
|
176
|
+
AnalysisPlan(
|
|
177
|
+
analysis=analysis, population=pop, observation=obs, group=group, parameter=param
|
|
178
|
+
)
|
|
179
|
+
for pop, obs, param in itertools.product(populations, observations, parameters)
|
|
180
|
+
]
|
|
181
|
+
return expanded_plans
|
|
182
|
+
|
|
183
|
+
def create_analysis_spec(self, plan: AnalysisPlan) -> Dict[str, Any]:
|
|
184
|
+
"""Create a summary analysis specification with keywords."""
|
|
185
|
+
spec = {
|
|
186
|
+
"analysis": plan.analysis,
|
|
187
|
+
"population": plan.population,
|
|
188
|
+
"observation": plan.observation,
|
|
189
|
+
"parameter": plan.parameter,
|
|
190
|
+
"group": plan.group,
|
|
191
|
+
}
|
|
192
|
+
return spec
|
|
193
|
+
|
|
194
|
+
def _to_list(self, value: Any) -> List[str]:
|
|
195
|
+
if value is None:
|
|
196
|
+
return []
|
|
197
|
+
if isinstance(value, str):
|
|
198
|
+
return [value]
|
|
199
|
+
return list(value)
|
|
200
|
+
|
|
201
|
+
def _parse_parameters(self, value: Any) -> Optional[List[str]]:
|
|
202
|
+
if value is None:
|
|
203
|
+
return None
|
|
204
|
+
if isinstance(value, str):
|
|
205
|
+
return [value] # Keep semicolon-separated values as single parameter
|
|
206
|
+
return list(value)
|
|
207
|
+
|
|
208
|
+
def _generate_title(self, plan: AnalysisPlan) -> str:
|
|
209
|
+
parts = [plan.analysis.replace("_", " ").title()]
|
|
210
|
+
if (pop := self.keywords.get_population(plan.population)) and pop.label:
|
|
211
|
+
parts.append(f"- {pop.label}")
|
|
212
|
+
if plan.observation:
|
|
213
|
+
obs = self.keywords.get_observation(plan.observation)
|
|
214
|
+
if obs and obs.label:
|
|
215
|
+
parts.append(f"- {obs.label}")
|
|
216
|
+
if plan.parameter:
|
|
217
|
+
param = self.keywords.get_parameter(plan.parameter)
|
|
218
|
+
if param and param.label:
|
|
219
|
+
parts.append(f"- {param.label}")
|
|
220
|
+
return " ".join(parts)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class StudyPlan:
|
|
224
|
+
"""Main study plan."""
|
|
225
|
+
|
|
226
|
+
def __init__(self, study_data: Dict[str, Any], base_path: Optional[Path] = None) -> None:
|
|
227
|
+
self.study_data = study_data
|
|
228
|
+
self.base_path: Path = base_path or Path(".")
|
|
229
|
+
self.datasets: Dict[str, pl.DataFrame] = {}
|
|
230
|
+
self.keywords = KeywordRegistry()
|
|
231
|
+
self.expander = PlanExpander(self.keywords)
|
|
232
|
+
self.keywords.load_from_dict(self.study_data)
|
|
233
|
+
self.load_datasets()
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def output_dir(self) -> str:
|
|
237
|
+
"""Get output directory from study configuration."""
|
|
238
|
+
study_config = self.study_data.get("study", {})
|
|
239
|
+
return cast(str, study_config.get("output", "."))
|
|
240
|
+
|
|
241
|
+
def load_datasets(self) -> None:
|
|
242
|
+
"""Load datasets from paths specified in data_sources."""
|
|
243
|
+
for name, data_source in self.keywords.data_sources.items():
|
|
244
|
+
try:
|
|
245
|
+
# Ensure the path is relative to the base_path of the plan
|
|
246
|
+
path = self.base_path / data_source.path
|
|
247
|
+
df = pl.read_parquet(path)
|
|
248
|
+
self.datasets[name] = df
|
|
249
|
+
data_source.dataframe = df
|
|
250
|
+
logger.info(f"Successfully loaded dataset '{name}' from '{path}'")
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.warning(
|
|
253
|
+
f"Could not load dataset '{name}' from '{data_source.path}'. Reason: {e}"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def get_plan_df(self) -> pl.DataFrame:
|
|
257
|
+
"""Expand all condensed plans into a DataFrame of detailed specifications."""
|
|
258
|
+
all_specs = [
|
|
259
|
+
self.expander.create_analysis_spec(plan)
|
|
260
|
+
for plan_data in self.study_data.get("plans", [])
|
|
261
|
+
for plan in self.expander.expand_plan(plan_data)
|
|
262
|
+
]
|
|
263
|
+
return pl.DataFrame(all_specs)
|
|
264
|
+
|
|
265
|
+
def get_dataset_df(self) -> Optional[pl.DataFrame]:
|
|
266
|
+
"""Get a DataFrame of data sources."""
|
|
267
|
+
if not self.keywords.data_sources:
|
|
268
|
+
return None
|
|
269
|
+
return pl.DataFrame(
|
|
270
|
+
[
|
|
271
|
+
{"name": name, "path": ds.path, "loaded": name in self.datasets}
|
|
272
|
+
for name, ds in self.keywords.data_sources.items()
|
|
273
|
+
]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def get_population_df(self) -> Optional[pl.DataFrame]:
|
|
277
|
+
"""Get a DataFrame of analysis populations."""
|
|
278
|
+
if not self.keywords.populations:
|
|
279
|
+
return None
|
|
280
|
+
return pl.DataFrame(
|
|
281
|
+
[
|
|
282
|
+
{"name": name, "label": pop.label, "filter": pop.filter}
|
|
283
|
+
for name, pop in self.keywords.populations.items()
|
|
284
|
+
]
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def get_observation_df(self) -> Optional[pl.DataFrame]:
|
|
288
|
+
"""Get a DataFrame of analysis observations."""
|
|
289
|
+
if not self.keywords.observations:
|
|
290
|
+
return None
|
|
291
|
+
return pl.DataFrame(
|
|
292
|
+
[
|
|
293
|
+
{"name": name, "label": obs.label, "filter": obs.filter}
|
|
294
|
+
for name, obs in self.keywords.observations.items()
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def get_parameter_df(self) -> Optional[pl.DataFrame]:
|
|
299
|
+
"""Get a DataFrame of analysis parameters."""
|
|
300
|
+
if not self.keywords.parameters:
|
|
301
|
+
return None
|
|
302
|
+
return pl.DataFrame(
|
|
303
|
+
[
|
|
304
|
+
{"name": name, "label": param.label, "filter": param.filter}
|
|
305
|
+
for name, param in self.keywords.parameters.items()
|
|
306
|
+
]
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def get_group_df(self) -> Optional[pl.DataFrame]:
|
|
310
|
+
"""Get a DataFrame of analysis groups."""
|
|
311
|
+
if not self.keywords.groups:
|
|
312
|
+
return None
|
|
313
|
+
return pl.DataFrame(
|
|
314
|
+
[
|
|
315
|
+
{
|
|
316
|
+
"name": name,
|
|
317
|
+
"variable": group.variable,
|
|
318
|
+
"levels": str(group.level),
|
|
319
|
+
"labels": str(group.group_label),
|
|
320
|
+
}
|
|
321
|
+
for name, group in self.keywords.groups.items()
|
|
322
|
+
]
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def print(self) -> None:
|
|
326
|
+
"""Print comprehensive study plan information using Polars DataFrames."""
|
|
327
|
+
logger.info("ADaM Metadata:")
|
|
328
|
+
|
|
329
|
+
if (df := self.get_dataset_df()) is not None:
|
|
330
|
+
logger.info(f"\nData Sources:\n{df}")
|
|
331
|
+
|
|
332
|
+
if (df := self.get_population_df()) is not None:
|
|
333
|
+
logger.info(f"\nAnalysis Population Type:\n{df}")
|
|
334
|
+
|
|
335
|
+
if (df := self.get_observation_df()) is not None:
|
|
336
|
+
logger.info(f"\nAnalysis Observation Type:\n{df}")
|
|
337
|
+
|
|
338
|
+
if (df := self.get_parameter_df()) is not None:
|
|
339
|
+
logger.info(f"\nAnalysis Parameter Type:\n{df}")
|
|
340
|
+
|
|
341
|
+
if (df := self.get_group_df()) is not None:
|
|
342
|
+
logger.info(f"\nAnalysis Groups:\n{df}")
|
|
343
|
+
|
|
344
|
+
if (df := self.get_plan_df()) is not None:
|
|
345
|
+
logger.info(f"\nAnalysis Plans:\n{df}")
|
|
346
|
+
|
|
347
|
+
def __str__(self) -> str:
|
|
348
|
+
study_name = self.study_data.get("study", {}).get("name", "Unknown")
|
|
349
|
+
condensed_plans = len(self.study_data.get("plans", []))
|
|
350
|
+
individual_analyses = len(self.get_plan_df())
|
|
351
|
+
return (
|
|
352
|
+
f"StudyPlan(study='{study_name}', plans={condensed_plans}, "
|
|
353
|
+
f"analyses={individual_analyses})"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def load_plan(plan_path: str) -> StudyPlan:
|
|
358
|
+
"""
|
|
359
|
+
Loads a study plan from a YAML file, resolving template inheritance.
|
|
360
|
+
"""
|
|
361
|
+
path = Path(plan_path)
|
|
362
|
+
base_path = path.parent
|
|
363
|
+
loader = YamlInheritanceLoader(base_path)
|
|
364
|
+
study_data = loader.load(path.name)
|
|
365
|
+
return StudyPlan(study_data, base_path)
|