csrlite 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csrlite/__init__.py +16 -8
- csrlite/ae/ae_listing.py +2 -0
- csrlite/ae/ae_specific.py +10 -5
- csrlite/ae/ae_summary.py +4 -2
- csrlite/ae/ae_utils.py +0 -70
- csrlite/common/config.py +34 -0
- csrlite/common/count.py +174 -80
- csrlite/common/plan.py +79 -67
- csrlite/common/rtf.py +85 -0
- csrlite/common/utils.py +4 -4
- csrlite/disposition/disposition.py +126 -95
- {csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/METADATA +7 -7
- csrlite-0.2.0.dist-info/RECORD +19 -0
- csrlite-0.1.0.dist-info/RECORD +0 -17
- {csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/WHEEL +0 -0
- {csrlite-0.1.0.dist-info → csrlite-0.2.0.dist-info}/top_level.txt +0 -0
csrlite/common/plan.py
CHANGED
|
@@ -6,17 +6,19 @@ using YAML plans with template inheritance and keyword resolution.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import itertools
|
|
9
|
-
|
|
9
|
+
import logging
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any, Dict, List, Optional, cast
|
|
12
12
|
|
|
13
13
|
import polars as pl
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
14
15
|
|
|
15
16
|
from .yaml_loader import YamlInheritanceLoader
|
|
16
17
|
|
|
18
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
class Keyword:
|
|
20
|
+
|
|
21
|
+
class Keyword(BaseModel):
|
|
20
22
|
"""Base keyword definition."""
|
|
21
23
|
|
|
22
24
|
name: str
|
|
@@ -24,54 +26,60 @@ class Keyword:
|
|
|
24
26
|
description: Optional[str] = None
|
|
25
27
|
|
|
26
28
|
|
|
27
|
-
@dataclass
|
|
28
29
|
class Population(Keyword):
|
|
29
30
|
"""Population definition with filter."""
|
|
30
31
|
|
|
31
32
|
filter: str = ""
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
@dataclass
|
|
35
35
|
class Observation(Keyword):
|
|
36
36
|
"""Observation/timepoint definition with filter."""
|
|
37
37
|
|
|
38
38
|
filter: str = ""
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
@dataclass
|
|
42
41
|
class Parameter(Keyword):
|
|
43
|
-
"""Parameter definition with filter.
|
|
44
|
-
|
|
45
|
-
The terms field supports dynamic title generation:
|
|
46
|
-
- terms.before: "serious" → "Serious Adverse Events"
|
|
47
|
-
- terms.after: "resulting in death" → "Adverse Events Resulting in Death"
|
|
48
|
-
"""
|
|
42
|
+
"""Parameter definition with filter."""
|
|
49
43
|
|
|
50
44
|
filter: str = ""
|
|
51
45
|
terms: Optional[Dict[str, str]] = None
|
|
52
|
-
indent: int = 0
|
|
46
|
+
indent: int = 0
|
|
53
47
|
|
|
54
48
|
|
|
55
|
-
@dataclass
|
|
56
49
|
class Group(Keyword):
|
|
57
50
|
"""Treatment group definition."""
|
|
58
51
|
|
|
59
52
|
variable: str = ""
|
|
60
|
-
level: List[str] =
|
|
61
|
-
group_label: List[str] =
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
53
|
+
level: List[str] = Field(default_factory=list)
|
|
54
|
+
group_label: List[str] = Field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
# Allow label to be excluded if it conflicts or handled manually
|
|
57
|
+
|
|
58
|
+
# pyre-ignore[56]
|
|
59
|
+
@field_validator("group_label", mode="before")
|
|
60
|
+
@classmethod
|
|
61
|
+
def set_group_label(cls, v: Any, info: Any) -> Any:
|
|
62
|
+
# If group_label is missing, fallback to 'label' field if present in input data
|
|
63
|
+
# Note: Pydantic V2 validation context doesn't easily give access to other fields input
|
|
64
|
+
# unless using model_validator. But here we can rely on standard defaulting or
|
|
65
|
+
# fix it at the registry level like before.
|
|
66
|
+
# Actually, let's keep it simple: if not provided, it's empty.
|
|
67
|
+
# The original code did:
|
|
68
|
+
# if "group_label" not in item_data: item_data["group_label"] = item_data.get("label", [])
|
|
69
|
+
return v or []
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DataSource(BaseModel):
|
|
66
73
|
"""Data source definition."""
|
|
67
74
|
|
|
68
75
|
name: str
|
|
69
76
|
path: str
|
|
70
|
-
dataframe: Optional[pl.DataFrame] = None
|
|
77
|
+
dataframe: Optional[pl.DataFrame] = Field(default=None, exclude=True)
|
|
78
|
+
|
|
79
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
71
80
|
|
|
72
81
|
|
|
73
|
-
|
|
74
|
-
class AnalysisPlan:
|
|
82
|
+
class AnalysisPlan(BaseModel):
|
|
75
83
|
"""Individual analysis plan specification."""
|
|
76
84
|
|
|
77
85
|
analysis: str
|
|
@@ -91,37 +99,48 @@ class AnalysisPlan:
|
|
|
91
99
|
return "_".join(parts)
|
|
92
100
|
|
|
93
101
|
|
|
94
|
-
class KeywordRegistry:
|
|
102
|
+
class KeywordRegistry(BaseModel):
|
|
95
103
|
"""Registry for managing keywords."""
|
|
96
104
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
self.data_sources: Dict[str, DataSource] = {}
|
|
105
|
+
populations: Dict[str, Population] = Field(default_factory=dict)
|
|
106
|
+
observations: Dict[str, Observation] = Field(default_factory=dict)
|
|
107
|
+
parameters: Dict[str, Parameter] = Field(default_factory=dict)
|
|
108
|
+
groups: Dict[str, Group] = Field(default_factory=dict)
|
|
109
|
+
data_sources: Dict[str, DataSource] = Field(default_factory=dict)
|
|
103
110
|
|
|
104
111
|
def load_from_dict(self, data: Dict[str, Any]) -> None:
|
|
105
112
|
"""Load keywords from a dictionary."""
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
113
|
+
# We manually load so we can handle the dict-to-list-of-models transformation
|
|
114
|
+
# and the specific logic for defaults.
|
|
115
|
+
|
|
116
|
+
for item in data.get("population", []):
|
|
117
|
+
pop_item = Population(**item)
|
|
118
|
+
self.populations[pop_item.name] = pop_item
|
|
119
|
+
|
|
120
|
+
for item in data.get("observation", []):
|
|
121
|
+
obs_item = Observation(**item)
|
|
122
|
+
self.observations[obs_item.name] = obs_item
|
|
123
|
+
|
|
124
|
+
for item in data.get("parameter", []):
|
|
125
|
+
param_item = Parameter(**item)
|
|
126
|
+
self.parameters[param_item.name] = param_item
|
|
127
|
+
|
|
128
|
+
for item in data.get("group", []):
|
|
129
|
+
# Special handling for Group where 'label' might be a list (for group_label)
|
|
130
|
+
# but Keyword.label expects a string.
|
|
131
|
+
if "label" in item and isinstance(item["label"], list):
|
|
132
|
+
if "group_label" not in item:
|
|
133
|
+
item["group_label"] = item["label"]
|
|
134
|
+
# Remove label from item to avoid validation error on Keyword.label
|
|
135
|
+
# or set it to a joined string if a label is really needed
|
|
136
|
+
del item["label"]
|
|
137
|
+
|
|
138
|
+
group_item = Group(**item)
|
|
139
|
+
self.groups[group_item.name] = group_item
|
|
140
|
+
|
|
141
|
+
for item in data.get("data", []):
|
|
142
|
+
ds_item = DataSource(**item)
|
|
143
|
+
self.data_sources[ds_item.name] = ds_item
|
|
125
144
|
|
|
126
145
|
def get_population(self, name: str) -> Optional[Population]:
|
|
127
146
|
return self.populations.get(name)
|
|
@@ -228,11 +247,10 @@ class StudyPlan:
|
|
|
228
247
|
df = pl.read_parquet(path)
|
|
229
248
|
self.datasets[name] = df
|
|
230
249
|
data_source.dataframe = df
|
|
231
|
-
|
|
250
|
+
logger.info(f"Successfully loaded dataset '{name}' from '{path}'")
|
|
232
251
|
except Exception as e:
|
|
233
|
-
|
|
234
|
-
f"
|
|
235
|
-
f"Reason: {e}"
|
|
252
|
+
logger.warning(
|
|
253
|
+
f"Could not load dataset '{name}' from '{data_source.path}'. Reason: {e}"
|
|
236
254
|
)
|
|
237
255
|
|
|
238
256
|
def get_plan_df(self) -> pl.DataFrame:
|
|
@@ -306,34 +324,28 @@ class StudyPlan:
|
|
|
306
324
|
|
|
307
325
|
def print(self) -> None:
|
|
308
326
|
"""Print comprehensive study plan information using Polars DataFrames."""
|
|
309
|
-
|
|
327
|
+
logger.info("ADaM Metadata:")
|
|
310
328
|
|
|
311
329
|
if (df := self.get_dataset_df()) is not None:
|
|
312
|
-
|
|
313
|
-
print(df)
|
|
330
|
+
logger.info(f"\nData Sources:\n{df}")
|
|
314
331
|
|
|
315
332
|
if (df := self.get_population_df()) is not None:
|
|
316
|
-
|
|
317
|
-
print(df)
|
|
333
|
+
logger.info(f"\nAnalysis Population Type:\n{df}")
|
|
318
334
|
|
|
319
335
|
if (df := self.get_observation_df()) is not None:
|
|
320
|
-
|
|
321
|
-
print(df)
|
|
336
|
+
logger.info(f"\nAnalysis Observation Type:\n{df}")
|
|
322
337
|
|
|
323
338
|
if (df := self.get_parameter_df()) is not None:
|
|
324
|
-
|
|
325
|
-
print(df)
|
|
339
|
+
logger.info(f"\nAnalysis Parameter Type:\n{df}")
|
|
326
340
|
|
|
327
341
|
if (df := self.get_group_df()) is not None:
|
|
328
|
-
|
|
329
|
-
print(df)
|
|
342
|
+
logger.info(f"\nAnalysis Groups:\n{df}")
|
|
330
343
|
|
|
331
344
|
if (df := self.get_plan_df()) is not None:
|
|
332
|
-
|
|
333
|
-
print(df)
|
|
345
|
+
logger.info(f"\nAnalysis Plans:\n{df}")
|
|
334
346
|
|
|
335
347
|
def __str__(self) -> str:
|
|
336
|
-
study_name = self.study_data.get("study",
|
|
348
|
+
study_name = self.study_data.get("study", {}).get("name", "Unknown")
|
|
337
349
|
condensed_plans = len(self.study_data.get("plans", []))
|
|
338
350
|
individual_analyses = len(self.get_plan_df())
|
|
339
351
|
return (
|
csrlite/common/rtf.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# pyre-strict
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import polars as pl
|
|
5
|
+
from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_rtf_table_n_pct(
|
|
9
|
+
df: pl.DataFrame,
|
|
10
|
+
col_header_1: list[str],
|
|
11
|
+
col_header_2: list[str] | None,
|
|
12
|
+
col_widths: list[float] | None,
|
|
13
|
+
title: list[str] | str,
|
|
14
|
+
footnote: list[str] | str | None,
|
|
15
|
+
source: list[str] | str | None,
|
|
16
|
+
borders_2: bool = True,
|
|
17
|
+
orientation: str = "landscape",
|
|
18
|
+
) -> RTFDocument:
|
|
19
|
+
"""
|
|
20
|
+
Create a standardized RTF table document with 1 or 2 header rows.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
df: Polars DataFrame containing the table data.
|
|
24
|
+
col_header_1: List of strings for the first header row.
|
|
25
|
+
col_header_2: Optional list of strings for the second header row.
|
|
26
|
+
col_widths: Optional list of relative column widths. Defaults to equal widths.
|
|
27
|
+
title: Title string or list of title strings.
|
|
28
|
+
footnote: Footnote string or list of footnote strings.
|
|
29
|
+
source: Source string or list of source strings.
|
|
30
|
+
borders_2: Whether to show borders for the second header row. Defaults to True.
|
|
31
|
+
orientation: Page orientation, "landscape" or "portrait". Defaults to "landscape".
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
RTFDocument object.
|
|
35
|
+
"""
|
|
36
|
+
n_cols = len(df.columns)
|
|
37
|
+
|
|
38
|
+
# Calculate column widths if None - simple default
|
|
39
|
+
if col_widths is None:
|
|
40
|
+
col_widths = [1.0] * n_cols
|
|
41
|
+
|
|
42
|
+
# Normalize metadata
|
|
43
|
+
title_list = [title] if isinstance(title, str) else title
|
|
44
|
+
footnote_list = [footnote] if isinstance(footnote, str) else (footnote or [])
|
|
45
|
+
source_list = [source] if isinstance(source, str) else (source or [])
|
|
46
|
+
|
|
47
|
+
headers = [
|
|
48
|
+
RTFColumnHeader(
|
|
49
|
+
text=col_header_1,
|
|
50
|
+
col_rel_width=col_widths,
|
|
51
|
+
text_justification=["l"] + ["c"] * (n_cols - 1),
|
|
52
|
+
)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
if col_header_2:
|
|
56
|
+
h2_kwargs = {
|
|
57
|
+
"text": col_header_2,
|
|
58
|
+
"col_rel_width": col_widths,
|
|
59
|
+
"text_justification": ["l"] + ["c"] * (n_cols - 1),
|
|
60
|
+
}
|
|
61
|
+
if borders_2:
|
|
62
|
+
h2_kwargs["border_left"] = ["single"]
|
|
63
|
+
h2_kwargs["border_top"] = [""]
|
|
64
|
+
|
|
65
|
+
headers.append(RTFColumnHeader(**h2_kwargs))
|
|
66
|
+
|
|
67
|
+
rtf_components: dict[str, Any] = {
|
|
68
|
+
"df": df,
|
|
69
|
+
"rtf_page": RTFPage(orientation=orientation),
|
|
70
|
+
"rtf_title": RTFTitle(text=title_list),
|
|
71
|
+
"rtf_column_header": headers,
|
|
72
|
+
"rtf_body": RTFBody(
|
|
73
|
+
col_rel_width=col_widths,
|
|
74
|
+
text_justification=["l"] + ["c"] * (n_cols - 1),
|
|
75
|
+
border_left=["single"] * n_cols,
|
|
76
|
+
),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if footnote_list:
|
|
80
|
+
rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
|
|
81
|
+
|
|
82
|
+
if source_list:
|
|
83
|
+
rtf_components["rtf_source"] = RTFSource(text=source_list)
|
|
84
|
+
|
|
85
|
+
return RTFDocument(**rtf_components)
|
csrlite/common/utils.py
CHANGED
|
@@ -4,11 +4,11 @@ import polars as pl
|
|
|
4
4
|
|
|
5
5
|
def apply_common_filters(
|
|
6
6
|
population: pl.DataFrame,
|
|
7
|
-
observation: pl.DataFrame,
|
|
7
|
+
observation: pl.DataFrame | None,
|
|
8
8
|
population_filter: str | None,
|
|
9
9
|
observation_filter: str | None,
|
|
10
10
|
parameter_filter: str | None = None,
|
|
11
|
-
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
|
11
|
+
) -> tuple[pl.DataFrame, pl.DataFrame | None]:
|
|
12
12
|
"""
|
|
13
13
|
Apply standard population, observation, and parameter filters.
|
|
14
14
|
|
|
@@ -23,11 +23,11 @@ def apply_common_filters(
|
|
|
23
23
|
|
|
24
24
|
# Apply observation filter
|
|
25
25
|
observation_filtered = observation
|
|
26
|
-
if observation_filter:
|
|
26
|
+
if observation_filter and observation_filtered is not None:
|
|
27
27
|
observation_filtered = observation_filtered.filter(pl.sql_expr(observation_filter))
|
|
28
28
|
|
|
29
29
|
# Apply parameter filter
|
|
30
|
-
if parameter_filter:
|
|
30
|
+
if parameter_filter and observation_filtered is not None:
|
|
31
31
|
observation_filtered = observation_filtered.filter(pl.sql_expr(parameter_filter))
|
|
32
32
|
|
|
33
33
|
return population_filtered, observation_filtered
|