csrlite 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csrlite/__init__.py +71 -58
- csrlite/ae/__init__.py +1 -1
- csrlite/ae/ae_listing.py +494 -494
- csrlite/ae/ae_specific.py +483 -483
- csrlite/ae/ae_summary.py +401 -401
- csrlite/ae/ae_utils.py +62 -62
- csrlite/common/config.py +34 -34
- csrlite/common/count.py +293 -293
- csrlite/common/parse.py +308 -308
- csrlite/common/plan.py +365 -365
- csrlite/common/rtf.py +137 -85
- csrlite/common/utils.py +33 -33
- csrlite/common/yaml_loader.py +71 -71
- csrlite/disposition/__init__.py +2 -2
- csrlite/disposition/disposition.py +332 -332
- csrlite/ie/ie.py +405 -0
- {csrlite-0.2.0.dist-info → csrlite-0.2.1.dist-info}/METADATA +68 -68
- csrlite-0.2.1.dist-info/RECORD +20 -0
- csrlite-0.2.0.dist-info/RECORD +0 -19
- {csrlite-0.2.0.dist-info → csrlite-0.2.1.dist-info}/WHEEL +0 -0
- {csrlite-0.2.0.dist-info → csrlite-0.2.1.dist-info}/top_level.txt +0 -0
csrlite/common/parse.py
CHANGED
|
@@ -1,308 +1,308 @@
|
|
|
1
|
-
# pyre-strict
|
|
2
|
-
"""
|
|
3
|
-
StudyPlan Parsing Utilities
|
|
4
|
-
|
|
5
|
-
This module provides utilities for parsing and extracting information from StudyPlan objects,
|
|
6
|
-
including filter conversion, parameter parsing, and keyword resolution.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import re
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
import polars as pl
|
|
13
|
-
|
|
14
|
-
from .plan import StudyPlan
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def parse_filter_to_sql(filter_str: str) -> str:
|
|
18
|
-
"""
|
|
19
|
-
Parse custom filter syntax to SQL WHERE clause.
|
|
20
|
-
|
|
21
|
-
Converts:
|
|
22
|
-
- "adsl:saffl == 'Y'" -> "SAFFL = 'Y'"
|
|
23
|
-
- "adae:trtemfl == 'Y' and adae:aeser == 'Y'" -> "TRTEMFL = 'Y' AND AESER = 'Y'"
|
|
24
|
-
- "adae:aerel in ['A', 'B']" -> "AEREL IN ('A', 'B')"
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
filter_str: Custom filter string with dataset:column format
|
|
28
|
-
|
|
29
|
-
Returns:
|
|
30
|
-
SQL WHERE clause string
|
|
31
|
-
"""
|
|
32
|
-
if not filter_str or filter_str.strip() == "":
|
|
33
|
-
return "1=1" # Always true
|
|
34
|
-
|
|
35
|
-
# Remove dataset prefixes (adsl:, adae:)
|
|
36
|
-
sql = re.sub(r"\w+:", "", filter_str)
|
|
37
|
-
|
|
38
|
-
# Convert Python syntax to SQL
|
|
39
|
-
sql = sql.replace("==", "=") # Python equality to SQL
|
|
40
|
-
sql = sql.replace(" and ", " AND ") # Python to SQL
|
|
41
|
-
sql = sql.replace(" and ", " AND ") # Python to SQL
|
|
42
|
-
sql = sql.replace(" or ", " OR ") # Python to SQL
|
|
43
|
-
sql = sql.replace(" in ", " IN ") # Python to SQL
|
|
44
|
-
|
|
45
|
-
# Convert Python list syntax to SQL IN: ['A', 'B'] -> ('A', 'B')
|
|
46
|
-
sql = sql.replace("[", "(").replace("]", ")")
|
|
47
|
-
|
|
48
|
-
# Uppercase column names (assuming ADaM standard)
|
|
49
|
-
# Match word boundaries before operators
|
|
50
|
-
sql = re.sub(
|
|
51
|
-
r"\b([a-z]\w*)\b(?=\s*[=<>!]|\s+IN)", lambda m: m.group(1).upper(), sql, flags=re.IGNORECASE
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
return sql
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def apply_filter_sql(df: pl.DataFrame, filter_str: str) -> pl.DataFrame:
|
|
58
|
-
"""
|
|
59
|
-
Apply filter using pl.sql_expr() - simpler and faster than SQLContext.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
df: DataFrame to filter
|
|
63
|
-
filter_str: Custom filter string
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
Filtered DataFrame
|
|
67
|
-
"""
|
|
68
|
-
if not filter_str or filter_str.strip() == "":
|
|
69
|
-
return df
|
|
70
|
-
|
|
71
|
-
where_clause = parse_filter_to_sql(filter_str)
|
|
72
|
-
|
|
73
|
-
try:
|
|
74
|
-
# Use pl.sql_expr() - much simpler and faster!
|
|
75
|
-
return df.filter(pl.sql_expr(where_clause))
|
|
76
|
-
except Exception as e:
|
|
77
|
-
# Fallback to manual parsing if SQL fails
|
|
78
|
-
print(f"Warning: SQL filter failed ({e}), using fallback method")
|
|
79
|
-
return df.filter(_parse_filter_expr(filter_str))
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _parse_filter_expr(filter_str: str) -> Any:
|
|
83
|
-
"""
|
|
84
|
-
Fallback filter parser using Polars expressions.
|
|
85
|
-
Used if SQL parsing fails.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
filter_str: Filter string
|
|
89
|
-
|
|
90
|
-
Returns:
|
|
91
|
-
Polars expression
|
|
92
|
-
"""
|
|
93
|
-
if not filter_str or filter_str.strip() == "":
|
|
94
|
-
return pl.lit(True)
|
|
95
|
-
|
|
96
|
-
# Remove dataset prefixes
|
|
97
|
-
filter_str = re.sub(r"\w+:", "", filter_str)
|
|
98
|
-
|
|
99
|
-
# Handle 'in' operator: column in ['A', 'B'] -> pl.col(column).is_in(['A', 'B'])
|
|
100
|
-
in_pattern = r"(\w+)\s+in\s+\[([^\]]+)\]"
|
|
101
|
-
|
|
102
|
-
def _parse_between(match: re.Match[str]) -> str:
|
|
103
|
-
col = match.group(1).upper()
|
|
104
|
-
values = match.group(2)
|
|
105
|
-
return f"(pl.col('{col}').is_in([{values}]))"
|
|
106
|
-
|
|
107
|
-
filter_str = re.sub(in_pattern, _parse_between, filter_str)
|
|
108
|
-
|
|
109
|
-
# Handle equality/inequality
|
|
110
|
-
eq_pattern = r"(\w+)\s*(==|!=|>|<|>=|<=)\s*'([^']+)'"
|
|
111
|
-
|
|
112
|
-
def _parse_like(match: re.Match[str]) -> str:
|
|
113
|
-
col = match.group(1).upper()
|
|
114
|
-
op = match.group(2)
|
|
115
|
-
val = match.group(3)
|
|
116
|
-
return f"(pl.col('{col}') {op} '{val}')"
|
|
117
|
-
|
|
118
|
-
filter_str = re.sub(eq_pattern, _parse_like, filter_str)
|
|
119
|
-
|
|
120
|
-
# Replace 'and'/'or'
|
|
121
|
-
filter_str = filter_str.replace(" and ", " & ")
|
|
122
|
-
filter_str = filter_str.replace(" or ", " | ")
|
|
123
|
-
|
|
124
|
-
return eval(filter_str)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def parse_parameter(parameter_str: str) -> list[str]:
|
|
128
|
-
"""
|
|
129
|
-
Parse semicolon-separated parameter string.
|
|
130
|
-
|
|
131
|
-
Args:
|
|
132
|
-
parameter_str: Single parameter or semicolon-separated (e.g., "any;rel;ser")
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
List of parameter names
|
|
136
|
-
"""
|
|
137
|
-
if not parameter_str:
|
|
138
|
-
return []
|
|
139
|
-
if ";" in parameter_str:
|
|
140
|
-
return [p.strip() for p in parameter_str.split(";")]
|
|
141
|
-
return [parameter_str]
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
class StudyPlanParser:
|
|
145
|
-
"""
|
|
146
|
-
Parser class for extracting and resolving information from StudyPlan objects.
|
|
147
|
-
|
|
148
|
-
This class provides methods to extract filters, labels, and other configuration
|
|
149
|
-
from StudyPlan keywords and convert them to analysis-ready formats.
|
|
150
|
-
"""
|
|
151
|
-
|
|
152
|
-
def __init__(self, study_plan: StudyPlan) -> None:
|
|
153
|
-
"""
|
|
154
|
-
Initialize parser with a StudyPlan object.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
study_plan: StudyPlan object with loaded datasets and keywords
|
|
158
|
-
"""
|
|
159
|
-
self.study_plan = study_plan
|
|
160
|
-
|
|
161
|
-
def get_population_filter(self, population: str) -> str:
|
|
162
|
-
"""
|
|
163
|
-
Get population filter as SQL WHERE clause.
|
|
164
|
-
|
|
165
|
-
Args:
|
|
166
|
-
population: Population keyword name
|
|
167
|
-
|
|
168
|
-
Returns:
|
|
169
|
-
SQL WHERE clause string
|
|
170
|
-
|
|
171
|
-
Raises:
|
|
172
|
-
ValueError: If population keyword not found
|
|
173
|
-
"""
|
|
174
|
-
pop = self.study_plan.keywords.get_population(population)
|
|
175
|
-
if pop is None:
|
|
176
|
-
raise ValueError(f"Population '{population}' not found")
|
|
177
|
-
return parse_filter_to_sql(pop.filter)
|
|
178
|
-
|
|
179
|
-
def get_observation_filter(self, observation: str | None) -> str | None:
|
|
180
|
-
"""
|
|
181
|
-
Get observation filter as SQL WHERE clause.
|
|
182
|
-
|
|
183
|
-
Args:
|
|
184
|
-
observation: Optional observation keyword name
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
SQL WHERE clause string or None if observation not specified
|
|
188
|
-
"""
|
|
189
|
-
if not observation:
|
|
190
|
-
return None
|
|
191
|
-
obs = self.study_plan.keywords.get_observation(observation)
|
|
192
|
-
if obs:
|
|
193
|
-
return parse_filter_to_sql(obs.filter)
|
|
194
|
-
return None
|
|
195
|
-
|
|
196
|
-
def get_parameter_info(
|
|
197
|
-
self, parameter: str
|
|
198
|
-
) -> tuple[list[str], list[str], list[str], list[int]]:
|
|
199
|
-
"""
|
|
200
|
-
Get parameter names, filters, labels, and indent levels.
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
parameter: Parameter keyword, can be semicolon-separated (e.g., "any;rel;ser")
|
|
204
|
-
|
|
205
|
-
Returns:
|
|
206
|
-
Tuple of (parameter_names, parameter_filters, parameter_labels, parameter_indents)
|
|
207
|
-
|
|
208
|
-
Raises:
|
|
209
|
-
ValueError: If any parameter keyword not found
|
|
210
|
-
"""
|
|
211
|
-
param_names = parse_parameter(parameter)
|
|
212
|
-
param_labels = []
|
|
213
|
-
param_filters = []
|
|
214
|
-
param_indents = []
|
|
215
|
-
|
|
216
|
-
for param_name in param_names:
|
|
217
|
-
param = self.study_plan.keywords.get_parameter(param_name)
|
|
218
|
-
if param is None:
|
|
219
|
-
raise ValueError(f"Parameter '{param_name}' not found")
|
|
220
|
-
param_filters.append(parse_filter_to_sql(param.filter))
|
|
221
|
-
param_labels.append(param.label or param_name)
|
|
222
|
-
param_indents.append(param.indent)
|
|
223
|
-
|
|
224
|
-
return param_names, param_filters, param_labels, param_indents
|
|
225
|
-
|
|
226
|
-
def get_single_parameter_info(self, parameter: str) -> tuple[str, str]:
|
|
227
|
-
"""
|
|
228
|
-
Get single parameter filter and label (NOT semicolon-separated).
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
parameter: Single parameter keyword name
|
|
232
|
-
|
|
233
|
-
Returns:
|
|
234
|
-
Tuple of (parameter_filter, parameter_label)
|
|
235
|
-
|
|
236
|
-
Raises:
|
|
237
|
-
ValueError: If parameter keyword not found
|
|
238
|
-
"""
|
|
239
|
-
param = self.study_plan.keywords.get_parameter(parameter)
|
|
240
|
-
if param is None:
|
|
241
|
-
raise ValueError(f"Parameter '{parameter}' not found")
|
|
242
|
-
return parse_filter_to_sql(param.filter), param.label or parameter
|
|
243
|
-
|
|
244
|
-
def get_group_info(self, group: str) -> tuple[str, list[str]]:
|
|
245
|
-
"""
|
|
246
|
-
Get group variable name and labels.
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
group: Group keyword name
|
|
250
|
-
|
|
251
|
-
Returns:
|
|
252
|
-
Tuple of (group_variable, group_labels)
|
|
253
|
-
|
|
254
|
-
Raises:
|
|
255
|
-
ValueError: If group keyword not found
|
|
256
|
-
"""
|
|
257
|
-
grp = self.study_plan.keywords.get_group(group)
|
|
258
|
-
if grp is None:
|
|
259
|
-
raise ValueError(f"Group '{group}' not found")
|
|
260
|
-
|
|
261
|
-
group_var = grp.variable.split(":")[-1].upper()
|
|
262
|
-
group_labels = grp.group_label if grp.group_label else []
|
|
263
|
-
|
|
264
|
-
return group_var, group_labels
|
|
265
|
-
|
|
266
|
-
def get_datasets(self, *dataset_names: str) -> tuple[pl.DataFrame, ...]:
|
|
267
|
-
"""
|
|
268
|
-
Get multiple datasets from StudyPlan.
|
|
269
|
-
|
|
270
|
-
Args:
|
|
271
|
-
*dataset_names: Names of datasets to retrieve (e.g., "adsl", "adae")
|
|
272
|
-
|
|
273
|
-
Returns:
|
|
274
|
-
Tuple of DataFrames in the order requested
|
|
275
|
-
|
|
276
|
-
Raises:
|
|
277
|
-
ValueError: If any dataset not found
|
|
278
|
-
"""
|
|
279
|
-
datasets = []
|
|
280
|
-
for name in dataset_names:
|
|
281
|
-
ds = self.study_plan.datasets.get(name)
|
|
282
|
-
if ds is None:
|
|
283
|
-
raise ValueError(f"Dataset '{name}' not found in study plan")
|
|
284
|
-
datasets.append(ds)
|
|
285
|
-
return tuple(datasets)
|
|
286
|
-
|
|
287
|
-
def get_population_data(self, population: str, group: str) -> tuple[pl.DataFrame, str]:
|
|
288
|
-
"""
|
|
289
|
-
Get filtered population dataset and group variable.
|
|
290
|
-
|
|
291
|
-
Args:
|
|
292
|
-
population: Population keyword name
|
|
293
|
-
group: Group keyword name
|
|
294
|
-
|
|
295
|
-
Returns:
|
|
296
|
-
Tuple of (filtered_adsl, group_variable)
|
|
297
|
-
"""
|
|
298
|
-
# Get ADSL dataset
|
|
299
|
-
(adsl,) = self.get_datasets("adsl")
|
|
300
|
-
|
|
301
|
-
# Apply population filter
|
|
302
|
-
pop_filter = self.get_population_filter(population)
|
|
303
|
-
adsl_pop = apply_filter_sql(adsl, pop_filter)
|
|
304
|
-
|
|
305
|
-
# Get group variable
|
|
306
|
-
group_var, _ = self.get_group_info(group)
|
|
307
|
-
|
|
308
|
-
return adsl_pop, group_var
|
|
1
|
+
# pyre-strict
|
|
2
|
+
"""
|
|
3
|
+
StudyPlan Parsing Utilities
|
|
4
|
+
|
|
5
|
+
This module provides utilities for parsing and extracting information from StudyPlan objects,
|
|
6
|
+
including filter conversion, parameter parsing, and keyword resolution.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import polars as pl
|
|
13
|
+
|
|
14
|
+
from .plan import StudyPlan
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_filter_to_sql(filter_str: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Parse custom filter syntax to SQL WHERE clause.
|
|
20
|
+
|
|
21
|
+
Converts:
|
|
22
|
+
- "adsl:saffl == 'Y'" -> "SAFFL = 'Y'"
|
|
23
|
+
- "adae:trtemfl == 'Y' and adae:aeser == 'Y'" -> "TRTEMFL = 'Y' AND AESER = 'Y'"
|
|
24
|
+
- "adae:aerel in ['A', 'B']" -> "AEREL IN ('A', 'B')"
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
filter_str: Custom filter string with dataset:column format
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
SQL WHERE clause string
|
|
31
|
+
"""
|
|
32
|
+
if not filter_str or filter_str.strip() == "":
|
|
33
|
+
return "1=1" # Always true
|
|
34
|
+
|
|
35
|
+
# Remove dataset prefixes (adsl:, adae:)
|
|
36
|
+
sql = re.sub(r"\w+:", "", filter_str)
|
|
37
|
+
|
|
38
|
+
# Convert Python syntax to SQL
|
|
39
|
+
sql = sql.replace("==", "=") # Python equality to SQL
|
|
40
|
+
sql = sql.replace(" and ", " AND ") # Python to SQL
|
|
41
|
+
sql = sql.replace(" and ", " AND ") # Python to SQL
|
|
42
|
+
sql = sql.replace(" or ", " OR ") # Python to SQL
|
|
43
|
+
sql = sql.replace(" in ", " IN ") # Python to SQL
|
|
44
|
+
|
|
45
|
+
# Convert Python list syntax to SQL IN: ['A', 'B'] -> ('A', 'B')
|
|
46
|
+
sql = sql.replace("[", "(").replace("]", ")")
|
|
47
|
+
|
|
48
|
+
# Uppercase column names (assuming ADaM standard)
|
|
49
|
+
# Match word boundaries before operators
|
|
50
|
+
sql = re.sub(
|
|
51
|
+
r"\b([a-z]\w*)\b(?=\s*[=<>!]|\s+IN)", lambda m: m.group(1).upper(), sql, flags=re.IGNORECASE
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return sql
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def apply_filter_sql(df: pl.DataFrame, filter_str: str) -> pl.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Apply filter using pl.sql_expr() - simpler and faster than SQLContext.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
df: DataFrame to filter
|
|
63
|
+
filter_str: Custom filter string
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Filtered DataFrame
|
|
67
|
+
"""
|
|
68
|
+
if not filter_str or filter_str.strip() == "":
|
|
69
|
+
return df
|
|
70
|
+
|
|
71
|
+
where_clause = parse_filter_to_sql(filter_str)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Use pl.sql_expr() - much simpler and faster!
|
|
75
|
+
return df.filter(pl.sql_expr(where_clause))
|
|
76
|
+
except Exception as e:
|
|
77
|
+
# Fallback to manual parsing if SQL fails
|
|
78
|
+
print(f"Warning: SQL filter failed ({e}), using fallback method")
|
|
79
|
+
return df.filter(_parse_filter_expr(filter_str))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _parse_filter_expr(filter_str: str) -> Any:
|
|
83
|
+
"""
|
|
84
|
+
Fallback filter parser using Polars expressions.
|
|
85
|
+
Used if SQL parsing fails.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
filter_str: Filter string
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Polars expression
|
|
92
|
+
"""
|
|
93
|
+
if not filter_str or filter_str.strip() == "":
|
|
94
|
+
return pl.lit(True)
|
|
95
|
+
|
|
96
|
+
# Remove dataset prefixes
|
|
97
|
+
filter_str = re.sub(r"\w+:", "", filter_str)
|
|
98
|
+
|
|
99
|
+
# Handle 'in' operator: column in ['A', 'B'] -> pl.col(column).is_in(['A', 'B'])
|
|
100
|
+
in_pattern = r"(\w+)\s+in\s+\[([^\]]+)\]"
|
|
101
|
+
|
|
102
|
+
def _parse_between(match: re.Match[str]) -> str:
|
|
103
|
+
col = match.group(1).upper()
|
|
104
|
+
values = match.group(2)
|
|
105
|
+
return f"(pl.col('{col}').is_in([{values}]))"
|
|
106
|
+
|
|
107
|
+
filter_str = re.sub(in_pattern, _parse_between, filter_str)
|
|
108
|
+
|
|
109
|
+
# Handle equality/inequality
|
|
110
|
+
eq_pattern = r"(\w+)\s*(==|!=|>|<|>=|<=)\s*'([^']+)'"
|
|
111
|
+
|
|
112
|
+
def _parse_like(match: re.Match[str]) -> str:
|
|
113
|
+
col = match.group(1).upper()
|
|
114
|
+
op = match.group(2)
|
|
115
|
+
val = match.group(3)
|
|
116
|
+
return f"(pl.col('{col}') {op} '{val}')"
|
|
117
|
+
|
|
118
|
+
filter_str = re.sub(eq_pattern, _parse_like, filter_str)
|
|
119
|
+
|
|
120
|
+
# Replace 'and'/'or'
|
|
121
|
+
filter_str = filter_str.replace(" and ", " & ")
|
|
122
|
+
filter_str = filter_str.replace(" or ", " | ")
|
|
123
|
+
|
|
124
|
+
return eval(filter_str)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_parameter(parameter_str: str) -> list[str]:
|
|
128
|
+
"""
|
|
129
|
+
Parse semicolon-separated parameter string.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
parameter_str: Single parameter or semicolon-separated (e.g., "any;rel;ser")
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of parameter names
|
|
136
|
+
"""
|
|
137
|
+
if not parameter_str:
|
|
138
|
+
return []
|
|
139
|
+
if ";" in parameter_str:
|
|
140
|
+
return [p.strip() for p in parameter_str.split(";")]
|
|
141
|
+
return [parameter_str]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class StudyPlanParser:
|
|
145
|
+
"""
|
|
146
|
+
Parser class for extracting and resolving information from StudyPlan objects.
|
|
147
|
+
|
|
148
|
+
This class provides methods to extract filters, labels, and other configuration
|
|
149
|
+
from StudyPlan keywords and convert them to analysis-ready formats.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, study_plan: StudyPlan) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Initialize parser with a StudyPlan object.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
study_plan: StudyPlan object with loaded datasets and keywords
|
|
158
|
+
"""
|
|
159
|
+
self.study_plan = study_plan
|
|
160
|
+
|
|
161
|
+
def get_population_filter(self, population: str) -> str:
|
|
162
|
+
"""
|
|
163
|
+
Get population filter as SQL WHERE clause.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
population: Population keyword name
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
SQL WHERE clause string
|
|
170
|
+
|
|
171
|
+
Raises:
|
|
172
|
+
ValueError: If population keyword not found
|
|
173
|
+
"""
|
|
174
|
+
pop = self.study_plan.keywords.get_population(population)
|
|
175
|
+
if pop is None:
|
|
176
|
+
raise ValueError(f"Population '{population}' not found")
|
|
177
|
+
return parse_filter_to_sql(pop.filter)
|
|
178
|
+
|
|
179
|
+
def get_observation_filter(self, observation: str | None) -> str | None:
|
|
180
|
+
"""
|
|
181
|
+
Get observation filter as SQL WHERE clause.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
observation: Optional observation keyword name
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
SQL WHERE clause string or None if observation not specified
|
|
188
|
+
"""
|
|
189
|
+
if not observation:
|
|
190
|
+
return None
|
|
191
|
+
obs = self.study_plan.keywords.get_observation(observation)
|
|
192
|
+
if obs:
|
|
193
|
+
return parse_filter_to_sql(obs.filter)
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
def get_parameter_info(
|
|
197
|
+
self, parameter: str
|
|
198
|
+
) -> tuple[list[str], list[str], list[str], list[int]]:
|
|
199
|
+
"""
|
|
200
|
+
Get parameter names, filters, labels, and indent levels.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
parameter: Parameter keyword, can be semicolon-separated (e.g., "any;rel;ser")
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Tuple of (parameter_names, parameter_filters, parameter_labels, parameter_indents)
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If any parameter keyword not found
|
|
210
|
+
"""
|
|
211
|
+
param_names = parse_parameter(parameter)
|
|
212
|
+
param_labels = []
|
|
213
|
+
param_filters = []
|
|
214
|
+
param_indents = []
|
|
215
|
+
|
|
216
|
+
for param_name in param_names:
|
|
217
|
+
param = self.study_plan.keywords.get_parameter(param_name)
|
|
218
|
+
if param is None:
|
|
219
|
+
raise ValueError(f"Parameter '{param_name}' not found")
|
|
220
|
+
param_filters.append(parse_filter_to_sql(param.filter))
|
|
221
|
+
param_labels.append(param.label or param_name)
|
|
222
|
+
param_indents.append(param.indent)
|
|
223
|
+
|
|
224
|
+
return param_names, param_filters, param_labels, param_indents
|
|
225
|
+
|
|
226
|
+
def get_single_parameter_info(self, parameter: str) -> tuple[str, str]:
|
|
227
|
+
"""
|
|
228
|
+
Get single parameter filter and label (NOT semicolon-separated).
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
parameter: Single parameter keyword name
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Tuple of (parameter_filter, parameter_label)
|
|
235
|
+
|
|
236
|
+
Raises:
|
|
237
|
+
ValueError: If parameter keyword not found
|
|
238
|
+
"""
|
|
239
|
+
param = self.study_plan.keywords.get_parameter(parameter)
|
|
240
|
+
if param is None:
|
|
241
|
+
raise ValueError(f"Parameter '{parameter}' not found")
|
|
242
|
+
return parse_filter_to_sql(param.filter), param.label or parameter
|
|
243
|
+
|
|
244
|
+
def get_group_info(self, group: str) -> tuple[str, list[str]]:
|
|
245
|
+
"""
|
|
246
|
+
Get group variable name and labels.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
group: Group keyword name
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Tuple of (group_variable, group_labels)
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
ValueError: If group keyword not found
|
|
256
|
+
"""
|
|
257
|
+
grp = self.study_plan.keywords.get_group(group)
|
|
258
|
+
if grp is None:
|
|
259
|
+
raise ValueError(f"Group '{group}' not found")
|
|
260
|
+
|
|
261
|
+
group_var = grp.variable.split(":")[-1].upper()
|
|
262
|
+
group_labels = grp.group_label if grp.group_label else []
|
|
263
|
+
|
|
264
|
+
return group_var, group_labels
|
|
265
|
+
|
|
266
|
+
def get_datasets(self, *dataset_names: str) -> tuple[pl.DataFrame, ...]:
|
|
267
|
+
"""
|
|
268
|
+
Get multiple datasets from StudyPlan.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
*dataset_names: Names of datasets to retrieve (e.g., "adsl", "adae")
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Tuple of DataFrames in the order requested
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
ValueError: If any dataset not found
|
|
278
|
+
"""
|
|
279
|
+
datasets = []
|
|
280
|
+
for name in dataset_names:
|
|
281
|
+
ds = self.study_plan.datasets.get(name)
|
|
282
|
+
if ds is None:
|
|
283
|
+
raise ValueError(f"Dataset '{name}' not found in study plan")
|
|
284
|
+
datasets.append(ds)
|
|
285
|
+
return tuple(datasets)
|
|
286
|
+
|
|
287
|
+
def get_population_data(self, population: str, group: str) -> tuple[pl.DataFrame, str]:
|
|
288
|
+
"""
|
|
289
|
+
Get filtered population dataset and group variable.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
population: Population keyword name
|
|
293
|
+
group: Group keyword name
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Tuple of (filtered_adsl, group_variable)
|
|
297
|
+
"""
|
|
298
|
+
# Get ADSL dataset
|
|
299
|
+
(adsl,) = self.get_datasets("adsl")
|
|
300
|
+
|
|
301
|
+
# Apply population filter
|
|
302
|
+
pop_filter = self.get_population_filter(population)
|
|
303
|
+
adsl_pop = apply_filter_sql(adsl, pop_filter)
|
|
304
|
+
|
|
305
|
+
# Get group variable
|
|
306
|
+
group_var, _ = self.get_group_info(group)
|
|
307
|
+
|
|
308
|
+
return adsl_pop, group_var
|