csrlite 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/ae/ae_specific.py CHANGED
@@ -1,483 +1,483 @@
1
- # pyre-strict
2
- """
3
- Adverse Event (AE) Specific Analysis Functions
4
-
5
- This module provides functions for AE specific analysis showing detailed event listings
6
- organized by System Organ Class (SOC) and Preferred Term (PT), following metalite.ae patterns.
7
-
8
- The three-step pipeline:
9
- - ae_specific_ard: Generate Analysis Results Data with SOC/PT hierarchy
10
- - ae_specific_df: Transform to display format
11
- - ae_specific_rtf: Generate formatted RTF output
12
- - ae_specific: Complete pipeline wrapper
13
- - study_plan_to_ae_specific: Batch generation from StudyPlan
14
-
15
- Uses Polars native SQL capabilities for data manipulation, count.py utilities for subject counting,
16
- and parse.py utilities for StudyPlan parsing.
17
- """
18
-
19
- from pathlib import Path
20
-
21
- import polars as pl
22
- from rtflite import RTFDocument
23
-
24
- from ..common.count import count_subject, count_subject_with_observation
25
- from ..common.parse import StudyPlanParser
26
- from ..common.plan import StudyPlan
27
- from ..common.rtf import create_rtf_table_n_pct
28
- from ..common.utils import apply_common_filters
29
- from .ae_utils import get_ae_parameter_row_labels, get_ae_parameter_title
30
-
31
-
32
- def ae_specific_ard(
33
- population: pl.DataFrame,
34
- observation: pl.DataFrame,
35
- population_filter: str | None,
36
- observation_filter: str | None,
37
- parameter_filter: str | None,
38
- id: tuple[str, str],
39
- group: tuple[str, str],
40
- ae_term: tuple[str, str],
41
- total: bool = True,
42
- missing_group: str = "error",
43
- n_with_label: str = " with one or more adverse events",
44
- n_without_label: str = " with no adverse events",
45
- ) -> pl.DataFrame:
46
- """
47
- Generate Analysis Results Data (ARD) for AE specific analysis.
48
-
49
- Creates a long-format DataFrame showing the number and percentage
50
- of subjects experiencing specific adverse events.
51
-
52
- Args:
53
- population: Population DataFrame (subject-level data, e.g., ADSL)
54
- observation: Observation DataFrame (event data, e.g., ADAE)
55
- population_filter: SQL WHERE clause for population (can be None)
56
- observation_filter: SQL WHERE clause for observation (can be None)
57
- parameter_filter: SQL WHERE clause for parameter filtering (can be None)
58
- id: Tuple (variable_name, label) for ID column
59
- group: Tuple (variable_name, label) for grouping variable
60
- ae_term: Tuple (variable_name, label) for AE term column
61
- total: Whether to include total column in counts
62
- missing_group: How to handle missing group values: "error", "ignore", or "fill"
63
- n_with_label: Label for "with one or more" row (dynamic based on parameter)
64
- n_without_label: Label for "with no" row (dynamic based on parameter)
65
-
66
- Returns:
67
- pl.DataFrame: Long-format ARD with columns __index__, __group__, __value__
68
- """
69
- # Extract variable names
70
- pop_var_name = "Participants in population"
71
- id_var_name, id_var_label = id
72
- group_var_name, group_var_label = group
73
- ae_term_var_name, ae_term_var_label = ae_term
74
-
75
- # Apply common filters
76
- population_filtered, observation_to_filter = apply_common_filters(
77
- population=population,
78
- observation=observation,
79
- population_filter=population_filter,
80
- observation_filter=observation_filter,
81
- parameter_filter=parameter_filter,
82
- )
83
-
84
- assert observation_to_filter is not None
85
-
86
- # Filter observation to include only subjects in filtered population
87
- observation_filtered = observation_to_filter.filter(
88
- pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
89
- ).with_columns(pl.col(ae_term_var_name).alias("__index__"))
90
-
91
- # Note: We'll extract categories from concatenated result later for both __index__ and __group__
92
-
93
- # Population counts - keep original for denominator calculations
94
- n_pop_counts = count_subject(
95
- population=population_filtered,
96
- id=id_var_name,
97
- group=group_var_name,
98
- total=total,
99
- missing_group=missing_group,
100
- )
101
-
102
- # Transform population counts for display
103
- n_pop = n_pop_counts.select(
104
- pl.lit(pop_var_name).alias("__index__"),
105
- pl.col(group_var_name).cast(pl.String).alias("__group__"),
106
- pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
107
- )
108
-
109
- # Empty separator row
110
- n_empty = n_pop.select(
111
- pl.lit("").alias("__index__"), pl.col("__group__"), pl.lit("").alias("__value__")
112
- )
113
-
114
- # Summary rows: "with one or more" and "with no" adverse events
115
- # Count subjects with at least one event
116
- subjects_with_events = observation_filtered.select(id_var_name).unique()
117
-
118
- # Get population with event indicator
119
- pop_with_indicator = population_filtered.with_columns(
120
- pl.col(id_var_name)
121
- .is_in(subjects_with_events[id_var_name].to_list())
122
- .alias("__has_event__")
123
- )
124
-
125
- # Count subjects with and without events using count_subject_with_observation
126
- event_counts = count_subject_with_observation(
127
- population=population_filtered,
128
- observation=pop_with_indicator,
129
- id=id_var_name,
130
- group=group_var_name,
131
- variable="__has_event__",
132
- total=total,
133
- missing_group=missing_group,
134
- )
135
-
136
- # Extract 'with' counts
137
- n_with = event_counts.filter(pl.col("__has_event__") == "true").select(
138
- [
139
- pl.lit(n_with_label).alias("__index__"),
140
- pl.col(group_var_name).cast(pl.String).alias("__group__"),
141
- pl.col("n_pct_subj_fmt").alias("__value__"),
142
- ]
143
- )
144
-
145
- # Extract 'without' counts
146
- n_without = event_counts.filter(pl.col("__has_event__") == "false").select(
147
- [
148
- pl.lit(n_without_label).alias("__index__"),
149
- pl.col(group_var_name).cast(pl.String).alias("__group__"),
150
- pl.col("n_pct_subj_fmt").alias("__value__"),
151
- ]
152
- )
153
-
154
- # AE term counts
155
- n_index = count_subject_with_observation(
156
- population=population_filtered,
157
- observation=observation_filtered,
158
- id=id_var_name,
159
- group=group_var_name,
160
- total=total,
161
- variable="__index__",
162
- missing_group=missing_group,
163
- )
164
-
165
- n_index = n_index.select(
166
- (
167
- pl.col("__index__").cast(pl.String).str.slice(0, 1).str.to_uppercase()
168
- + pl.col("__index__").cast(pl.String).str.slice(1).str.to_lowercase()
169
- ).alias("__index__"),
170
- pl.col(group_var_name).cast(pl.String).alias("__group__"),
171
- pl.col("n_pct_subj_fmt").alias("__value__"),
172
- )
173
-
174
- # Concatenate all parts
175
- parts = [n_pop, n_with, n_without, n_empty, n_index]
176
-
177
- res = pl.concat(parts)
178
-
179
- # Extract unique categories from concatenated result in order of appearance
180
- index_categories = res.select("__index__").unique(maintain_order=True).to_series().to_list()
181
- group_categories = res.select("__group__").unique(maintain_order=True).to_series().to_list()
182
-
183
- # Convert to Enum types for proper categorical ordering and sorting
184
- res = res.with_columns(
185
- [
186
- pl.col("__index__").cast(pl.Enum(index_categories)),
187
- pl.col("__group__").cast(pl.Enum(group_categories)),
188
- ]
189
- )
190
-
191
- # Sort by index and group using categorical ordering
192
- res = res.sort("__index__", "__group__")
193
-
194
- return res
195
-
196
-
197
- def ae_specific_df(ard: pl.DataFrame) -> pl.DataFrame:
198
- """
199
- Transform AE specific ARD to display-ready DataFrame.
200
-
201
- Converts the long-format ARD with __index__, __group__, __value__ columns
202
- into a wide-format display table where groups become columns.
203
-
204
- Args:
205
- ard: Analysis Results Data DataFrame with __index__, __group__, __value__ columns
206
-
207
- Returns:
208
- pl.DataFrame: Wide-format display table with index rows and groups as columns
209
- """
210
- # Pivot from long to wide format
211
- df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
212
-
213
- # Rename __index__ to display column name
214
- df_wide = df_wide.rename({"__index__": "Term"}).select(pl.col("Term"), pl.exclude("Term"))
215
-
216
- return df_wide
217
-
218
-
219
- def ae_specific_rtf(
220
- df: pl.DataFrame,
221
- title: list[str],
222
- footnote: list[str] | None,
223
- source: list[str] | None,
224
- col_rel_width: list[float] | None = None,
225
- ) -> RTFDocument:
226
- """
227
- Generate RTF table from AE specific display DataFrame.
228
-
229
- Creates a formatted RTF table with two-level column headers showing
230
- treatment groups with "n (%)" values.
231
-
232
- Args:
233
- df: Display DataFrame from ae_specific_df (wide format)
234
- title: Title(s) for the table as list of strings
235
- footnote: Optional footnote(s) as list of strings
236
- source: Optional source note(s) as list of strings
237
- col_rel_width: Optional list of relative column widths. If None, auto-calculated
238
- as [n_cols-1, 1, 1, 1, ...] where n_cols is total column count
239
-
240
- Returns:
241
- RTFDocument: RTF document object that can be written to file
242
- """
243
-
244
- # Rename Term to empty string for display
245
- df_rtf = df.rename({"Term": ""})
246
-
247
- # Calculate number of columns
248
- n_cols = len(df_rtf.columns)
249
-
250
- # Build first-level column headers
251
- col_header_1 = list(df_rtf.columns)
252
-
253
- # Build second-level column headers
254
- col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
255
-
256
- # Calculate column widths
257
- if col_rel_width is None:
258
- col_widths = [n_cols / 1.5] + [1] * (n_cols - 1)
259
- else:
260
- col_widths = col_rel_width
261
-
262
- return create_rtf_table_n_pct(
263
- df=df_rtf,
264
- col_header_1=col_header_1,
265
- col_header_2=col_header_2,
266
- col_widths=col_widths,
267
- title=title,
268
- footnote=footnote,
269
- source=source,
270
- )
271
-
272
-
273
- def ae_specific(
274
- population: pl.DataFrame,
275
- observation: pl.DataFrame,
276
- population_filter: str | None,
277
- observation_filter: str | None,
278
- parameter_filter: str | None,
279
- id: tuple[str, str],
280
- group: tuple[str, str],
281
- title: list[str],
282
- footnote: list[str] | None,
283
- source: list[str] | None,
284
- output_file: str,
285
- ae_term: tuple[str, str],
286
- total: bool = True,
287
- col_rel_width: list[float] | None = None,
288
- missing_group: str = "error",
289
- n_with_label: str = " with one or more adverse events",
290
- n_without_label: str = " with no adverse events",
291
- ) -> str:
292
- """
293
- Complete AE specific pipeline wrapper.
294
-
295
- This function orchestrates the three-step pipeline:
296
- 1. ae_specific_ard: Generate Analysis Results Data
297
- 2. ae_specific_df: Transform to display format
298
- 3. ae_specific_rtf: Generate RTF output and write to file
299
-
300
- Args:
301
- population: Population DataFrame (subject-level data, e.g., ADSL)
302
- observation: Observation DataFrame (event data, e.g., ADAE)
303
- population_filter: SQL WHERE clause for population (can be None)
304
- observation_filter: SQL WHERE clause for observation (can be None)
305
- parameter_filter: SQL WHERE clause for parameter filtering (can be None)
306
- id: Tuple (variable_name, label) for ID column
307
- group: Tuple (variable_name, label) for grouping variable
308
- title: Title for RTF output as list of strings
309
- footnote: Optional footnote for RTF output as list of strings
310
- source: Optional source for RTF output as list of strings
311
- output_file: File path to write RTF output
312
- ae_term: Tuple (variable_name, label) for AE term column
313
- (default: ("AEDECOD", "Adverse Event"))
314
- total: Whether to include total column (default: True)
315
- col_rel_width: Optional column widths for RTF output
316
- missing_group: How to handle missing group values (default: "error")
317
- n_with_label: Label for "with one or more" row (dynamic based on parameter)
318
- n_without_label: Label for "with no" row (dynamic based on parameter)
319
-
320
- Returns:
321
- str: Path to the generated RTF file
322
- """
323
- # Step 1: Generate ARD
324
- ard = ae_specific_ard(
325
- population=population,
326
- observation=observation,
327
- population_filter=population_filter,
328
- observation_filter=observation_filter,
329
- parameter_filter=parameter_filter,
330
- id=id,
331
- group=group,
332
- ae_term=ae_term,
333
- total=total,
334
- missing_group=missing_group,
335
- n_with_label=n_with_label,
336
- n_without_label=n_without_label,
337
- )
338
-
339
- # Step 2: Transform to display format
340
- df = ae_specific_df(ard)
341
-
342
- # Step 3: Generate RTF and write to file
343
- rtf_doc = ae_specific_rtf(
344
- df=df,
345
- title=title,
346
- footnote=footnote,
347
- source=source,
348
- col_rel_width=col_rel_width,
349
- )
350
- rtf_doc.write_rtf(output_file)
351
-
352
- return output_file
353
-
354
-
355
- def study_plan_to_ae_specific(
356
- study_plan: StudyPlan,
357
- ) -> list[str]:
358
- """
359
- Generate AE specific RTF outputs for all analyses defined in StudyPlan.
360
-
361
- This function reads the expanded plan from StudyPlan and generates
362
- an RTF table for each ae_specific analysis specification automatically.
363
-
364
- Args:
365
- study_plan: StudyPlan object with loaded datasets and analysis specifications
366
-
367
- Returns:
368
- list[str]: List of paths to generated RTF files
369
- """
370
-
371
- # Meta data
372
- analysis = "ae_specific"
373
- output_dir = study_plan.output_dir
374
- footnote = ["Every participant is counted a single time for each applicable row and column."]
375
- source = None
376
-
377
- population_df_name = "adsl"
378
- observation_df_name = "adae"
379
-
380
- id = ("USUBJID", "Subject ID")
381
- ae_term = ("AEDECOD", "Adverse Event")
382
- total = True
383
- missing_group = "error"
384
-
385
- # Create output directory if it doesn't exist
386
- Path(output_dir).mkdir(parents=True, exist_ok=True)
387
-
388
- # Initialize parser
389
- parser = StudyPlanParser(study_plan)
390
-
391
- # Get expanded plan DataFrame
392
- plan_df = study_plan.get_plan_df()
393
-
394
- # Filter for AE specific analyses
395
- ae_plans = plan_df.filter(pl.col("analysis") == analysis)
396
-
397
- rtf_files = []
398
-
399
- # Generate RTF for each analysis
400
- for row in ae_plans.iter_rows(named=True):
401
- population = row["population"]
402
- observation = row.get("observation")
403
- parameter = row.get("parameter")
404
- group = row.get("group")
405
-
406
- # Validate group is specified
407
- if group is None:
408
- raise ValueError(
409
- f"Group not specified in YAML for analysis: population={population}, "
410
- f"observation={observation}, parameter={parameter}. "
411
- f"Please add group to your YAML plan."
412
- )
413
-
414
- # Get datasets using parser
415
- population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
416
-
417
- # Get filters and configuration using parser
418
- population_filter = parser.get_population_filter(population)
419
- obs_filter = parser.get_observation_filter(observation)
420
-
421
- # Get parameter filter if parameter is specified
422
- parameter_filter = None
423
- if parameter:
424
- param_names, param_filters, param_labels, _ = parser.get_parameter_info(
425
- parameter
426
- ) # Ignore indent for AE
427
- # For ae_specific, use the first (and typically only) filter
428
- parameter_filter = param_filters[0] if param_filters else None
429
-
430
- group_var_name, group_labels = parser.get_group_info(group)
431
-
432
- # Build group tuple
433
- group_var_label = group_labels[0] if group_labels else group_var_name
434
- group_tuple = (group_var_name, group_var_label)
435
-
436
- # Build dynamic title and row labels based on parameter
437
- param = study_plan.keywords.get_parameter(parameter) if parameter else None
438
- dynamic_title = get_ae_parameter_title(param)
439
- n_with_label, n_without_label = get_ae_parameter_row_labels(param)
440
-
441
- # Build title with population and observation context
442
- title_parts = [dynamic_title]
443
- if observation:
444
- obs_kw = study_plan.keywords.observations.get(observation)
445
- if obs_kw and obs_kw.label:
446
- title_parts.append(obs_kw.label)
447
-
448
- pop_kw = study_plan.keywords.populations.get(population)
449
- if pop_kw and pop_kw.label:
450
- title_parts.append(pop_kw.label)
451
-
452
- # Build output filename
453
- filename = f"{analysis}_{population}"
454
- if observation:
455
- filename += f"_{observation}"
456
- if parameter:
457
- filename += f"_{parameter.replace(';', '_')}"
458
- filename += ".rtf"
459
- output_file = str(Path(output_dir) / filename)
460
-
461
- # Generate RTF
462
- rtf_path = ae_specific(
463
- population=population_df,
464
- observation=observation_df,
465
- population_filter=population_filter,
466
- observation_filter=obs_filter,
467
- parameter_filter=parameter_filter,
468
- id=id,
469
- group=group_tuple,
470
- ae_term=ae_term,
471
- title=title_parts,
472
- footnote=footnote,
473
- source=source,
474
- output_file=output_file,
475
- total=total,
476
- missing_group=missing_group,
477
- n_with_label=n_with_label,
478
- n_without_label=n_without_label,
479
- )
480
-
481
- rtf_files.append(rtf_path)
482
-
483
- return rtf_files
1
+ # pyre-strict
2
+ """
3
+ Adverse Event (AE) Specific Analysis Functions
4
+
5
+ This module provides functions for AE specific analysis showing detailed event listings
6
+ organized by System Organ Class (SOC) and Preferred Term (PT), following metalite.ae patterns.
7
+
8
+ The three-step pipeline:
9
+ - ae_specific_ard: Generate Analysis Results Data with SOC/PT hierarchy
10
+ - ae_specific_df: Transform to display format
11
+ - ae_specific_rtf: Generate formatted RTF output
12
+ - ae_specific: Complete pipeline wrapper
13
+ - study_plan_to_ae_specific: Batch generation from StudyPlan
14
+
15
+ Uses Polars native SQL capabilities for data manipulation, count.py utilities for subject counting,
16
+ and parse.py utilities for StudyPlan parsing.
17
+ """
18
+
19
+ from pathlib import Path
20
+
21
+ import polars as pl
22
+ from rtflite import RTFDocument
23
+
24
+ from ..common.count import count_subject, count_subject_with_observation
25
+ from ..common.parse import StudyPlanParser
26
+ from ..common.plan import StudyPlan
27
+ from ..common.rtf import create_rtf_table_n_pct
28
+ from ..common.utils import apply_common_filters
29
+ from .ae_utils import get_ae_parameter_row_labels, get_ae_parameter_title
30
+
31
+
32
+ def ae_specific_ard(
33
+ population: pl.DataFrame,
34
+ observation: pl.DataFrame,
35
+ population_filter: str | None,
36
+ observation_filter: str | None,
37
+ parameter_filter: str | None,
38
+ id: tuple[str, str],
39
+ group: tuple[str, str],
40
+ ae_term: tuple[str, str],
41
+ total: bool = True,
42
+ missing_group: str = "error",
43
+ n_with_label: str = " with one or more adverse events",
44
+ n_without_label: str = " with no adverse events",
45
+ ) -> pl.DataFrame:
46
+ """
47
+ Generate Analysis Results Data (ARD) for AE specific analysis.
48
+
49
+ Creates a long-format DataFrame showing the number and percentage
50
+ of subjects experiencing specific adverse events.
51
+
52
+ Args:
53
+ population: Population DataFrame (subject-level data, e.g., ADSL)
54
+ observation: Observation DataFrame (event data, e.g., ADAE)
55
+ population_filter: SQL WHERE clause for population (can be None)
56
+ observation_filter: SQL WHERE clause for observation (can be None)
57
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
58
+ id: Tuple (variable_name, label) for ID column
59
+ group: Tuple (variable_name, label) for grouping variable
60
+ ae_term: Tuple (variable_name, label) for AE term column
61
+ total: Whether to include total column in counts
62
+ missing_group: How to handle missing group values: "error", "ignore", or "fill"
63
+ n_with_label: Label for "with one or more" row (dynamic based on parameter)
64
+ n_without_label: Label for "with no" row (dynamic based on parameter)
65
+
66
+ Returns:
67
+ pl.DataFrame: Long-format ARD with columns __index__, __group__, __value__
68
+ """
69
+ # Extract variable names
70
+ pop_var_name = "Participants in population"
71
+ id_var_name, id_var_label = id
72
+ group_var_name, group_var_label = group
73
+ ae_term_var_name, ae_term_var_label = ae_term
74
+
75
+ # Apply common filters
76
+ population_filtered, observation_to_filter = apply_common_filters(
77
+ population=population,
78
+ observation=observation,
79
+ population_filter=population_filter,
80
+ observation_filter=observation_filter,
81
+ parameter_filter=parameter_filter,
82
+ )
83
+
84
+ assert observation_to_filter is not None
85
+
86
+ # Filter observation to include only subjects in filtered population
87
+ observation_filtered = observation_to_filter.filter(
88
+ pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
89
+ ).with_columns(pl.col(ae_term_var_name).alias("__index__"))
90
+
91
+ # Note: We'll extract categories from concatenated result later for both __index__ and __group__
92
+
93
+ # Population counts - keep original for denominator calculations
94
+ n_pop_counts = count_subject(
95
+ population=population_filtered,
96
+ id=id_var_name,
97
+ group=group_var_name,
98
+ total=total,
99
+ missing_group=missing_group,
100
+ )
101
+
102
+ # Transform population counts for display
103
+ n_pop = n_pop_counts.select(
104
+ pl.lit(pop_var_name).alias("__index__"),
105
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
106
+ pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
107
+ )
108
+
109
+ # Empty separator row
110
+ n_empty = n_pop.select(
111
+ pl.lit("").alias("__index__"), pl.col("__group__"), pl.lit("").alias("__value__")
112
+ )
113
+
114
+ # Summary rows: "with one or more" and "with no" adverse events
115
+ # Count subjects with at least one event
116
+ subjects_with_events = observation_filtered.select(id_var_name).unique()
117
+
118
+ # Get population with event indicator
119
+ pop_with_indicator = population_filtered.with_columns(
120
+ pl.col(id_var_name)
121
+ .is_in(subjects_with_events[id_var_name].to_list())
122
+ .alias("__has_event__")
123
+ )
124
+
125
+ # Count subjects with and without events using count_subject_with_observation
126
+ event_counts = count_subject_with_observation(
127
+ population=population_filtered,
128
+ observation=pop_with_indicator,
129
+ id=id_var_name,
130
+ group=group_var_name,
131
+ variable="__has_event__",
132
+ total=total,
133
+ missing_group=missing_group,
134
+ )
135
+
136
+ # Extract 'with' counts
137
+ n_with = event_counts.filter(pl.col("__has_event__") == "true").select(
138
+ [
139
+ pl.lit(n_with_label).alias("__index__"),
140
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
141
+ pl.col("n_pct_subj_fmt").alias("__value__"),
142
+ ]
143
+ )
144
+
145
+ # Extract 'without' counts
146
+ n_without = event_counts.filter(pl.col("__has_event__") == "false").select(
147
+ [
148
+ pl.lit(n_without_label).alias("__index__"),
149
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
150
+ pl.col("n_pct_subj_fmt").alias("__value__"),
151
+ ]
152
+ )
153
+
154
+ # AE term counts
155
+ n_index = count_subject_with_observation(
156
+ population=population_filtered,
157
+ observation=observation_filtered,
158
+ id=id_var_name,
159
+ group=group_var_name,
160
+ total=total,
161
+ variable="__index__",
162
+ missing_group=missing_group,
163
+ )
164
+
165
+ n_index = n_index.select(
166
+ (
167
+ pl.col("__index__").cast(pl.String).str.slice(0, 1).str.to_uppercase()
168
+ + pl.col("__index__").cast(pl.String).str.slice(1).str.to_lowercase()
169
+ ).alias("__index__"),
170
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
171
+ pl.col("n_pct_subj_fmt").alias("__value__"),
172
+ )
173
+
174
+ # Concatenate all parts
175
+ parts = [n_pop, n_with, n_without, n_empty, n_index]
176
+
177
+ res = pl.concat(parts)
178
+
179
+ # Extract unique categories from concatenated result in order of appearance
180
+ index_categories = res.select("__index__").unique(maintain_order=True).to_series().to_list()
181
+ group_categories = res.select("__group__").unique(maintain_order=True).to_series().to_list()
182
+
183
+ # Convert to Enum types for proper categorical ordering and sorting
184
+ res = res.with_columns(
185
+ [
186
+ pl.col("__index__").cast(pl.Enum(index_categories)),
187
+ pl.col("__group__").cast(pl.Enum(group_categories)),
188
+ ]
189
+ )
190
+
191
+ # Sort by index and group using categorical ordering
192
+ res = res.sort("__index__", "__group__")
193
+
194
+ return res
195
+
196
+
197
+ def ae_specific_df(ard: pl.DataFrame) -> pl.DataFrame:
198
+ """
199
+ Transform AE specific ARD to display-ready DataFrame.
200
+
201
+ Converts the long-format ARD with __index__, __group__, __value__ columns
202
+ into a wide-format display table where groups become columns.
203
+
204
+ Args:
205
+ ard: Analysis Results Data DataFrame with __index__, __group__, __value__ columns
206
+
207
+ Returns:
208
+ pl.DataFrame: Wide-format display table with index rows and groups as columns
209
+ """
210
+ # Pivot from long to wide format
211
+ df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
212
+
213
+ # Rename __index__ to display column name
214
+ df_wide = df_wide.rename({"__index__": "Term"}).select(pl.col("Term"), pl.exclude("Term"))
215
+
216
+ return df_wide
217
+
218
+
219
+ def ae_specific_rtf(
220
+ df: pl.DataFrame,
221
+ title: list[str],
222
+ footnote: list[str] | None,
223
+ source: list[str] | None,
224
+ col_rel_width: list[float] | None = None,
225
+ ) -> RTFDocument:
226
+ """
227
+ Generate RTF table from AE specific display DataFrame.
228
+
229
+ Creates a formatted RTF table with two-level column headers showing
230
+ treatment groups with "n (%)" values.
231
+
232
+ Args:
233
+ df: Display DataFrame from ae_specific_df (wide format)
234
+ title: Title(s) for the table as list of strings
235
+ footnote: Optional footnote(s) as list of strings
236
+ source: Optional source note(s) as list of strings
237
+ col_rel_width: Optional list of relative column widths. If None, auto-calculated
238
+ as [n_cols-1, 1, 1, 1, ...] where n_cols is total column count
239
+
240
+ Returns:
241
+ RTFDocument: RTF document object that can be written to file
242
+ """
243
+
244
+ # Rename Term to empty string for display
245
+ df_rtf = df.rename({"Term": ""})
246
+
247
+ # Calculate number of columns
248
+ n_cols = len(df_rtf.columns)
249
+
250
+ # Build first-level column headers
251
+ col_header_1 = list(df_rtf.columns)
252
+
253
+ # Build second-level column headers
254
+ col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
255
+
256
+ # Calculate column widths
257
+ if col_rel_width is None:
258
+ col_widths = [n_cols / 1.5] + [1] * (n_cols - 1)
259
+ else:
260
+ col_widths = col_rel_width
261
+
262
+ return create_rtf_table_n_pct(
263
+ df=df_rtf,
264
+ col_header_1=col_header_1,
265
+ col_header_2=col_header_2,
266
+ col_widths=col_widths,
267
+ title=title,
268
+ footnote=footnote,
269
+ source=source,
270
+ )
271
+
272
+
273
+ def ae_specific(
274
+ population: pl.DataFrame,
275
+ observation: pl.DataFrame,
276
+ population_filter: str | None,
277
+ observation_filter: str | None,
278
+ parameter_filter: str | None,
279
+ id: tuple[str, str],
280
+ group: tuple[str, str],
281
+ title: list[str],
282
+ footnote: list[str] | None,
283
+ source: list[str] | None,
284
+ output_file: str,
285
+ ae_term: tuple[str, str],
286
+ total: bool = True,
287
+ col_rel_width: list[float] | None = None,
288
+ missing_group: str = "error",
289
+ n_with_label: str = " with one or more adverse events",
290
+ n_without_label: str = " with no adverse events",
291
+ ) -> str:
292
+ """
293
+ Complete AE specific pipeline wrapper.
294
+
295
+ This function orchestrates the three-step pipeline:
296
+ 1. ae_specific_ard: Generate Analysis Results Data
297
+ 2. ae_specific_df: Transform to display format
298
+ 3. ae_specific_rtf: Generate RTF output and write to file
299
+
300
+ Args:
301
+ population: Population DataFrame (subject-level data, e.g., ADSL)
302
+ observation: Observation DataFrame (event data, e.g., ADAE)
303
+ population_filter: SQL WHERE clause for population (can be None)
304
+ observation_filter: SQL WHERE clause for observation (can be None)
305
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
306
+ id: Tuple (variable_name, label) for ID column
307
+ group: Tuple (variable_name, label) for grouping variable
308
+ title: Title for RTF output as list of strings
309
+ footnote: Optional footnote for RTF output as list of strings
310
+ source: Optional source for RTF output as list of strings
311
+ output_file: File path to write RTF output
312
+ ae_term: Tuple (variable_name, label) for AE term column
313
+ (default: ("AEDECOD", "Adverse Event"))
314
+ total: Whether to include total column (default: True)
315
+ col_rel_width: Optional column widths for RTF output
316
+ missing_group: How to handle missing group values (default: "error")
317
+ n_with_label: Label for "with one or more" row (dynamic based on parameter)
318
+ n_without_label: Label for "with no" row (dynamic based on parameter)
319
+
320
+ Returns:
321
+ str: Path to the generated RTF file
322
+ """
323
+ # Step 1: Generate ARD
324
+ ard = ae_specific_ard(
325
+ population=population,
326
+ observation=observation,
327
+ population_filter=population_filter,
328
+ observation_filter=observation_filter,
329
+ parameter_filter=parameter_filter,
330
+ id=id,
331
+ group=group,
332
+ ae_term=ae_term,
333
+ total=total,
334
+ missing_group=missing_group,
335
+ n_with_label=n_with_label,
336
+ n_without_label=n_without_label,
337
+ )
338
+
339
+ # Step 2: Transform to display format
340
+ df = ae_specific_df(ard)
341
+
342
+ # Step 3: Generate RTF and write to file
343
+ rtf_doc = ae_specific_rtf(
344
+ df=df,
345
+ title=title,
346
+ footnote=footnote,
347
+ source=source,
348
+ col_rel_width=col_rel_width,
349
+ )
350
+ rtf_doc.write_rtf(output_file)
351
+
352
+ return output_file
353
+
354
+
355
+ def study_plan_to_ae_specific(
356
+ study_plan: StudyPlan,
357
+ ) -> list[str]:
358
+ """
359
+ Generate AE specific RTF outputs for all analyses defined in StudyPlan.
360
+
361
+ This function reads the expanded plan from StudyPlan and generates
362
+ an RTF table for each ae_specific analysis specification automatically.
363
+
364
+ Args:
365
+ study_plan: StudyPlan object with loaded datasets and analysis specifications
366
+
367
+ Returns:
368
+ list[str]: List of paths to generated RTF files
369
+ """
370
+
371
+ # Meta data
372
+ analysis = "ae_specific"
373
+ output_dir = study_plan.output_dir
374
+ footnote = ["Every participant is counted a single time for each applicable row and column."]
375
+ source = None
376
+
377
+ population_df_name = "adsl"
378
+ observation_df_name = "adae"
379
+
380
+ id = ("USUBJID", "Subject ID")
381
+ ae_term = ("AEDECOD", "Adverse Event")
382
+ total = True
383
+ missing_group = "error"
384
+
385
+ # Create output directory if it doesn't exist
386
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
387
+
388
+ # Initialize parser
389
+ parser = StudyPlanParser(study_plan)
390
+
391
+ # Get expanded plan DataFrame
392
+ plan_df = study_plan.get_plan_df()
393
+
394
+ # Filter for AE specific analyses
395
+ ae_plans = plan_df.filter(pl.col("analysis") == analysis)
396
+
397
+ rtf_files = []
398
+
399
+ # Generate RTF for each analysis
400
+ for row in ae_plans.iter_rows(named=True):
401
+ population = row["population"]
402
+ observation = row.get("observation")
403
+ parameter = row.get("parameter")
404
+ group = row.get("group")
405
+
406
+ # Validate group is specified
407
+ if group is None:
408
+ raise ValueError(
409
+ f"Group not specified in YAML for analysis: population={population}, "
410
+ f"observation={observation}, parameter={parameter}. "
411
+ f"Please add group to your YAML plan."
412
+ )
413
+
414
+ # Get datasets using parser
415
+ population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
416
+
417
+ # Get filters and configuration using parser
418
+ population_filter = parser.get_population_filter(population)
419
+ obs_filter = parser.get_observation_filter(observation)
420
+
421
+ # Get parameter filter if parameter is specified
422
+ parameter_filter = None
423
+ if parameter:
424
+ param_names, param_filters, param_labels, _ = parser.get_parameter_info(
425
+ parameter
426
+ ) # Ignore indent for AE
427
+ # For ae_specific, use the first (and typically only) filter
428
+ parameter_filter = param_filters[0] if param_filters else None
429
+
430
+ group_var_name, group_labels = parser.get_group_info(group)
431
+
432
+ # Build group tuple
433
+ group_var_label = group_labels[0] if group_labels else group_var_name
434
+ group_tuple = (group_var_name, group_var_label)
435
+
436
+ # Build dynamic title and row labels based on parameter
437
+ param = study_plan.keywords.get_parameter(parameter) if parameter else None
438
+ dynamic_title = get_ae_parameter_title(param)
439
+ n_with_label, n_without_label = get_ae_parameter_row_labels(param)
440
+
441
+ # Build title with population and observation context
442
+ title_parts = [dynamic_title]
443
+ if observation:
444
+ obs_kw = study_plan.keywords.observations.get(observation)
445
+ if obs_kw and obs_kw.label:
446
+ title_parts.append(obs_kw.label)
447
+
448
+ pop_kw = study_plan.keywords.populations.get(population)
449
+ if pop_kw and pop_kw.label:
450
+ title_parts.append(pop_kw.label)
451
+
452
+ # Build output filename
453
+ filename = f"{analysis}_{population}"
454
+ if observation:
455
+ filename += f"_{observation}"
456
+ if parameter:
457
+ filename += f"_{parameter.replace(';', '_')}"
458
+ filename += ".rtf"
459
+ output_file = str(Path(output_dir) / filename)
460
+
461
+ # Generate RTF
462
+ rtf_path = ae_specific(
463
+ population=population_df,
464
+ observation=observation_df,
465
+ population_filter=population_filter,
466
+ observation_filter=obs_filter,
467
+ parameter_filter=parameter_filter,
468
+ id=id,
469
+ group=group_tuple,
470
+ ae_term=ae_term,
471
+ title=title_parts,
472
+ footnote=footnote,
473
+ source=source,
474
+ output_file=output_file,
475
+ total=total,
476
+ missing_group=missing_group,
477
+ n_with_label=n_with_label,
478
+ n_without_label=n_without_label,
479
+ )
480
+
481
+ rtf_files.append(rtf_path)
482
+
483
+ return rtf_files