csrlite 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/ae/ae_listing.py CHANGED
@@ -1,494 +1,494 @@
1
- # pyre-strict
2
- """
3
- Adverse Event (AE) Listing Functions
4
-
5
- This module provides functions for generating detailed AE listings showing individual
6
- adverse event records with key details like severity, relationship, and outcomes.
7
-
8
- The two-step pipeline:
9
- - ae_listing_ard: Filter, select, sort, and rename columns (returns display-ready data)
10
- - ae_listing_rtf: Generate formatted RTF output
11
- - ae_listing: Complete pipeline wrapper
12
- - study_plan_to_ae_listing: Batch generation from StudyPlan
13
-
14
- Uses Polars native SQL capabilities for data manipulation and parse.py utilities
15
- for StudyPlan parsing.
16
- """
17
-
18
- from pathlib import Path
19
- from typing import Any
20
-
21
- import polars as pl
22
- from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
23
-
24
- from ..common.parse import StudyPlanParser
25
- from ..common.plan import StudyPlan
26
- from ..common.utils import apply_common_filters
27
- from .ae_utils import get_ae_parameter_title
28
-
29
-
30
- def ae_listing_ard(
31
- population: pl.DataFrame,
32
- observation: pl.DataFrame,
33
- population_filter: str | None,
34
- observation_filter: str | None,
35
- parameter_filter: str | None,
36
- id: tuple[str, str],
37
- population_columns: list[tuple[str, str]] | None = None,
38
- observation_columns: list[tuple[str, str]] | None = None,
39
- sort_columns: list[str] | None = None,
40
- page_by: list[str] | None = None,
41
- ) -> pl.DataFrame:
42
- """
43
- Generate Analysis Results Data (ARD) for AE listing.
44
-
45
- Filters and joins population and observation data, then selects relevant columns.
46
-
47
- Args:
48
- population: Population DataFrame (subject-level data, e.g., ADSL)
49
- observation: Observation DataFrame (event data, e.g., ADAE)
50
- population_filter: SQL WHERE clause for population (can be None)
51
- observation_filter: SQL WHERE clause for observation (can be None)
52
- parameter_filter: SQL WHERE clause for parameter filtering (can be None)
53
- id: Tuple (variable_name, label) for ID column
54
- population_columns: List of tuples (variable_name, label) from population
55
- (e.g., [("SEX", "Sex"), ("RACE", "Race")])
56
- observation_columns: List of tuples (variable_name, label) from observation
57
- (e.g., [("AEDECOD", "Adverse Event")])
58
- sort_columns: List of column names to sort by. If None, sorts by id column.
59
-
60
- Returns:
61
- pl.DataFrame: Filtered and joined records with selected columns
62
- """
63
- id_var_name, id_var_label = id
64
-
65
- # Apply common filters
66
- population_filtered, observation_to_filter = apply_common_filters(
67
- population=population,
68
- observation=observation,
69
- population_filter=population_filter,
70
- observation_filter=observation_filter,
71
- parameter_filter=parameter_filter,
72
- )
73
-
74
- assert observation_to_filter is not None
75
-
76
- # Filter observation to include only subjects in filtered population
77
- observation_filtered = observation_to_filter.filter(
78
- pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
79
- )
80
-
81
- # Determine which observation columns to select
82
- if observation_columns is None:
83
- # Default: select id column only
84
- obs_cols = [id_var_name]
85
- else:
86
- # Extract variable names from tuples
87
- obs_col_names = [var_name for var_name, _ in observation_columns]
88
- # Ensure id is included
89
- obs_cols = [id_var_name] + [col for col in obs_col_names if col != id_var_name]
90
-
91
- # Select available observation columns
92
- obs_cols_available = [col for col in obs_cols if col in observation_filtered.columns]
93
- result = observation_filtered.select(obs_cols_available)
94
-
95
- # Join with population to add population columns
96
- if population_columns is not None:
97
- # Extract variable names from tuples
98
- pop_col_names = [var_name for var_name, _ in population_columns]
99
- # Select id + requested population columns
100
- pop_cols = [id_var_name] + [col for col in pop_col_names if col != id_var_name]
101
- pop_cols_available = [col for col in pop_cols if col in population_filtered.columns]
102
- population_subset = population_filtered.select(pop_cols_available)
103
-
104
- # Left join to preserve all observation records
105
- result = result.join(population_subset, on=id_var_name, how="left")
106
-
107
- # Create __index__ column for pagination
108
- # Default to using the id column as the index
109
- if id_var_name in result.columns:
110
- result = result.with_columns(
111
- (pl.lit(f"{id_var_label} = ") + pl.col(id_var_name).cast(pl.Utf8)).alias("__index__")
112
- )
113
-
114
- # Use page_by columns if provided and they exist
115
- existing_page_by_cols = [col for col in page_by if col in result.columns] if page_by else []
116
-
117
- if existing_page_by_cols:
118
- # Create a mapping from column name to label
119
- column_labels = {id_var_name: id_var_label}
120
- if population_columns:
121
- for var_name, var_label in population_columns:
122
- column_labels[var_name] = var_label
123
-
124
- # Ensure the order of labels matches the order of columns in page_by
125
- index_expressions = []
126
- for col_name in existing_page_by_cols:
127
- label = column_labels.get(col_name, col_name)
128
- index_expressions.append(pl.lit(f"{label} = ") + pl.col(col_name).cast(pl.Utf8))
129
-
130
- result = result.with_columns(
131
- pl.concat_str(index_expressions, separator=", ").alias("__index__")
132
- )
133
-
134
- page_by_remove = [col for col in (page_by or []) if col != id_var_name]
135
- result = result.drop(page_by_remove)
136
-
137
- if "__index__" in result.columns:
138
- # Get all columns except __index__
139
- other_columns = [col for col in result.columns if col != "__index__"]
140
- # Reorder to have __index__ first
141
- result = result.select(["__index__"] + other_columns)
142
-
143
- # Sort by specified columns or default to id column
144
- if sort_columns is None:
145
- # Default: sort by id column if it exists in result
146
- if id_var_name in result.columns:
147
- result = result.sort(id_var_name)
148
- else:
149
- # Sort by specified columns that exist in result
150
- cols_to_sort = [col for col in sort_columns if col in result.columns]
151
- if cols_to_sort:
152
- result = result.sort(cols_to_sort)
153
-
154
- return result
155
-
156
-
157
- def ae_listing_rtf(
158
- df: pl.DataFrame,
159
- column_labels: dict[str, str],
160
- title: list[str],
161
- footnote: list[str] | None,
162
- source: list[str] | None,
163
- col_rel_width: list[float] | None = None,
164
- group_by: list[str] | None = None,
165
- page_by: list[str] | None = None,
166
- orientation: str = "landscape",
167
- ) -> RTFDocument:
168
- """
169
- Generate RTF table from AE listing display DataFrame.
170
-
171
- Creates a formatted RTF table with column headers and optional section grouping/pagination.
172
-
173
- Args:
174
- df: Display DataFrame from ae_listing_ard
175
- column_labels: Dictionary mapping column names to display labels
176
- title: Title(s) for the table as list of strings
177
- footnote: Optional footnote(s) as list of strings
178
- source: Optional source note(s) as list of strings
179
- col_rel_width: Optional list of relative column widths. If None, auto-calculated
180
- as equal widths for all columns
181
- group_by: Optional list of column names to group by for section headers within pages.
182
- Should only contain population columns (e.g., ["TRT01A", "USUBJID"])
183
- page_by: Optional list of column names to trigger new pages when values change.
184
- Should only contain population columns (e.g., ["TRT01A"])
185
- orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
186
-
187
- Returns:
188
- RTFDocument: RTF document object that can be written to file
189
- """
190
- # Calculate number of columns
191
- n_cols = len(df.columns)
192
-
193
- # Build column headers using labels
194
- col_header = [column_labels.get(col, col) for col in df.columns]
195
-
196
- # Calculate column widths
197
- if col_rel_width is None:
198
- col_widths = [1.0] * n_cols
199
- else:
200
- col_widths = col_rel_width
201
-
202
- # Normalize title, footnote, source to lists
203
- title_list = title
204
- footnote_list: list[str] = footnote or []
205
- source_list: list[str] = source or []
206
-
207
- # Build RTF document
208
- rtf_components: dict[str, Any] = {
209
- "df": df,
210
- "rtf_page": RTFPage(orientation=orientation),
211
- "rtf_title": RTFTitle(text=title_list),
212
- "rtf_column_header": [
213
- RTFColumnHeader(
214
- text=col_header[1:],
215
- col_rel_width=col_widths[1:],
216
- text_justification=["l"] + ["c"] * (n_cols - 1),
217
- ),
218
- ],
219
- "rtf_body": RTFBody(
220
- col_rel_width=col_widths,
221
- text_justification=["l"] * n_cols,
222
- border_left=["single"],
223
- border_top=["single"] + [""] * (n_cols - 1),
224
- border_bottom=["single"] + [""] * (n_cols - 1),
225
- group_by=group_by,
226
- page_by=page_by,
227
- ),
228
- }
229
-
230
- # Add optional footnote
231
- if footnote_list:
232
- rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
233
-
234
- # Add optional source
235
- if source_list:
236
- rtf_components["rtf_source"] = RTFSource(text=source_list)
237
-
238
- # Create RTF document
239
- doc = RTFDocument(**rtf_components)
240
-
241
- return doc
242
-
243
-
244
- def ae_listing(
245
- population: pl.DataFrame,
246
- observation: pl.DataFrame,
247
- population_filter: str | None,
248
- observation_filter: str | None,
249
- parameter_filter: str | None,
250
- id: tuple[str, str],
251
- title: list[str],
252
- footnote: list[str] | None,
253
- source: list[str] | None,
254
- output_file: str,
255
- population_columns: list[tuple[str, str]] | None = None,
256
- observation_columns: list[tuple[str, str]] | None = None,
257
- sort_columns: list[str] | None = None,
258
- group_by: list[str] | None = None,
259
- page_by: list[str] | None = None,
260
- col_rel_width: list[float] | None = None,
261
- orientation: str = "landscape",
262
- ) -> str:
263
- """
264
- Complete AE listing pipeline wrapper.
265
-
266
- This function orchestrates the two-step pipeline:
267
- 1. ae_listing_ard: Filter, join, select, and sort columns
268
- 2. ae_listing_rtf: Generate RTF output with optional grouping/pagination
269
-
270
- Args:
271
- population: Population DataFrame (subject-level data, e.g., ADSL)
272
- observation: Observation DataFrame (event data, e.g., ADAE)
273
- population_filter: SQL WHERE clause for population (can be None)
274
- observation_filter: SQL WHERE clause for observation (can be None)
275
- parameter_filter: SQL WHERE clause for parameter filtering (can be None)
276
- id: Tuple (variable_name, label) for ID column
277
- title: Title for RTF output as list of strings
278
- footnote: Optional footnote for RTF output as list of strings
279
- source: Optional source for RTF output as list of strings
280
- output_file: File path to write RTF output
281
- population_columns: Optional list of tuples (variable_name, label) from population
282
- observation_columns: Optional list of tuples (variable_name, label) from observation
283
- sort_columns: Optional list of column names to sort by. If None, sorts by id column.
284
- group_by: Optional list of column names to group by for section headers
285
- (population columns only)
286
- page_by: Optional list of column names to trigger new pages (population columns only)
287
- col_rel_width: Optional column widths for RTF output
288
- orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
289
-
290
- Returns:
291
- str: Path to the generated RTF file
292
- """
293
- # Step 1: Generate ARD (includes filtering, joining, and selecting)
294
- df = ae_listing_ard(
295
- population=population,
296
- observation=observation,
297
- population_filter=population_filter,
298
- observation_filter=observation_filter,
299
- parameter_filter=parameter_filter,
300
- id=id,
301
- population_columns=population_columns,
302
- observation_columns=observation_columns,
303
- sort_columns=sort_columns,
304
- page_by=page_by,
305
- )
306
-
307
- # Build column labels from tuples
308
- id_var_name, id_var_label = id
309
- column_labels = {id_var_name: id_var_label}
310
-
311
- # Add observation column labels
312
- if observation_columns is not None:
313
- for var_name, var_label in observation_columns:
314
- column_labels[var_name] = var_label
315
-
316
- # Add population column labels
317
- if population_columns is not None:
318
- for var_name, var_label in population_columns:
319
- column_labels[var_name] = var_label
320
-
321
- # Set __index__ header to empty string
322
- column_labels["__index__"] = ""
323
-
324
- # Step 2: Generate RTF and write to file
325
- rtf_doc = ae_listing_rtf(
326
- df=df,
327
- column_labels=column_labels,
328
- title=title,
329
- footnote=footnote,
330
- source=source,
331
- col_rel_width=col_rel_width,
332
- group_by=group_by,
333
- page_by=["__index__"],
334
- orientation=orientation,
335
- )
336
- rtf_doc.write_rtf(output_file)
337
-
338
- return output_file
339
-
340
-
341
- def study_plan_to_ae_listing(
342
- study_plan: StudyPlan,
343
- ) -> list[str]:
344
- """
345
- Generate AE listing RTF outputs for all analyses defined in StudyPlan.
346
-
347
- This function reads the expanded plan from StudyPlan and generates
348
- an RTF listing for each ae_listing analysis specification automatically.
349
-
350
- Args:
351
- study_plan: StudyPlan object with loaded datasets and analysis specifications
352
-
353
- Returns:
354
- list[str]: List of paths to generated RTF files
355
- """
356
-
357
- # Meta data
358
- analysis = "ae_listing"
359
- output_dir = study_plan.output_dir
360
- col_rel_width = [1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0]
361
- footnote = None
362
- source = None
363
-
364
- population_df_name = "adsl"
365
- observation_df_name = "adae"
366
-
367
- id = ("USUBJID", "Subject ID")
368
- # Column configuration with labels - easy to customize
369
- # Population columns (demographics) - group variable will be added dynamically
370
- population_columns_base = [
371
- ("AGE", "Age"),
372
- ("SEX", "Sex"),
373
- ("RACE", "Race"),
374
- ]
375
-
376
- # Observation columns (event details)
377
- observation_columns_base = [
378
- ("AEDECOD", "Adverse Event"),
379
- ("ASTDY", "Study Day"),
380
- ("ADURN", "Duration (days)"),
381
- ("AESEV", "Severity"),
382
- ("AESER", "Serious"),
383
- ("AEREL", "Related"),
384
- ("AEACN", "Action Taken"),
385
- ("AEOUT", "Outcome"),
386
- ]
387
-
388
- # Sorting configuration
389
- sort_columns = ["TRT01A", "USUBJID", "ASTDY"]
390
- page_by = ["USUBJID", "SEX", "RACE", "AGE", "TRT01A"]
391
- group_by = ["USUBJID"]
392
-
393
- # Create output directory if it doesn't exist
394
- Path(output_dir).mkdir(parents=True, exist_ok=True)
395
-
396
- # Initialize parser
397
- parser = StudyPlanParser(study_plan)
398
-
399
- # Get expanded plan DataFrame
400
- plan_df = study_plan.get_plan_df()
401
-
402
- # Filter for AE listing analyses
403
- ae_plans = plan_df.filter(pl.col("analysis") == analysis)
404
-
405
- rtf_files = []
406
-
407
- # Generate RTF for each analysis
408
- for row in ae_plans.iter_rows(named=True):
409
- population = row["population"]
410
- observation = row.get("observation")
411
- parameter = row.get("parameter")
412
- group = row.get("group")
413
-
414
- # Validate group is specified
415
- if group is None:
416
- raise ValueError(
417
- f"Group not specified in YAML for analysis: population={population}, "
418
- f"observation={observation}, parameter={parameter}. "
419
- "Please add group to your YAML plan."
420
- )
421
-
422
- # Get datasets using parser
423
- population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
424
-
425
- # Get filters using parser
426
- population_filter = parser.get_population_filter(population)
427
- obs_filter = parser.get_observation_filter(observation)
428
-
429
- # Get group variable name from YAML
430
- group_var_name, group_labels = parser.get_group_info(group)
431
-
432
- # Determine group variable label
433
- group_var_label = group_labels[0] if group_labels else "Treatment"
434
-
435
- # Build columns dynamically from base configuration with labels
436
- population_columns = population_columns_base + [(group_var_name, group_var_label)]
437
- observation_columns = observation_columns_base
438
-
439
- # Get parameter filter if parameter is specified
440
- parameter_filter = None
441
- if parameter:
442
- param_names, param_filters, param_labels, _ = parser.get_parameter_info(
443
- parameter
444
- ) # Ignore indent for AE
445
- # For ae_listing, use the first (and typically only) filter
446
- parameter_filter = param_filters[0] if param_filters else None
447
-
448
- # Build dynamic title based on parameter
449
- param = study_plan.keywords.get_parameter(parameter) if parameter else None
450
- dynamic_title = get_ae_parameter_title(param, prefix="Listing of Participants With")
451
-
452
- # Build title with population and observation context
453
- title_parts = [dynamic_title]
454
- if observation:
455
- obs_kw = study_plan.keywords.observations.get(observation)
456
- if obs_kw and obs_kw.label:
457
- title_parts.append(obs_kw.label)
458
-
459
- pop_kw = study_plan.keywords.populations.get(population)
460
- if pop_kw and pop_kw.label:
461
- title_parts.append(pop_kw.label)
462
-
463
- # Build output filename
464
- filename = f"{analysis}_{population}"
465
- if observation:
466
- filename += f"_{observation}"
467
- if parameter:
468
- filename += f"_{parameter.replace(';', '_')}"
469
- filename += ".rtf"
470
- output_file = str(Path(output_dir) / filename)
471
-
472
- # Generate RTF
473
- rtf_path = ae_listing(
474
- population=population_df,
475
- observation=observation_df,
476
- population_filter=population_filter,
477
- observation_filter=obs_filter,
478
- parameter_filter=parameter_filter,
479
- id=id,
480
- title=title_parts,
481
- footnote=footnote,
482
- source=source,
483
- output_file=output_file,
484
- population_columns=population_columns,
485
- observation_columns=observation_columns,
486
- sort_columns=sort_columns,
487
- col_rel_width=col_rel_width,
488
- group_by=group_by,
489
- page_by=page_by,
490
- )
491
-
492
- rtf_files.append(rtf_path)
493
-
494
- return rtf_files
1
+ # pyre-strict
2
+ """
3
+ Adverse Event (AE) Listing Functions
4
+
5
+ This module provides functions for generating detailed AE listings showing individual
6
+ adverse event records with key details like severity, relationship, and outcomes.
7
+
8
+ The two-step pipeline:
9
+ - ae_listing_ard: Filter, select, sort, and rename columns (returns display-ready data)
10
+ - ae_listing_rtf: Generate formatted RTF output
11
+ - ae_listing: Complete pipeline wrapper
12
+ - study_plan_to_ae_listing: Batch generation from StudyPlan
13
+
14
+ Uses Polars native SQL capabilities for data manipulation and parse.py utilities
15
+ for StudyPlan parsing.
16
+ """
17
+
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ import polars as pl
22
+ from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
23
+
24
+ from ..common.parse import StudyPlanParser
25
+ from ..common.plan import StudyPlan
26
+ from ..common.utils import apply_common_filters
27
+ from .ae_utils import get_ae_parameter_title
28
+
29
+
30
+ def ae_listing_ard(
31
+ population: pl.DataFrame,
32
+ observation: pl.DataFrame,
33
+ population_filter: str | None,
34
+ observation_filter: str | None,
35
+ parameter_filter: str | None,
36
+ id: tuple[str, str],
37
+ population_columns: list[tuple[str, str]] | None = None,
38
+ observation_columns: list[tuple[str, str]] | None = None,
39
+ sort_columns: list[str] | None = None,
40
+ page_by: list[str] | None = None,
41
+ ) -> pl.DataFrame:
42
+ """
43
+ Generate Analysis Results Data (ARD) for AE listing.
44
+
45
+ Filters and joins population and observation data, then selects relevant columns.
46
+
47
+ Args:
48
+ population: Population DataFrame (subject-level data, e.g., ADSL)
49
+ observation: Observation DataFrame (event data, e.g., ADAE)
50
+ population_filter: SQL WHERE clause for population (can be None)
51
+ observation_filter: SQL WHERE clause for observation (can be None)
52
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
53
+ id: Tuple (variable_name, label) for ID column
54
+ population_columns: List of tuples (variable_name, label) from population
55
+ (e.g., [("SEX", "Sex"), ("RACE", "Race")])
56
+ observation_columns: List of tuples (variable_name, label) from observation
57
+ (e.g., [("AEDECOD", "Adverse Event")])
58
+ sort_columns: List of column names to sort by. If None, sorts by id column.
59
+
60
+ Returns:
61
+ pl.DataFrame: Filtered and joined records with selected columns
62
+ """
63
+ id_var_name, id_var_label = id
64
+
65
+ # Apply common filters
66
+ population_filtered, observation_to_filter = apply_common_filters(
67
+ population=population,
68
+ observation=observation,
69
+ population_filter=population_filter,
70
+ observation_filter=observation_filter,
71
+ parameter_filter=parameter_filter,
72
+ )
73
+
74
+ assert observation_to_filter is not None
75
+
76
+ # Filter observation to include only subjects in filtered population
77
+ observation_filtered = observation_to_filter.filter(
78
+ pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
79
+ )
80
+
81
+ # Determine which observation columns to select
82
+ if observation_columns is None:
83
+ # Default: select id column only
84
+ obs_cols = [id_var_name]
85
+ else:
86
+ # Extract variable names from tuples
87
+ obs_col_names = [var_name for var_name, _ in observation_columns]
88
+ # Ensure id is included
89
+ obs_cols = [id_var_name] + [col for col in obs_col_names if col != id_var_name]
90
+
91
+ # Select available observation columns
92
+ obs_cols_available = [col for col in obs_cols if col in observation_filtered.columns]
93
+ result = observation_filtered.select(obs_cols_available)
94
+
95
+ # Join with population to add population columns
96
+ if population_columns is not None:
97
+ # Extract variable names from tuples
98
+ pop_col_names = [var_name for var_name, _ in population_columns]
99
+ # Select id + requested population columns
100
+ pop_cols = [id_var_name] + [col for col in pop_col_names if col != id_var_name]
101
+ pop_cols_available = [col for col in pop_cols if col in population_filtered.columns]
102
+ population_subset = population_filtered.select(pop_cols_available)
103
+
104
+ # Left join to preserve all observation records
105
+ result = result.join(population_subset, on=id_var_name, how="left")
106
+
107
+ # Create __index__ column for pagination
108
+ # Default to using the id column as the index
109
+ if id_var_name in result.columns:
110
+ result = result.with_columns(
111
+ (pl.lit(f"{id_var_label} = ") + pl.col(id_var_name).cast(pl.Utf8)).alias("__index__")
112
+ )
113
+
114
+ # Use page_by columns if provided and they exist
115
+ existing_page_by_cols = [col for col in page_by if col in result.columns] if page_by else []
116
+
117
+ if existing_page_by_cols:
118
+ # Create a mapping from column name to label
119
+ column_labels = {id_var_name: id_var_label}
120
+ if population_columns:
121
+ for var_name, var_label in population_columns:
122
+ column_labels[var_name] = var_label
123
+
124
+ # Ensure the order of labels matches the order of columns in page_by
125
+ index_expressions = []
126
+ for col_name in existing_page_by_cols:
127
+ label = column_labels.get(col_name, col_name)
128
+ index_expressions.append(pl.lit(f"{label} = ") + pl.col(col_name).cast(pl.Utf8))
129
+
130
+ result = result.with_columns(
131
+ pl.concat_str(index_expressions, separator=", ").alias("__index__")
132
+ )
133
+
134
+ page_by_remove = [col for col in (page_by or []) if col != id_var_name]
135
+ result = result.drop(page_by_remove)
136
+
137
+ if "__index__" in result.columns:
138
+ # Get all columns except __index__
139
+ other_columns = [col for col in result.columns if col != "__index__"]
140
+ # Reorder to have __index__ first
141
+ result = result.select(["__index__"] + other_columns)
142
+
143
+ # Sort by specified columns or default to id column
144
+ if sort_columns is None:
145
+ # Default: sort by id column if it exists in result
146
+ if id_var_name in result.columns:
147
+ result = result.sort(id_var_name)
148
+ else:
149
+ # Sort by specified columns that exist in result
150
+ cols_to_sort = [col for col in sort_columns if col in result.columns]
151
+ if cols_to_sort:
152
+ result = result.sort(cols_to_sort)
153
+
154
+ return result
155
+
156
+
157
+ def ae_listing_rtf(
158
+ df: pl.DataFrame,
159
+ column_labels: dict[str, str],
160
+ title: list[str],
161
+ footnote: list[str] | None,
162
+ source: list[str] | None,
163
+ col_rel_width: list[float] | None = None,
164
+ group_by: list[str] | None = None,
165
+ page_by: list[str] | None = None,
166
+ orientation: str = "landscape",
167
+ ) -> RTFDocument:
168
+ """
169
+ Generate RTF table from AE listing display DataFrame.
170
+
171
+ Creates a formatted RTF table with column headers and optional section grouping/pagination.
172
+
173
+ Args:
174
+ df: Display DataFrame from ae_listing_ard
175
+ column_labels: Dictionary mapping column names to display labels
176
+ title: Title(s) for the table as list of strings
177
+ footnote: Optional footnote(s) as list of strings
178
+ source: Optional source note(s) as list of strings
179
+ col_rel_width: Optional list of relative column widths. If None, auto-calculated
180
+ as equal widths for all columns
181
+ group_by: Optional list of column names to group by for section headers within pages.
182
+ Should only contain population columns (e.g., ["TRT01A", "USUBJID"])
183
+ page_by: Optional list of column names to trigger new pages when values change.
184
+ Should only contain population columns (e.g., ["TRT01A"])
185
+ orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
186
+
187
+ Returns:
188
+ RTFDocument: RTF document object that can be written to file
189
+ """
190
+ # Calculate number of columns
191
+ n_cols = len(df.columns)
192
+
193
+ # Build column headers using labels
194
+ col_header = [column_labels.get(col, col) for col in df.columns]
195
+
196
+ # Calculate column widths
197
+ if col_rel_width is None:
198
+ col_widths = [1.0] * n_cols
199
+ else:
200
+ col_widths = col_rel_width
201
+
202
+ # Normalize title, footnote, source to lists
203
+ title_list = title
204
+ footnote_list: list[str] = footnote or []
205
+ source_list: list[str] = source or []
206
+
207
+ # Build RTF document
208
+ rtf_components: dict[str, Any] = {
209
+ "df": df,
210
+ "rtf_page": RTFPage(orientation=orientation),
211
+ "rtf_title": RTFTitle(text=title_list),
212
+ "rtf_column_header": [
213
+ RTFColumnHeader(
214
+ text=col_header[1:],
215
+ col_rel_width=col_widths[1:],
216
+ text_justification=["l"] + ["c"] * (n_cols - 1),
217
+ ),
218
+ ],
219
+ "rtf_body": RTFBody(
220
+ col_rel_width=col_widths,
221
+ text_justification=["l"] * n_cols,
222
+ border_left=["single"],
223
+ border_top=["single"] + [""] * (n_cols - 1),
224
+ border_bottom=["single"] + [""] * (n_cols - 1),
225
+ group_by=group_by,
226
+ page_by=page_by,
227
+ ),
228
+ }
229
+
230
+ # Add optional footnote
231
+ if footnote_list:
232
+ rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
233
+
234
+ # Add optional source
235
+ if source_list:
236
+ rtf_components["rtf_source"] = RTFSource(text=source_list)
237
+
238
+ # Create RTF document
239
+ doc = RTFDocument(**rtf_components)
240
+
241
+ return doc
242
+
243
+
244
+ def ae_listing(
245
+ population: pl.DataFrame,
246
+ observation: pl.DataFrame,
247
+ population_filter: str | None,
248
+ observation_filter: str | None,
249
+ parameter_filter: str | None,
250
+ id: tuple[str, str],
251
+ title: list[str],
252
+ footnote: list[str] | None,
253
+ source: list[str] | None,
254
+ output_file: str,
255
+ population_columns: list[tuple[str, str]] | None = None,
256
+ observation_columns: list[tuple[str, str]] | None = None,
257
+ sort_columns: list[str] | None = None,
258
+ group_by: list[str] | None = None,
259
+ page_by: list[str] | None = None,
260
+ col_rel_width: list[float] | None = None,
261
+ orientation: str = "landscape",
262
+ ) -> str:
263
+ """
264
+ Complete AE listing pipeline wrapper.
265
+
266
+ This function orchestrates the two-step pipeline:
267
+ 1. ae_listing_ard: Filter, join, select, and sort columns
268
+ 2. ae_listing_rtf: Generate RTF output with optional grouping/pagination
269
+
270
+ Args:
271
+ population: Population DataFrame (subject-level data, e.g., ADSL)
272
+ observation: Observation DataFrame (event data, e.g., ADAE)
273
+ population_filter: SQL WHERE clause for population (can be None)
274
+ observation_filter: SQL WHERE clause for observation (can be None)
275
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
276
+ id: Tuple (variable_name, label) for ID column
277
+ title: Title for RTF output as list of strings
278
+ footnote: Optional footnote for RTF output as list of strings
279
+ source: Optional source for RTF output as list of strings
280
+ output_file: File path to write RTF output
281
+ population_columns: Optional list of tuples (variable_name, label) from population
282
+ observation_columns: Optional list of tuples (variable_name, label) from observation
283
+ sort_columns: Optional list of column names to sort by. If None, sorts by id column.
284
+ group_by: Optional list of column names to group by for section headers
285
+ (population columns only)
286
+ page_by: Optional list of column names to trigger new pages (population columns only)
287
+ col_rel_width: Optional column widths for RTF output
288
+ orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
289
+
290
+ Returns:
291
+ str: Path to the generated RTF file
292
+ """
293
+ # Step 1: Generate ARD (includes filtering, joining, and selecting)
294
+ df = ae_listing_ard(
295
+ population=population,
296
+ observation=observation,
297
+ population_filter=population_filter,
298
+ observation_filter=observation_filter,
299
+ parameter_filter=parameter_filter,
300
+ id=id,
301
+ population_columns=population_columns,
302
+ observation_columns=observation_columns,
303
+ sort_columns=sort_columns,
304
+ page_by=page_by,
305
+ )
306
+
307
+ # Build column labels from tuples
308
+ id_var_name, id_var_label = id
309
+ column_labels = {id_var_name: id_var_label}
310
+
311
+ # Add observation column labels
312
+ if observation_columns is not None:
313
+ for var_name, var_label in observation_columns:
314
+ column_labels[var_name] = var_label
315
+
316
+ # Add population column labels
317
+ if population_columns is not None:
318
+ for var_name, var_label in population_columns:
319
+ column_labels[var_name] = var_label
320
+
321
+ # Set __index__ header to empty string
322
+ column_labels["__index__"] = ""
323
+
324
+ # Step 2: Generate RTF and write to file
325
+ rtf_doc = ae_listing_rtf(
326
+ df=df,
327
+ column_labels=column_labels,
328
+ title=title,
329
+ footnote=footnote,
330
+ source=source,
331
+ col_rel_width=col_rel_width,
332
+ group_by=group_by,
333
+ page_by=["__index__"],
334
+ orientation=orientation,
335
+ )
336
+ rtf_doc.write_rtf(output_file)
337
+
338
+ return output_file
339
+
340
+
341
+ def study_plan_to_ae_listing(
342
+ study_plan: StudyPlan,
343
+ ) -> list[str]:
344
+ """
345
+ Generate AE listing RTF outputs for all analyses defined in StudyPlan.
346
+
347
+ This function reads the expanded plan from StudyPlan and generates
348
+ an RTF listing for each ae_listing analysis specification automatically.
349
+
350
+ Args:
351
+ study_plan: StudyPlan object with loaded datasets and analysis specifications
352
+
353
+ Returns:
354
+ list[str]: List of paths to generated RTF files
355
+ """
356
+
357
+ # Meta data
358
+ analysis = "ae_listing"
359
+ output_dir = study_plan.output_dir
360
+ col_rel_width = [1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0]
361
+ footnote = None
362
+ source = None
363
+
364
+ population_df_name = "adsl"
365
+ observation_df_name = "adae"
366
+
367
+ id = ("USUBJID", "Subject ID")
368
+ # Column configuration with labels - easy to customize
369
+ # Population columns (demographics) - group variable will be added dynamically
370
+ population_columns_base = [
371
+ ("AGE", "Age"),
372
+ ("SEX", "Sex"),
373
+ ("RACE", "Race"),
374
+ ]
375
+
376
+ # Observation columns (event details)
377
+ observation_columns_base = [
378
+ ("AEDECOD", "Adverse Event"),
379
+ ("ASTDY", "Study Day"),
380
+ ("ADURN", "Duration (days)"),
381
+ ("AESEV", "Severity"),
382
+ ("AESER", "Serious"),
383
+ ("AEREL", "Related"),
384
+ ("AEACN", "Action Taken"),
385
+ ("AEOUT", "Outcome"),
386
+ ]
387
+
388
+ # Sorting configuration
389
+ sort_columns = ["TRT01A", "USUBJID", "ASTDY"]
390
+ page_by = ["USUBJID", "SEX", "RACE", "AGE", "TRT01A"]
391
+ group_by = ["USUBJID"]
392
+
393
+ # Create output directory if it doesn't exist
394
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
395
+
396
+ # Initialize parser
397
+ parser = StudyPlanParser(study_plan)
398
+
399
+ # Get expanded plan DataFrame
400
+ plan_df = study_plan.get_plan_df()
401
+
402
+ # Filter for AE listing analyses
403
+ ae_plans = plan_df.filter(pl.col("analysis") == analysis)
404
+
405
+ rtf_files = []
406
+
407
+ # Generate RTF for each analysis
408
+ for row in ae_plans.iter_rows(named=True):
409
+ population = row["population"]
410
+ observation = row.get("observation")
411
+ parameter = row.get("parameter")
412
+ group = row.get("group")
413
+
414
+ # Validate group is specified
415
+ if group is None:
416
+ raise ValueError(
417
+ f"Group not specified in YAML for analysis: population={population}, "
418
+ f"observation={observation}, parameter={parameter}. "
419
+ "Please add group to your YAML plan."
420
+ )
421
+
422
+ # Get datasets using parser
423
+ population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
424
+
425
+ # Get filters using parser
426
+ population_filter = parser.get_population_filter(population)
427
+ obs_filter = parser.get_observation_filter(observation)
428
+
429
+ # Get group variable name from YAML
430
+ group_var_name, group_labels = parser.get_group_info(group)
431
+
432
+ # Determine group variable label
433
+ group_var_label = group_labels[0] if group_labels else "Treatment"
434
+
435
+ # Build columns dynamically from base configuration with labels
436
+ population_columns = population_columns_base + [(group_var_name, group_var_label)]
437
+ observation_columns = observation_columns_base
438
+
439
+ # Get parameter filter if parameter is specified
440
+ parameter_filter = None
441
+ if parameter:
442
+ param_names, param_filters, param_labels, _ = parser.get_parameter_info(
443
+ parameter
444
+ ) # Ignore indent for AE
445
+ # For ae_listing, use the first (and typically only) filter
446
+ parameter_filter = param_filters[0] if param_filters else None
447
+
448
+ # Build dynamic title based on parameter
449
+ param = study_plan.keywords.get_parameter(parameter) if parameter else None
450
+ dynamic_title = get_ae_parameter_title(param, prefix="Listing of Participants With")
451
+
452
+ # Build title with population and observation context
453
+ title_parts = [dynamic_title]
454
+ if observation:
455
+ obs_kw = study_plan.keywords.observations.get(observation)
456
+ if obs_kw and obs_kw.label:
457
+ title_parts.append(obs_kw.label)
458
+
459
+ pop_kw = study_plan.keywords.populations.get(population)
460
+ if pop_kw and pop_kw.label:
461
+ title_parts.append(pop_kw.label)
462
+
463
+ # Build output filename
464
+ filename = f"{analysis}_{population}"
465
+ if observation:
466
+ filename += f"_{observation}"
467
+ if parameter:
468
+ filename += f"_{parameter.replace(';', '_')}"
469
+ filename += ".rtf"
470
+ output_file = str(Path(output_dir) / filename)
471
+
472
+ # Generate RTF
473
+ rtf_path = ae_listing(
474
+ population=population_df,
475
+ observation=observation_df,
476
+ population_filter=population_filter,
477
+ observation_filter=obs_filter,
478
+ parameter_filter=parameter_filter,
479
+ id=id,
480
+ title=title_parts,
481
+ footnote=footnote,
482
+ source=source,
483
+ output_file=output_file,
484
+ population_columns=population_columns,
485
+ observation_columns=observation_columns,
486
+ sort_columns=sort_columns,
487
+ col_rel_width=col_rel_width,
488
+ group_by=group_by,
489
+ page_by=page_by,
490
+ )
491
+
492
+ rtf_files.append(rtf_path)
493
+
494
+ return rtf_files