csrlite 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/ae/ae_summary.py CHANGED
@@ -1,401 +1,401 @@
1
- # pyre-strict
2
- """
3
- Adverse Event (AE) Analysis Functions
4
-
5
- This module provides a three-step pipeline for AE summary analysis:
6
- - ae_summary_ard: Generate Analysis Results Data (ARD) in long format
7
- - ae_summary_df: Transform ARD to wide display format
8
- - ae_summary_rtf: Generate formatted RTF output
9
- - ae_summary: Complete pipeline wrapper
10
- - study_plan_to_ae_summary: Batch generation from StudyPlan
11
-
12
- Uses Polars native SQL capabilities for data manipulation, count.py utilities for subject counting,
13
- and parse.py utilities for StudyPlan parsing.
14
- """
15
-
16
- from pathlib import Path
17
-
18
- import polars as pl
19
- from rtflite import RTFDocument
20
-
21
- from ..common.count import count_subject, count_subject_with_observation
22
- from ..common.parse import StudyPlanParser
23
- from ..common.plan import StudyPlan
24
- from ..common.rtf import create_rtf_table_n_pct
25
- from ..common.utils import apply_common_filters
26
-
27
-
28
- def study_plan_to_ae_summary(
29
- study_plan: StudyPlan,
30
- ) -> list[str]:
31
- """
32
- Generate AE summary RTF outputs for all analyses defined in StudyPlan.
33
-
34
- This function reads the expanded plan from StudyPlan and generates
35
- an RTF table for each analysis specification automatically.
36
-
37
- Args:
38
- study_plan: StudyPlan object with loaded datasets and analysis specifications
39
-
40
- Returns:
41
- list[str]: List of paths to generated RTF files
42
- """
43
-
44
- # Meta data
45
- analysis = "ae_summary"
46
- analysis_label = "Analysis of Adverse Event Summary"
47
- output_dir = study_plan.output_dir
48
- footnote = ["Every participant is counted a single time for each applicable row and column."]
49
- source = None
50
-
51
- population_df_name = "adsl"
52
- observation_df_name = "adae"
53
-
54
- id = ("USUBJID", "Subject ID")
55
- total = True
56
- missing_group = "error"
57
-
58
- # Create output directory if it doesn't exist
59
- Path(output_dir).mkdir(parents=True, exist_ok=True)
60
-
61
- # Initialize parser
62
- parser = StudyPlanParser(study_plan)
63
-
64
- # Get expanded plan DataFrame
65
- plan_df = study_plan.get_plan_df()
66
-
67
- # Filter for AE summary analyses
68
- ae_plans = plan_df.filter(pl.col("analysis") == analysis)
69
-
70
- rtf_files = []
71
-
72
- # Generate RTF for each analysis
73
- for row in ae_plans.iter_rows(named=True):
74
- population = row["population"]
75
- observation = row.get("observation")
76
- parameter = row["parameter"]
77
- group = row.get("group")
78
-
79
- # Validate group is specified
80
- if group is None:
81
- raise ValueError(
82
- f"Group not specified in YAML "
83
- f"population={population}, observation={observation}, parameter={parameter}. "
84
- "Please add group to your YAML plan."
85
- )
86
-
87
- # Get datasets using parser
88
- population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
89
-
90
- # Get filters and configuration using parser
91
- population_filter = parser.get_population_filter(population)
92
- param_names, param_filters, param_labels, _ = parser.get_parameter_info(
93
- parameter
94
- ) # Ignore indent for AE
95
- obs_filter = parser.get_observation_filter(observation)
96
- group_var_name, group_labels = parser.get_group_info(group)
97
-
98
- # Build variables as list of tuples [(filter, label)]
99
- variables_list = list(zip(param_filters, param_labels))
100
-
101
- # Build group tuple (variable_name, label)
102
- group_var_label = group_labels[0] if group_labels else group_var_name
103
- group_tuple = (group_var_name, group_var_label)
104
-
105
- # Build title with population and observation context
106
- title_parts = [analysis_label]
107
- if observation:
108
- obs_kw = study_plan.keywords.observations.get(observation)
109
- if obs_kw and obs_kw.label:
110
- title_parts.append(obs_kw.label)
111
-
112
- pop_kw = study_plan.keywords.populations.get(population)
113
- if pop_kw and pop_kw.label:
114
- title_parts.append(pop_kw.label)
115
-
116
- # Build output filename
117
- filename = f"{analysis}_{population}"
118
- if observation:
119
- filename += f"_{observation}"
120
- filename += f"_{parameter.replace(';', '_')}.rtf"
121
- output_file = str(Path(output_dir) / filename)
122
-
123
- # Generate RTF using the new ae_summary signature
124
- rtf_path = ae_summary(
125
- population=population_df,
126
- observation=observation_df,
127
- population_filter=population_filter,
128
- observation_filter=obs_filter,
129
- id=id,
130
- group=group_tuple,
131
- variables=variables_list,
132
- title=title_parts,
133
- footnote=footnote,
134
- source=source,
135
- output_file=output_file,
136
- total=total,
137
- missing_group=missing_group,
138
- )
139
-
140
- rtf_files.append(rtf_path)
141
-
142
- return rtf_files
143
-
144
-
145
- def ae_summary(
146
- population: pl.DataFrame,
147
- observation: pl.DataFrame,
148
- population_filter: str | None,
149
- observation_filter: str | None,
150
- id: tuple[str, str],
151
- group: tuple[str, str],
152
- variables: list[tuple[str, str]],
153
- title: list[str],
154
- footnote: list[str] | None,
155
- source: list[str] | None,
156
- output_file: str,
157
- total: bool = True,
158
- col_rel_width: list[float] | None = None,
159
- missing_group: str = "error",
160
- ) -> str:
161
- """
162
- Complete AE summary pipeline wrapper.
163
-
164
- This function orchestrates the three-step pipeline:
165
- 1. ae_summary_ard: Generate Analysis Results Data
166
- 2. ae_summary_df: Transform to display format
167
- 3. ae_summary_rtf: Generate RTF output and write to file
168
-
169
- Args:
170
- population: Population DataFrame (subject-level data, e.g., ADSL)
171
- observation: Observation DataFrame (event data, e.g., ADAE)
172
- population_filter: SQL WHERE clause for population (can be None)
173
- observation_filter: SQL WHERE clause for observation (can be None)
174
- id: Tuple (variable_name, label) for ID column
175
- group: Tuple (variable_name, label) for grouping variable
176
- variables: List of tuples [(filter, label)] for analysis variables
177
- title: Title for RTF output as list of strings
178
- footnote: Optional footnote for RTF output as list of strings
179
- source: Optional source for RTF output as list of strings
180
- output_file: File path to write RTF output
181
- total: Whether to include total column (default: True)
182
- col_rel_width: Optional column widths for RTF output
183
- missing_group: How to handle missing group values (default: "error")
184
-
185
- Returns:
186
- str: Path to the generated RTF file
187
- """
188
- # Step 1: Generate ARD
189
- ard = ae_summary_ard(
190
- population=population,
191
- observation=observation,
192
- population_filter=population_filter,
193
- observation_filter=observation_filter,
194
- id=id,
195
- group=group,
196
- variables=variables,
197
- total=total,
198
- missing_group=missing_group,
199
- )
200
-
201
- # Step 2: Transform to display format
202
- df = ae_summary_df(ard)
203
-
204
- # Step 3: Generate RTF and write to file
205
- rtf_doc = ae_summary_rtf(
206
- df=df,
207
- title=title,
208
- footnote=footnote,
209
- source=source,
210
- col_rel_width=col_rel_width,
211
- )
212
- rtf_doc.write_rtf(output_file)
213
-
214
- return output_file
215
-
216
-
217
- def ae_summary_ard(
218
- population: pl.DataFrame,
219
- observation: pl.DataFrame,
220
- population_filter: str | None,
221
- observation_filter: str | None,
222
- id: tuple[str, str],
223
- group: tuple[str, str],
224
- variables: list[tuple[str, str]],
225
- total: bool,
226
- missing_group: str,
227
- ) -> pl.DataFrame:
228
- """
229
- Generate Analysis Results Data (ARD) for AE summary analysis.
230
-
231
- Creates a long-format DataFrame with standardized structure (__index__, __group__, __value__)
232
- containing population counts and observation statistics for each analysis variable.
233
-
234
- Args:
235
- population: Population DataFrame (subject-level data, e.g., ADSL)
236
- observation: Observation DataFrame (event data, e.g., ADAE)
237
- population_filter: SQL WHERE clause for population (can be None)
238
- observation_filter: SQL WHERE clause for observation (can be None)
239
- id: Tuple (variable_name, label) for ID column
240
- group: Tuple (variable_name, label) for grouping variable
241
- variables: List of tuples [(filter, label)] for analysis variables
242
- total: Whether to include total column in counts
243
- missing_group: How to handle missing group values: "error", "ignore", or "fill"
244
-
245
- Returns:
246
- pl.DataFrame: Long-format ARD with columns __index__, __group__, __value__
247
- """
248
- # Extract group variable name (label is in tuple but not needed separately)
249
- pop_var_name = "Participants in population"
250
- id_var_name, id_var_label = id
251
- group_var_name, group_var_label = group
252
-
253
- # Apply common filters (parameter_filter is handled inside the loop, so None here)
254
- population_filtered, observation_to_filter = apply_common_filters(
255
- population=population,
256
- observation=observation,
257
- population_filter=population_filter,
258
- observation_filter=observation_filter,
259
- )
260
-
261
- assert observation_to_filter is not None
262
-
263
- # Filter observation data to include only subjects in the filtered population
264
- # Process all variables in the list
265
- observation_filtered_list = []
266
- for variable_filter, variable_label in variables:
267
- obs_filtered = (
268
- observation_to_filter.filter(
269
- pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
270
- )
271
- .filter(pl.sql_expr(variable_filter))
272
- .with_columns(pl.lit(variable_label).alias("__index__"))
273
- )
274
-
275
- observation_filtered_list.append(obs_filtered)
276
-
277
- # Concatenate all filtered observations
278
- observation_filtered = pl.concat(observation_filtered_list)
279
-
280
- # Population
281
- n_pop = count_subject(
282
- population=population_filtered,
283
- id=id_var_name,
284
- group=group_var_name,
285
- total=total,
286
- missing_group=missing_group,
287
- )
288
-
289
- n_pop = n_pop.select(
290
- pl.lit(pop_var_name).alias("__index__"),
291
- pl.col(group_var_name).alias("__group__"),
292
- pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
293
- )
294
-
295
- # Empty row with same structure as n_pop but with empty strings
296
- n_empty = n_pop.select(
297
- pl.lit("").alias("__index__"), pl.col("__group__"), pl.lit("").alias("__value__")
298
- )
299
-
300
- # Observation
301
- n_obs = count_subject_with_observation(
302
- population=population_filtered,
303
- observation=observation_filtered,
304
- id=id_var_name,
305
- group=group_var_name,
306
- total=total,
307
- variable="__index__",
308
- missing_group=missing_group,
309
- )
310
-
311
- n_obs = n_obs.select(
312
- pl.col("__index__"),
313
- pl.col(group_var_name).alias("__group__"),
314
- pl.col("n_pct_subj_fmt").alias("__value__"),
315
- )
316
-
317
- res = pl.concat([n_pop, n_empty, n_obs])
318
-
319
- # Convert __index__ to ordered Enum based on appearance
320
- # Build the ordered categories list: population name, empty string, then variable labels
321
- variable_labels = [label for _, label in variables]
322
- ordered_categories = [pop_var_name, ""] + variable_labels
323
-
324
- res = res.with_columns(pl.col("__index__").cast(pl.Enum(ordered_categories))).sort(
325
- "__index__", "__group__"
326
- )
327
-
328
- return res
329
-
330
-
331
- def ae_summary_df(ard: pl.DataFrame) -> pl.DataFrame:
332
- """
333
- Transform AE summary ARD (Analysis Results Data) into display-ready DataFrame.
334
-
335
- Converts the long-format ARD with __index__, __group__, and __value__ columns
336
- into a wide-format display table where groups become columns.
337
-
338
- Args:
339
- ard: Analysis Results Data DataFrame with __index__, __group__, __value__ columns
340
-
341
- Returns:
342
- pl.DataFrame: Wide-format display table with groups as columns
343
- """
344
- # Pivot from long to wide format: __group__ values become columns
345
- df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
346
-
347
- return df_wide
348
-
349
-
350
- def ae_summary_rtf(
351
- df: pl.DataFrame,
352
- title: list[str],
353
- footnote: list[str] | None,
354
- source: list[str] | None,
355
- col_rel_width: list[float] | None = None,
356
- ) -> RTFDocument:
357
- """
358
- Generate RTF table from AE summary display DataFrame.
359
-
360
- Creates a formatted RTF table with two-level column headers showing
361
- treatment groups with "n (%)" values.
362
-
363
- Args:
364
- df: Display DataFrame from ae_summary_df (wide format with __index__ column)
365
- title: Title(s) for the table as list of strings
366
- footnote: Optional footnote(s) as list of strings
367
- source: Optional source note(s) as list of strings
368
- col_rel_width: Optional list of relative column widths. If None, auto-calculated
369
- as [n_cols-1, 1, 1, 1, ...] where n_cols is total column count
370
-
371
- Returns:
372
- RTFDocument: RTF document object that can be written to file
373
- """
374
-
375
- # Rename __index__ to empty string for display
376
- df_rtf = df.rename({"__index__": ""})
377
-
378
- # Calculate number of columns
379
- n_cols = len(df_rtf.columns)
380
-
381
- # Build first-level column headers (use actual column names)
382
- col_header_1 = list(df_rtf.columns)
383
-
384
- # Build second-level column headers (empty for first, "n (%)" for groups)
385
- col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
386
-
387
- # Calculate column widths - auto-calculate if not provided
388
- if col_rel_width is None:
389
- col_widths = [float(n_cols - 1)] + [1.0] * (n_cols - 1)
390
- else:
391
- col_widths = col_rel_width
392
-
393
- return create_rtf_table_n_pct(
394
- df=df_rtf,
395
- col_header_1=col_header_1,
396
- col_header_2=col_header_2,
397
- col_widths=col_widths,
398
- title=title,
399
- footnote=footnote,
400
- source=source,
401
- )
1
+ # pyre-strict
2
+ """
3
+ Adverse Event (AE) Analysis Functions
4
+
5
+ This module provides a three-step pipeline for AE summary analysis:
6
+ - ae_summary_ard: Generate Analysis Results Data (ARD) in long format
7
+ - ae_summary_df: Transform ARD to wide display format
8
+ - ae_summary_rtf: Generate formatted RTF output
9
+ - ae_summary: Complete pipeline wrapper
10
+ - study_plan_to_ae_summary: Batch generation from StudyPlan
11
+
12
+ Uses Polars native SQL capabilities for data manipulation, count.py utilities for subject counting,
13
+ and parse.py utilities for StudyPlan parsing.
14
+ """
15
+
16
+ from pathlib import Path
17
+
18
+ import polars as pl
19
+ from rtflite import RTFDocument
20
+
21
+ from ..common.count import count_subject, count_subject_with_observation
22
+ from ..common.parse import StudyPlanParser
23
+ from ..common.plan import StudyPlan
24
+ from ..common.rtf import create_rtf_table_n_pct
25
+ from ..common.utils import apply_common_filters
26
+
27
+
28
+ def study_plan_to_ae_summary(
29
+ study_plan: StudyPlan,
30
+ ) -> list[str]:
31
+ """
32
+ Generate AE summary RTF outputs for all analyses defined in StudyPlan.
33
+
34
+ This function reads the expanded plan from StudyPlan and generates
35
+ an RTF table for each analysis specification automatically.
36
+
37
+ Args:
38
+ study_plan: StudyPlan object with loaded datasets and analysis specifications
39
+
40
+ Returns:
41
+ list[str]: List of paths to generated RTF files
42
+ """
43
+
44
+ # Meta data
45
+ analysis = "ae_summary"
46
+ analysis_label = "Analysis of Adverse Event Summary"
47
+ output_dir = study_plan.output_dir
48
+ footnote = ["Every participant is counted a single time for each applicable row and column."]
49
+ source = None
50
+
51
+ population_df_name = "adsl"
52
+ observation_df_name = "adae"
53
+
54
+ id = ("USUBJID", "Subject ID")
55
+ total = True
56
+ missing_group = "error"
57
+
58
+ # Create output directory if it doesn't exist
59
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
60
+
61
+ # Initialize parser
62
+ parser = StudyPlanParser(study_plan)
63
+
64
+ # Get expanded plan DataFrame
65
+ plan_df = study_plan.get_plan_df()
66
+
67
+ # Filter for AE summary analyses
68
+ ae_plans = plan_df.filter(pl.col("analysis") == analysis)
69
+
70
+ rtf_files = []
71
+
72
+ # Generate RTF for each analysis
73
+ for row in ae_plans.iter_rows(named=True):
74
+ population = row["population"]
75
+ observation = row.get("observation")
76
+ parameter = row["parameter"]
77
+ group = row.get("group")
78
+
79
+ # Validate group is specified
80
+ if group is None:
81
+ raise ValueError(
82
+ f"Group not specified in YAML "
83
+ f"population={population}, observation={observation}, parameter={parameter}. "
84
+ "Please add group to your YAML plan."
85
+ )
86
+
87
+ # Get datasets using parser
88
+ population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
89
+
90
+ # Get filters and configuration using parser
91
+ population_filter = parser.get_population_filter(population)
92
+ param_names, param_filters, param_labels, _ = parser.get_parameter_info(
93
+ parameter
94
+ ) # Ignore indent for AE
95
+ obs_filter = parser.get_observation_filter(observation)
96
+ group_var_name, group_labels = parser.get_group_info(group)
97
+
98
+ # Build variables as list of tuples [(filter, label)]
99
+ variables_list = list(zip(param_filters, param_labels))
100
+
101
+ # Build group tuple (variable_name, label)
102
+ group_var_label = group_labels[0] if group_labels else group_var_name
103
+ group_tuple = (group_var_name, group_var_label)
104
+
105
+ # Build title with population and observation context
106
+ title_parts = [analysis_label]
107
+ if observation:
108
+ obs_kw = study_plan.keywords.observations.get(observation)
109
+ if obs_kw and obs_kw.label:
110
+ title_parts.append(obs_kw.label)
111
+
112
+ pop_kw = study_plan.keywords.populations.get(population)
113
+ if pop_kw and pop_kw.label:
114
+ title_parts.append(pop_kw.label)
115
+
116
+ # Build output filename
117
+ filename = f"{analysis}_{population}"
118
+ if observation:
119
+ filename += f"_{observation}"
120
+ filename += f"_{parameter.replace(';', '_')}.rtf"
121
+ output_file = str(Path(output_dir) / filename)
122
+
123
+ # Generate RTF using the new ae_summary signature
124
+ rtf_path = ae_summary(
125
+ population=population_df,
126
+ observation=observation_df,
127
+ population_filter=population_filter,
128
+ observation_filter=obs_filter,
129
+ id=id,
130
+ group=group_tuple,
131
+ variables=variables_list,
132
+ title=title_parts,
133
+ footnote=footnote,
134
+ source=source,
135
+ output_file=output_file,
136
+ total=total,
137
+ missing_group=missing_group,
138
+ )
139
+
140
+ rtf_files.append(rtf_path)
141
+
142
+ return rtf_files
143
+
144
+
145
+ def ae_summary(
146
+ population: pl.DataFrame,
147
+ observation: pl.DataFrame,
148
+ population_filter: str | None,
149
+ observation_filter: str | None,
150
+ id: tuple[str, str],
151
+ group: tuple[str, str],
152
+ variables: list[tuple[str, str]],
153
+ title: list[str],
154
+ footnote: list[str] | None,
155
+ source: list[str] | None,
156
+ output_file: str,
157
+ total: bool = True,
158
+ col_rel_width: list[float] | None = None,
159
+ missing_group: str = "error",
160
+ ) -> str:
161
+ """
162
+ Complete AE summary pipeline wrapper.
163
+
164
+ This function orchestrates the three-step pipeline:
165
+ 1. ae_summary_ard: Generate Analysis Results Data
166
+ 2. ae_summary_df: Transform to display format
167
+ 3. ae_summary_rtf: Generate RTF output and write to file
168
+
169
+ Args:
170
+ population: Population DataFrame (subject-level data, e.g., ADSL)
171
+ observation: Observation DataFrame (event data, e.g., ADAE)
172
+ population_filter: SQL WHERE clause for population (can be None)
173
+ observation_filter: SQL WHERE clause for observation (can be None)
174
+ id: Tuple (variable_name, label) for ID column
175
+ group: Tuple (variable_name, label) for grouping variable
176
+ variables: List of tuples [(filter, label)] for analysis variables
177
+ title: Title for RTF output as list of strings
178
+ footnote: Optional footnote for RTF output as list of strings
179
+ source: Optional source for RTF output as list of strings
180
+ output_file: File path to write RTF output
181
+ total: Whether to include total column (default: True)
182
+ col_rel_width: Optional column widths for RTF output
183
+ missing_group: How to handle missing group values (default: "error")
184
+
185
+ Returns:
186
+ str: Path to the generated RTF file
187
+ """
188
+ # Step 1: Generate ARD
189
+ ard = ae_summary_ard(
190
+ population=population,
191
+ observation=observation,
192
+ population_filter=population_filter,
193
+ observation_filter=observation_filter,
194
+ id=id,
195
+ group=group,
196
+ variables=variables,
197
+ total=total,
198
+ missing_group=missing_group,
199
+ )
200
+
201
+ # Step 2: Transform to display format
202
+ df = ae_summary_df(ard)
203
+
204
+ # Step 3: Generate RTF and write to file
205
+ rtf_doc = ae_summary_rtf(
206
+ df=df,
207
+ title=title,
208
+ footnote=footnote,
209
+ source=source,
210
+ col_rel_width=col_rel_width,
211
+ )
212
+ rtf_doc.write_rtf(output_file)
213
+
214
+ return output_file
215
+
216
+
217
+ def ae_summary_ard(
218
+ population: pl.DataFrame,
219
+ observation: pl.DataFrame,
220
+ population_filter: str | None,
221
+ observation_filter: str | None,
222
+ id: tuple[str, str],
223
+ group: tuple[str, str],
224
+ variables: list[tuple[str, str]],
225
+ total: bool,
226
+ missing_group: str,
227
+ ) -> pl.DataFrame:
228
+ """
229
+ Generate Analysis Results Data (ARD) for AE summary analysis.
230
+
231
+ Creates a long-format DataFrame with standardized structure (__index__, __group__, __value__)
232
+ containing population counts and observation statistics for each analysis variable.
233
+
234
+ Args:
235
+ population: Population DataFrame (subject-level data, e.g., ADSL)
236
+ observation: Observation DataFrame (event data, e.g., ADAE)
237
+ population_filter: SQL WHERE clause for population (can be None)
238
+ observation_filter: SQL WHERE clause for observation (can be None)
239
+ id: Tuple (variable_name, label) for ID column
240
+ group: Tuple (variable_name, label) for grouping variable
241
+ variables: List of tuples [(filter, label)] for analysis variables
242
+ total: Whether to include total column in counts
243
+ missing_group: How to handle missing group values: "error", "ignore", or "fill"
244
+
245
+ Returns:
246
+ pl.DataFrame: Long-format ARD with columns __index__, __group__, __value__
247
+ """
248
+ # Extract group variable name (label is in tuple but not needed separately)
249
+ pop_var_name = "Participants in population"
250
+ id_var_name, id_var_label = id
251
+ group_var_name, group_var_label = group
252
+
253
+ # Apply common filters (parameter_filter is handled inside the loop, so None here)
254
+ population_filtered, observation_to_filter = apply_common_filters(
255
+ population=population,
256
+ observation=observation,
257
+ population_filter=population_filter,
258
+ observation_filter=observation_filter,
259
+ )
260
+
261
+ assert observation_to_filter is not None
262
+
263
+ # Filter observation data to include only subjects in the filtered population
264
+ # Process all variables in the list
265
+ observation_filtered_list = []
266
+ for variable_filter, variable_label in variables:
267
+ obs_filtered = (
268
+ observation_to_filter.filter(
269
+ pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
270
+ )
271
+ .filter(pl.sql_expr(variable_filter))
272
+ .with_columns(pl.lit(variable_label).alias("__index__"))
273
+ )
274
+
275
+ observation_filtered_list.append(obs_filtered)
276
+
277
+ # Concatenate all filtered observations
278
+ observation_filtered = pl.concat(observation_filtered_list)
279
+
280
+ # Population
281
+ n_pop = count_subject(
282
+ population=population_filtered,
283
+ id=id_var_name,
284
+ group=group_var_name,
285
+ total=total,
286
+ missing_group=missing_group,
287
+ )
288
+
289
+ n_pop = n_pop.select(
290
+ pl.lit(pop_var_name).alias("__index__"),
291
+ pl.col(group_var_name).alias("__group__"),
292
+ pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
293
+ )
294
+
295
+ # Empty row with same structure as n_pop but with empty strings
296
+ n_empty = n_pop.select(
297
+ pl.lit("").alias("__index__"), pl.col("__group__"), pl.lit("").alias("__value__")
298
+ )
299
+
300
+ # Observation
301
+ n_obs = count_subject_with_observation(
302
+ population=population_filtered,
303
+ observation=observation_filtered,
304
+ id=id_var_name,
305
+ group=group_var_name,
306
+ total=total,
307
+ variable="__index__",
308
+ missing_group=missing_group,
309
+ )
310
+
311
+ n_obs = n_obs.select(
312
+ pl.col("__index__"),
313
+ pl.col(group_var_name).alias("__group__"),
314
+ pl.col("n_pct_subj_fmt").alias("__value__"),
315
+ )
316
+
317
+ res = pl.concat([n_pop, n_empty, n_obs])
318
+
319
+ # Convert __index__ to ordered Enum based on appearance
320
+ # Build the ordered categories list: population name, empty string, then variable labels
321
+ variable_labels = [label for _, label in variables]
322
+ ordered_categories = [pop_var_name, ""] + variable_labels
323
+
324
+ res = res.with_columns(pl.col("__index__").cast(pl.Enum(ordered_categories))).sort(
325
+ "__index__", "__group__"
326
+ )
327
+
328
+ return res
329
+
330
+
331
+ def ae_summary_df(ard: pl.DataFrame) -> pl.DataFrame:
332
+ """
333
+ Transform AE summary ARD (Analysis Results Data) into display-ready DataFrame.
334
+
335
+ Converts the long-format ARD with __index__, __group__, and __value__ columns
336
+ into a wide-format display table where groups become columns.
337
+
338
+ Args:
339
+ ard: Analysis Results Data DataFrame with __index__, __group__, __value__ columns
340
+
341
+ Returns:
342
+ pl.DataFrame: Wide-format display table with groups as columns
343
+ """
344
+ # Pivot from long to wide format: __group__ values become columns
345
+ df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
346
+
347
+ return df_wide
348
+
349
+
350
+ def ae_summary_rtf(
351
+ df: pl.DataFrame,
352
+ title: list[str],
353
+ footnote: list[str] | None,
354
+ source: list[str] | None,
355
+ col_rel_width: list[float] | None = None,
356
+ ) -> RTFDocument:
357
+ """
358
+ Generate RTF table from AE summary display DataFrame.
359
+
360
+ Creates a formatted RTF table with two-level column headers showing
361
+ treatment groups with "n (%)" values.
362
+
363
+ Args:
364
+ df: Display DataFrame from ae_summary_df (wide format with __index__ column)
365
+ title: Title(s) for the table as list of strings
366
+ footnote: Optional footnote(s) as list of strings
367
+ source: Optional source note(s) as list of strings
368
+ col_rel_width: Optional list of relative column widths. If None, auto-calculated
369
+ as [n_cols-1, 1, 1, 1, ...] where n_cols is total column count
370
+
371
+ Returns:
372
+ RTFDocument: RTF document object that can be written to file
373
+ """
374
+
375
+ # Rename __index__ to empty string for display
376
+ df_rtf = df.rename({"__index__": ""})
377
+
378
+ # Calculate number of columns
379
+ n_cols = len(df_rtf.columns)
380
+
381
+ # Build first-level column headers (use actual column names)
382
+ col_header_1 = list(df_rtf.columns)
383
+
384
+ # Build second-level column headers (empty for first, "n (%)" for groups)
385
+ col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
386
+
387
+ # Calculate column widths - auto-calculate if not provided
388
+ if col_rel_width is None:
389
+ col_widths = [float(n_cols - 1)] + [1.0] * (n_cols - 1)
390
+ else:
391
+ col_widths = col_rel_width
392
+
393
+ return create_rtf_table_n_pct(
394
+ df=df_rtf,
395
+ col_header_1=col_header_1,
396
+ col_header_2=col_header_2,
397
+ col_widths=col_widths,
398
+ title=title,
399
+ footnote=footnote,
400
+ source=source,
401
+ )