csrlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/__init__.py ADDED
@@ -0,0 +1,50 @@
1
+ from .ae.ae_listing import (
2
+ # AE listing functions
3
+ ae_listing,
4
+ study_plan_to_ae_listing,
5
+ )
6
+ from .ae.ae_specific import (
7
+ # AE specific functions
8
+ ae_specific,
9
+ study_plan_to_ae_specific,
10
+ )
11
+ from .ae.ae_summary import (
12
+ # AE summary functions
13
+ ae_summary,
14
+ study_plan_to_ae_summary,
15
+ )
16
+ from .common.count import (
17
+ count_subject,
18
+ count_subject_with_observation,
19
+ )
20
+ from .common.parse import (
21
+ StudyPlanParser,
22
+ parse_filter_to_sql,
23
+ )
24
+ from .common.plan import (
25
+ # Core classes
26
+ load_plan,
27
+ )
28
+ from .disposition.disposition import study_plan_to_disposition_summary
29
+
30
+ # Main exports for common usage
31
+ __all__ = [
32
+ # Primary user interface
33
+ "load_plan",
34
+ # AE analysis (direct pipeline wrappers)
35
+ "ae_summary",
36
+ "ae_specific",
37
+ "ae_listing",
38
+ # AE analysis (StudyPlan integration)
39
+ "study_plan_to_ae_summary",
40
+ "study_plan_to_ae_specific",
41
+ "study_plan_to_ae_listing",
42
+ # Disposition analysis
43
+ "study_plan_to_disposition_summary",
44
+ # Count functions
45
+ "count_subject",
46
+ "count_subject_with_observation",
47
+ # Parse utilities
48
+ "StudyPlanParser",
49
+ "parse_filter_to_sql",
50
+ ]
csrlite/ae/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # pyre-strict
@@ -0,0 +1,492 @@
1
+ # pyre-strict
2
+ """
3
+ Adverse Event (AE) Listing Functions
4
+
5
+ This module provides functions for generating detailed AE listings showing individual
6
+ adverse event records with key details like severity, relationship, and outcomes.
7
+
8
+ The two-step pipeline:
9
+ - ae_listing_ard: Filter, select, sort, and rename columns (returns display-ready data)
10
+ - ae_listing_rtf: Generate formatted RTF output
11
+ - ae_listing: Complete pipeline wrapper
12
+ - study_plan_to_ae_listing: Batch generation from StudyPlan
13
+
14
+ Uses Polars native SQL capabilities for data manipulation and parse.py utilities
15
+ for StudyPlan parsing.
16
+ """
17
+
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ import polars as pl
22
+ from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
23
+
24
+ from ..common.parse import StudyPlanParser
25
+ from ..common.plan import StudyPlan
26
+ from ..common.utils import apply_common_filters
27
+ from .ae_utils import get_ae_parameter_title
28
+
29
+
30
+ def ae_listing_ard(
31
+ population: pl.DataFrame,
32
+ observation: pl.DataFrame,
33
+ population_filter: str | None,
34
+ observation_filter: str | None,
35
+ parameter_filter: str | None,
36
+ id: tuple[str, str],
37
+ population_columns: list[tuple[str, str]] | None = None,
38
+ observation_columns: list[tuple[str, str]] | None = None,
39
+ sort_columns: list[str] | None = None,
40
+ page_by: list[str] | None = None,
41
+ ) -> pl.DataFrame:
42
+ """
43
+ Generate Analysis Results Data (ARD) for AE listing.
44
+
45
+ Filters and joins population and observation data, then selects relevant columns.
46
+
47
+ Args:
48
+ population: Population DataFrame (subject-level data, e.g., ADSL)
49
+ observation: Observation DataFrame (event data, e.g., ADAE)
50
+ population_filter: SQL WHERE clause for population (can be None)
51
+ observation_filter: SQL WHERE clause for observation (can be None)
52
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
53
+ id: Tuple (variable_name, label) for ID column
54
+ population_columns: List of tuples (variable_name, label) from population
55
+ (e.g., [("SEX", "Sex"), ("RACE", "Race")])
56
+ observation_columns: List of tuples (variable_name, label) from observation
57
+ (e.g., [("AEDECOD", "Adverse Event")])
58
+ sort_columns: List of column names to sort by. If None, sorts by id column.
59
+
60
+ Returns:
61
+ pl.DataFrame: Filtered and joined records with selected columns
62
+ """
63
+ id_var_name, id_var_label = id
64
+
65
+ # Apply common filters
66
+ population_filtered, observation_to_filter = apply_common_filters(
67
+ population=population,
68
+ observation=observation,
69
+ population_filter=population_filter,
70
+ observation_filter=observation_filter,
71
+ parameter_filter=parameter_filter,
72
+ )
73
+
74
+ # Filter observation to include only subjects in filtered population
75
+ observation_filtered = observation_to_filter.filter(
76
+ pl.col(id_var_name).is_in(population_filtered[id_var_name].to_list())
77
+ )
78
+
79
+ # Determine which observation columns to select
80
+ if observation_columns is None:
81
+ # Default: select id column only
82
+ obs_cols = [id_var_name]
83
+ else:
84
+ # Extract variable names from tuples
85
+ obs_col_names = [var_name for var_name, _ in observation_columns]
86
+ # Ensure id is included
87
+ obs_cols = [id_var_name] + [col for col in obs_col_names if col != id_var_name]
88
+
89
+ # Select available observation columns
90
+ obs_cols_available = [col for col in obs_cols if col in observation_filtered.columns]
91
+ result = observation_filtered.select(obs_cols_available)
92
+
93
+ # Join with population to add population columns
94
+ if population_columns is not None:
95
+ # Extract variable names from tuples
96
+ pop_col_names = [var_name for var_name, _ in population_columns]
97
+ # Select id + requested population columns
98
+ pop_cols = [id_var_name] + [col for col in pop_col_names if col != id_var_name]
99
+ pop_cols_available = [col for col in pop_cols if col in population_filtered.columns]
100
+ population_subset = population_filtered.select(pop_cols_available)
101
+
102
+ # Left join to preserve all observation records
103
+ result = result.join(population_subset, on=id_var_name, how="left")
104
+
105
+ # Create __index__ column for pagination
106
+ # Default to using the id column as the index
107
+ if id_var_name in result.columns:
108
+ result = result.with_columns(
109
+ (pl.lit(f"{id_var_label} = ") + pl.col(id_var_name).cast(pl.Utf8)).alias("__index__")
110
+ )
111
+
112
+ # Use page_by columns if provided and they exist
113
+ existing_page_by_cols = [col for col in page_by if col in result.columns] if page_by else []
114
+
115
+ if existing_page_by_cols:
116
+ # Create a mapping from column name to label
117
+ column_labels = {id_var_name: id_var_label}
118
+ if population_columns:
119
+ for var_name, var_label in population_columns:
120
+ column_labels[var_name] = var_label
121
+
122
+ # Ensure the order of labels matches the order of columns in page_by
123
+ index_expressions = []
124
+ for col_name in existing_page_by_cols:
125
+ label = column_labels.get(col_name, col_name)
126
+ index_expressions.append(pl.lit(f"{label} = ") + pl.col(col_name).cast(pl.Utf8))
127
+
128
+ result = result.with_columns(
129
+ pl.concat_str(index_expressions, separator=", ").alias("__index__")
130
+ )
131
+
132
+ page_by_remove = [col for col in (page_by or []) if col != id_var_name]
133
+ result = result.drop(page_by_remove)
134
+
135
+ if "__index__" in result.columns:
136
+ # Get all columns except __index__
137
+ other_columns = [col for col in result.columns if col != "__index__"]
138
+ # Reorder to have __index__ first
139
+ result = result.select(["__index__"] + other_columns)
140
+
141
+ # Sort by specified columns or default to id column
142
+ if sort_columns is None:
143
+ # Default: sort by id column if it exists in result
144
+ if id_var_name in result.columns:
145
+ result = result.sort(id_var_name)
146
+ else:
147
+ # Sort by specified columns that exist in result
148
+ cols_to_sort = [col for col in sort_columns if col in result.columns]
149
+ if cols_to_sort:
150
+ result = result.sort(cols_to_sort)
151
+
152
+ return result
153
+
154
+
155
+ def ae_listing_rtf(
156
+ df: pl.DataFrame,
157
+ column_labels: dict[str, str],
158
+ title: list[str],
159
+ footnote: list[str] | None,
160
+ source: list[str] | None,
161
+ col_rel_width: list[float] | None = None,
162
+ group_by: list[str] | None = None,
163
+ page_by: list[str] | None = None,
164
+ orientation: str = "landscape",
165
+ ) -> RTFDocument:
166
+ """
167
+ Generate RTF table from AE listing display DataFrame.
168
+
169
+ Creates a formatted RTF table with column headers and optional section grouping/pagination.
170
+
171
+ Args:
172
+ df: Display DataFrame from ae_listing_ard
173
+ column_labels: Dictionary mapping column names to display labels
174
+ title: Title(s) for the table as list of strings
175
+ footnote: Optional footnote(s) as list of strings
176
+ source: Optional source note(s) as list of strings
177
+ col_rel_width: Optional list of relative column widths. If None, auto-calculated
178
+ as equal widths for all columns
179
+ group_by: Optional list of column names to group by for section headers within pages.
180
+ Should only contain population columns (e.g., ["TRT01A", "USUBJID"])
181
+ page_by: Optional list of column names to trigger new pages when values change.
182
+ Should only contain population columns (e.g., ["TRT01A"])
183
+ orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
184
+
185
+ Returns:
186
+ RTFDocument: RTF document object that can be written to file
187
+ """
188
+ # Calculate number of columns
189
+ n_cols = len(df.columns)
190
+
191
+ # Build column headers using labels
192
+ col_header = [column_labels.get(col, col) for col in df.columns]
193
+
194
+ # Calculate column widths
195
+ if col_rel_width is None:
196
+ col_widths = [1.0] * n_cols
197
+ else:
198
+ col_widths = col_rel_width
199
+
200
+ # Normalize title, footnote, source to lists
201
+ title_list = title
202
+ footnote_list: list[str] = footnote or []
203
+ source_list: list[str] = source or []
204
+
205
+ # Build RTF document
206
+ rtf_components: dict[str, Any] = {
207
+ "df": df,
208
+ "rtf_page": RTFPage(orientation=orientation),
209
+ "rtf_title": RTFTitle(text=title_list),
210
+ "rtf_column_header": [
211
+ RTFColumnHeader(
212
+ text=col_header[1:],
213
+ col_rel_width=col_widths[1:],
214
+ text_justification=["l"] + ["c"] * (n_cols - 1),
215
+ ),
216
+ ],
217
+ "rtf_body": RTFBody(
218
+ col_rel_width=col_widths,
219
+ text_justification=["l"] * n_cols,
220
+ border_left=["single"],
221
+ border_top=["single"] + [""] * (n_cols - 1),
222
+ border_bottom=["single"] + [""] * (n_cols - 1),
223
+ group_by=group_by,
224
+ page_by=page_by,
225
+ ),
226
+ }
227
+
228
+ # Add optional footnote
229
+ if footnote_list:
230
+ rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
231
+
232
+ # Add optional source
233
+ if source_list:
234
+ rtf_components["rtf_source"] = RTFSource(text=source_list)
235
+
236
+ # Create RTF document
237
+ doc = RTFDocument(**rtf_components)
238
+
239
+ return doc
240
+
241
+
242
+ def ae_listing(
243
+ population: pl.DataFrame,
244
+ observation: pl.DataFrame,
245
+ population_filter: str | None,
246
+ observation_filter: str | None,
247
+ parameter_filter: str | None,
248
+ id: tuple[str, str],
249
+ title: list[str],
250
+ footnote: list[str] | None,
251
+ source: list[str] | None,
252
+ output_file: str,
253
+ population_columns: list[tuple[str, str]] | None = None,
254
+ observation_columns: list[tuple[str, str]] | None = None,
255
+ sort_columns: list[str] | None = None,
256
+ group_by: list[str] | None = None,
257
+ page_by: list[str] | None = None,
258
+ col_rel_width: list[float] | None = None,
259
+ orientation: str = "landscape",
260
+ ) -> str:
261
+ """
262
+ Complete AE listing pipeline wrapper.
263
+
264
+ This function orchestrates the two-step pipeline:
265
+ 1. ae_listing_ard: Filter, join, select, and sort columns
266
+ 2. ae_listing_rtf: Generate RTF output with optional grouping/pagination
267
+
268
+ Args:
269
+ population: Population DataFrame (subject-level data, e.g., ADSL)
270
+ observation: Observation DataFrame (event data, e.g., ADAE)
271
+ population_filter: SQL WHERE clause for population (can be None)
272
+ observation_filter: SQL WHERE clause for observation (can be None)
273
+ parameter_filter: SQL WHERE clause for parameter filtering (can be None)
274
+ id: Tuple (variable_name, label) for ID column
275
+ title: Title for RTF output as list of strings
276
+ footnote: Optional footnote for RTF output as list of strings
277
+ source: Optional source for RTF output as list of strings
278
+ output_file: File path to write RTF output
279
+ population_columns: Optional list of tuples (variable_name, label) from population
280
+ observation_columns: Optional list of tuples (variable_name, label) from observation
281
+ sort_columns: Optional list of column names to sort by. If None, sorts by id column.
282
+ group_by: Optional list of column names to group by for section headers
283
+ (population columns only)
284
+ page_by: Optional list of column names to trigger new pages (population columns only)
285
+ col_rel_width: Optional column widths for RTF output
286
+ orientation: Page orientation ("portrait" or "landscape"), default is "landscape"
287
+
288
+ Returns:
289
+ str: Path to the generated RTF file
290
+ """
291
+ # Step 1: Generate ARD (includes filtering, joining, and selecting)
292
+ df = ae_listing_ard(
293
+ population=population,
294
+ observation=observation,
295
+ population_filter=population_filter,
296
+ observation_filter=observation_filter,
297
+ parameter_filter=parameter_filter,
298
+ id=id,
299
+ population_columns=population_columns,
300
+ observation_columns=observation_columns,
301
+ sort_columns=sort_columns,
302
+ page_by=page_by,
303
+ )
304
+
305
+ # Build column labels from tuples
306
+ id_var_name, id_var_label = id
307
+ column_labels = {id_var_name: id_var_label}
308
+
309
+ # Add observation column labels
310
+ if observation_columns is not None:
311
+ for var_name, var_label in observation_columns:
312
+ column_labels[var_name] = var_label
313
+
314
+ # Add population column labels
315
+ if population_columns is not None:
316
+ for var_name, var_label in population_columns:
317
+ column_labels[var_name] = var_label
318
+
319
+ # Set __index__ header to empty string
320
+ column_labels["__index__"] = ""
321
+
322
+ # Step 2: Generate RTF and write to file
323
+ rtf_doc = ae_listing_rtf(
324
+ df=df,
325
+ column_labels=column_labels,
326
+ title=title,
327
+ footnote=footnote,
328
+ source=source,
329
+ col_rel_width=col_rel_width,
330
+ group_by=group_by,
331
+ page_by=["__index__"],
332
+ orientation=orientation,
333
+ )
334
+ rtf_doc.write_rtf(output_file)
335
+
336
+ return output_file
337
+
338
+
339
+ def study_plan_to_ae_listing(
340
+ study_plan: StudyPlan,
341
+ ) -> list[str]:
342
+ """
343
+ Generate AE listing RTF outputs for all analyses defined in StudyPlan.
344
+
345
+ This function reads the expanded plan from StudyPlan and generates
346
+ an RTF listing for each ae_listing analysis specification automatically.
347
+
348
+ Args:
349
+ study_plan: StudyPlan object with loaded datasets and analysis specifications
350
+
351
+ Returns:
352
+ list[str]: List of paths to generated RTF files
353
+ """
354
+
355
+ # Meta data
356
+ analysis = "ae_listing"
357
+ output_dir = study_plan.output_dir
358
+ col_rel_width = [1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0]
359
+ footnote = None
360
+ source = None
361
+
362
+ population_df_name = "adsl"
363
+ observation_df_name = "adae"
364
+
365
+ id = ("USUBJID", "Subject ID")
366
+ # Column configuration with labels - easy to customize
367
+ # Population columns (demographics) - group variable will be added dynamically
368
+ population_columns_base = [
369
+ ("AGE", "Age"),
370
+ ("SEX", "Sex"),
371
+ ("RACE", "Race"),
372
+ ]
373
+
374
+ # Observation columns (event details)
375
+ observation_columns_base = [
376
+ ("AEDECOD", "Adverse Event"),
377
+ ("ASTDY", "Study Day"),
378
+ ("ADURN", "Duration (days)"),
379
+ ("AESEV", "Severity"),
380
+ ("AESER", "Serious"),
381
+ ("AEREL", "Related"),
382
+ ("AEACN", "Action Taken"),
383
+ ("AEOUT", "Outcome"),
384
+ ]
385
+
386
+ # Sorting configuration
387
+ sort_columns = ["TRT01A", "USUBJID", "ASTDY"]
388
+ page_by = ["USUBJID", "SEX", "RACE", "AGE", "TRT01A"]
389
+ group_by = ["USUBJID"]
390
+
391
+ # Create output directory if it doesn't exist
392
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
393
+
394
+ # Initialize parser
395
+ parser = StudyPlanParser(study_plan)
396
+
397
+ # Get expanded plan DataFrame
398
+ plan_df = study_plan.get_plan_df()
399
+
400
+ # Filter for AE listing analyses
401
+ ae_plans = plan_df.filter(pl.col("analysis") == analysis)
402
+
403
+ rtf_files = []
404
+
405
+ # Generate RTF for each analysis
406
+ for row in ae_plans.iter_rows(named=True):
407
+ population = row["population"]
408
+ observation = row.get("observation")
409
+ parameter = row.get("parameter")
410
+ group = row.get("group")
411
+
412
+ # Validate group is specified
413
+ if group is None:
414
+ raise ValueError(
415
+ f"Group not specified in YAML for analysis: population={population}, "
416
+ f"observation={observation}, parameter={parameter}. "
417
+ "Please add group to your YAML plan."
418
+ )
419
+
420
+ # Get datasets using parser
421
+ population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
422
+
423
+ # Get filters using parser
424
+ population_filter = parser.get_population_filter(population)
425
+ obs_filter = parser.get_observation_filter(observation)
426
+
427
+ # Get group variable name from YAML
428
+ group_var_name, group_labels = parser.get_group_info(group)
429
+
430
+ # Determine group variable label
431
+ group_var_label = group_labels[0] if group_labels else "Treatment"
432
+
433
+ # Build columns dynamically from base configuration with labels
434
+ population_columns = population_columns_base + [(group_var_name, group_var_label)]
435
+ observation_columns = observation_columns_base
436
+
437
+ # Get parameter filter if parameter is specified
438
+ parameter_filter = None
439
+ if parameter:
440
+ param_names, param_filters, param_labels, _ = parser.get_parameter_info(
441
+ parameter
442
+ ) # Ignore indent for AE
443
+ # For ae_listing, use the first (and typically only) filter
444
+ parameter_filter = param_filters[0] if param_filters else None
445
+
446
+ # Build dynamic title based on parameter
447
+ param = study_plan.keywords.get_parameter(parameter) if parameter else None
448
+ dynamic_title = get_ae_parameter_title(param, prefix="Listing of Participants With")
449
+
450
+ # Build title with population and observation context
451
+ title_parts = [dynamic_title]
452
+ if observation:
453
+ obs_kw = study_plan.keywords.observations.get(observation)
454
+ if obs_kw and obs_kw.label:
455
+ title_parts.append(obs_kw.label)
456
+
457
+ pop_kw = study_plan.keywords.populations.get(population)
458
+ if pop_kw and pop_kw.label:
459
+ title_parts.append(pop_kw.label)
460
+
461
+ # Build output filename
462
+ filename = f"{analysis}_{population}"
463
+ if observation:
464
+ filename += f"_{observation}"
465
+ if parameter:
466
+ filename += f"_{parameter.replace(';', '_')}"
467
+ filename += ".rtf"
468
+ output_file = str(Path(output_dir) / filename)
469
+
470
+ # Generate RTF
471
+ rtf_path = ae_listing(
472
+ population=population_df,
473
+ observation=observation_df,
474
+ population_filter=population_filter,
475
+ observation_filter=obs_filter,
476
+ parameter_filter=parameter_filter,
477
+ id=id,
478
+ title=title_parts,
479
+ footnote=footnote,
480
+ source=source,
481
+ output_file=output_file,
482
+ population_columns=population_columns,
483
+ observation_columns=observation_columns,
484
+ sort_columns=sort_columns,
485
+ col_rel_width=col_rel_width,
486
+ group_by=group_by,
487
+ page_by=page_by,
488
+ )
489
+
490
+ rtf_files.append(rtf_path)
491
+
492
+ return rtf_files