csrlite 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csrlite/__init__.py +91 -110
- csrlite/ae/__init__.py +1 -1
- csrlite/ae/ae_listing.py +494 -494
- csrlite/ae/ae_specific.py +483 -483
- csrlite/ae/ae_summary.py +401 -401
- csrlite/ae/ae_utils.py +62 -62
- csrlite/cm/cm_listing.py +497 -497
- csrlite/cm/cm_summary.py +327 -327
- csrlite/common/config.py +34 -34
- csrlite/common/count.py +293 -293
- csrlite/common/parse.py +308 -308
- csrlite/common/plan.py +365 -365
- csrlite/common/rtf.py +166 -137
- csrlite/common/utils.py +33 -33
- csrlite/common/yaml_loader.py +71 -71
- csrlite/disposition/__init__.py +2 -2
- csrlite/disposition/disposition.py +332 -332
- csrlite/ie/{ie_summary.py → ie.py} +405 -292
- csrlite/pd/pd_listing.py +461 -461
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/METADATA +68 -68
- csrlite-0.3.2.dist-info/RECORD +23 -0
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/WHEEL +1 -1
- csrlite/ie/ie_listing.py +0 -109
- csrlite/mh/mh_listing.py +0 -209
- csrlite/mh/mh_summary.py +0 -333
- csrlite-0.3.0.dist-info/RECORD +0 -26
- {csrlite-0.3.0.dist-info → csrlite-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -1,332 +1,332 @@
|
|
|
1
|
-
# pyre-strict
|
|
2
|
-
"""
|
|
3
|
-
Disposition Table 1.1 Analysis Functions
|
|
4
|
-
|
|
5
|
-
This module provides a pipeline for Disposition Table 1.1 summary analysis:
|
|
6
|
-
- disposition_ard: Generate Analysis Results Data (ARD)
|
|
7
|
-
- disposition_df: Transform ARD to display format
|
|
8
|
-
- disposition_rtf: Generate formatted RTF output
|
|
9
|
-
- disposition: Complete pipeline wrapper
|
|
10
|
-
- study_plan_to_disposition_summary: Batch generation from StudyPlan
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
|
|
15
|
-
import polars as pl
|
|
16
|
-
from rtflite import RTFDocument
|
|
17
|
-
|
|
18
|
-
from ..common.count import count_subject, count_subject_with_observation
|
|
19
|
-
from ..common.parse import StudyPlanParser
|
|
20
|
-
from ..common.plan import StudyPlan
|
|
21
|
-
from ..common.rtf import create_rtf_table_n_pct
|
|
22
|
-
from ..common.utils import apply_common_filters
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def study_plan_to_disposition_summary(
|
|
26
|
-
study_plan: StudyPlan,
|
|
27
|
-
) -> list[str]:
|
|
28
|
-
"""
|
|
29
|
-
Generate Disposition Summary Table outputs for all analyses defined in StudyPlan.
|
|
30
|
-
"""
|
|
31
|
-
# Meta data
|
|
32
|
-
analysis_type = "disposition_summary"
|
|
33
|
-
output_dir = study_plan.output_dir
|
|
34
|
-
title = "Disposition of Participants"
|
|
35
|
-
footnote = ["Percentages are based on the number of enrolled participants."]
|
|
36
|
-
source = None
|
|
37
|
-
|
|
38
|
-
population_df_name = "adsl"
|
|
39
|
-
|
|
40
|
-
id = ("USUBJID", "Subject ID")
|
|
41
|
-
ds_term = ("EOSSTT", "Disposition Status")
|
|
42
|
-
dist_reason_term = ("DCSREAS", "Discontinued Reason")
|
|
43
|
-
|
|
44
|
-
total = True
|
|
45
|
-
missing_group = "error"
|
|
46
|
-
|
|
47
|
-
# Create output directory
|
|
48
|
-
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
49
|
-
|
|
50
|
-
# Initialize parser
|
|
51
|
-
parser = StudyPlanParser(study_plan)
|
|
52
|
-
|
|
53
|
-
# Get expanded plan DataFrame
|
|
54
|
-
plan_df = study_plan.get_plan_df()
|
|
55
|
-
|
|
56
|
-
# Filter for disposition analyses
|
|
57
|
-
disp_plans = plan_df.filter(pl.col("analysis") == analysis_type)
|
|
58
|
-
|
|
59
|
-
rtf_files = []
|
|
60
|
-
|
|
61
|
-
for row in disp_plans.iter_rows(named=True):
|
|
62
|
-
population = row["population"]
|
|
63
|
-
group = row.get("group")
|
|
64
|
-
title_text = title
|
|
65
|
-
|
|
66
|
-
# Get datasets
|
|
67
|
-
(population_df,) = parser.get_datasets(population_df_name)
|
|
68
|
-
|
|
69
|
-
# Get filters
|
|
70
|
-
population_filter = parser.get_population_filter(population)
|
|
71
|
-
|
|
72
|
-
# Get group info (optional)
|
|
73
|
-
if group is not None:
|
|
74
|
-
group_var_name, group_labels = parser.get_group_info(group)
|
|
75
|
-
group_var_label = group_labels[0] if group_labels else group_var_name
|
|
76
|
-
group_tuple = (group_var_name, group_var_label)
|
|
77
|
-
else:
|
|
78
|
-
# When no group specified, use a dummy group column for overall counts
|
|
79
|
-
group_tuple = None
|
|
80
|
-
|
|
81
|
-
# Build title
|
|
82
|
-
title_parts = [title_text]
|
|
83
|
-
pop_kw = study_plan.keywords.populations.get(population)
|
|
84
|
-
if pop_kw and pop_kw.label:
|
|
85
|
-
title_parts.append(pop_kw.label)
|
|
86
|
-
|
|
87
|
-
# Build output filename
|
|
88
|
-
group_suffix = f"_{group}" if group else ""
|
|
89
|
-
filename = f"{analysis_type}_{population}{group_suffix}.rtf"
|
|
90
|
-
output_file = str(Path(output_dir) / filename)
|
|
91
|
-
|
|
92
|
-
rtf_path = disposition(
|
|
93
|
-
population=population_df,
|
|
94
|
-
population_filter=population_filter,
|
|
95
|
-
id=id,
|
|
96
|
-
group=group_tuple,
|
|
97
|
-
ds_term=ds_term,
|
|
98
|
-
dist_reason_term=dist_reason_term,
|
|
99
|
-
title=title_parts,
|
|
100
|
-
footnote=footnote,
|
|
101
|
-
source=source,
|
|
102
|
-
output_file=output_file,
|
|
103
|
-
total=total,
|
|
104
|
-
missing_group=missing_group,
|
|
105
|
-
)
|
|
106
|
-
rtf_files.append(rtf_path)
|
|
107
|
-
|
|
108
|
-
return rtf_files
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def disposition(
|
|
112
|
-
population: pl.DataFrame,
|
|
113
|
-
population_filter: str | None,
|
|
114
|
-
id: tuple[str, str],
|
|
115
|
-
group: tuple[str, str] | None,
|
|
116
|
-
ds_term: tuple[str, str],
|
|
117
|
-
dist_reason_term: tuple[str, str],
|
|
118
|
-
title: list[str],
|
|
119
|
-
footnote: list[str] | None,
|
|
120
|
-
source: list[str] | None,
|
|
121
|
-
output_file: str,
|
|
122
|
-
total: bool = True,
|
|
123
|
-
col_rel_width: list[float] | None = None,
|
|
124
|
-
missing_group: str = "error",
|
|
125
|
-
) -> str:
|
|
126
|
-
"""
|
|
127
|
-
Complete Disposition Summary Table pipeline wrapper.
|
|
128
|
-
"""
|
|
129
|
-
# Step 1: Generate ARD
|
|
130
|
-
ard = disposition_ard(
|
|
131
|
-
population=population,
|
|
132
|
-
population_filter=population_filter,
|
|
133
|
-
id=id,
|
|
134
|
-
group=group,
|
|
135
|
-
ds_term=ds_term,
|
|
136
|
-
dist_reason_term=dist_reason_term,
|
|
137
|
-
total=total,
|
|
138
|
-
missing_group=missing_group,
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# Step 2: Transform to display format
|
|
142
|
-
df = disposition_df(ard)
|
|
143
|
-
|
|
144
|
-
# Step 3: Generate RTF
|
|
145
|
-
rtf_doc = disposition_rtf(
|
|
146
|
-
df=df,
|
|
147
|
-
title=title,
|
|
148
|
-
footnote=footnote,
|
|
149
|
-
source=source,
|
|
150
|
-
col_rel_width=col_rel_width,
|
|
151
|
-
)
|
|
152
|
-
rtf_doc.write_rtf(output_file)
|
|
153
|
-
|
|
154
|
-
return output_file
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def _validate_disposition_data(df: pl.DataFrame, ds_var: str, reason_var: str) -> None:
|
|
158
|
-
"""
|
|
159
|
-
Validate disposition data integrity.
|
|
160
|
-
|
|
161
|
-
Rules:
|
|
162
|
-
1. ds_var must be {Completed, Ongoing, Discontinued} and non-null.
|
|
163
|
-
2. If ds_var is Completed/Ongoing, reason_var must be the same as ds_var or null.
|
|
164
|
-
3. If ds_var is Discontinued, reason_var must be non-null and not Completed/Ongoing.
|
|
165
|
-
"""
|
|
166
|
-
# Rule 1: Valid Statuses
|
|
167
|
-
valid_statuses = ["Completed", "Ongoing", "Discontinued"]
|
|
168
|
-
if df[ds_var].is_null().any():
|
|
169
|
-
raise ValueError(f"Found null values in disposition status column '{ds_var}'")
|
|
170
|
-
|
|
171
|
-
invalid_status = df.filter(~pl.col(ds_var).is_in(valid_statuses))
|
|
172
|
-
if not invalid_status.is_empty():
|
|
173
|
-
bad_values = invalid_status[ds_var].unique().to_list()
|
|
174
|
-
raise ValueError(
|
|
175
|
-
f"Invalid disposition statuses found: {bad_values}. Must be one of {valid_statuses}"
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
# Rule 2: Completed/Ongoing implies Reason is Null OR equal to Status
|
|
179
|
-
inconsistent_completed = df.filter(
|
|
180
|
-
(pl.col(ds_var).is_in(["Completed", "Ongoing"]))
|
|
181
|
-
& (~pl.col(reason_var).is_null())
|
|
182
|
-
& (pl.col(reason_var) != pl.col(ds_var))
|
|
183
|
-
)
|
|
184
|
-
if not inconsistent_completed.is_empty():
|
|
185
|
-
raise ValueError(
|
|
186
|
-
f"Found subjects with status 'Completed' or 'Ongoing' with mismatched "
|
|
187
|
-
f"discontinuation reason in '{reason_var}'. Reason must be Null or match Status."
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
# Rule 3: Discontinued implies Reason is NOT Null AND NOT {Completed, Ongoing}
|
|
191
|
-
invalid_discontinued = df.filter(
|
|
192
|
-
(pl.col(ds_var) == "Discontinued")
|
|
193
|
-
& ((pl.col(reason_var).is_null()) | (pl.col(reason_var).is_in(["Completed", "Ongoing"])))
|
|
194
|
-
)
|
|
195
|
-
if not invalid_discontinued.is_empty():
|
|
196
|
-
raise ValueError(
|
|
197
|
-
f"Found subjects with status 'Discontinued' but missing or invalid "
|
|
198
|
-
f"discontinuation reason in '{reason_var}'"
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def disposition_ard(
|
|
203
|
-
population: pl.DataFrame,
|
|
204
|
-
population_filter: str | None,
|
|
205
|
-
id: tuple[str, str],
|
|
206
|
-
group: tuple[str, str] | None,
|
|
207
|
-
ds_term: tuple[str, str],
|
|
208
|
-
dist_reason_term: tuple[str, str],
|
|
209
|
-
total: bool,
|
|
210
|
-
missing_group: str,
|
|
211
|
-
pop_var_name: str = "Enrolled",
|
|
212
|
-
) -> pl.DataFrame:
|
|
213
|
-
"""
|
|
214
|
-
Generate ARD for Summary Table.
|
|
215
|
-
"""
|
|
216
|
-
# Unpack variables
|
|
217
|
-
ds_var_name, _ = ds_term
|
|
218
|
-
dist_reason_var_name, _ = dist_reason_term
|
|
219
|
-
id_var_name, _ = id
|
|
220
|
-
|
|
221
|
-
# Validate Data
|
|
222
|
-
_validate_disposition_data(population, ds_var_name, dist_reason_var_name)
|
|
223
|
-
|
|
224
|
-
# Apply common filters
|
|
225
|
-
population_filtered, _ = apply_common_filters(
|
|
226
|
-
population=population,
|
|
227
|
-
observation=None,
|
|
228
|
-
population_filter=population_filter,
|
|
229
|
-
observation_filter=None,
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
if group:
|
|
233
|
-
group_var_name, _ = group
|
|
234
|
-
else:
|
|
235
|
-
# Create dummy group for overall analysis
|
|
236
|
-
group_var_name = "Overall"
|
|
237
|
-
total = False
|
|
238
|
-
population_filtered = population_filtered.with_columns(
|
|
239
|
-
pl.lit("Overall").alias(group_var_name)
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
# Enrolled Subjects
|
|
243
|
-
n_pop_counts = count_subject(
|
|
244
|
-
population=population_filtered,
|
|
245
|
-
id=id_var_name,
|
|
246
|
-
group=group_var_name,
|
|
247
|
-
total=total,
|
|
248
|
-
missing_group=missing_group,
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
n_pop = n_pop_counts.select(
|
|
252
|
-
pl.lit(pop_var_name).alias("__index__"),
|
|
253
|
-
pl.col(group_var_name).cast(pl.String).alias("__group__"),
|
|
254
|
-
pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
# Hierarchical Counts for Status and Reason
|
|
258
|
-
# Level 1: Status (Completed, Ongoing, Discontinued)
|
|
259
|
-
# Level 2: Status + Reason (Only relevant for Discontinued)
|
|
260
|
-
n_dict = count_subject_with_observation(
|
|
261
|
-
population=population_filtered,
|
|
262
|
-
observation=population_filtered,
|
|
263
|
-
id=id_var_name,
|
|
264
|
-
group=group_var_name,
|
|
265
|
-
variable=[ds_var_name, dist_reason_var_name],
|
|
266
|
-
total=total,
|
|
267
|
-
missing_group=missing_group,
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
# Filter and format
|
|
271
|
-
# Identify rows:
|
|
272
|
-
# 1. Status rows: Where reason is "__all__"
|
|
273
|
-
# 2. Reason rows: Where reason is specific value (indented)
|
|
274
|
-
n_dict = n_dict.unique([group_var_name, ds_var_name, dist_reason_var_name, "__id__"])
|
|
275
|
-
|
|
276
|
-
# Filter out redundant nested rows (e.g., "Completed" under "Completed")
|
|
277
|
-
n_dict = n_dict.filter(pl.col(dist_reason_var_name) != pl.col(ds_var_name))
|
|
278
|
-
|
|
279
|
-
n_final = n_dict.sort("__id__").select(
|
|
280
|
-
pl.col("__variable__").alias("__index__"),
|
|
281
|
-
pl.col(group_var_name).cast(pl.String).alias("__group__"),
|
|
282
|
-
pl.col("n_pct_subj_fmt").cast(pl.String).alias("__value__"),
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
return pl.concat([n_pop, n_final])
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
def disposition_df(ard: pl.DataFrame) -> pl.DataFrame:
|
|
289
|
-
"""
|
|
290
|
-
Transform ARD to display format.
|
|
291
|
-
"""
|
|
292
|
-
# Pivot
|
|
293
|
-
# Pivot from long to wide format
|
|
294
|
-
df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
|
|
295
|
-
|
|
296
|
-
# Rename __index__ to display column name
|
|
297
|
-
df_wide = df_wide.rename({"__index__": "Term"}).select(pl.col("Term"), pl.exclude("Term"))
|
|
298
|
-
|
|
299
|
-
return df_wide
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def disposition_rtf(
|
|
303
|
-
df: pl.DataFrame,
|
|
304
|
-
title: list[str],
|
|
305
|
-
footnote: list[str] | None,
|
|
306
|
-
source: list[str] | None,
|
|
307
|
-
col_rel_width: list[float] | None = None,
|
|
308
|
-
) -> RTFDocument:
|
|
309
|
-
"""
|
|
310
|
-
Generate RTF.
|
|
311
|
-
"""
|
|
312
|
-
# Reuse generic table creation
|
|
313
|
-
# Columns: Disposition Status, Group 1, Group 2, ... Total
|
|
314
|
-
|
|
315
|
-
n_cols = len(df.columns)
|
|
316
|
-
col_header_1 = [""] + list(df.columns[1:])
|
|
317
|
-
col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
|
|
318
|
-
|
|
319
|
-
if col_rel_width is None:
|
|
320
|
-
col_widths = [2.5] + [1] * (n_cols - 1)
|
|
321
|
-
else:
|
|
322
|
-
col_widths = col_rel_width
|
|
323
|
-
|
|
324
|
-
return create_rtf_table_n_pct(
|
|
325
|
-
df=df,
|
|
326
|
-
col_header_1=col_header_1,
|
|
327
|
-
col_header_2=col_header_2,
|
|
328
|
-
col_widths=col_widths,
|
|
329
|
-
title=title,
|
|
330
|
-
footnote=footnote,
|
|
331
|
-
source=source,
|
|
332
|
-
)
|
|
1
|
+
# pyre-strict
|
|
2
|
+
"""
|
|
3
|
+
Disposition Table 1.1 Analysis Functions
|
|
4
|
+
|
|
5
|
+
This module provides a pipeline for Disposition Table 1.1 summary analysis:
|
|
6
|
+
- disposition_ard: Generate Analysis Results Data (ARD)
|
|
7
|
+
- disposition_df: Transform ARD to display format
|
|
8
|
+
- disposition_rtf: Generate formatted RTF output
|
|
9
|
+
- disposition: Complete pipeline wrapper
|
|
10
|
+
- study_plan_to_disposition_summary: Batch generation from StudyPlan
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import polars as pl
|
|
16
|
+
from rtflite import RTFDocument
|
|
17
|
+
|
|
18
|
+
from ..common.count import count_subject, count_subject_with_observation
|
|
19
|
+
from ..common.parse import StudyPlanParser
|
|
20
|
+
from ..common.plan import StudyPlan
|
|
21
|
+
from ..common.rtf import create_rtf_table_n_pct
|
|
22
|
+
from ..common.utils import apply_common_filters
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def study_plan_to_disposition_summary(
|
|
26
|
+
study_plan: StudyPlan,
|
|
27
|
+
) -> list[str]:
|
|
28
|
+
"""
|
|
29
|
+
Generate Disposition Summary Table outputs for all analyses defined in StudyPlan.
|
|
30
|
+
"""
|
|
31
|
+
# Meta data
|
|
32
|
+
analysis_type = "disposition_summary"
|
|
33
|
+
output_dir = study_plan.output_dir
|
|
34
|
+
title = "Disposition of Participants"
|
|
35
|
+
footnote = ["Percentages are based on the number of enrolled participants."]
|
|
36
|
+
source = None
|
|
37
|
+
|
|
38
|
+
population_df_name = "adsl"
|
|
39
|
+
|
|
40
|
+
id = ("USUBJID", "Subject ID")
|
|
41
|
+
ds_term = ("EOSSTT", "Disposition Status")
|
|
42
|
+
dist_reason_term = ("DCSREAS", "Discontinued Reason")
|
|
43
|
+
|
|
44
|
+
total = True
|
|
45
|
+
missing_group = "error"
|
|
46
|
+
|
|
47
|
+
# Create output directory
|
|
48
|
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
# Initialize parser
|
|
51
|
+
parser = StudyPlanParser(study_plan)
|
|
52
|
+
|
|
53
|
+
# Get expanded plan DataFrame
|
|
54
|
+
plan_df = study_plan.get_plan_df()
|
|
55
|
+
|
|
56
|
+
# Filter for disposition analyses
|
|
57
|
+
disp_plans = plan_df.filter(pl.col("analysis") == analysis_type)
|
|
58
|
+
|
|
59
|
+
rtf_files = []
|
|
60
|
+
|
|
61
|
+
for row in disp_plans.iter_rows(named=True):
|
|
62
|
+
population = row["population"]
|
|
63
|
+
group = row.get("group")
|
|
64
|
+
title_text = title
|
|
65
|
+
|
|
66
|
+
# Get datasets
|
|
67
|
+
(population_df,) = parser.get_datasets(population_df_name)
|
|
68
|
+
|
|
69
|
+
# Get filters
|
|
70
|
+
population_filter = parser.get_population_filter(population)
|
|
71
|
+
|
|
72
|
+
# Get group info (optional)
|
|
73
|
+
if group is not None:
|
|
74
|
+
group_var_name, group_labels = parser.get_group_info(group)
|
|
75
|
+
group_var_label = group_labels[0] if group_labels else group_var_name
|
|
76
|
+
group_tuple = (group_var_name, group_var_label)
|
|
77
|
+
else:
|
|
78
|
+
# When no group specified, use a dummy group column for overall counts
|
|
79
|
+
group_tuple = None
|
|
80
|
+
|
|
81
|
+
# Build title
|
|
82
|
+
title_parts = [title_text]
|
|
83
|
+
pop_kw = study_plan.keywords.populations.get(population)
|
|
84
|
+
if pop_kw and pop_kw.label:
|
|
85
|
+
title_parts.append(pop_kw.label)
|
|
86
|
+
|
|
87
|
+
# Build output filename
|
|
88
|
+
group_suffix = f"_{group}" if group else ""
|
|
89
|
+
filename = f"{analysis_type}_{population}{group_suffix}.rtf"
|
|
90
|
+
output_file = str(Path(output_dir) / filename)
|
|
91
|
+
|
|
92
|
+
rtf_path = disposition(
|
|
93
|
+
population=population_df,
|
|
94
|
+
population_filter=population_filter,
|
|
95
|
+
id=id,
|
|
96
|
+
group=group_tuple,
|
|
97
|
+
ds_term=ds_term,
|
|
98
|
+
dist_reason_term=dist_reason_term,
|
|
99
|
+
title=title_parts,
|
|
100
|
+
footnote=footnote,
|
|
101
|
+
source=source,
|
|
102
|
+
output_file=output_file,
|
|
103
|
+
total=total,
|
|
104
|
+
missing_group=missing_group,
|
|
105
|
+
)
|
|
106
|
+
rtf_files.append(rtf_path)
|
|
107
|
+
|
|
108
|
+
return rtf_files
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def disposition(
|
|
112
|
+
population: pl.DataFrame,
|
|
113
|
+
population_filter: str | None,
|
|
114
|
+
id: tuple[str, str],
|
|
115
|
+
group: tuple[str, str] | None,
|
|
116
|
+
ds_term: tuple[str, str],
|
|
117
|
+
dist_reason_term: tuple[str, str],
|
|
118
|
+
title: list[str],
|
|
119
|
+
footnote: list[str] | None,
|
|
120
|
+
source: list[str] | None,
|
|
121
|
+
output_file: str,
|
|
122
|
+
total: bool = True,
|
|
123
|
+
col_rel_width: list[float] | None = None,
|
|
124
|
+
missing_group: str = "error",
|
|
125
|
+
) -> str:
|
|
126
|
+
"""
|
|
127
|
+
Complete Disposition Summary Table pipeline wrapper.
|
|
128
|
+
"""
|
|
129
|
+
# Step 1: Generate ARD
|
|
130
|
+
ard = disposition_ard(
|
|
131
|
+
population=population,
|
|
132
|
+
population_filter=population_filter,
|
|
133
|
+
id=id,
|
|
134
|
+
group=group,
|
|
135
|
+
ds_term=ds_term,
|
|
136
|
+
dist_reason_term=dist_reason_term,
|
|
137
|
+
total=total,
|
|
138
|
+
missing_group=missing_group,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Step 2: Transform to display format
|
|
142
|
+
df = disposition_df(ard)
|
|
143
|
+
|
|
144
|
+
# Step 3: Generate RTF
|
|
145
|
+
rtf_doc = disposition_rtf(
|
|
146
|
+
df=df,
|
|
147
|
+
title=title,
|
|
148
|
+
footnote=footnote,
|
|
149
|
+
source=source,
|
|
150
|
+
col_rel_width=col_rel_width,
|
|
151
|
+
)
|
|
152
|
+
rtf_doc.write_rtf(output_file)
|
|
153
|
+
|
|
154
|
+
return output_file
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _validate_disposition_data(df: pl.DataFrame, ds_var: str, reason_var: str) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Validate disposition data integrity.
|
|
160
|
+
|
|
161
|
+
Rules:
|
|
162
|
+
1. ds_var must be {Completed, Ongoing, Discontinued} and non-null.
|
|
163
|
+
2. If ds_var is Completed/Ongoing, reason_var must be the same as ds_var or null.
|
|
164
|
+
3. If ds_var is Discontinued, reason_var must be non-null and not Completed/Ongoing.
|
|
165
|
+
"""
|
|
166
|
+
# Rule 1: Valid Statuses
|
|
167
|
+
valid_statuses = ["Completed", "Ongoing", "Discontinued"]
|
|
168
|
+
if df[ds_var].is_null().any():
|
|
169
|
+
raise ValueError(f"Found null values in disposition status column '{ds_var}'")
|
|
170
|
+
|
|
171
|
+
invalid_status = df.filter(~pl.col(ds_var).is_in(valid_statuses))
|
|
172
|
+
if not invalid_status.is_empty():
|
|
173
|
+
bad_values = invalid_status[ds_var].unique().to_list()
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Invalid disposition statuses found: {bad_values}. Must be one of {valid_statuses}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Rule 2: Completed/Ongoing implies Reason is Null OR equal to Status
|
|
179
|
+
inconsistent_completed = df.filter(
|
|
180
|
+
(pl.col(ds_var).is_in(["Completed", "Ongoing"]))
|
|
181
|
+
& (~pl.col(reason_var).is_null())
|
|
182
|
+
& (pl.col(reason_var) != pl.col(ds_var))
|
|
183
|
+
)
|
|
184
|
+
if not inconsistent_completed.is_empty():
|
|
185
|
+
raise ValueError(
|
|
186
|
+
f"Found subjects with status 'Completed' or 'Ongoing' with mismatched "
|
|
187
|
+
f"discontinuation reason in '{reason_var}'. Reason must be Null or match Status."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Rule 3: Discontinued implies Reason is NOT Null AND NOT {Completed, Ongoing}
|
|
191
|
+
invalid_discontinued = df.filter(
|
|
192
|
+
(pl.col(ds_var) == "Discontinued")
|
|
193
|
+
& ((pl.col(reason_var).is_null()) | (pl.col(reason_var).is_in(["Completed", "Ongoing"])))
|
|
194
|
+
)
|
|
195
|
+
if not invalid_discontinued.is_empty():
|
|
196
|
+
raise ValueError(
|
|
197
|
+
f"Found subjects with status 'Discontinued' but missing or invalid "
|
|
198
|
+
f"discontinuation reason in '{reason_var}'"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def disposition_ard(
|
|
203
|
+
population: pl.DataFrame,
|
|
204
|
+
population_filter: str | None,
|
|
205
|
+
id: tuple[str, str],
|
|
206
|
+
group: tuple[str, str] | None,
|
|
207
|
+
ds_term: tuple[str, str],
|
|
208
|
+
dist_reason_term: tuple[str, str],
|
|
209
|
+
total: bool,
|
|
210
|
+
missing_group: str,
|
|
211
|
+
pop_var_name: str = "Enrolled",
|
|
212
|
+
) -> pl.DataFrame:
|
|
213
|
+
"""
|
|
214
|
+
Generate ARD for Summary Table.
|
|
215
|
+
"""
|
|
216
|
+
# Unpack variables
|
|
217
|
+
ds_var_name, _ = ds_term
|
|
218
|
+
dist_reason_var_name, _ = dist_reason_term
|
|
219
|
+
id_var_name, _ = id
|
|
220
|
+
|
|
221
|
+
# Validate Data
|
|
222
|
+
_validate_disposition_data(population, ds_var_name, dist_reason_var_name)
|
|
223
|
+
|
|
224
|
+
# Apply common filters
|
|
225
|
+
population_filtered, _ = apply_common_filters(
|
|
226
|
+
population=population,
|
|
227
|
+
observation=None,
|
|
228
|
+
population_filter=population_filter,
|
|
229
|
+
observation_filter=None,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
if group:
|
|
233
|
+
group_var_name, _ = group
|
|
234
|
+
else:
|
|
235
|
+
# Create dummy group for overall analysis
|
|
236
|
+
group_var_name = "Overall"
|
|
237
|
+
total = False
|
|
238
|
+
population_filtered = population_filtered.with_columns(
|
|
239
|
+
pl.lit("Overall").alias(group_var_name)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Enrolled Subjects
|
|
243
|
+
n_pop_counts = count_subject(
|
|
244
|
+
population=population_filtered,
|
|
245
|
+
id=id_var_name,
|
|
246
|
+
group=group_var_name,
|
|
247
|
+
total=total,
|
|
248
|
+
missing_group=missing_group,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
n_pop = n_pop_counts.select(
|
|
252
|
+
pl.lit(pop_var_name).alias("__index__"),
|
|
253
|
+
pl.col(group_var_name).cast(pl.String).alias("__group__"),
|
|
254
|
+
pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Hierarchical Counts for Status and Reason
|
|
258
|
+
# Level 1: Status (Completed, Ongoing, Discontinued)
|
|
259
|
+
# Level 2: Status + Reason (Only relevant for Discontinued)
|
|
260
|
+
n_dict = count_subject_with_observation(
|
|
261
|
+
population=population_filtered,
|
|
262
|
+
observation=population_filtered,
|
|
263
|
+
id=id_var_name,
|
|
264
|
+
group=group_var_name,
|
|
265
|
+
variable=[ds_var_name, dist_reason_var_name],
|
|
266
|
+
total=total,
|
|
267
|
+
missing_group=missing_group,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Filter and format
|
|
271
|
+
# Identify rows:
|
|
272
|
+
# 1. Status rows: Where reason is "__all__"
|
|
273
|
+
# 2. Reason rows: Where reason is specific value (indented)
|
|
274
|
+
n_dict = n_dict.unique([group_var_name, ds_var_name, dist_reason_var_name, "__id__"])
|
|
275
|
+
|
|
276
|
+
# Filter out redundant nested rows (e.g., "Completed" under "Completed")
|
|
277
|
+
n_dict = n_dict.filter(pl.col(dist_reason_var_name) != pl.col(ds_var_name))
|
|
278
|
+
|
|
279
|
+
n_final = n_dict.sort("__id__").select(
|
|
280
|
+
pl.col("__variable__").alias("__index__"),
|
|
281
|
+
pl.col(group_var_name).cast(pl.String).alias("__group__"),
|
|
282
|
+
pl.col("n_pct_subj_fmt").cast(pl.String).alias("__value__"),
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
return pl.concat([n_pop, n_final])
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def disposition_df(ard: pl.DataFrame) -> pl.DataFrame:
|
|
289
|
+
"""
|
|
290
|
+
Transform ARD to display format.
|
|
291
|
+
"""
|
|
292
|
+
# Pivot
|
|
293
|
+
# Pivot from long to wide format
|
|
294
|
+
df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
|
|
295
|
+
|
|
296
|
+
# Rename __index__ to display column name
|
|
297
|
+
df_wide = df_wide.rename({"__index__": "Term"}).select(pl.col("Term"), pl.exclude("Term"))
|
|
298
|
+
|
|
299
|
+
return df_wide
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def disposition_rtf(
|
|
303
|
+
df: pl.DataFrame,
|
|
304
|
+
title: list[str],
|
|
305
|
+
footnote: list[str] | None,
|
|
306
|
+
source: list[str] | None,
|
|
307
|
+
col_rel_width: list[float] | None = None,
|
|
308
|
+
) -> RTFDocument:
|
|
309
|
+
"""
|
|
310
|
+
Generate RTF.
|
|
311
|
+
"""
|
|
312
|
+
# Reuse generic table creation
|
|
313
|
+
# Columns: Disposition Status, Group 1, Group 2, ... Total
|
|
314
|
+
|
|
315
|
+
n_cols = len(df.columns)
|
|
316
|
+
col_header_1 = [""] + list(df.columns[1:])
|
|
317
|
+
col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
|
|
318
|
+
|
|
319
|
+
if col_rel_width is None:
|
|
320
|
+
col_widths = [2.5] + [1] * (n_cols - 1)
|
|
321
|
+
else:
|
|
322
|
+
col_widths = col_rel_width
|
|
323
|
+
|
|
324
|
+
return create_rtf_table_n_pct(
|
|
325
|
+
df=df,
|
|
326
|
+
col_header_1=col_header_1,
|
|
327
|
+
col_header_2=col_header_2,
|
|
328
|
+
col_widths=col_widths,
|
|
329
|
+
title=title,
|
|
330
|
+
footnote=footnote,
|
|
331
|
+
source=source,
|
|
332
|
+
)
|