csrlite 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,10 +15,10 @@ from pathlib import Path
15
15
  import polars as pl
16
16
  from rtflite import RTFDocument
17
17
 
18
- from ..ae.ae_utils import create_ae_rtf_table
19
- from ..common.count import count_subject_with_observation
18
+ from ..common.count import count_subject, count_subject_with_observation
20
19
  from ..common.parse import StudyPlanParser
21
20
  from ..common.plan import StudyPlan
21
+ from ..common.rtf import create_rtf_table_n_pct
22
22
  from ..common.utils import apply_common_filters
23
23
 
24
24
 
@@ -26,18 +26,21 @@ def study_plan_to_disposition_summary(
26
26
  study_plan: StudyPlan,
27
27
  ) -> list[str]:
28
28
  """
29
- Generate Disposition Table 1.1 RTF outputs for all analyses defined in StudyPlan.
29
+ Generate Disposition Summary Table outputs for all analyses defined in StudyPlan.
30
30
  """
31
31
  # Meta data
32
32
  analysis_type = "disposition_summary"
33
33
  output_dir = study_plan.output_dir
34
+ title = "Disposition of Participants"
34
35
  footnote = ["Percentages are based on the number of enrolled participants."]
35
36
  source = None
36
37
 
37
38
  population_df_name = "adsl"
38
- observation_df_name = "ds" # As per plan_ds_xyz123.yaml
39
39
 
40
40
  id = ("USUBJID", "Subject ID")
41
+ ds_term = ("EOSSTT", "Disposition Status")
42
+ dist_reason_term = ("DCREASCD", "Discontinued Reason")
43
+
41
44
  total = True
42
45
  missing_group = "error"
43
46
 
@@ -57,32 +60,14 @@ def study_plan_to_disposition_summary(
57
60
 
58
61
  for row in disp_plans.iter_rows(named=True):
59
62
  population = row["population"]
60
- observation = row.get("observation")
61
- parameter = row["parameter"]
62
63
  group = row.get("group")
63
- title_text = row.get(
64
- "title", "Disposition of Participants"
65
- ) # Allow title override from plan if we supported it in parser, else default
64
+ title_text = title
66
65
 
67
66
  # Get datasets
68
- population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
67
+ (population_df,) = parser.get_datasets(population_df_name)
69
68
 
70
69
  # Get filters
71
70
  population_filter = parser.get_population_filter(population)
72
- obs_filter = parser.get_observation_filter(observation)
73
-
74
- # Get parameters with indent levels
75
- param_names, param_filters, param_labels, param_indents = parser.get_parameter_info(
76
- parameter
77
- )
78
-
79
- # Apply indentation to labels
80
- indented_labels = []
81
- for label, indent_level in zip(param_labels, param_indents):
82
- indent_str = " " * indent_level # 4 spaces per indent level
83
- indented_labels.append(f"{indent_str}{label}")
84
-
85
- variables_list = list(zip(param_filters, indented_labels))
86
71
 
87
72
  # Get group info (optional)
88
73
  if group is not None:
@@ -106,12 +91,11 @@ def study_plan_to_disposition_summary(
106
91
 
107
92
  rtf_path = disposition(
108
93
  population=population_df,
109
- observation=observation_df,
110
94
  population_filter=population_filter,
111
- observation_filter=obs_filter,
112
95
  id=id,
113
96
  group=group_tuple,
114
- variables=variables_list,
97
+ ds_term=ds_term,
98
+ dist_reason_term=dist_reason_term,
115
99
  title=title_parts,
116
100
  footnote=footnote,
117
101
  source=source,
@@ -126,12 +110,11 @@ def study_plan_to_disposition_summary(
126
110
 
127
111
  def disposition(
128
112
  population: pl.DataFrame,
129
- observation: pl.DataFrame,
130
113
  population_filter: str | None,
131
- observation_filter: str | None,
132
114
  id: tuple[str, str],
133
115
  group: tuple[str, str] | None,
134
- variables: list[tuple[str, str]],
116
+ ds_term: tuple[str, str],
117
+ dist_reason_term: tuple[str, str],
135
118
  title: list[str],
136
119
  footnote: list[str] | None,
137
120
  source: list[str] | None,
@@ -141,17 +124,16 @@ def disposition(
141
124
  missing_group: str = "error",
142
125
  ) -> str:
143
126
  """
144
- Complete Disposition Table 1.1 pipeline wrapper.
127
+ Complete Disposition Summary Table pipeline wrapper.
145
128
  """
146
129
  # Step 1: Generate ARD
147
130
  ard = disposition_ard(
148
131
  population=population,
149
- observation=observation,
150
132
  population_filter=population_filter,
151
- observation_filter=observation_filter,
152
133
  id=id,
153
134
  group=group,
154
- variables=variables,
135
+ ds_term=ds_term,
136
+ dist_reason_term=dist_reason_term,
155
137
  total=total,
156
138
  missing_group=missing_group,
157
139
  )
@@ -172,87 +154,135 @@ def disposition(
172
154
  return output_file
173
155
 
174
156
 
157
+ def _validate_disposition_data(df: pl.DataFrame, ds_var: str, reason_var: str) -> None:
158
+ """
159
+ Validate disposition data integrity.
160
+
161
+ Rules:
162
+ 1. ds_var must be {Completed, Ongoing, Discontinued} and non-null.
163
+ 2. If ds_var is Completed/Ongoing, reason_var must be the same as ds_var or null.
164
+ 3. If ds_var is Discontinued, reason_var must be non-null and not Completed/Ongoing.
165
+ """
166
+ # Rule 1: Valid Statuses
167
+ valid_statuses = ["Completed", "Ongoing", "Discontinued"]
168
+ if df[ds_var].is_null().any():
169
+ raise ValueError(f"Found null values in disposition status column '{ds_var}'")
170
+
171
+ invalid_status = df.filter(~pl.col(ds_var).is_in(valid_statuses))
172
+ if not invalid_status.is_empty():
173
+ bad_values = invalid_status[ds_var].unique().to_list()
174
+ raise ValueError(
175
+ f"Invalid disposition statuses found: {bad_values}. Must be one of {valid_statuses}"
176
+ )
177
+
178
+ # Rule 2: Completed/Ongoing implies Reason is Null OR equal to Status
179
+ inconsistent_completed = df.filter(
180
+ (pl.col(ds_var).is_in(["Completed", "Ongoing"]))
181
+ & (~pl.col(reason_var).is_null())
182
+ & (pl.col(reason_var) != pl.col(ds_var))
183
+ )
184
+ if not inconsistent_completed.is_empty():
185
+ raise ValueError(
186
+ f"Found subjects with status 'Completed' or 'Ongoing' with mismatched "
187
+ f"discontinuation reason in '{reason_var}'. Reason must be Null or match Status."
188
+ )
189
+
190
+ # Rule 3: Discontinued implies Reason is NOT Null AND NOT {Completed, Ongoing}
191
+ invalid_discontinued = df.filter(
192
+ (pl.col(ds_var) == "Discontinued")
193
+ & ((pl.col(reason_var).is_null()) | (pl.col(reason_var).is_in(["Completed", "Ongoing"])))
194
+ )
195
+ if not invalid_discontinued.is_empty():
196
+ raise ValueError(
197
+ f"Found subjects with status 'Discontinued' but missing or invalid "
198
+ f"discontinuation reason in '{reason_var}'"
199
+ )
200
+
201
+
175
202
  def disposition_ard(
176
203
  population: pl.DataFrame,
177
- observation: pl.DataFrame,
178
204
  population_filter: str | None,
179
- observation_filter: str | None,
180
205
  id: tuple[str, str],
181
206
  group: tuple[str, str] | None,
182
- variables: list[tuple[str, str]],
207
+ ds_term: tuple[str, str],
208
+ dist_reason_term: tuple[str, str],
183
209
  total: bool,
184
210
  missing_group: str,
211
+ pop_var_name: str = "Enrolled",
185
212
  ) -> pl.DataFrame:
186
213
  """
187
- Generate ARD for Disposition Table 1.1.
214
+ Generate ARD for Summary Table.
188
215
  """
216
+ # Unpack variables
217
+ ds_var_name, _ = ds_term
218
+ dist_reason_var_name, _ = dist_reason_term
189
219
  id_var_name, _ = id
190
220
 
191
- # Handle optional group
192
- if group is not None:
193
- group_var_name, _ = group
194
- else:
195
- # Create a dummy group column for overall counts
196
- group_var_name = "__all__"
197
- population = population.with_columns(pl.lit("All Subjects").alias(group_var_name))
198
- observation = observation.with_columns(pl.lit("All Subjects").alias(group_var_name))
199
- total = False # No need for total column when there's only one group
221
+ # Validate Data
222
+ _validate_disposition_data(population, ds_var_name, dist_reason_var_name)
200
223
 
201
224
  # Apply common filters
202
- population_filtered, observation_to_filter = apply_common_filters(
225
+ population_filtered, _ = apply_common_filters(
203
226
  population=population,
204
- observation=observation,
227
+ observation=None,
205
228
  population_filter=population_filter,
206
- observation_filter=observation_filter,
229
+ observation_filter=None,
207
230
  )
208
231
 
209
- # For each parameter, we create an "observation" dataset and use
210
- # count_subject_with_observation. This approach works for both ADSL-based
211
- # filters (e.g., "Enrolled") and DS-based filters (e.g., "Discontinued")
212
-
213
- results = []
214
-
215
- for var_filter, var_label in variables:
216
- # Try to apply the filter to population first, then observation
217
- # This handles both ADSL-based and DS-based parameter filters
218
- try:
219
- target_obs = population_filtered.filter(pl.sql_expr(var_filter))
220
- except Exception:
221
- target_obs = observation_to_filter.filter(pl.sql_expr(var_filter))
222
-
223
- # Add the parameter label as a variable for counting
224
- target_obs = target_obs.with_columns(pl.lit(var_label).alias("__index__"))
225
-
226
- # Use count_subject_with_observation to get n (%) for each group
227
- counts = count_subject_with_observation(
228
- population=population_filtered,
229
- observation=target_obs,
230
- id=id_var_name,
231
- group=group_var_name,
232
- variable="__index__",
233
- total=total,
234
- missing_group=missing_group,
232
+ if group:
233
+ group_var_name, _ = group
234
+ else:
235
+ # Create dummy group for overall analysis
236
+ group_var_name = "Overall"
237
+ total = False
238
+ population_filtered = population_filtered.with_columns(
239
+ pl.lit("Overall").alias(group_var_name)
235
240
  )
236
241
 
237
- results.append(
238
- counts.select(
239
- pl.col("__index__"),
240
- pl.col(group_var_name).alias("__group__"),
241
- pl.col("n_pct_subj_fmt").alias("__value__"),
242
- )
243
- )
242
+ # Enrolled Subjects
243
+ n_pop_counts = count_subject(
244
+ population=population_filtered,
245
+ id=id_var_name,
246
+ group=group_var_name,
247
+ total=total,
248
+ missing_group=missing_group,
249
+ )
250
+
251
+ n_pop = n_pop_counts.select(
252
+ pl.lit(pop_var_name).alias("__index__"),
253
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
254
+ pl.col("n_subj_pop").cast(pl.String).alias("__value__"),
255
+ )
256
+
257
+ # Hierarchical Counts for Status and Reason
258
+ # Level 1: Status (Completed, Ongoing, Discontinued)
259
+ # Level 2: Status + Reason (Only relevant for Discontinued)
260
+ n_dict = count_subject_with_observation(
261
+ population=population_filtered,
262
+ observation=population_filtered,
263
+ id=id_var_name,
264
+ group=group_var_name,
265
+ variable=[ds_var_name, dist_reason_var_name],
266
+ total=total,
267
+ missing_group=missing_group,
268
+ )
269
+
270
+ # Filter and format
271
+ # Identify rows:
272
+ # 1. Status rows: Where reason is "__all__"
273
+ # 2. Reason rows: Where reason is specific value (indented)
274
+ n_dict = n_dict.unique([group_var_name, ds_var_name, dist_reason_var_name, "__id__"])
244
275
 
245
- # Combine all results
246
- ard = pl.concat(results)
276
+ # Filter out redundant nested rows (e.g., "Completed" under "Completed")
277
+ n_dict = n_dict.filter(pl.col(dist_reason_var_name) != pl.col(ds_var_name))
247
278
 
248
- # Sort by the order of variables in the list
249
- # Create an Enum for __index__
250
- var_labels = [label for _, label in variables]
251
- ard = ard.with_columns(pl.col("__index__").cast(pl.Enum(var_labels))).sort(
252
- "__index__", "__group__"
279
+ n_final = n_dict.sort("__id__").select(
280
+ pl.col("__variable__").alias("__index__"),
281
+ pl.col(group_var_name).cast(pl.String).alias("__group__"),
282
+ pl.col("n_pct_subj_fmt").cast(pl.String).alias("__value__"),
253
283
  )
254
284
 
255
- return ard
285
+ return pl.concat([n_pop, n_final])
256
286
 
257
287
 
258
288
  def disposition_df(ard: pl.DataFrame) -> pl.DataFrame:
@@ -260,10 +290,11 @@ def disposition_df(ard: pl.DataFrame) -> pl.DataFrame:
260
290
  Transform ARD to display format.
261
291
  """
262
292
  # Pivot
293
+ # Pivot from long to wide format
263
294
  df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
264
295
 
265
- # Rename index
266
- df_wide = df_wide.rename({"__index__": "Disposition Status"})
296
+ # Rename __index__ to display column name
297
+ df_wide = df_wide.rename({"__index__": "Term"}).select(pl.col("Term"), pl.exclude("Term"))
267
298
 
268
299
  return df_wide
269
300
 
@@ -282,7 +313,7 @@ def disposition_rtf(
282
313
  # Columns: Disposition Status, Group 1, Group 2, ... Total
283
314
 
284
315
  n_cols = len(df.columns)
285
- col_header_1 = list(df.columns)
316
+ col_header_1 = [""] + list(df.columns[1:])
286
317
  col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
287
318
 
288
319
  if col_rel_width is None:
@@ -290,7 +321,7 @@ def disposition_rtf(
290
321
  else:
291
322
  col_widths = col_rel_width
292
323
 
293
- return create_ae_rtf_table(
324
+ return create_rtf_table_n_pct(
294
325
  df=df,
295
326
  col_header_1=col_header_1,
296
327
  col_header_2=col_header_2,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csrlite
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A hierarchical YAML-based framework for generating Tables, Listings, and Figures in clinical trials
5
5
  Author-email: Clinical Biostatistics Team <biostat@example.com>
6
6
  License: MIT
@@ -28,17 +28,17 @@ Provides-Extra: plotting
28
28
  Requires-Dist: matplotlib>=3.5.0; extra == "plotting"
29
29
  Requires-Dist: plotly>=5.0.0; extra == "plotting"
30
30
  Provides-Extra: dev
31
- Requires-Dist: pytest>=7.0.0; extra == "dev"
32
31
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
33
- Requires-Dist: black>=22.0.0; extra == "dev"
34
- Requires-Dist: isort>=5.0.0; extra == "dev"
35
- Requires-Dist: mypy>=1.0.0; extra == "dev"
36
32
  Requires-Dist: pytest>=9.0.1; extra == "dev"
33
+ Requires-Dist: black>=22.0.0; extra == "dev"
34
+ Requires-Dist: isort>=7.0.0; extra == "dev"
35
+ Requires-Dist: ruff>=0.14.8; extra == "dev"
36
+ Requires-Dist: mypy>=1.19.0; extra == "dev"
37
+ Requires-Dist: quarto>=0.1.0; extra == "dev"
38
+ Requires-Dist: pyre-check>=0.9.18; extra == "dev"
37
39
  Requires-Dist: jupyter>=1.1.1; extra == "dev"
38
40
  Requires-Dist: jupyter-cache>=1.0.1; extra == "dev"
39
41
  Requires-Dist: nbformat>=5.10.4; extra == "dev"
40
- Requires-Dist: ruff>=0.1.0; extra == "dev"
41
- Requires-Dist: pyre-check>=0.9.18; extra == "dev"
42
42
  Provides-Extra: all
43
43
  Requires-Dist: rtflite; extra == "all"
44
44
  Requires-Dist: matplotlib>=3.5.0; extra == "all"
@@ -0,0 +1,19 @@
1
+ csrlite/__init__.py,sha256=w18H3dEZE_HZFdsr0Qqo0xmJPksx764BFAg8XPo9tGI,1417
2
+ csrlite/ae/__init__.py,sha256=gZHPLATRF9f8QBwwQtEjQRtXMsqOJsUK2sbUMLjiE5U,14
3
+ csrlite/ae/ae_listing.py,sha256=R4g8JnJRCx4u60xRC2IEu8EPzEIgxj8VgDhBaaQ5eZE,18389
4
+ csrlite/ae/ae_specific.py,sha256=_CDAgF4vMmjpqFTppL2LgmExVsEhA57z4jYj4Y1zLfY,17022
5
+ csrlite/ae/ae_summary.py,sha256=46IyuqHGdn0dLOrz7XffKNcNjscA0Y8OZFiZ6akisB4,13692
6
+ csrlite/ae/ae_utils.py,sha256=ew5Mm_zNdflc_MRYvYSChXhRhGQ1oZcz7H_TZPVvFBk,2011
7
+ csrlite/common/config.py,sha256=FUnUL1BtQO52U0ag1U_d2K3UP5L_vA_KifonANHLv_c,878
8
+ csrlite/common/count.py,sha256=k1W-LdQv63s-B-Oeq2SvYsXctrT1YMVWs93CtIaGpVw,8785
9
+ csrlite/common/parse.py,sha256=Vz9C7ljkDygT2qkP6TlY3T3p71D6BD5GtIwRKv6p8ps,9319
10
+ csrlite/common/plan.py,sha256=XXUGpzNxC6oS66c7NYnDPmE0CwXMhIQzlJCga1nDktw,12928
11
+ csrlite/common/rtf.py,sha256=gah-M-WdvMk52R-AEacM79P18jc2OFnCH7-I0B91Fhk,2825
12
+ csrlite/common/utils.py,sha256=It0aHqPfXDmCte2uVAO2Lkb3U_jDLrjNihAL8gziTQk,1110
13
+ csrlite/common/yaml_loader.py,sha256=_v9pkbAUVshTqVoMLqMiEn17awL2K0kFR4pdDArMSOM,3071
14
+ csrlite/disposition/__init__.py,sha256=KMtGoBjN4aKNYvXHmZ0GX-f4RnmQ3coYbUrkFeU8Es0,85
15
+ csrlite/disposition/disposition.py,sha256=r0R53ozVFYNXI0JBzR14sghQayLVUvXJBiS0wNLAtB0,10348
16
+ csrlite-0.2.0.dist-info/METADATA,sha256=SzOst7_7EmNn6FB29KO-oFZpGZ93yZCQ9lGBhG-p9bE,2801
17
+ csrlite-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ csrlite-0.2.0.dist-info/top_level.txt,sha256=59zJTvGH5zx2FY4vCl4kgnH8awT0cZrg21Mace7IFlU,8
19
+ csrlite-0.2.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- csrlite/__init__.py,sha256=o7HOFA9KKbyfq8l_26dqNHBDz2jqDJm8lQBvXYfBYdQ,1164
2
- csrlite/ae/__init__.py,sha256=gZHPLATRF9f8QBwwQtEjQRtXMsqOJsUK2sbUMLjiE5U,14
3
- csrlite/ae/ae_listing.py,sha256=EwmU5CTmqmkuiOsA7FedEF83S9MJ1YPlmf5AMsksUCU,18343
4
- csrlite/ae/ae_specific.py,sha256=s-Zj6WQhKKHARMt5LkoSw74iThPVd-_92l8eYhUGiPc,16898
5
- csrlite/ae/ae_summary.py,sha256=NlqbuW0N0aiJ6i3fLCDJJPxPuqk1mv6i5svPsIT1xD0,13637
6
- csrlite/ae/ae_utils.py,sha256=6UhUrTkyOgpxpl5YFoNjteLBgkf0Gtw5lgQApCkwf3c,4121
7
- csrlite/common/count.py,sha256=gdTSlA-nr5B6e3fuP9pelASf_FdaeRKYzujpE0bbzvA,6925
8
- csrlite/common/parse.py,sha256=Vz9C7ljkDygT2qkP6TlY3T3p71D6BD5GtIwRKv6p8ps,9319
9
- csrlite/common/plan.py,sha256=QhsBD7b-AU_mc-JScLHM1Oiw7FJ4AKN1iHWX80-ukuw,11988
10
- csrlite/common/utils.py,sha256=SAqEnwDtE32LuQqnMVQr_1Xfdp-z54wIrwbwwPBE9lU,1022
11
- csrlite/common/yaml_loader.py,sha256=_v9pkbAUVshTqVoMLqMiEn17awL2K0kFR4pdDArMSOM,3071
12
- csrlite/disposition/__init__.py,sha256=KMtGoBjN4aKNYvXHmZ0GX-f4RnmQ3coYbUrkFeU8Es0,85
13
- csrlite/disposition/disposition.py,sha256=UMm4Z1fFQ6VJ-KSqSaMP7qEzLoSa399kRSZx-oPKEqM,9274
14
- csrlite-0.1.0.dist-info/METADATA,sha256=HNbQnMH5LqJ2Vq_KMky8u9_ofP0YOe40AynZM9384FI,2799
15
- csrlite-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- csrlite-0.1.0.dist-info/top_level.txt,sha256=59zJTvGH5zx2FY4vCl4kgnH8awT0cZrg21Mace7IFlU,8
17
- csrlite-0.1.0.dist-info/RECORD,,