csrlite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csrlite/__init__.py +50 -0
- csrlite/ae/__init__.py +1 -0
- csrlite/ae/ae_listing.py +492 -0
- csrlite/ae/ae_specific.py +478 -0
- csrlite/ae/ae_summary.py +399 -0
- csrlite/ae/ae_utils.py +132 -0
- csrlite/common/count.py +199 -0
- csrlite/common/parse.py +308 -0
- csrlite/common/plan.py +353 -0
- csrlite/common/utils.py +33 -0
- csrlite/common/yaml_loader.py +71 -0
- csrlite/disposition/__init__.py +2 -0
- csrlite/disposition/disposition.py +301 -0
- csrlite-0.1.0.dist-info/METADATA +68 -0
- csrlite-0.1.0.dist-info/RECORD +17 -0
- csrlite-0.1.0.dist-info/WHEEL +5 -0
- csrlite-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# pyre-strict
|
|
2
|
+
"""
|
|
3
|
+
Disposition Table 1.1 Analysis Functions
|
|
4
|
+
|
|
5
|
+
This module provides a pipeline for Disposition Table 1.1 summary analysis:
|
|
6
|
+
- disposition_ard: Generate Analysis Results Data (ARD)
|
|
7
|
+
- disposition_df: Transform ARD to display format
|
|
8
|
+
- disposition_rtf: Generate formatted RTF output
|
|
9
|
+
- disposition: Complete pipeline wrapper
|
|
10
|
+
- study_plan_to_disposition_summary: Batch generation from StudyPlan
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import polars as pl
|
|
16
|
+
from rtflite import RTFDocument
|
|
17
|
+
|
|
18
|
+
from ..ae.ae_utils import create_ae_rtf_table
|
|
19
|
+
from ..common.count import count_subject_with_observation
|
|
20
|
+
from ..common.parse import StudyPlanParser
|
|
21
|
+
from ..common.plan import StudyPlan
|
|
22
|
+
from ..common.utils import apply_common_filters
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def study_plan_to_disposition_summary(
|
|
26
|
+
study_plan: StudyPlan,
|
|
27
|
+
) -> list[str]:
|
|
28
|
+
"""
|
|
29
|
+
Generate Disposition Table 1.1 RTF outputs for all analyses defined in StudyPlan.
|
|
30
|
+
"""
|
|
31
|
+
# Meta data
|
|
32
|
+
analysis_type = "disposition_summary"
|
|
33
|
+
output_dir = study_plan.output_dir
|
|
34
|
+
footnote = ["Percentages are based on the number of enrolled participants."]
|
|
35
|
+
source = None
|
|
36
|
+
|
|
37
|
+
population_df_name = "adsl"
|
|
38
|
+
observation_df_name = "ds" # As per plan_ds_xyz123.yaml
|
|
39
|
+
|
|
40
|
+
id = ("USUBJID", "Subject ID")
|
|
41
|
+
total = True
|
|
42
|
+
missing_group = "error"
|
|
43
|
+
|
|
44
|
+
# Create output directory
|
|
45
|
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
# Initialize parser
|
|
48
|
+
parser = StudyPlanParser(study_plan)
|
|
49
|
+
|
|
50
|
+
# Get expanded plan DataFrame
|
|
51
|
+
plan_df = study_plan.get_plan_df()
|
|
52
|
+
|
|
53
|
+
# Filter for disposition analyses
|
|
54
|
+
disp_plans = plan_df.filter(pl.col("analysis") == analysis_type)
|
|
55
|
+
|
|
56
|
+
rtf_files = []
|
|
57
|
+
|
|
58
|
+
for row in disp_plans.iter_rows(named=True):
|
|
59
|
+
population = row["population"]
|
|
60
|
+
observation = row.get("observation")
|
|
61
|
+
parameter = row["parameter"]
|
|
62
|
+
group = row.get("group")
|
|
63
|
+
title_text = row.get(
|
|
64
|
+
"title", "Disposition of Participants"
|
|
65
|
+
) # Allow title override from plan if we supported it in parser, else default
|
|
66
|
+
|
|
67
|
+
# Get datasets
|
|
68
|
+
population_df, observation_df = parser.get_datasets(population_df_name, observation_df_name)
|
|
69
|
+
|
|
70
|
+
# Get filters
|
|
71
|
+
population_filter = parser.get_population_filter(population)
|
|
72
|
+
obs_filter = parser.get_observation_filter(observation)
|
|
73
|
+
|
|
74
|
+
# Get parameters with indent levels
|
|
75
|
+
param_names, param_filters, param_labels, param_indents = parser.get_parameter_info(
|
|
76
|
+
parameter
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Apply indentation to labels
|
|
80
|
+
indented_labels = []
|
|
81
|
+
for label, indent_level in zip(param_labels, param_indents):
|
|
82
|
+
indent_str = " " * indent_level # 4 spaces per indent level
|
|
83
|
+
indented_labels.append(f"{indent_str}{label}")
|
|
84
|
+
|
|
85
|
+
variables_list = list(zip(param_filters, indented_labels))
|
|
86
|
+
|
|
87
|
+
# Get group info (optional)
|
|
88
|
+
if group is not None:
|
|
89
|
+
group_var_name, group_labels = parser.get_group_info(group)
|
|
90
|
+
group_var_label = group_labels[0] if group_labels else group_var_name
|
|
91
|
+
group_tuple = (group_var_name, group_var_label)
|
|
92
|
+
else:
|
|
93
|
+
# When no group specified, use a dummy group column for overall counts
|
|
94
|
+
group_tuple = None
|
|
95
|
+
|
|
96
|
+
# Build title
|
|
97
|
+
title_parts = [title_text]
|
|
98
|
+
pop_kw = study_plan.keywords.populations.get(population)
|
|
99
|
+
if pop_kw and pop_kw.label:
|
|
100
|
+
title_parts.append(pop_kw.label)
|
|
101
|
+
|
|
102
|
+
# Build output filename
|
|
103
|
+
group_suffix = f"_{group}" if group else ""
|
|
104
|
+
filename = f"{analysis_type}_{population}{group_suffix}.rtf"
|
|
105
|
+
output_file = str(Path(output_dir) / filename)
|
|
106
|
+
|
|
107
|
+
rtf_path = disposition(
|
|
108
|
+
population=population_df,
|
|
109
|
+
observation=observation_df,
|
|
110
|
+
population_filter=population_filter,
|
|
111
|
+
observation_filter=obs_filter,
|
|
112
|
+
id=id,
|
|
113
|
+
group=group_tuple,
|
|
114
|
+
variables=variables_list,
|
|
115
|
+
title=title_parts,
|
|
116
|
+
footnote=footnote,
|
|
117
|
+
source=source,
|
|
118
|
+
output_file=output_file,
|
|
119
|
+
total=total,
|
|
120
|
+
missing_group=missing_group,
|
|
121
|
+
)
|
|
122
|
+
rtf_files.append(rtf_path)
|
|
123
|
+
|
|
124
|
+
return rtf_files
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def disposition(
|
|
128
|
+
population: pl.DataFrame,
|
|
129
|
+
observation: pl.DataFrame,
|
|
130
|
+
population_filter: str | None,
|
|
131
|
+
observation_filter: str | None,
|
|
132
|
+
id: tuple[str, str],
|
|
133
|
+
group: tuple[str, str] | None,
|
|
134
|
+
variables: list[tuple[str, str]],
|
|
135
|
+
title: list[str],
|
|
136
|
+
footnote: list[str] | None,
|
|
137
|
+
source: list[str] | None,
|
|
138
|
+
output_file: str,
|
|
139
|
+
total: bool = True,
|
|
140
|
+
col_rel_width: list[float] | None = None,
|
|
141
|
+
missing_group: str = "error",
|
|
142
|
+
) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Complete Disposition Table 1.1 pipeline wrapper.
|
|
145
|
+
"""
|
|
146
|
+
# Step 1: Generate ARD
|
|
147
|
+
ard = disposition_ard(
|
|
148
|
+
population=population,
|
|
149
|
+
observation=observation,
|
|
150
|
+
population_filter=population_filter,
|
|
151
|
+
observation_filter=observation_filter,
|
|
152
|
+
id=id,
|
|
153
|
+
group=group,
|
|
154
|
+
variables=variables,
|
|
155
|
+
total=total,
|
|
156
|
+
missing_group=missing_group,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Step 2: Transform to display format
|
|
160
|
+
df = disposition_df(ard)
|
|
161
|
+
|
|
162
|
+
# Step 3: Generate RTF
|
|
163
|
+
rtf_doc = disposition_rtf(
|
|
164
|
+
df=df,
|
|
165
|
+
title=title,
|
|
166
|
+
footnote=footnote,
|
|
167
|
+
source=source,
|
|
168
|
+
col_rel_width=col_rel_width,
|
|
169
|
+
)
|
|
170
|
+
rtf_doc.write_rtf(output_file)
|
|
171
|
+
|
|
172
|
+
return output_file
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def disposition_ard(
|
|
176
|
+
population: pl.DataFrame,
|
|
177
|
+
observation: pl.DataFrame,
|
|
178
|
+
population_filter: str | None,
|
|
179
|
+
observation_filter: str | None,
|
|
180
|
+
id: tuple[str, str],
|
|
181
|
+
group: tuple[str, str] | None,
|
|
182
|
+
variables: list[tuple[str, str]],
|
|
183
|
+
total: bool,
|
|
184
|
+
missing_group: str,
|
|
185
|
+
) -> pl.DataFrame:
|
|
186
|
+
"""
|
|
187
|
+
Generate ARD for Disposition Table 1.1.
|
|
188
|
+
"""
|
|
189
|
+
id_var_name, _ = id
|
|
190
|
+
|
|
191
|
+
# Handle optional group
|
|
192
|
+
if group is not None:
|
|
193
|
+
group_var_name, _ = group
|
|
194
|
+
else:
|
|
195
|
+
# Create a dummy group column for overall counts
|
|
196
|
+
group_var_name = "__all__"
|
|
197
|
+
population = population.with_columns(pl.lit("All Subjects").alias(group_var_name))
|
|
198
|
+
observation = observation.with_columns(pl.lit("All Subjects").alias(group_var_name))
|
|
199
|
+
total = False # No need for total column when there's only one group
|
|
200
|
+
|
|
201
|
+
# Apply common filters
|
|
202
|
+
population_filtered, observation_to_filter = apply_common_filters(
|
|
203
|
+
population=population,
|
|
204
|
+
observation=observation,
|
|
205
|
+
population_filter=population_filter,
|
|
206
|
+
observation_filter=observation_filter,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# For each parameter, we create an "observation" dataset and use
|
|
210
|
+
# count_subject_with_observation. This approach works for both ADSL-based
|
|
211
|
+
# filters (e.g., "Enrolled") and DS-based filters (e.g., "Discontinued")
|
|
212
|
+
|
|
213
|
+
results = []
|
|
214
|
+
|
|
215
|
+
for var_filter, var_label in variables:
|
|
216
|
+
# Try to apply the filter to population first, then observation
|
|
217
|
+
# This handles both ADSL-based and DS-based parameter filters
|
|
218
|
+
try:
|
|
219
|
+
target_obs = population_filtered.filter(pl.sql_expr(var_filter))
|
|
220
|
+
except Exception:
|
|
221
|
+
target_obs = observation_to_filter.filter(pl.sql_expr(var_filter))
|
|
222
|
+
|
|
223
|
+
# Add the parameter label as a variable for counting
|
|
224
|
+
target_obs = target_obs.with_columns(pl.lit(var_label).alias("__index__"))
|
|
225
|
+
|
|
226
|
+
# Use count_subject_with_observation to get n (%) for each group
|
|
227
|
+
counts = count_subject_with_observation(
|
|
228
|
+
population=population_filtered,
|
|
229
|
+
observation=target_obs,
|
|
230
|
+
id=id_var_name,
|
|
231
|
+
group=group_var_name,
|
|
232
|
+
variable="__index__",
|
|
233
|
+
total=total,
|
|
234
|
+
missing_group=missing_group,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
results.append(
|
|
238
|
+
counts.select(
|
|
239
|
+
pl.col("__index__"),
|
|
240
|
+
pl.col(group_var_name).alias("__group__"),
|
|
241
|
+
pl.col("n_pct_subj_fmt").alias("__value__"),
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Combine all results
|
|
246
|
+
ard = pl.concat(results)
|
|
247
|
+
|
|
248
|
+
# Sort by the order of variables in the list
|
|
249
|
+
# Create an Enum for __index__
|
|
250
|
+
var_labels = [label for _, label in variables]
|
|
251
|
+
ard = ard.with_columns(pl.col("__index__").cast(pl.Enum(var_labels))).sort(
|
|
252
|
+
"__index__", "__group__"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return ard
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def disposition_df(ard: pl.DataFrame) -> pl.DataFrame:
|
|
259
|
+
"""
|
|
260
|
+
Transform ARD to display format.
|
|
261
|
+
"""
|
|
262
|
+
# Pivot
|
|
263
|
+
df_wide = ard.pivot(index="__index__", on="__group__", values="__value__")
|
|
264
|
+
|
|
265
|
+
# Rename index
|
|
266
|
+
df_wide = df_wide.rename({"__index__": "Disposition Status"})
|
|
267
|
+
|
|
268
|
+
return df_wide
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def disposition_rtf(
|
|
272
|
+
df: pl.DataFrame,
|
|
273
|
+
title: list[str],
|
|
274
|
+
footnote: list[str] | None,
|
|
275
|
+
source: list[str] | None,
|
|
276
|
+
col_rel_width: list[float] | None = None,
|
|
277
|
+
) -> RTFDocument:
|
|
278
|
+
"""
|
|
279
|
+
Generate RTF.
|
|
280
|
+
"""
|
|
281
|
+
# Reuse generic table creation
|
|
282
|
+
# Columns: Disposition Status, Group 1, Group 2, ... Total
|
|
283
|
+
|
|
284
|
+
n_cols = len(df.columns)
|
|
285
|
+
col_header_1 = list(df.columns)
|
|
286
|
+
col_header_2 = [""] + ["n (%)"] * (n_cols - 1)
|
|
287
|
+
|
|
288
|
+
if col_rel_width is None:
|
|
289
|
+
col_widths = [2.5] + [1] * (n_cols - 1)
|
|
290
|
+
else:
|
|
291
|
+
col_widths = col_rel_width
|
|
292
|
+
|
|
293
|
+
return create_ae_rtf_table(
|
|
294
|
+
df=df,
|
|
295
|
+
col_header_1=col_header_1,
|
|
296
|
+
col_header_2=col_header_2,
|
|
297
|
+
col_widths=col_widths,
|
|
298
|
+
title=title,
|
|
299
|
+
footnote=footnote,
|
|
300
|
+
source=source,
|
|
301
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: csrlite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A hierarchical YAML-based framework for generating Tables, Listings, and Figures in clinical trials
|
|
5
|
+
Author-email: Clinical Biostatistics Team <biostat@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/elong0527/csrlite
|
|
8
|
+
Project-URL: Documentation, https://elong0527.github.io/csrlite
|
|
9
|
+
Project-URL: Repository, https://github.com/elong0527/csrlite.git
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/elong0527/csrlite/issues
|
|
11
|
+
Keywords: clinical-trials,biostatistics,yaml,tlf,tables,listings,figures
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: pydantic>=2.0.0
|
|
22
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Requires-Dist: polars>=0.20.0
|
|
24
|
+
Requires-Dist: rtflite>=2.1.1
|
|
25
|
+
Provides-Extra: rtf
|
|
26
|
+
Requires-Dist: rtflite; extra == "rtf"
|
|
27
|
+
Provides-Extra: plotting
|
|
28
|
+
Requires-Dist: matplotlib>=3.5.0; extra == "plotting"
|
|
29
|
+
Requires-Dist: plotly>=5.0.0; extra == "plotting"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: isort>=5.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest>=9.0.1; extra == "dev"
|
|
37
|
+
Requires-Dist: jupyter>=1.1.1; extra == "dev"
|
|
38
|
+
Requires-Dist: jupyter-cache>=1.0.1; extra == "dev"
|
|
39
|
+
Requires-Dist: nbformat>=5.10.4; extra == "dev"
|
|
40
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pyre-check>=0.9.18; extra == "dev"
|
|
42
|
+
Provides-Extra: all
|
|
43
|
+
Requires-Dist: rtflite; extra == "all"
|
|
44
|
+
Requires-Dist: matplotlib>=3.5.0; extra == "all"
|
|
45
|
+
Requires-Dist: plotly>=5.0.0; extra == "all"
|
|
46
|
+
|
|
47
|
+
# csrlite
|
|
48
|
+
|
|
49
|
+
[](https://github.com/elong0527/csrlite/actions/workflows/ci.yml)
|
|
50
|
+
[](https://codecov.io/gh/elong0527/csrlite)
|
|
51
|
+
[](https://badge.fury.io/py/csrlite)
|
|
52
|
+
[](https://www.python.org/downloads/)
|
|
53
|
+
|
|
54
|
+
A hierarchical YAML-based framework for generating Tables, Listings, and Figures in clinical trials.
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install csrlite
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Documentation
|
|
63
|
+
|
|
64
|
+
Visit [https://elong0527.github.io/csrlite](https://elong0527.github.io/csrlite) for full documentation.
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
csrlite/__init__.py,sha256=o7HOFA9KKbyfq8l_26dqNHBDz2jqDJm8lQBvXYfBYdQ,1164
|
|
2
|
+
csrlite/ae/__init__.py,sha256=gZHPLATRF9f8QBwwQtEjQRtXMsqOJsUK2sbUMLjiE5U,14
|
|
3
|
+
csrlite/ae/ae_listing.py,sha256=EwmU5CTmqmkuiOsA7FedEF83S9MJ1YPlmf5AMsksUCU,18343
|
|
4
|
+
csrlite/ae/ae_specific.py,sha256=s-Zj6WQhKKHARMt5LkoSw74iThPVd-_92l8eYhUGiPc,16898
|
|
5
|
+
csrlite/ae/ae_summary.py,sha256=NlqbuW0N0aiJ6i3fLCDJJPxPuqk1mv6i5svPsIT1xD0,13637
|
|
6
|
+
csrlite/ae/ae_utils.py,sha256=6UhUrTkyOgpxpl5YFoNjteLBgkf0Gtw5lgQApCkwf3c,4121
|
|
7
|
+
csrlite/common/count.py,sha256=gdTSlA-nr5B6e3fuP9pelASf_FdaeRKYzujpE0bbzvA,6925
|
|
8
|
+
csrlite/common/parse.py,sha256=Vz9C7ljkDygT2qkP6TlY3T3p71D6BD5GtIwRKv6p8ps,9319
|
|
9
|
+
csrlite/common/plan.py,sha256=QhsBD7b-AU_mc-JScLHM1Oiw7FJ4AKN1iHWX80-ukuw,11988
|
|
10
|
+
csrlite/common/utils.py,sha256=SAqEnwDtE32LuQqnMVQr_1Xfdp-z54wIrwbwwPBE9lU,1022
|
|
11
|
+
csrlite/common/yaml_loader.py,sha256=_v9pkbAUVshTqVoMLqMiEn17awL2K0kFR4pdDArMSOM,3071
|
|
12
|
+
csrlite/disposition/__init__.py,sha256=KMtGoBjN4aKNYvXHmZ0GX-f4RnmQ3coYbUrkFeU8Es0,85
|
|
13
|
+
csrlite/disposition/disposition.py,sha256=UMm4Z1fFQ6VJ-KSqSaMP7qEzLoSa399kRSZx-oPKEqM,9274
|
|
14
|
+
csrlite-0.1.0.dist-info/METADATA,sha256=HNbQnMH5LqJ2Vq_KMky8u9_ofP0YOe40AynZM9384FI,2799
|
|
15
|
+
csrlite-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
16
|
+
csrlite-0.1.0.dist-info/top_level.txt,sha256=59zJTvGH5zx2FY4vCl4kgnH8awT0cZrg21Mace7IFlU,8
|
|
17
|
+
csrlite-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
csrlite
|