meta-edc 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meta_analytics/dataframes/__init__.py +3 -0
- meta_analytics/dataframes/constants.py +1 -1
- meta_analytics/dataframes/enrolled/__init__.py +0 -1
- meta_analytics/dataframes/get_eos_df.py +15 -2
- meta_analytics/dataframes/get_glucose_df.py +149 -0
- meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
- meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
- meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
- meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
- meta_analytics/dataframes/utils.py +18 -4
- meta_analytics/notebooks/hiv_regimens.ipynb +425 -0
- meta_analytics/notebooks/monitoring_report.ipynb +1561 -0
- meta_analytics/notebooks/pharmacy.ipynb +971 -0
- meta_analytics/utils.py +81 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/METADATA +4 -3
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/RECORD +32 -18
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/WHEEL +1 -1
- meta_edc-1.0.7.dist-info/licenses/AUTHORS.rst +8 -0
- meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
- meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
- meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
- meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
- meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
- meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
- meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
- meta_subject/form_validators/glucose_form_validator.py +16 -1
- meta_subject/forms/study_medication_form.py +5 -3
- meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
- meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
- meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
- /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info/licenses}/LICENSE +0 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1561 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"id": "28e21930-b943-4a08-a79a-ff2712ae9215",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"%%capture\n",
|
9
|
+
"import os\n",
|
10
|
+
"from pathlib import Path\n",
|
11
|
+
"import pandas as pd\n",
|
12
|
+
"from dj_notebook import activate\n",
|
13
|
+
"import numpy as np\n",
|
14
|
+
"from django_pandas.io import read_frame\n",
|
15
|
+
"\n",
|
16
|
+
"env_file = os.environ[\"META_ENV\"]\n",
|
17
|
+
"reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
|
18
|
+
"analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
|
19
|
+
"pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
|
20
|
+
"plus = activate(dotenv_file=env_file)\n",
|
21
|
+
"pd.set_option('future.no_silent_downcasting', True)"
|
22
|
+
],
|
23
|
+
"outputs": [],
|
24
|
+
"execution_count": null
|
25
|
+
},
|
26
|
+
{
|
27
|
+
"cell_type": "code",
|
28
|
+
"id": "52d4bb98-18a5-4528-be8d-e8370b1b5d1e",
|
29
|
+
"metadata": {},
|
30
|
+
"source": [
|
31
|
+
"\n",
|
32
|
+
"import pdfkit\n",
|
33
|
+
"from datetime import date\n",
|
34
|
+
"from edc_pdutils.dataframes import get_subject_visit\n",
|
35
|
+
"from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39\n",
|
36
|
+
"from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
|
37
|
+
"from scipy.stats import chi2\n",
|
38
|
+
"from great_tables import loc, style, md\n",
|
39
|
+
"from meta_analytics.dataframes import get_eos_df\n",
|
40
|
+
"from meta_analytics.utils import df_as_great_table, df_as_great_table2\n",
|
41
|
+
"from meta_prn.models import LossToFollowup\n",
|
42
|
+
"from edc_visit_schedule.models import SubjectScheduleHistory\n",
|
43
|
+
"from edc_appointment.analytics import get_appointment_df\n",
|
44
|
+
"from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT\n",
|
45
|
+
"from meta_consent.models import SubjectConsentV1Ext\n",
|
46
|
+
"from meta_analytics.dataframes import get_glucose_df\n",
|
47
|
+
"\n",
|
48
|
+
"from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
|
49
|
+
"from edc_constants.constants import YES # noqa\n"
|
50
|
+
],
|
51
|
+
"outputs": [],
|
52
|
+
"execution_count": null
|
53
|
+
},
|
54
|
+
{
|
55
|
+
"metadata": {},
|
56
|
+
"cell_type": "code",
|
57
|
+
"source": [
|
58
|
+
"html_data = []\n",
|
59
|
+
"cutoff_date = date(2025,3, 31)\n",
|
60
|
+
"end_of_trial_date= date(2026,3, 1)\n",
|
61
|
+
"document_title = f\"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {cutoff_date.strftime('%d %B %Y')}</h5>\"\n",
|
62
|
+
"study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'\n",
|
63
|
+
"pdf_filename = f\"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf\"\n"
|
64
|
+
],
|
65
|
+
"id": "b255fd34cd6f50c0",
|
66
|
+
"outputs": [],
|
67
|
+
"execution_count": null
|
68
|
+
},
|
69
|
+
{
|
70
|
+
"metadata": {},
|
71
|
+
"cell_type": "code",
|
72
|
+
"source": [
|
73
|
+
"\n",
|
74
|
+
"df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
|
75
|
+
"late_exlusion_offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
|
76
|
+
"df_eos = get_eos_df()\n",
|
77
|
+
"df_eos_excluded = (\n",
|
78
|
+
" df_eos\n",
|
79
|
+
" .query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")\n",
|
80
|
+
" .copy()\n",
|
81
|
+
" .reset_index()\n",
|
82
|
+
")\n",
|
83
|
+
"df_visit = (\n",
|
84
|
+
" df_visit\n",
|
85
|
+
" .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
86
|
+
" .query(\"_merge=='left_only'\")\n",
|
87
|
+
" .drop(columns=[\"_merge\"])\n",
|
88
|
+
")\n",
|
89
|
+
"\n",
|
90
|
+
"df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n",
|
91
|
+
"\n",
|
92
|
+
"df_appointments = get_appointment_df()\n",
|
93
|
+
"df_appointments[\"site_id\"] = df_appointments.site_id.astype(str)\n",
|
94
|
+
"df_appointments = (\n",
|
95
|
+
" df_appointments\n",
|
96
|
+
" .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
97
|
+
" .query(\"_merge=='left_only'\")\n",
|
98
|
+
" .drop(columns=[\"_merge\"])\n",
|
99
|
+
")\n",
|
100
|
+
"\n",
|
101
|
+
"\n",
|
102
|
+
"cls = GlucoseEndpointsByDate()\n",
|
103
|
+
"cls.run()\n",
|
104
|
+
"df_endpoint = cls.endpoint_only_df.copy()\n",
|
105
|
+
"df_glucose = get_glucose_df()\n",
|
106
|
+
"# df_glucose_fbg = get_glucose_fbg_df()\n",
|
107
|
+
"# df_glucose = pd.concat([df_glucose, df_glucose_fbg])\n",
|
108
|
+
"\n",
|
109
|
+
"\n",
|
110
|
+
"enrolled = df_visit.copy()\n",
|
111
|
+
"enrolled[\"site_id\"] = enrolled[\"site_id\"].astype(str)\n",
|
112
|
+
"enrolled_pivot = (\n",
|
113
|
+
" enrolled\n",
|
114
|
+
" .query(\"visit_code==1000.0\").groupby([\"site_id\"])\n",
|
115
|
+
" .size()\n",
|
116
|
+
" .reset_index()\n",
|
117
|
+
" .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
|
118
|
+
")\n",
|
119
|
+
"enrolled_pivot.columns.name=\"\"\n",
|
120
|
+
"enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
|
121
|
+
"\n"
|
122
|
+
],
|
123
|
+
"id": "215212f9d44e79df",
|
124
|
+
"outputs": [],
|
125
|
+
"execution_count": null
|
126
|
+
},
|
127
|
+
{
|
128
|
+
"metadata": {},
|
129
|
+
"cell_type": "code",
|
130
|
+
"source": [
|
131
|
+
"column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\", \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
|
132
|
+
"column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
|
133
|
+
],
|
134
|
+
"id": "fe90271ff1799692",
|
135
|
+
"outputs": [],
|
136
|
+
"execution_count": null
|
137
|
+
},
|
138
|
+
{
|
139
|
+
"metadata": {},
|
140
|
+
"cell_type": "code",
|
141
|
+
"source": [
|
142
|
+
"# Table 1a Visits completed to date\n",
|
143
|
+
"\n",
|
144
|
+
"df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
|
145
|
+
"\n",
|
146
|
+
"df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
|
147
|
+
"df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
|
148
|
+
"df1.columns.name = None\n",
|
149
|
+
"df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
|
150
|
+
"df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
|
151
|
+
"df1.fillna(0, inplace=True)\n",
|
152
|
+
"df_attended = df1.copy().reset_index(drop=True)\n",
|
153
|
+
"df_attended = df_attended.fillna(0.0)"
|
154
|
+
],
|
155
|
+
"id": "9e3d608809eea5",
|
156
|
+
"outputs": [],
|
157
|
+
"execution_count": null
|
158
|
+
},
|
159
|
+
{
|
160
|
+
"metadata": {},
|
161
|
+
"cell_type": "code",
|
162
|
+
"source": [
|
163
|
+
"gt = df_as_great_table(\n",
|
164
|
+
" df_attended[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]],\n",
|
165
|
+
" title=\"Table 1a: Visits completed to date\"\n",
|
166
|
+
")\n",
|
167
|
+
"gt = (\n",
|
168
|
+
" gt\n",
|
169
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
170
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
171
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
|
172
|
+
" .data_color(\n",
|
173
|
+
" columns=[\"visit_code\"],\n",
|
174
|
+
" palette=[\"lavender\", \"thistle\"],\n",
|
175
|
+
" domain=[2000, 5000],\n",
|
176
|
+
" na_color=\"white\"\n",
|
177
|
+
" )\n",
|
178
|
+
" .tab_source_note(source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
|
179
|
+
")\n",
|
180
|
+
"html_data.append(gt.as_raw_html())\n",
|
181
|
+
"gt.show()"
|
182
|
+
],
|
183
|
+
"id": "a43c2fbd8a7a692c",
|
184
|
+
"outputs": [],
|
185
|
+
"execution_count": null
|
186
|
+
},
|
187
|
+
{
|
188
|
+
"metadata": {},
|
189
|
+
"cell_type": "code",
|
190
|
+
"source": [
|
191
|
+
"# Table 1b Total scheduled appointments\n",
|
192
|
+
"df_appt_pivot = (\n",
|
193
|
+
" df_appointments.query(\"appt_reason==@SCHEDULED_APPT\")\n",
|
194
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
195
|
+
" .query(\"_merge=='left_only'\")\n",
|
196
|
+
" .drop(columns=[\"_merge\"])\n",
|
197
|
+
" .reset_index(drop=True)\n",
|
198
|
+
" .groupby([\"visit_code\", \"site_id\"])\n",
|
199
|
+
" .size()\n",
|
200
|
+
" .to_frame()\n",
|
201
|
+
" .reset_index()\n",
|
202
|
+
" .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
|
203
|
+
" .reset_index()\n",
|
204
|
+
" .fillna(0)\n",
|
205
|
+
")\n",
|
206
|
+
"\n",
|
207
|
+
"df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
|
208
|
+
"df_appt_pivot.columns.name = None\n",
|
209
|
+
"gt = df_as_great_table(\n",
|
210
|
+
" df_appt_pivot,\n",
|
211
|
+
" title=\"Table 1b: Total appointments\",\n",
|
212
|
+
" subtitle=\"Total possible appointments not including unscheduled appointments\"\n",
|
213
|
+
"\n",
|
214
|
+
")\n",
|
215
|
+
"gt = (\n",
|
216
|
+
" gt\n",
|
217
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
218
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
219
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
|
220
|
+
" .data_color(\n",
|
221
|
+
" columns=[\"visit_code\"],\n",
|
222
|
+
" palette=[\"lavender\", \"thistle\"],\n",
|
223
|
+
" domain=[2000, 5000],\n",
|
224
|
+
" na_color=\"white\"\n",
|
225
|
+
" )\n",
|
226
|
+
")\n",
|
227
|
+
"html_data.append(gt.as_raw_html())\n",
|
228
|
+
"gt.show()"
|
229
|
+
],
|
230
|
+
"id": "70eb34a139ff7095",
|
231
|
+
"outputs": [],
|
232
|
+
"execution_count": null
|
233
|
+
},
|
234
|
+
{
|
235
|
+
"metadata": {},
|
236
|
+
"cell_type": "code",
|
237
|
+
"source": [
|
238
|
+
"# Table 1c Past scheduled appointments -- no information provided\n",
|
239
|
+
"df_appt_pivot = (\n",
|
240
|
+
" df_appointments.query(\"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
|
241
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
242
|
+
" .query(\"_merge=='left_only'\")\n",
|
243
|
+
" .drop(columns=[\"_merge\"])\n",
|
244
|
+
" .reset_index(drop=True)\n",
|
245
|
+
" .groupby([\"visit_code\", \"site_id\"])\n",
|
246
|
+
" .size()\n",
|
247
|
+
" .to_frame()\n",
|
248
|
+
" .reset_index()\n",
|
249
|
+
" .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
|
250
|
+
" .reset_index()\n",
|
251
|
+
" .fillna(0)\n",
|
252
|
+
")\n",
|
253
|
+
"df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
|
254
|
+
"df_appt_pivot.columns.name = None\n",
|
255
|
+
"gt = df_as_great_table(\n",
|
256
|
+
" df_appt_pivot,\n",
|
257
|
+
" title=\"Table 1c: Past appointments not attended/not reported\",\n",
|
258
|
+
" subtitle=\"Expected by now but no information provided by site\",\n",
|
259
|
+
")\n",
|
260
|
+
"gt = (\n",
|
261
|
+
" gt\n",
|
262
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
263
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
264
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
|
265
|
+
" .data_color(\n",
|
266
|
+
" columns=[\"visit_code\"],\n",
|
267
|
+
" palette=[\"lavender\", \"thistle\"],\n",
|
268
|
+
" domain=[2000, 5000],\n",
|
269
|
+
" na_color=\"white\"\n",
|
270
|
+
" )\n",
|
271
|
+
" .tab_source_note(source_note=f\"Scheduled appointment date is before {cutoff_date.strftime('%d %B %Y')}.\")\n",
|
272
|
+
")\n",
|
273
|
+
"html_data.append(gt.as_raw_html())\n",
|
274
|
+
"gt.show()"
|
275
|
+
],
|
276
|
+
"id": "f243552177b216d7",
|
277
|
+
"outputs": [],
|
278
|
+
"execution_count": null
|
279
|
+
},
|
280
|
+
{
|
281
|
+
"metadata": {},
|
282
|
+
"cell_type": "code",
|
283
|
+
"source": [
|
284
|
+
"# Table 1d Unscheduled appointments\n",
|
285
|
+
"df_appt = (\n",
|
286
|
+
" df_appointments.query(\"appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT\")\n",
|
287
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
288
|
+
" .query(\"_merge=='left_only'\")\n",
|
289
|
+
" .drop(columns=[\"_merge\"])\n",
|
290
|
+
" .reset_index(drop=True)\n",
|
291
|
+
" .copy()\n",
|
292
|
+
" .reset_index(drop=True)\n",
|
293
|
+
")\n",
|
294
|
+
"df_appt['visit_code'] = df_appt['visit_code'].astype(int)\n",
|
295
|
+
"df_appt['visit_code'] = df_appt['visit_code'].astype(str)\n",
|
296
|
+
"\n",
|
297
|
+
"subjects_with_unscheduled = df_appt.subject_identifier.nunique()\n",
|
298
|
+
"\n",
|
299
|
+
"df_appt_pivot = (\n",
|
300
|
+
" df_appt\n",
|
301
|
+
" .groupby([\"visit_code\", \"site_id\"])\n",
|
302
|
+
" .size()\n",
|
303
|
+
" .to_frame()\n",
|
304
|
+
" .reset_index()\n",
|
305
|
+
" .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
|
306
|
+
" .reset_index()\n",
|
307
|
+
" .fillna(0)\n",
|
308
|
+
")\n",
|
309
|
+
"df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
|
310
|
+
"df_appt_pivot.columns.name = None\n",
|
311
|
+
"df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype('float64')\n",
|
312
|
+
"\n",
|
313
|
+
"\n",
|
314
|
+
"# add totals row\n",
|
315
|
+
"sum_row = df_appt_pivot.select_dtypes(include='float64').sum()\n",
|
316
|
+
"sum_row['visit_code'] = 'Total'\n",
|
317
|
+
"sum_row_df = pd.DataFrame(sum_row).T\n",
|
318
|
+
"df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0).reset_index(drop=True)\n",
|
319
|
+
"\n",
|
320
|
+
"gt = df_as_great_table(\n",
|
321
|
+
" df_appt_pivot,\n",
|
322
|
+
" title=\"Table 1d: Unscheduled appointments\",\n",
|
323
|
+
" subtitle=\"Appointments with sequence>0 grouped by visit code\",\n",
|
324
|
+
")\n",
|
325
|
+
"gt = (\n",
|
326
|
+
" gt\n",
|
327
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
328
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
329
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
|
330
|
+
" .data_color(\n",
|
331
|
+
" columns=[\"visit_code\"],\n",
|
332
|
+
" palette=[\"lavender\", \"thistle\"],\n",
|
333
|
+
" domain=[2000, 5000],\n",
|
334
|
+
" na_color=\"white\"\n",
|
335
|
+
" )\n",
|
336
|
+
" .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
|
337
|
+
" .tab_source_note(source_note=f\"{subjects_with_unscheduled} participants had at least one unscheduled appointment.\")\n",
|
338
|
+
")\n",
|
339
|
+
"html_data.append(gt.as_raw_html())\n",
|
340
|
+
"gt.show()"
|
341
|
+
],
|
342
|
+
"id": "6e55569e322370a",
|
343
|
+
"outputs": [],
|
344
|
+
"execution_count": null
|
345
|
+
},
|
346
|
+
{
|
347
|
+
"metadata": {},
|
348
|
+
"cell_type": "code",
|
349
|
+
"source": [
|
350
|
+
"# Table 1e Future scheduled appointments\n",
|
351
|
+
"df_appt_pivot = (\n",
|
352
|
+
" df_appointments.query(\"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
|
353
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
354
|
+
" .query(\"_merge=='left_only'\")\n",
|
355
|
+
" .drop(columns=[\"_merge\"])\n",
|
356
|
+
" .reset_index(drop=True)\n",
|
357
|
+
" .groupby([\"visit_code\", \"site_id\"])\n",
|
358
|
+
" .size()\n",
|
359
|
+
" .to_frame()\n",
|
360
|
+
" .reset_index()\n",
|
361
|
+
" .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
|
362
|
+
" .reset_index()\n",
|
363
|
+
" .fillna(0)\n",
|
364
|
+
")\n",
|
365
|
+
"df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
|
366
|
+
"df_appt_pivot.columns.name = None\n",
|
367
|
+
"gt = df_as_great_table(\n",
|
368
|
+
" df_appt_pivot,\n",
|
369
|
+
" title=\"Table 1e: Future appointments\",\n",
|
370
|
+
")\n",
|
371
|
+
"gt = (\n",
|
372
|
+
" gt\n",
|
373
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
374
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
375
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
|
376
|
+
" .data_color(\n",
|
377
|
+
" columns=[\"visit_code\"],\n",
|
378
|
+
" palette=[\"lavender\", \"thistle\"],\n",
|
379
|
+
" domain=[2000, 5000],\n",
|
380
|
+
" na_color=\"white\"\n",
|
381
|
+
" )\n",
|
382
|
+
" .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
|
383
|
+
" .tab_source_note(source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
|
384
|
+
")\n",
|
385
|
+
"html_data.append(gt.as_raw_html())\n",
|
386
|
+
"gt.show()"
|
387
|
+
],
|
388
|
+
"id": "8193005de33cae6f",
|
389
|
+
"outputs": [],
|
390
|
+
"execution_count": null
|
391
|
+
},
|
392
|
+
{
|
393
|
+
"metadata": {},
|
394
|
+
"cell_type": "code",
|
395
|
+
"source": [
|
396
|
+
"# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed\n",
|
397
|
+
"subject_count = (\n",
|
398
|
+
" df_visit\n",
|
399
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
400
|
+
" .query(\"_merge=='left_only'\")\n",
|
401
|
+
" .drop(columns=[\"_merge\"])\n",
|
402
|
+
" .reset_index(drop=True)\n",
|
403
|
+
" .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
|
404
|
+
").subject_identifier.nunique()\n",
|
405
|
+
"df_tbl = (\n",
|
406
|
+
" df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
|
407
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
408
|
+
" .query(\"_merge=='left_only'\")\n",
|
409
|
+
" .drop(columns=[\"_merge\"])\n",
|
410
|
+
" .reset_index(drop=True)\n",
|
411
|
+
" .groupby(by=[\"visit_code\", \"site_id\"])\n",
|
412
|
+
" .size()\n",
|
413
|
+
" .to_frame()\n",
|
414
|
+
" .reset_index()\n",
|
415
|
+
")\n",
|
416
|
+
"df_tbl.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
|
417
|
+
"df_tbl_pivot = df_tbl.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
|
418
|
+
"df_tbl_pivot.columns.name = None\n",
|
419
|
+
"df_tbl_pivot.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
|
420
|
+
"df_tbl_pivot['total'] = df_tbl_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
|
421
|
+
"df_missed = (\n",
|
422
|
+
" df_tbl_pivot\n",
|
423
|
+
" .fillna(0)\n",
|
424
|
+
" .copy()\n",
|
425
|
+
" .set_index([\"visit_code\"])\n",
|
426
|
+
")\n",
|
427
|
+
"\n",
|
428
|
+
"df_attended_display = df_attended.copy()\n",
|
429
|
+
"df_attended_display = (\n",
|
430
|
+
" df_attended_display\n",
|
431
|
+
" .set_index([\"visit_code\"])\n",
|
432
|
+
")\n",
|
433
|
+
"\n",
|
434
|
+
"attended_and_missed = df_attended_display + df_missed\n",
|
435
|
+
"attended_and_missed = (\n",
|
436
|
+
" attended_and_missed\n",
|
437
|
+
" .fillna(0)\n",
|
438
|
+
" .reset_index()\n",
|
439
|
+
" .set_index([\"visit_code\"])\n",
|
440
|
+
")\n",
|
441
|
+
"\n",
|
442
|
+
"attended_and_missed_perc = df_missed/attended_and_missed\n",
|
443
|
+
"attended_and_missed_perc = (\n",
|
444
|
+
" attended_and_missed_perc\n",
|
445
|
+
" .fillna(0)\n",
|
446
|
+
" .reset_index()\n",
|
447
|
+
" .set_index([\"visit_code\"])\n",
|
448
|
+
")\n",
|
449
|
+
"\n",
|
450
|
+
"df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
|
451
|
+
"for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
|
452
|
+
" col_perc = f\"{col}_perc\"\n",
|
453
|
+
" df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
|
454
|
+
"df_result = df_result.reset_index().sort_values(by=[\"visit_code\"], ascending=True)\n",
|
455
|
+
"df_result = df_result.fillna(0.0)"
|
456
|
+
],
|
457
|
+
"id": "c86c5f0ffe59e951",
|
458
|
+
"outputs": [],
|
459
|
+
"execution_count": null
|
460
|
+
},
|
461
|
+
{
|
462
|
+
"metadata": {},
|
463
|
+
"cell_type": "code",
|
464
|
+
"source": [
|
465
|
+
"df_table = df_result[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].copy()\n",
|
466
|
+
"gt = df_as_great_table(\n",
|
467
|
+
" df_table,\n",
|
468
|
+
" title=\"Table 2a: Visits Missed to Date\",\n",
|
469
|
+
" subtitle=\"as % of Visits Attended + Visits Missed\"\n",
|
470
|
+
")\n",
|
471
|
+
"gt = (\n",
|
472
|
+
" gt\n",
|
473
|
+
" .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
|
474
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
475
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
|
476
|
+
" .tab_style(\n",
|
477
|
+
" style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
|
478
|
+
" locations=loc.body(\n",
|
479
|
+
" columns=[0],\n",
|
480
|
+
" rows=list(range(0, len(df_table))),\n",
|
481
|
+
" ),\n",
|
482
|
+
" )\n",
|
483
|
+
" .tab_source_note(source_note=f\"{subject_count} participants had at least one missed visit.\")\n",
|
484
|
+
"\n",
|
485
|
+
")\n",
|
486
|
+
"html_data.append(gt.as_raw_html())\n",
|
487
|
+
"gt.show()\n"
|
488
|
+
],
|
489
|
+
"id": "3cd8b1290091660c",
|
490
|
+
"outputs": [],
|
491
|
+
"execution_count": null
|
492
|
+
},
|
493
|
+
{
|
494
|
+
"metadata": {},
|
495
|
+
"cell_type": "code",
|
496
|
+
"source": [
|
497
|
+
"# Table 2b: Number of missed visits by participant\n",
|
498
|
+
"subject_count = (\n",
|
499
|
+
" df_visit\n",
|
500
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
501
|
+
" .query(\"_merge=='left_only'\")\n",
|
502
|
+
" .drop(columns=[\"_merge\"])\n",
|
503
|
+
" .reset_index(drop=True)\n",
|
504
|
+
" .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
|
505
|
+
").subject_identifier.nunique()\n",
|
506
|
+
"df_tbl = (\n",
|
507
|
+
" df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
|
508
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
|
509
|
+
" .query(\"_merge=='left_only'\")\n",
|
510
|
+
" .drop(columns=[\"_merge\"])\n",
|
511
|
+
" .reset_index(drop=True)\n",
|
512
|
+
" .groupby(by=[\"subject_identifier\", \"site_id\"])\n",
|
513
|
+
" .size()\n",
|
514
|
+
" .to_frame()\n",
|
515
|
+
" .reset_index()\n",
|
516
|
+
")\n",
|
517
|
+
"df_tbl.columns = [\"subject_identifier\", \"site_id\", \"missed_count\"]\n",
|
518
|
+
"df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100], labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
|
519
|
+
"df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False, aggfunc=\"count\").reset_index()\n",
|
520
|
+
"\n",
|
521
|
+
"df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)\n",
|
522
|
+
"\n",
|
523
|
+
"sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()\n",
|
524
|
+
"sum_row['category'] = 'Total'\n",
|
525
|
+
"\n",
|
526
|
+
"\n",
|
527
|
+
"df_tbl_pivot = (\n",
|
528
|
+
" pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)\n",
|
529
|
+
" .rename(columns={10: \"10\", 20: \"20\", 30: \"30\", 40: \"40\", 60: \"60\"})\n",
|
530
|
+
")\n",
|
531
|
+
"\n",
|
532
|
+
"gt = df_as_great_table(\n",
|
533
|
+
" df_tbl_pivot,\n",
|
534
|
+
" title=\"Table 2b: Number of participants who missed one or more visits\",\n",
|
535
|
+
")\n",
|
536
|
+
"gt = (\n",
|
537
|
+
" gt\n",
|
538
|
+
" .cols_label({\"category\": \"Category\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
|
539
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
540
|
+
" .cols_align(align=\"left\", columns=[\"category\"])\n",
|
541
|
+
" .tab_style(\n",
|
542
|
+
" style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
|
543
|
+
" locations=loc.body(\n",
|
544
|
+
" columns=[0],\n",
|
545
|
+
" rows=list(range(0, len(df_table))),\n",
|
546
|
+
" ),\n",
|
547
|
+
" )\n",
|
548
|
+
")\n",
|
549
|
+
"html_data.append(gt.as_raw_html())\n",
|
550
|
+
"gt.show()\n",
|
551
|
+
"\n"
|
552
|
+
],
|
553
|
+
"id": "b18088e16e0bf7f7",
|
554
|
+
"outputs": [],
|
555
|
+
"execution_count": null
|
556
|
+
},
|
557
|
+
{
|
558
|
+
"metadata": {},
|
559
|
+
"cell_type": "code",
|
560
|
+
"source": [
|
561
|
+
"# func for tables 3,4,5\n",
|
562
|
+
"def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:\n",
|
563
|
+
" row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
|
564
|
+
" row_df[\"label\"] = label\n",
|
565
|
+
" row_df = row_df.reset_index()\n",
|
566
|
+
" row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
|
567
|
+
" row_df.columns.name = \"\"\n",
|
568
|
+
" all_sites = [10, 20, 30, 40, 60]\n",
|
569
|
+
" for site in all_sites:\n",
|
570
|
+
" if site not in row_df.columns:\n",
|
571
|
+
" row_df[site] = None\n",
|
572
|
+
" row_df = row_df.reset_index(drop=True)\n",
|
573
|
+
" return row_df\n",
|
574
|
+
"\n",
|
575
|
+
"\n",
|
576
|
+
"def get_table_df(df_source:pd.DataFrame, visit_code:float|None=None, month_label:str|None=None)->pd.DataFrame:\n",
|
577
|
+
" if visit_code:\n",
|
578
|
+
" df_month = df_source[df_source.visit_code==visit_code].copy()\n",
|
579
|
+
" elif month_label:\n",
|
580
|
+
" df_month = df_source.copy()\n",
|
581
|
+
"\n",
|
582
|
+
" \n",
|
583
|
+
" row_df = df_month.copy()\n",
|
584
|
+
" table_df = get_row_df(row_df, \"Total (n)\")\n",
|
585
|
+
" \n",
|
586
|
+
" row_df = df_month.query(\"ogtt_value<7.8 and fbg_value<6.1\").copy()\n",
|
587
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG <6.1\")])\n",
|
588
|
+
" \n",
|
589
|
+
" row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
|
590
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\")])\n",
|
591
|
+
" \n",
|
592
|
+
" row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
|
593
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=7.0\")])\n",
|
594
|
+
" \n",
|
595
|
+
" row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
|
596
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\")])\n",
|
597
|
+
" \n",
|
598
|
+
" row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
|
599
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\")])\n",
|
600
|
+
" \n",
|
601
|
+
" row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
|
602
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\")])\n",
|
603
|
+
" \n",
|
604
|
+
" row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
|
605
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG <6.1\")])\n",
|
606
|
+
" \n",
|
607
|
+
" row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
|
608
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\")])\n",
|
609
|
+
" \n",
|
610
|
+
" row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
|
611
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=7.0\")])\n",
|
612
|
+
"\n",
|
613
|
+
" row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
|
614
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"Missing OGTT\")])\n",
|
615
|
+
" return table_df\n",
|
616
|
+
"\n",
|
617
|
+
"\n",
|
618
|
+
"def format_table_df(tbl_df, add_totals:bool|None=None):\n",
|
619
|
+
" add_totals = True if add_totals is None else add_totals\n",
|
620
|
+
" tbl_df = tbl_df.fillna(0.0)\n",
|
621
|
+
" tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
|
622
|
+
" tbl_df = tbl_df.reset_index(drop=True)\n",
|
623
|
+
"\n",
|
624
|
+
" if add_totals:\n",
|
625
|
+
" df_last = tbl_df[1:].sum().to_frame()\n",
|
626
|
+
" df_last.loc[\"label\"] = np.nan\n",
|
627
|
+
" df_last = df_last.reset_index()\n",
|
628
|
+
" df_last.columns = [\"label\", \"value\"]\n",
|
629
|
+
" df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
|
630
|
+
" df_last.columns.name = \"\"\n",
|
631
|
+
" df_last[\"label\"] = \"Totals\"\n",
|
632
|
+
"\n",
|
633
|
+
" tbl_df = pd.concat([tbl_df, df_last])\n",
|
634
|
+
" tbl_df = tbl_df.reset_index(drop=True)\n",
|
635
|
+
"\n",
|
636
|
+
" tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]\n",
|
637
|
+
"\n",
|
638
|
+
" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
|
639
|
+
" tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
|
640
|
+
" tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
|
641
|
+
"\n",
|
642
|
+
"\n",
|
643
|
+
" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
|
644
|
+
" tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
|
645
|
+
" tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
|
646
|
+
"\n",
|
647
|
+
" cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]]\n",
|
648
|
+
" tbl_df1 = tbl_df[cols]\n",
|
649
|
+
" tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
|
650
|
+
" return tbl_df1"
|
651
|
+
],
|
652
|
+
"id": "e1bcc6507b1e49c7",
|
653
|
+
"outputs": [],
|
654
|
+
"execution_count": null
|
655
|
+
},
|
656
|
+
{
|
657
|
+
"metadata": {},
|
658
|
+
"cell_type": "code",
|
659
|
+
"source": [
|
660
|
+
"# Table 3: OGTT and FBG at 12-month visit\n",
|
661
|
+
"df_table3 = get_table_df(df_glucose, 1120.0)\n",
|
662
|
+
"df_table3 = format_table_df(df_table3)\n",
|
663
|
+
"df_table3 = df_table3.fillna(0.0)\n",
|
664
|
+
"gt = df_as_great_table(df_table3, title=\"Table 3: OGTT and FBG at 12-month visit\")\n",
|
665
|
+
"gt = (\n",
|
666
|
+
" gt\n",
|
667
|
+
" .cols_label(column_headers_with_str)\n",
|
668
|
+
" .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
|
669
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
670
|
+
" .cols_width(cases={\"label\": \"35%\"})\n",
|
671
|
+
")\n",
|
672
|
+
"html_data.append(gt.as_raw_html())\n",
|
673
|
+
"gt.show()\n"
|
674
|
+
],
|
675
|
+
"id": "9a9616a118ae674d",
|
676
|
+
"outputs": [],
|
677
|
+
"execution_count": null
|
678
|
+
},
|
679
|
+
{
|
680
|
+
"metadata": {},
|
681
|
+
"cell_type": "code",
|
682
|
+
"source": [
|
683
|
+
"# Table 4: OGTT and FBG at 24-month visit\n",
|
684
|
+
"df_table4 = get_table_df(df_glucose, 1240.0)\n",
|
685
|
+
"df_table4 = format_table_df(df_table4)\n",
|
686
|
+
"df_table4 = df_table4.fillna(0.0)\n",
|
687
|
+
"gt = df_as_great_table(df_table4, title=\"Table 4: OGTT and FBG at 24-month visit\")\n",
|
688
|
+
"gt = (\n",
|
689
|
+
" gt\n",
|
690
|
+
" .cols_label(column_headers_with_str)\n",
|
691
|
+
" .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
|
692
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
693
|
+
" .cols_width(cases={\"label\": \"35%\"})\n",
|
694
|
+
")\n",
|
695
|
+
"html_data.append(gt.as_raw_html())\n",
|
696
|
+
"gt.show()"
|
697
|
+
],
|
698
|
+
"id": "ec0988364166e130",
|
699
|
+
"outputs": [],
|
700
|
+
"execution_count": null
|
701
|
+
},
|
702
|
+
{
|
703
|
+
"metadata": {},
|
704
|
+
"cell_type": "code",
|
705
|
+
"source": [
|
706
|
+
"# Table 5: OGTT and FBG at 36-month visit\n",
|
707
|
+
"df_table5 = get_table_df(df_glucose, 1360.0)\n",
|
708
|
+
"df_table5 = format_table_df(df_table5)\n",
|
709
|
+
"df_table5 = df_table5.fillna(0.0)\n",
|
710
|
+
"gt = df_as_great_table(df_table5, title=\"Table 5: OGTT and FBG at 36-month visit\")\n",
|
711
|
+
"gt = (\n",
|
712
|
+
" gt\n",
|
713
|
+
" .cols_label(column_headers_with_str)\n",
|
714
|
+
" .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
|
715
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
716
|
+
" .cols_width(cases={\"label\": \"35%\"})\n",
|
717
|
+
")\n",
|
718
|
+
"html_data.append(gt.as_raw_html())\n",
|
719
|
+
"gt.show()"
|
720
|
+
],
|
721
|
+
"id": "59be72121202df15",
|
722
|
+
"outputs": [],
|
723
|
+
"execution_count": null
|
724
|
+
},
|
725
|
+
{
|
726
|
+
"metadata": {},
|
727
|
+
"cell_type": "code",
|
728
|
+
"source": [
|
729
|
+
"# Table 6: Any OGTT>11.1 ever\n",
|
730
|
+
"row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
|
731
|
+
"table_df = get_row_df(row_df, \"Total (n)\")\n",
|
732
|
+
"df_table6 = format_table_df(table_df)\n",
|
733
|
+
"df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)\n",
|
734
|
+
"gt = df_as_great_table(df_table, title=\"Table 6: Any OGTT>11.1 ever\")\n",
|
735
|
+
"gt = (\n",
|
736
|
+
" gt\n",
|
737
|
+
" .cols_label(column_headers_with_str)\n",
|
738
|
+
" .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
|
739
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
740
|
+
" .cols_width(cases={\"label\": \"35%\"})\n",
|
741
|
+
")\n",
|
742
|
+
"html_data.append(gt.as_raw_html())\n",
|
743
|
+
"gt.show()"
|
744
|
+
],
|
745
|
+
"id": "f016ddbe736c2f93",
|
746
|
+
"outputs": [],
|
747
|
+
"execution_count": null
|
748
|
+
},
|
749
|
+
{
|
750
|
+
"metadata": {},
|
751
|
+
"cell_type": "code",
|
752
|
+
"source": [
|
753
|
+
"# func for table 7\n",
|
754
|
+
"def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
|
755
|
+
" df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
|
756
|
+
"\n",
|
757
|
+
" row_df = df_month.copy()\n",
|
758
|
+
" table_df = get_row_df(row_df, \"Total (n)\")\n",
|
759
|
+
"\n",
|
760
|
+
" row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
|
761
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
|
762
|
+
"\n",
|
763
|
+
" row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
|
764
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
|
765
|
+
"\n",
|
766
|
+
" row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
|
767
|
+
" table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
|
768
|
+
" return table_df"
|
769
|
+
],
|
770
|
+
"id": "6193907cc12f5b5c",
|
771
|
+
"outputs": [],
|
772
|
+
"execution_count": null
|
773
|
+
},
|
774
|
+
{
|
775
|
+
"metadata": {},
|
776
|
+
"cell_type": "code",
|
777
|
+
"source": [
|
778
|
+
"# Table 7: Interim FBG results\n",
|
779
|
+
"df_table7 = get_table7_df(df_glucose, 1150.0)\n",
|
780
|
+
"df_table7 = format_table_df(df_table7, add_totals=False)\n",
|
781
|
+
"df_table7[\"visit_code\"] = MONTH15\n",
|
782
|
+
"\n",
|
783
|
+
"df_table71 = get_table7_df(df_glucose, 1180.0)\n",
|
784
|
+
"df_table71 = format_table_df(df_table71, add_totals=False)\n",
|
785
|
+
"df_table71[\"visit_code\"] = MONTH18\n",
|
786
|
+
"\n",
|
787
|
+
"df_table72 = get_table7_df(df_glucose, 1210.0)\n",
|
788
|
+
"df_table72 = format_table_df(df_table72, add_totals=False)\n",
|
789
|
+
"df_table72[\"visit_code\"] = MONTH21\n",
|
790
|
+
"\n",
|
791
|
+
"df_table73 = get_table7_df(df_glucose, 1270.0)\n",
|
792
|
+
"df_table73 = format_table_df(df_table73, add_totals=False)\n",
|
793
|
+
"df_table73[\"visit_code\"] = MONTH27\n",
|
794
|
+
"\n",
|
795
|
+
"df_table74 = get_table7_df(df_glucose, 1300.0)\n",
|
796
|
+
"df_table74 = format_table_df(df_table74, add_totals=False)\n",
|
797
|
+
"df_table74[\"visit_code\"] = MONTH30\n",
|
798
|
+
"\n",
|
799
|
+
"df_table75 = get_table7_df(df_glucose, 1330.0)\n",
|
800
|
+
"df_table75 = format_table_df(df_table75, add_totals=False)\n",
|
801
|
+
"df_table75[\"visit_code\"] = MONTH33\n",
|
802
|
+
"\n",
|
803
|
+
"df_table76 = get_table7_df(df_glucose, 1390.0)\n",
|
804
|
+
"df_table76 = format_table_df(df_table76, add_totals=False)\n",
|
805
|
+
"df_table76[\"visit_code\"] = MONTH39\n",
|
806
|
+
"\n",
|
807
|
+
"df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])\n",
|
808
|
+
"df_table = df_table.reset_index(drop=True)\n",
|
809
|
+
"df_table = df_table.fillna(0.0)"
|
810
|
+
],
|
811
|
+
"id": "16adb1f965081358",
|
812
|
+
"outputs": [],
|
813
|
+
"execution_count": null
|
814
|
+
},
|
815
|
+
{
|
816
|
+
"metadata": {},
|
817
|
+
"cell_type": "code",
|
818
|
+
"source": [
|
819
|
+
"column_headers_with_str = {\"visit_code\": \"Visit Code\", **column_headers_with_str}\n",
|
820
|
+
"gt = df_as_great_table2(df_table, title=\"Table 7: Interim FBG results\")\n",
|
821
|
+
"gt = (\n",
|
822
|
+
" gt\n",
|
823
|
+
" .cols_label(column_headers_with_str)\n",
|
824
|
+
" .cols_move_to_start(columns=\"visit_code\")\n",
|
825
|
+
" .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
|
826
|
+
" .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
|
827
|
+
" .cols_width(cases={\"label\": \"15%\"})\n",
|
828
|
+
" .tab_style(\n",
|
829
|
+
" style=[\n",
|
830
|
+
" style.text(color=\"black\", weight=\"bold\"),\n",
|
831
|
+
" style.fill(color=\"lightgray\")\n",
|
832
|
+
" ],\n",
|
833
|
+
" locations=loc.row_groups()\n",
|
834
|
+
" )\n",
|
835
|
+
")\n",
|
836
|
+
"html_data.append(gt.as_raw_html())\n",
|
837
|
+
"gt.show()"
|
838
|
+
],
|
839
|
+
"id": "81bcfe52d364b646",
|
840
|
+
"outputs": [],
|
841
|
+
"execution_count": null
|
842
|
+
},
|
843
|
+
{
|
844
|
+
"metadata": {},
|
845
|
+
"cell_type": "code",
|
846
|
+
"source": [
|
847
|
+
"# Table 8: Primary Endpoint met\n",
|
848
|
+
"df_endpoint_grp = df_endpoint.groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
|
849
|
+
"df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
|
850
|
+
"df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
|
851
|
+
"df_endpoint_pivot.columns.name = \"\"\n",
|
852
|
+
"df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
|
853
|
+
"df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
|
854
|
+
"df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
|
855
|
+
"df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
|
856
|
+
"df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
|
857
|
+
"\n",
|
858
|
+
"gt = df_as_great_table(\n",
|
859
|
+
" df_endpoint_pivot,\n",
|
860
|
+
" title=\"Table 8: Primary Endpoint met\"\n",
|
861
|
+
")\n",
|
862
|
+
"gt = (\n",
|
863
|
+
" gt\n",
|
864
|
+
" .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
|
865
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
866
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
867
|
+
" .cols_width(cases={\"label\": \"25%\"})\n",
|
868
|
+
")\n",
|
869
|
+
"html_data.append(gt.as_raw_html())\n",
|
870
|
+
"gt.show()"
|
871
|
+
],
|
872
|
+
"id": "37904c7ce49724e6",
|
873
|
+
"outputs": [],
|
874
|
+
"execution_count": null
|
875
|
+
},
|
876
|
+
{
|
877
|
+
"metadata": {},
|
878
|
+
"cell_type": "code",
|
879
|
+
"source": "#read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\"), verbose=False).rename(columns={\"site\": \"site_id\"})",
|
880
|
+
"id": "562a1cc911a71255",
|
881
|
+
"outputs": [],
|
882
|
+
"execution_count": null
|
883
|
+
},
|
884
|
+
{
|
885
|
+
"metadata": {},
|
886
|
+
"cell_type": "code",
|
887
|
+
"source": [
|
888
|
+
"from great_tables import html\n",
|
889
|
+
"\n",
|
890
|
+
"# Table 8a: Primary Endpoint no EOS or DM Referral\n",
|
891
|
+
"df_subjecthistory = read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
|
892
|
+
"df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
|
893
|
+
"df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]], on=[\"subject_identifier\"], how=\"left\")\n",
|
894
|
+
"df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
|
895
|
+
"df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
|
896
|
+
"df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
|
897
|
+
"df_endpoint_pivot.columns.name = \"\"\n",
|
898
|
+
"df_endpoint_pivot.columns = ['label', *[str(col) for col in df_endpoint_pivot.columns if col != \"label\"]]\n",
|
899
|
+
"for col in [c for c in ['label', \"10\", \"20\", \"30\", \"40\", \"60\"] if str(c) not in df_endpoint_pivot.columns]:\n",
|
900
|
+
" df_endpoint_pivot[str(col)] = np.nan\n",
|
901
|
+
"df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
|
902
|
+
"df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
|
903
|
+
"df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
|
904
|
+
"df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
|
905
|
+
"df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
|
906
|
+
"subjects = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").subject_identifier.to_list()\n",
|
907
|
+
"\n",
|
908
|
+
"gt = df_as_great_table(\n",
|
909
|
+
" df_endpoint_pivot,\n",
|
910
|
+
" title=\"Table 8a: Primary Endpoint met -- participant not referred\"\n",
|
911
|
+
")\n",
|
912
|
+
"gt = (\n",
|
913
|
+
" gt\n",
|
914
|
+
" .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
|
915
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
|
916
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
917
|
+
" .cols_width(cases={\"label\": \"25%\"})\n",
|
918
|
+
" .tab_source_note(source_note=html(\"<BR>\".join(subjects)))\n",
|
919
|
+
")\n",
|
920
|
+
"html_data.append(gt.as_raw_html())\n",
|
921
|
+
"gt.show()"
|
922
|
+
],
|
923
|
+
"id": "a74cd253ec5827f9",
|
924
|
+
"outputs": [],
|
925
|
+
"execution_count": null
|
926
|
+
},
|
927
|
+
{
|
928
|
+
"metadata": {},
|
929
|
+
"cell_type": "code",
|
930
|
+
"source": "",
|
931
|
+
"id": "4df949cb48b088d7",
|
932
|
+
"outputs": [],
|
933
|
+
"execution_count": null
|
934
|
+
},
|
935
|
+
{
|
936
|
+
"metadata": {},
|
937
|
+
"cell_type": "code",
|
938
|
+
"source": [
|
939
|
+
"# Table 9: Incident Rate per 1000 person years\n",
|
940
|
+
"\n",
|
941
|
+
"def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
|
942
|
+
" if not lower_days:\n",
|
943
|
+
" lower_days = -1\n",
|
944
|
+
" cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
|
945
|
+
" # exclude subjects for this reason\n",
|
946
|
+
" offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
|
947
|
+
"\n",
|
948
|
+
" df_eos = get_eos_df()\n",
|
949
|
+
" df_eos_excluded = (\n",
|
950
|
+
" df_eos\n",
|
951
|
+
" .query(\"followup_days>@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)\")\n",
|
952
|
+
" .copy()\n",
|
953
|
+
" .reset_index()\n",
|
954
|
+
" )\n",
|
955
|
+
" df_visit_final = (\n",
|
956
|
+
" df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0\")\n",
|
957
|
+
" .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
|
958
|
+
" .query(\"_merge=='left_only'\")\n",
|
959
|
+
" .drop(columns=[\"_merge\"])\n",
|
960
|
+
" )\n",
|
961
|
+
" df_main = (\n",
|
962
|
+
" df_visit_final\n",
|
963
|
+
" .groupby(by=[\"subject_identifier\"])[[\"baseline_datetime\", \"visit_datetime\", \"followup_days\"]]\n",
|
964
|
+
" .max()\n",
|
965
|
+
" .reset_index()\n",
|
966
|
+
" )\n",
|
967
|
+
"\n",
|
968
|
+
" df_main = (\n",
|
969
|
+
" df_main\n",
|
970
|
+
" .merge(\n",
|
971
|
+
" df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
|
972
|
+
" how=\"left\",\n",
|
973
|
+
" on=[\"subject_identifier\"])\n",
|
974
|
+
" .reset_index(drop=True)\n",
|
975
|
+
" )\n",
|
976
|
+
" if lower_days>=365.25:\n",
|
977
|
+
" df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
|
978
|
+
" df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
|
979
|
+
" return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
|
980
|
+
"\n",
|
981
|
+
"def get_rate_and_ci(events, person_years_total):\n",
|
982
|
+
" lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
|
983
|
+
" upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
|
984
|
+
" return events/person_years_total*1000, lower_ci, upper_ci\n",
|
985
|
+
"\n",
|
986
|
+
"def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
|
987
|
+
" data = {}\n",
|
988
|
+
" df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
|
989
|
+
" person_years_total = df_main.followup_years.sum()\n",
|
990
|
+
" data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
|
991
|
+
" return data"
|
992
|
+
],
|
993
|
+
"id": "920db81ad440edab",
|
994
|
+
"outputs": [],
|
995
|
+
"execution_count": null
|
996
|
+
},
|
997
|
+
{
|
998
|
+
"metadata": {},
|
999
|
+
"cell_type": "code",
|
1000
|
+
"source": [
|
1001
|
+
"incidence_data = {}\n",
|
1002
|
+
"incidence_data.update(get_incidence_data(\"total\", lower_days=-1, upper_days=10000))\n",
|
1003
|
+
"incidence_data.update(get_incidence_data(\"0-1 years\", lower_days=-1, upper_days=365.25))\n",
|
1004
|
+
"incidence_data.update(get_incidence_data(\"1-2 years\", lower_days=365.25, upper_days=2 * 365.25))\n",
|
1005
|
+
"incidence_data.update(get_incidence_data(\"2-3 years\", lower_days=2 * 365.25, upper_days=3 * 365.25))\n",
|
1006
|
+
"incidence_data.update(get_incidence_data(\"3+ years\", lower_days=3 * 365.25, upper_days=10 * 365.25))\n",
|
1007
|
+
"data = dict(label=[], person_years=[], subjects=[], failures=[], rate=[], lower_ci=[], upper_ci=[])\n",
|
1008
|
+
"for k in incidence_data:\n",
|
1009
|
+
" data[\"label\"].append(k)\n",
|
1010
|
+
"\n",
|
1011
|
+
"for v in incidence_data.values():\n",
|
1012
|
+
" data[\"person_years\"].append(v[0])\n",
|
1013
|
+
" data[\"subjects\"].append(v[1])\n",
|
1014
|
+
" data[\"failures\"].append(v[2])\n",
|
1015
|
+
" data[\"rate\"].append(v[3])\n",
|
1016
|
+
" data[\"lower_ci\"].append(v[4])\n",
|
1017
|
+
" data[\"upper_ci\"].append(v[5])\n",
|
1018
|
+
"\n",
|
1019
|
+
"df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!=\"subjects\"})"
|
1020
|
+
],
|
1021
|
+
"id": "44651e865641b75d",
|
1022
|
+
"outputs": [],
|
1023
|
+
"execution_count": null
|
1024
|
+
},
|
1025
|
+
{
|
1026
|
+
"metadata": {},
|
1027
|
+
"cell_type": "code",
|
1028
|
+
"source": [
|
1029
|
+
"gt = df_as_great_table(\n",
|
1030
|
+
" df_table9,\n",
|
1031
|
+
" title=\"Table 9: Incident Rate per 1000 person years\",\n",
|
1032
|
+
" subtitle=md(\"using randomisation to diabetes/last seen\"),\n",
|
1033
|
+
")\n",
|
1034
|
+
"gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
|
1035
|
+
"gt = (gt\n",
|
1036
|
+
" .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
|
1037
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
1038
|
+
" .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
|
1039
|
+
" .tab_spanner(\n",
|
1040
|
+
" label=\"95%CI\",\n",
|
1041
|
+
" columns=[\"lower_ci\", \"upper_ci\"],\n",
|
1042
|
+
" )\n",
|
1043
|
+
" .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
|
1044
|
+
")\n",
|
1045
|
+
"gt.show()\n",
|
1046
|
+
"html_data.append(gt.as_raw_html())"
|
1047
|
+
],
|
1048
|
+
"id": "da4e67d83522768a",
|
1049
|
+
"outputs": [],
|
1050
|
+
"execution_count": null
|
1051
|
+
},
|
1052
|
+
{
|
1053
|
+
"metadata": {},
|
1054
|
+
"cell_type": "code",
|
1055
|
+
"source": [
|
1056
|
+
"# Table 10: Proportion meeting primary endpoint\n",
|
1057
|
+
"df_table10 = pd.DataFrame(data=data)\n",
|
1058
|
+
"df_table10[\"proportion\"] = df_table10[\"failures\"]/df_table10[\"subjects\"]*100\n",
|
1059
|
+
"gt = df_as_great_table(\n",
|
1060
|
+
" df_table10[[\"label\", \"subjects\", 'failures', \"proportion\"]],\n",
|
1061
|
+
" title=\"Table 10: Proportion meeting primary endpoint\",\n",
|
1062
|
+
")\n",
|
1063
|
+
"gt = (\n",
|
1064
|
+
" gt\n",
|
1065
|
+
" .fmt_number(columns=[\"failures\", \"proportion\"], decimals=2)\n",
|
1066
|
+
" .cols_label({\"label\": \"Label\", \"subjects\": \"Participants\", \"failures\": \"Failures\", \"proportion\": \"%\"})\n",
|
1067
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
1068
|
+
" .cols_align(align=\"center\", columns=[\"subjects\", \"failures\", \"proportion\"])\n",
|
1069
|
+
" .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
|
1070
|
+
")\n",
|
1071
|
+
"html_data.append(gt.as_raw_html())\n",
|
1072
|
+
"gt.show()\n"
|
1073
|
+
],
|
1074
|
+
"id": "2376a30803fbc743",
|
1075
|
+
"outputs": [],
|
1076
|
+
"execution_count": null
|
1077
|
+
},
|
1078
|
+
{
|
1079
|
+
"metadata": {},
|
1080
|
+
"cell_type": "code",
|
1081
|
+
"source": "",
|
1082
|
+
"id": "afc730c0bd9d03aa",
|
1083
|
+
"outputs": [],
|
1084
|
+
"execution_count": null
|
1085
|
+
},
|
1086
|
+
{
|
1087
|
+
"metadata": {},
|
1088
|
+
"cell_type": "code",
|
1089
|
+
"source": [
|
1090
|
+
"# Table 11a: End of Study Table (for those who have completed an end of study form)\n",
|
1091
|
+
"df_eos = get_eos_df()\n",
|
1092
|
+
"offstudy_reasons = {\n",
|
1093
|
+
" \"Delivered / Completed followup from pregnancy\": \"Pregnancy\",\n",
|
1094
|
+
" \"Patient completed 36 months of follow-up\": \"Completed 36m\",\n",
|
1095
|
+
" \"Patient developed diabetes\": \"Developed diabetes\",\n",
|
1096
|
+
" \"Other reason (specify below)\": \"Other\",\n",
|
1097
|
+
" \"Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment\": \"Late exclusion\",\n",
|
1098
|
+
" \"Patient has been transferred to another health centre\": \"Transferred out\",\n",
|
1099
|
+
" \"Patient is withdrawn on CLINICAL grounds ...\": \"Withdrawal: Clinical grounds\",\n",
|
1100
|
+
" \"Patient lost to follow-up\": \"LTFU\",\n",
|
1101
|
+
" \"Patient reported/known to have died\": \"Died\",\n",
|
1102
|
+
" \"Patient withdrew consent to participate further\": \"Withdrawal: Consent\",\n",
|
1103
|
+
"}\n",
|
1104
|
+
"df_eos[\"offstudy_reason\"] = df_eos[\"offstudy_reason\"].map(offstudy_reasons)\n",
|
1105
|
+
"df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"], categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
|
1106
|
+
"df_eos[\"site_id\"] = df_eos[\"site_id\"].astype(str)\n",
|
1107
|
+
"df_eos_pivot = (\n",
|
1108
|
+
" df_eos\n",
|
1109
|
+
" .groupby(by=[\"offstudy_reason\", \"site_id\"],observed=True)\n",
|
1110
|
+
" .size()\n",
|
1111
|
+
" .reset_index()\n",
|
1112
|
+
" .pivot_table(index=\"offstudy_reason\", columns=\"site_id\", values=0, observed=True)\n",
|
1113
|
+
" .fillna(0)\n",
|
1114
|
+
" .astype(int)\n",
|
1115
|
+
" .reset_index()\n",
|
1116
|
+
")\n",
|
1117
|
+
"df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
|
1118
|
+
"df_eos_pivot.columns.name=\"\"\n",
|
1119
|
+
"sum_row = df_eos_pivot.select_dtypes(include='int64').sum()\n",
|
1120
|
+
"sum_row['offstudy_reason'] = 'Total'\n",
|
1121
|
+
"sum_row_df = pd.DataFrame(sum_row).T\n",
|
1122
|
+
"enrolled_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
|
1123
|
+
"enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]\n",
|
1124
|
+
"df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
|
1125
|
+
"\n",
|
1126
|
+
"gt = df_as_great_table(\n",
|
1127
|
+
" df_eos_pivot,\n",
|
1128
|
+
" title=\"Table 11a: End of study report\",\n",
|
1129
|
+
" subtitle=md(\"for those who have completed an End of study report\"),\n",
|
1130
|
+
")\n",
|
1131
|
+
"gt = (\n",
|
1132
|
+
" gt\n",
|
1133
|
+
" .cols_label({\"offstudy_reason\": \"Reason\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
|
1134
|
+
" .cols_align(align=\"left\", columns=[\"offstudy_reason\"])\n",
|
1135
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
|
1136
|
+
" .tab_style(\n",
|
1137
|
+
" style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
|
1138
|
+
" locations=loc.body(\n",
|
1139
|
+
" columns=[0],\n",
|
1140
|
+
" rows=[len(df_eos_pivot)-1]),\n",
|
1141
|
+
" )\n",
|
1142
|
+
" .tab_style(\n",
|
1143
|
+
" style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
|
1144
|
+
" locations=loc.body(\n",
|
1145
|
+
" columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
|
1146
|
+
" rows=[len(df_eos_pivot)-1],\n",
|
1147
|
+
" ),\n",
|
1148
|
+
" )\n",
|
1149
|
+
" .tab_style(\n",
|
1150
|
+
" style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
|
1151
|
+
" locations=loc.body(\n",
|
1152
|
+
" columns=[\"total\"],\n",
|
1153
|
+
" rows=[len(df_eos_pivot)-1],\n",
|
1154
|
+
" ),\n",
|
1155
|
+
" )\n",
|
1156
|
+
" .tab_style(\n",
|
1157
|
+
" style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
|
1158
|
+
" locations=loc.body(\n",
|
1159
|
+
" columns=[\"offstudy_reason\"],\n",
|
1160
|
+
" rows=[0],\n",
|
1161
|
+
" ),\n",
|
1162
|
+
" )\n",
|
1163
|
+
")\n",
|
1164
|
+
"html_data.append(gt.as_raw_html())\n",
|
1165
|
+
"gt.show()\n"
|
1166
|
+
],
|
1167
|
+
"id": "37dcd320411bd9c5",
|
1168
|
+
"outputs": [],
|
1169
|
+
"execution_count": null
|
1170
|
+
},
|
1171
|
+
{
|
1172
|
+
"metadata": {},
|
1173
|
+
"cell_type": "code",
|
1174
|
+
"source": "",
|
1175
|
+
"id": "9acdb7515d1490a3",
|
1176
|
+
"outputs": [],
|
1177
|
+
"execution_count": null
|
1178
|
+
},
|
1179
|
+
{
|
1180
|
+
"metadata": {},
|
1181
|
+
"cell_type": "code",
|
1182
|
+
"source": [
|
1183
|
+
"# Table 11b: Study status\n",
|
1184
|
+
"def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:\n",
|
1185
|
+
" columns = {k:f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
|
1186
|
+
" df_schedule = (\n",
|
1187
|
+
" df_subjecthistory\n",
|
1188
|
+
" .query(f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()\")\n",
|
1189
|
+
" .groupby(by=[\"onschedule_model\", \"site_id\"])\n",
|
1190
|
+
" .size()\n",
|
1191
|
+
" .reset_index()\n",
|
1192
|
+
" .pivot_table(index=\"onschedule_model\", columns=\"site_id\", values=0, observed=True)\n",
|
1193
|
+
" .reset_index()\n",
|
1194
|
+
" .rename(columns={\"onschedule_model\":\"schedule\", **columns})\n",
|
1195
|
+
" .fillna(0)\n",
|
1196
|
+
" .copy()\n",
|
1197
|
+
" )\n",
|
1198
|
+
" df_schedule.columns.name = \"\"\n",
|
1199
|
+
" return df_schedule\n",
|
1200
|
+
"\n",
|
1201
|
+
"df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
|
1202
|
+
"df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
|
1203
|
+
"\n",
|
1204
|
+
"df_on = pd.concat([\n",
|
1205
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"on\"),\n",
|
1206
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"on\"),\n",
|
1207
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"on\"),\n",
|
1208
|
+
"])\n",
|
1209
|
+
"\n",
|
1210
|
+
"df_on = (\n",
|
1211
|
+
" df_on\n",
|
1212
|
+
" .fillna(0)\n",
|
1213
|
+
" .reset_index(drop=True)\n",
|
1214
|
+
")\n",
|
1215
|
+
"\n",
|
1216
|
+
"df_off = pd.concat([\n",
|
1217
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"off\"),\n",
|
1218
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"off\"),\n",
|
1219
|
+
" get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"off\"),\n",
|
1220
|
+
"])\n",
|
1221
|
+
"df_off = (\n",
|
1222
|
+
" df_off\n",
|
1223
|
+
" .fillna(0)\n",
|
1224
|
+
" .reset_index(drop=True)\n",
|
1225
|
+
")\n",
|
1226
|
+
"\n",
|
1227
|
+
"df_status = pd.merge(df_on, df_off, on=[\"schedule\"], how=\"outer\")\n",
|
1228
|
+
"columns = []\n",
|
1229
|
+
"for ele in [[f\"{x}_on\", f\"{x}_off\"] for x in [\"10\", \"20\", \"30\", \"40\", \"60\"]]:\n",
|
1230
|
+
" columns.extend(ele)\n",
|
1231
|
+
"df_status = df_status[[\"schedule\", *columns]]\n",
|
1232
|
+
"df_status[\"total_on\"] = df_status[[col for col in columns if \"on\" in col]].sum(axis=1)\n",
|
1233
|
+
"df_status[\"total_off\"] = df_status[[col for col in columns if \"off\" in col]].sum(axis=1)\n",
|
1234
|
+
"df_status[\"total\"] = df_status[columns].sum(axis=1)\n",
|
1235
|
+
"df_status[\"schedule\"] = df_status.schedule.map({\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\", \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
|
1236
|
+
"\n",
|
1237
|
+
"gt = df_as_great_table(\n",
|
1238
|
+
" df_status,\n",
|
1239
|
+
" title=\"Table 11b: Study status\",\n",
|
1240
|
+
" subtitle=md(\"Calculated from Offschedule form; not End of study report\"),\n",
|
1241
|
+
")\n",
|
1242
|
+
"# gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=0)\n",
|
1243
|
+
"gt = (gt\n",
|
1244
|
+
" .tab_source_note(\n",
|
1245
|
+
" source_note=(\n",
|
1246
|
+
" \"Note: Offschedule form is always submitted before the End of study report. \"\n",
|
1247
|
+
" \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
|
1248
|
+
" \"the site staff are actioned to submit the End of study report.\"\n",
|
1249
|
+
" )\n",
|
1250
|
+
" )\n",
|
1251
|
+
" .cols_label({\n",
|
1252
|
+
" \"10_on\": \"On\", \"10_off\": \"Off\",\n",
|
1253
|
+
" \"20_on\": \"On\", \"20_off\": \"Off\",\n",
|
1254
|
+
" \"30_on\": \"On\", \"30_off\": \"Off\",\n",
|
1255
|
+
" \"40_on\": \"On\", \"40_off\": \"Off\",\n",
|
1256
|
+
" \"60_on\": \"On\", \"60_off\": \"Off\",\n",
|
1257
|
+
" \"total_on\": \"On\", \"total_off\": \"Off\",\n",
|
1258
|
+
" \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
|
1259
|
+
" .cols_align(align=\"center\")\n",
|
1260
|
+
" .cols_align(align=\"left\", columns=[\"label\"])\n",
|
1261
|
+
" .tab_spanner(\n",
|
1262
|
+
" label=\"Hindu mandal\",\n",
|
1263
|
+
" columns=[\"10_on\", \"10_off\"],\n",
|
1264
|
+
" )\n",
|
1265
|
+
" .tab_spanner(\n",
|
1266
|
+
" label=\"Amana\",\n",
|
1267
|
+
" columns=[\"20_on\", \"20_off\"],\n",
|
1268
|
+
" )\n",
|
1269
|
+
" .tab_spanner(\n",
|
1270
|
+
" label=\"Temeke\",\n",
|
1271
|
+
" columns=[\"30_on\", \"30_off\"],\n",
|
1272
|
+
" )\n",
|
1273
|
+
" .tab_spanner(\n",
|
1274
|
+
" label=\"Mwananyamala\",\n",
|
1275
|
+
" columns=[\"40_on\", \"40_off\"],\n",
|
1276
|
+
" )\n",
|
1277
|
+
" .tab_spanner(\n",
|
1278
|
+
" label=\"Mnazi Moja\",\n",
|
1279
|
+
" columns=[\"60_on\", \"60_off\"],\n",
|
1280
|
+
" )\n",
|
1281
|
+
" .tab_spanner(\n",
|
1282
|
+
" label=\"Total\",\n",
|
1283
|
+
" columns=[\"total_on\", \"total_off\"],\n",
|
1284
|
+
" )\n",
|
1285
|
+
" .tab_style(\n",
|
1286
|
+
" style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
|
1287
|
+
" locations=loc.body(\n",
|
1288
|
+
" columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
|
1289
|
+
" rows=list(range(0, 1)),\n",
|
1290
|
+
" ),\n",
|
1291
|
+
" )\n",
|
1292
|
+
" .tab_style(\n",
|
1293
|
+
" style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
|
1294
|
+
" locations=loc.body(\n",
|
1295
|
+
" columns=[\"total_off\"],\n",
|
1296
|
+
" rows=list(range(0, 1)),\n",
|
1297
|
+
" ),\n",
|
1298
|
+
" )\n",
|
1299
|
+
" .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
|
1300
|
+
")\n",
|
1301
|
+
"html_data.append(gt.as_raw_html())\n",
|
1302
|
+
"gt.show()"
|
1303
|
+
],
|
1304
|
+
"id": "35840b9a7971e4cf",
|
1305
|
+
"outputs": [],
|
1306
|
+
"execution_count": null
|
1307
|
+
},
|
1308
|
+
{
|
1309
|
+
"metadata": {},
|
1310
|
+
"cell_type": "code",
|
1311
|
+
"source": [
|
1312
|
+
"# Table 12: Loss to Follow Up\n",
|
1313
|
+
"df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
|
1314
|
+
"df_ltfu_pivot = (\n",
|
1315
|
+
" df_ltfu\n",
|
1316
|
+
" .groupby(by=[\"loss_category\", \"site_id\"],observed=True,dropna=False)\n",
|
1317
|
+
" .size()\n",
|
1318
|
+
" .reset_index()\n",
|
1319
|
+
" .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True,dropna=False)\n",
|
1320
|
+
" .fillna(0)\n",
|
1321
|
+
" .astype(int)\n",
|
1322
|
+
" .reset_index()\n",
|
1323
|
+
")\n",
|
1324
|
+
"df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
|
1325
|
+
"df_ltfu_pivot.columns.name=\"\"\n",
|
1326
|
+
"sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()\n",
|
1327
|
+
"sum_row['loss_category'] = 'Total'\n",
|
1328
|
+
"sum_row_df = pd.DataFrame(sum_row).T\n",
|
1329
|
+
"df_ltfu_pivot = pd.concat([df_ltfu_pivot, sum_row_df], ignore_index=True)\n",
|
1330
|
+
"df_ltfu_pivot\n"
|
1331
|
+
],
|
1332
|
+
"id": "534c51e7321e2ef3",
|
1333
|
+
"outputs": [],
|
1334
|
+
"execution_count": null
|
1335
|
+
},
|
1336
|
+
{
|
1337
|
+
"metadata": {},
|
1338
|
+
"cell_type": "code",
|
1339
|
+
"source": [
|
1340
|
+
"# Table 11c: End of study report not submitted\n",
|
1341
|
+
"\n",
|
1342
|
+
"df1 = (\n",
|
1343
|
+
" df_status\n",
|
1344
|
+
" .query(\"schedule=='Main trial'\")[[col for col in columns if \"off\" in col]]\n",
|
1345
|
+
" .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\",\"30\",\"40\",\"60\"])))\n",
|
1346
|
+
" .reset_index(drop=True)\n",
|
1347
|
+
")\n",
|
1348
|
+
"df2 = (\n",
|
1349
|
+
" df_eos_pivot\n",
|
1350
|
+
" .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\",\"30\",\"40\",\"60\"]]\n",
|
1351
|
+
" .reset_index(drop=True)\n",
|
1352
|
+
")\n",
|
1353
|
+
"\n",
|
1354
|
+
"df_eos_not_reported = df1-df2\n",
|
1355
|
+
"df_eos_not_reported[\"schedule\"] = 'Main trial'\n",
|
1356
|
+
"df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
|
1357
|
+
"df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\",\"30\",\"40\",\"60\", \"total\"]]\n",
|
1358
|
+
"\n",
|
1359
|
+
"gt = df_as_great_table(\n",
|
1360
|
+
" df_eos_not_reported,\n",
|
1361
|
+
" title=\"Table 11c: End of study report not submitted\",\n",
|
1362
|
+
" subtitle=md(\"End of study report expected based on Offschedule form\"),\n",
|
1363
|
+
")\n",
|
1364
|
+
"gt = (\n",
|
1365
|
+
" gt\n",
|
1366
|
+
" .cols_label({\"schedule\": \"Schedule\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
|
1367
|
+
" .cols_align(align=\"left\", columns=[\"schedule\"])\n",
|
1368
|
+
" .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
|
1369
|
+
" .tab_style(\n",
|
1370
|
+
" style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
|
1371
|
+
" locations=loc.body(\n",
|
1372
|
+
" columns=[0],\n",
|
1373
|
+
" rows=[len(df_eos_pivot)-1]),\n",
|
1374
|
+
" )\n",
|
1375
|
+
" .tab_style(\n",
|
1376
|
+
" style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
|
1377
|
+
" locations=loc.body(\n",
|
1378
|
+
" columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
|
1379
|
+
" rows=[len(df_eos_pivot)-1],\n",
|
1380
|
+
" ),\n",
|
1381
|
+
" )\n",
|
1382
|
+
" .tab_style(\n",
|
1383
|
+
" style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
|
1384
|
+
" locations=loc.body(\n",
|
1385
|
+
" columns=[\"total\"],\n",
|
1386
|
+
" rows=[len(df_eos_pivot)-1],\n",
|
1387
|
+
" ),\n",
|
1388
|
+
" )\n",
|
1389
|
+
")\n",
|
1390
|
+
"html_data.append(gt.as_raw_html())\n",
|
1391
|
+
"gt.show()\n"
|
1392
|
+
],
|
1393
|
+
"id": "25d05831ef76f267",
|
1394
|
+
"outputs": [],
|
1395
|
+
"execution_count": null
|
1396
|
+
},
|
1397
|
+
{
|
1398
|
+
"metadata": {},
|
1399
|
+
"cell_type": "code",
|
1400
|
+
"source": "# Table 13: Baseline Sample",
|
1401
|
+
"id": "b607a436749cc3b2",
|
1402
|
+
"outputs": [],
|
1403
|
+
"execution_count": null
|
1404
|
+
},
|
1405
|
+
{
|
1406
|
+
"metadata": {},
|
1407
|
+
"cell_type": "code",
|
1408
|
+
"source": [
|
1409
|
+
"# Table 15: Consented to extended followup\n",
|
1410
|
+
"df_consented = (\n",
|
1411
|
+
" read_frame(SubjectConsentV1Ext.objects.all(), verbose=False)\n",
|
1412
|
+
" .query(\"agrees_to_extension==@YES\")\n",
|
1413
|
+
" .rename(columns={\"site\": \"site_id\"})\n",
|
1414
|
+
")\n",
|
1415
|
+
"df_consented[\"site_id\"] = df_consented.site_id.astype(str)\n",
|
1416
|
+
"df_consented[\"month\"] = df_consented.report_datetime.dt.strftime(\"%m\")\n",
|
1417
|
+
"df_consented[\"year\"] = df_consented.report_datetime.dt.strftime(\"%Y\")\n",
|
1418
|
+
"df_consented_grp = (\n",
|
1419
|
+
" df_consented.groupby(by=[\"site_id\", \"year\", \"month\"]).\n",
|
1420
|
+
" size()\n",
|
1421
|
+
" .reset_index()\n",
|
1422
|
+
" .sort_values(by=[\"site_id\", \"year\", \"month\"], ascending=True)\n",
|
1423
|
+
" .reset_index(drop=True)\n",
|
1424
|
+
")\n",
|
1425
|
+
"df_consented_pivot = (\n",
|
1426
|
+
" df_consented_grp\n",
|
1427
|
+
" .pivot_table(index=[\"year\", \"month\"], columns=\"site_id\", values=0, aggfunc=\"sum\")\n",
|
1428
|
+
" .reset_index()\n",
|
1429
|
+
" .fillna(0)\n",
|
1430
|
+
")\n",
|
1431
|
+
"if \"60\" not in df_consented_pivot.columns:\n",
|
1432
|
+
" df_consented_pivot[\"60\"] = 0.0 * len(df_consented_pivot)\n",
|
1433
|
+
"df_consented_pivot.columns.name=\"\"\n",
|
1434
|
+
"df_consented_pivot[\"year\"] = df_consented_pivot[\"year\"].astype(str)\n",
|
1435
|
+
"df_consented_pivot[\"month\"] = df_consented_pivot[\"month\"].astype(str)\n",
|
1436
|
+
"\n",
|
1437
|
+
"sum_row = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum()\n",
|
1438
|
+
"sum_row['year'] = \"Total\"\n",
|
1439
|
+
"sum_row['month'] = \"\"\n",
|
1440
|
+
"df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)\n",
|
1441
|
+
"df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1).astype(int)\n",
|
1442
|
+
"df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].astype(int)\n",
|
1443
|
+
"gt = df_as_great_table2(\n",
|
1444
|
+
" df_consented_pivot,\n",
|
1445
|
+
" title=\"Table 15: Consented to extended followup\",\n",
|
1446
|
+
" rowname_col=\"month\",\n",
|
1447
|
+
" groupname_col=\"year\",\n",
|
1448
|
+
")\n",
|
1449
|
+
"gt = (\n",
|
1450
|
+
" gt\n",
|
1451
|
+
" .cols_label({\"year\": \"Year\", \"month\": \"Month\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
|
1452
|
+
" .cols_align(align=\"center\")\n",
|
1453
|
+
" .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
|
1454
|
+
" .tab_stubhead(label=\"Consented\")\n",
|
1455
|
+
" .tab_style(\n",
|
1456
|
+
" style=[\n",
|
1457
|
+
" style.text(color=\"black\", weight=\"bold\"),\n",
|
1458
|
+
" style.fill(color=\"lightgray\")\n",
|
1459
|
+
" ],\n",
|
1460
|
+
" locations=loc.row_groups()\n",
|
1461
|
+
" )\n",
|
1462
|
+
")\n",
|
1463
|
+
"html_data.append(gt.as_raw_html())\n",
|
1464
|
+
"gt.show()"
|
1465
|
+
],
|
1466
|
+
"id": "8acd2dd7e5a958e9",
|
1467
|
+
"outputs": [],
|
1468
|
+
"execution_count": null
|
1469
|
+
},
|
1470
|
+
{
|
1471
|
+
"metadata": {},
|
1472
|
+
"cell_type": "code",
|
1473
|
+
"source": "",
|
1474
|
+
"id": "45ee71bc4a06f8f7",
|
1475
|
+
"outputs": [],
|
1476
|
+
"execution_count": null
|
1477
|
+
},
|
1478
|
+
{
|
1479
|
+
"metadata": {},
|
1480
|
+
"cell_type": "code",
|
1481
|
+
"source": [
|
1482
|
+
"# gather raw html\n",
|
1483
|
+
"raw_html = [f'<div class=\"page-break\">{s}</div>' for s in html_data]\n",
|
1484
|
+
"style_css = \"\"\"\n",
|
1485
|
+
"<style>\n",
|
1486
|
+
" .page-break {\n",
|
1487
|
+
" page-break-inside: avoid; /* Always add page break before this element */\n",
|
1488
|
+
" }\n",
|
1489
|
+
" .table-header {\n",
|
1490
|
+
" font-weight: bold;\n",
|
1491
|
+
" font-size: 18px;\n",
|
1492
|
+
" text-align: center;\n",
|
1493
|
+
" border-bottom: None;\n",
|
1494
|
+
" }\n",
|
1495
|
+
"</style>\n",
|
1496
|
+
"\"\"\"\n",
|
1497
|
+
"raw_html = ''.join(raw_html)\n",
|
1498
|
+
"raw_html = f'<!DOCTYPE html>\\n<html lang=\"en\">\\n{style_css}\\n<head>\\n<meta charset=\"utf-8\"/>\\n</head>\\n<body>\\n' + document_title + raw_html + '\\n</body>\\n</html>\\n'"
|
1499
|
+
],
|
1500
|
+
"id": "a38e9d7ba59d063b",
|
1501
|
+
"outputs": [],
|
1502
|
+
"execution_count": null
|
1503
|
+
},
|
1504
|
+
{
|
1505
|
+
"metadata": {},
|
1506
|
+
"cell_type": "code",
|
1507
|
+
"source": [
|
1508
|
+
"# render html to PDF\n",
|
1509
|
+
"pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),\n",
|
1510
|
+
"options={\n",
|
1511
|
+
" 'footer-center': 'Page [page] of [topage]',\n",
|
1512
|
+
" 'footer-font-size': '8',\n",
|
1513
|
+
" 'footer-spacing': '5',\n",
|
1514
|
+
" 'encoding': \"UTF-8\",\n",
|
1515
|
+
" 'margin-top':'10mm',\n",
|
1516
|
+
" 'margin-right':'15mm',\n",
|
1517
|
+
" 'margin-bottom':'15mm',\n",
|
1518
|
+
" 'margin-left':'15mm',\n",
|
1519
|
+
" 'header-center': study_title,\n",
|
1520
|
+
" 'header-font-size': '6',\n",
|
1521
|
+
" 'header-spacing': '0',\n",
|
1522
|
+
" 'disable-javascript': None,\n",
|
1523
|
+
" 'no-outline': None,\n",
|
1524
|
+
"},\n",
|
1525
|
+
"verbose=True)"
|
1526
|
+
],
|
1527
|
+
"id": "792243aad557cc86",
|
1528
|
+
"outputs": [],
|
1529
|
+
"execution_count": null
|
1530
|
+
},
|
1531
|
+
{
|
1532
|
+
"metadata": {},
|
1533
|
+
"cell_type": "code",
|
1534
|
+
"source": "",
|
1535
|
+
"id": "4cfdfa6f69c3916a",
|
1536
|
+
"outputs": [],
|
1537
|
+
"execution_count": null
|
1538
|
+
}
|
1539
|
+
],
|
1540
|
+
"metadata": {
|
1541
|
+
"kernelspec": {
|
1542
|
+
"display_name": "Python 3 (ipykernel)",
|
1543
|
+
"language": "python",
|
1544
|
+
"name": "python3"
|
1545
|
+
},
|
1546
|
+
"language_info": {
|
1547
|
+
"codemirror_mode": {
|
1548
|
+
"name": "ipython",
|
1549
|
+
"version": 3
|
1550
|
+
},
|
1551
|
+
"file_extension": ".py",
|
1552
|
+
"mimetype": "text/x-python",
|
1553
|
+
"name": "python",
|
1554
|
+
"nbconvert_exporter": "python",
|
1555
|
+
"pygments_lexer": "ipython3",
|
1556
|
+
"version": "3.12.4"
|
1557
|
+
}
|
1558
|
+
},
|
1559
|
+
"nbformat": 4,
|
1560
|
+
"nbformat_minor": 5
|
1561
|
+
}
|