meta-edc 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. meta_analytics/dataframes/__init__.py +3 -0
  2. meta_analytics/dataframes/constants.py +1 -1
  3. meta_analytics/dataframes/enrolled/__init__.py +0 -1
  4. meta_analytics/dataframes/get_eos_df.py +15 -2
  5. meta_analytics/dataframes/get_glucose_df.py +149 -0
  6. meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
  7. meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
  8. meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
  9. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
  10. meta_analytics/dataframes/utils.py +18 -4
  11. meta_analytics/notebooks/hiv_regimens.ipynb +425 -0
  12. meta_analytics/notebooks/monitoring_report.ipynb +1561 -0
  13. meta_analytics/notebooks/pharmacy.ipynb +971 -0
  14. meta_analytics/utils.py +81 -0
  15. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/METADATA +4 -3
  16. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/RECORD +32 -18
  17. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/WHEEL +1 -1
  18. meta_edc-1.0.7.dist-info/licenses/AUTHORS.rst +8 -0
  19. meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
  20. meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
  21. meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
  22. meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
  23. meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
  24. meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
  25. meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
  26. meta_subject/form_validators/glucose_form_validator.py +16 -1
  27. meta_subject/forms/study_medication_form.py +5 -3
  28. meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
  29. meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
  30. meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
  31. /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
  32. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info/licenses}/LICENSE +0 -0
  33. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1561 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "28e21930-b943-4a08-a79a-ff2712ae9215",
6
+ "metadata": {},
7
+ "source": [
8
+ "%%capture\n",
9
+ "import os\n",
10
+ "from pathlib import Path\n",
11
+ "import pandas as pd\n",
12
+ "from dj_notebook import activate\n",
13
+ "import numpy as np\n",
14
+ "from django_pandas.io import read_frame\n",
15
+ "\n",
16
+ "env_file = os.environ[\"META_ENV\"]\n",
17
+ "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
18
+ "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
19
+ "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
20
+ "plus = activate(dotenv_file=env_file)\n",
21
+ "pd.set_option('future.no_silent_downcasting', True)"
22
+ ],
23
+ "outputs": [],
24
+ "execution_count": null
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "id": "52d4bb98-18a5-4528-be8d-e8370b1b5d1e",
29
+ "metadata": {},
30
+ "source": [
31
+ "\n",
32
+ "import pdfkit\n",
33
+ "from datetime import date\n",
34
+ "from edc_pdutils.dataframes import get_subject_visit\n",
35
+ "from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39\n",
36
+ "from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
37
+ "from scipy.stats import chi2\n",
38
+ "from great_tables import loc, style, md\n",
39
+ "from meta_analytics.dataframes import get_eos_df\n",
40
+ "from meta_analytics.utils import df_as_great_table, df_as_great_table2\n",
41
+ "from meta_prn.models import LossToFollowup\n",
42
+ "from edc_visit_schedule.models import SubjectScheduleHistory\n",
43
+ "from edc_appointment.analytics import get_appointment_df\n",
44
+ "from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT\n",
45
+ "from meta_consent.models import SubjectConsentV1Ext\n",
46
+ "from meta_analytics.dataframes import get_glucose_df\n",
47
+ "\n",
48
+ "from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
49
+ "from edc_constants.constants import YES # noqa\n"
50
+ ],
51
+ "outputs": [],
52
+ "execution_count": null
53
+ },
54
+ {
55
+ "metadata": {},
56
+ "cell_type": "code",
57
+ "source": [
58
+ "html_data = []\n",
59
+ "cutoff_date = date(2025,3, 31)\n",
60
+ "end_of_trial_date= date(2026,3, 1)\n",
61
+ "document_title = f\"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {cutoff_date.strftime('%d %B %Y')}</h5>\"\n",
62
+ "study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'\n",
63
+ "pdf_filename = f\"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf\"\n"
64
+ ],
65
+ "id": "b255fd34cd6f50c0",
66
+ "outputs": [],
67
+ "execution_count": null
68
+ },
69
+ {
70
+ "metadata": {},
71
+ "cell_type": "code",
72
+ "source": [
73
+ "\n",
74
+ "df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
75
+ "late_exlusion_offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
76
+ "df_eos = get_eos_df()\n",
77
+ "df_eos_excluded = (\n",
78
+ " df_eos\n",
79
+ " .query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")\n",
80
+ " .copy()\n",
81
+ " .reset_index()\n",
82
+ ")\n",
83
+ "df_visit = (\n",
84
+ " df_visit\n",
85
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
86
+ " .query(\"_merge=='left_only'\")\n",
87
+ " .drop(columns=[\"_merge\"])\n",
88
+ ")\n",
89
+ "\n",
90
+ "df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n",
91
+ "\n",
92
+ "df_appointments = get_appointment_df()\n",
93
+ "df_appointments[\"site_id\"] = df_appointments.site_id.astype(str)\n",
94
+ "df_appointments = (\n",
95
+ " df_appointments\n",
96
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
97
+ " .query(\"_merge=='left_only'\")\n",
98
+ " .drop(columns=[\"_merge\"])\n",
99
+ ")\n",
100
+ "\n",
101
+ "\n",
102
+ "cls = GlucoseEndpointsByDate()\n",
103
+ "cls.run()\n",
104
+ "df_endpoint = cls.endpoint_only_df.copy()\n",
105
+ "df_glucose = get_glucose_df()\n",
106
+ "# df_glucose_fbg = get_glucose_fbg_df()\n",
107
+ "# df_glucose = pd.concat([df_glucose, df_glucose_fbg])\n",
108
+ "\n",
109
+ "\n",
110
+ "enrolled = df_visit.copy()\n",
111
+ "enrolled[\"site_id\"] = enrolled[\"site_id\"].astype(str)\n",
112
+ "enrolled_pivot = (\n",
113
+ " enrolled\n",
114
+ " .query(\"visit_code==1000.0\").groupby([\"site_id\"])\n",
115
+ " .size()\n",
116
+ " .reset_index()\n",
117
+ " .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
118
+ ")\n",
119
+ "enrolled_pivot.columns.name=\"\"\n",
120
+ "enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
121
+ "\n"
122
+ ],
123
+ "id": "215212f9d44e79df",
124
+ "outputs": [],
125
+ "execution_count": null
126
+ },
127
+ {
128
+ "metadata": {},
129
+ "cell_type": "code",
130
+ "source": [
131
+ "column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\", \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
132
+ "column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
133
+ ],
134
+ "id": "fe90271ff1799692",
135
+ "outputs": [],
136
+ "execution_count": null
137
+ },
138
+ {
139
+ "metadata": {},
140
+ "cell_type": "code",
141
+ "source": [
142
+ "# Table 1a Visits completed to date\n",
143
+ "\n",
144
+ "df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
145
+ "\n",
146
+ "df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
147
+ "df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
148
+ "df1.columns.name = None\n",
149
+ "df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
150
+ "df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
151
+ "df1.fillna(0, inplace=True)\n",
152
+ "df_attended = df1.copy().reset_index(drop=True)\n",
153
+ "df_attended = df_attended.fillna(0.0)"
154
+ ],
155
+ "id": "9e3d608809eea5",
156
+ "outputs": [],
157
+ "execution_count": null
158
+ },
159
+ {
160
+ "metadata": {},
161
+ "cell_type": "code",
162
+ "source": [
163
+ "gt = df_as_great_table(\n",
164
+ " df_attended[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]],\n",
165
+ " title=\"Table 1a: Visits completed to date\"\n",
166
+ ")\n",
167
+ "gt = (\n",
168
+ " gt\n",
169
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
170
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
171
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
172
+ " .data_color(\n",
173
+ " columns=[\"visit_code\"],\n",
174
+ " palette=[\"lavender\", \"thistle\"],\n",
175
+ " domain=[2000, 5000],\n",
176
+ " na_color=\"white\"\n",
177
+ " )\n",
178
+ " .tab_source_note(source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
179
+ ")\n",
180
+ "html_data.append(gt.as_raw_html())\n",
181
+ "gt.show()"
182
+ ],
183
+ "id": "a43c2fbd8a7a692c",
184
+ "outputs": [],
185
+ "execution_count": null
186
+ },
187
+ {
188
+ "metadata": {},
189
+ "cell_type": "code",
190
+ "source": [
191
+ "# Table 1b Total scheduled appointments\n",
192
+ "df_appt_pivot = (\n",
193
+ " df_appointments.query(\"appt_reason==@SCHEDULED_APPT\")\n",
194
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
195
+ " .query(\"_merge=='left_only'\")\n",
196
+ " .drop(columns=[\"_merge\"])\n",
197
+ " .reset_index(drop=True)\n",
198
+ " .groupby([\"visit_code\", \"site_id\"])\n",
199
+ " .size()\n",
200
+ " .to_frame()\n",
201
+ " .reset_index()\n",
202
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
203
+ " .reset_index()\n",
204
+ " .fillna(0)\n",
205
+ ")\n",
206
+ "\n",
207
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
208
+ "df_appt_pivot.columns.name = None\n",
209
+ "gt = df_as_great_table(\n",
210
+ " df_appt_pivot,\n",
211
+ " title=\"Table 1b: Total appointments\",\n",
212
+ " subtitle=\"Total possible appointments not including unscheduled appointments\"\n",
213
+ "\n",
214
+ ")\n",
215
+ "gt = (\n",
216
+ " gt\n",
217
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
218
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
219
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
220
+ " .data_color(\n",
221
+ " columns=[\"visit_code\"],\n",
222
+ " palette=[\"lavender\", \"thistle\"],\n",
223
+ " domain=[2000, 5000],\n",
224
+ " na_color=\"white\"\n",
225
+ " )\n",
226
+ ")\n",
227
+ "html_data.append(gt.as_raw_html())\n",
228
+ "gt.show()"
229
+ ],
230
+ "id": "70eb34a139ff7095",
231
+ "outputs": [],
232
+ "execution_count": null
233
+ },
234
+ {
235
+ "metadata": {},
236
+ "cell_type": "code",
237
+ "source": [
238
+ "# Table 1c Past scheduled appointments -- no information provided\n",
239
+ "df_appt_pivot = (\n",
240
+ " df_appointments.query(\"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
241
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
242
+ " .query(\"_merge=='left_only'\")\n",
243
+ " .drop(columns=[\"_merge\"])\n",
244
+ " .reset_index(drop=True)\n",
245
+ " .groupby([\"visit_code\", \"site_id\"])\n",
246
+ " .size()\n",
247
+ " .to_frame()\n",
248
+ " .reset_index()\n",
249
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
250
+ " .reset_index()\n",
251
+ " .fillna(0)\n",
252
+ ")\n",
253
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
254
+ "df_appt_pivot.columns.name = None\n",
255
+ "gt = df_as_great_table(\n",
256
+ " df_appt_pivot,\n",
257
+ " title=\"Table 1c: Past appointments not attended/not reported\",\n",
258
+ " subtitle=\"Expected by now but no information provided by site\",\n",
259
+ ")\n",
260
+ "gt = (\n",
261
+ " gt\n",
262
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
263
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
264
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
265
+ " .data_color(\n",
266
+ " columns=[\"visit_code\"],\n",
267
+ " palette=[\"lavender\", \"thistle\"],\n",
268
+ " domain=[2000, 5000],\n",
269
+ " na_color=\"white\"\n",
270
+ " )\n",
271
+ " .tab_source_note(source_note=f\"Scheduled appointment date is before {cutoff_date.strftime('%d %B %Y')}.\")\n",
272
+ ")\n",
273
+ "html_data.append(gt.as_raw_html())\n",
274
+ "gt.show()"
275
+ ],
276
+ "id": "f243552177b216d7",
277
+ "outputs": [],
278
+ "execution_count": null
279
+ },
280
+ {
281
+ "metadata": {},
282
+ "cell_type": "code",
283
+ "source": [
284
+ "# Table 1d Unscheduled appointments\n",
285
+ "df_appt = (\n",
286
+ " df_appointments.query(\"appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT\")\n",
287
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
288
+ " .query(\"_merge=='left_only'\")\n",
289
+ " .drop(columns=[\"_merge\"])\n",
290
+ " .reset_index(drop=True)\n",
291
+ " .copy()\n",
292
+ " .reset_index(drop=True)\n",
293
+ ")\n",
294
+ "df_appt['visit_code'] = df_appt['visit_code'].astype(int)\n",
295
+ "df_appt['visit_code'] = df_appt['visit_code'].astype(str)\n",
296
+ "\n",
297
+ "subjects_with_unscheduled = df_appt.subject_identifier.nunique()\n",
298
+ "\n",
299
+ "df_appt_pivot = (\n",
300
+ " df_appt\n",
301
+ " .groupby([\"visit_code\", \"site_id\"])\n",
302
+ " .size()\n",
303
+ " .to_frame()\n",
304
+ " .reset_index()\n",
305
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
306
+ " .reset_index()\n",
307
+ " .fillna(0)\n",
308
+ ")\n",
309
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
310
+ "df_appt_pivot.columns.name = None\n",
311
+ "df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype('float64')\n",
312
+ "\n",
313
+ "\n",
314
+ "# add totals row\n",
315
+ "sum_row = df_appt_pivot.select_dtypes(include='float64').sum()\n",
316
+ "sum_row['visit_code'] = 'Total'\n",
317
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
318
+ "df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0).reset_index(drop=True)\n",
319
+ "\n",
320
+ "gt = df_as_great_table(\n",
321
+ " df_appt_pivot,\n",
322
+ " title=\"Table 1d: Unscheduled appointments\",\n",
323
+ " subtitle=\"Appointments with sequence>0 grouped by visit code\",\n",
324
+ ")\n",
325
+ "gt = (\n",
326
+ " gt\n",
327
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
328
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
329
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
330
+ " .data_color(\n",
331
+ " columns=[\"visit_code\"],\n",
332
+ " palette=[\"lavender\", \"thistle\"],\n",
333
+ " domain=[2000, 5000],\n",
334
+ " na_color=\"white\"\n",
335
+ " )\n",
336
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
337
+ " .tab_source_note(source_note=f\"{subjects_with_unscheduled} participants had at least one unscheduled appointment.\")\n",
338
+ ")\n",
339
+ "html_data.append(gt.as_raw_html())\n",
340
+ "gt.show()"
341
+ ],
342
+ "id": "6e55569e322370a",
343
+ "outputs": [],
344
+ "execution_count": null
345
+ },
346
+ {
347
+ "metadata": {},
348
+ "cell_type": "code",
349
+ "source": [
350
+ "# Table 1e Future scheduled appointments\n",
351
+ "df_appt_pivot = (\n",
352
+ " df_appointments.query(\"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
353
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
354
+ " .query(\"_merge=='left_only'\")\n",
355
+ " .drop(columns=[\"_merge\"])\n",
356
+ " .reset_index(drop=True)\n",
357
+ " .groupby([\"visit_code\", \"site_id\"])\n",
358
+ " .size()\n",
359
+ " .to_frame()\n",
360
+ " .reset_index()\n",
361
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
362
+ " .reset_index()\n",
363
+ " .fillna(0)\n",
364
+ ")\n",
365
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
366
+ "df_appt_pivot.columns.name = None\n",
367
+ "gt = df_as_great_table(\n",
368
+ " df_appt_pivot,\n",
369
+ " title=\"Table 1e: Future appointments\",\n",
370
+ ")\n",
371
+ "gt = (\n",
372
+ " gt\n",
373
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
374
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
375
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
376
+ " .data_color(\n",
377
+ " columns=[\"visit_code\"],\n",
378
+ " palette=[\"lavender\", \"thistle\"],\n",
379
+ " domain=[2000, 5000],\n",
380
+ " na_color=\"white\"\n",
381
+ " )\n",
382
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
383
+ " .tab_source_note(source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
384
+ ")\n",
385
+ "html_data.append(gt.as_raw_html())\n",
386
+ "gt.show()"
387
+ ],
388
+ "id": "8193005de33cae6f",
389
+ "outputs": [],
390
+ "execution_count": null
391
+ },
392
+ {
393
+ "metadata": {},
394
+ "cell_type": "code",
395
+ "source": [
396
+ "# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed\n",
397
+ "subject_count = (\n",
398
+ " df_visit\n",
399
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
400
+ " .query(\"_merge=='left_only'\")\n",
401
+ " .drop(columns=[\"_merge\"])\n",
402
+ " .reset_index(drop=True)\n",
403
+ " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
404
+ ").subject_identifier.nunique()\n",
405
+ "df_tbl = (\n",
406
+ " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
407
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
408
+ " .query(\"_merge=='left_only'\")\n",
409
+ " .drop(columns=[\"_merge\"])\n",
410
+ " .reset_index(drop=True)\n",
411
+ " .groupby(by=[\"visit_code\", \"site_id\"])\n",
412
+ " .size()\n",
413
+ " .to_frame()\n",
414
+ " .reset_index()\n",
415
+ ")\n",
416
+ "df_tbl.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
417
+ "df_tbl_pivot = df_tbl.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
418
+ "df_tbl_pivot.columns.name = None\n",
419
+ "df_tbl_pivot.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
420
+ "df_tbl_pivot['total'] = df_tbl_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
421
+ "df_missed = (\n",
422
+ " df_tbl_pivot\n",
423
+ " .fillna(0)\n",
424
+ " .copy()\n",
425
+ " .set_index([\"visit_code\"])\n",
426
+ ")\n",
427
+ "\n",
428
+ "df_attended_display = df_attended.copy()\n",
429
+ "df_attended_display = (\n",
430
+ " df_attended_display\n",
431
+ " .set_index([\"visit_code\"])\n",
432
+ ")\n",
433
+ "\n",
434
+ "attended_and_missed = df_attended_display + df_missed\n",
435
+ "attended_and_missed = (\n",
436
+ " attended_and_missed\n",
437
+ " .fillna(0)\n",
438
+ " .reset_index()\n",
439
+ " .set_index([\"visit_code\"])\n",
440
+ ")\n",
441
+ "\n",
442
+ "attended_and_missed_perc = df_missed/attended_and_missed\n",
443
+ "attended_and_missed_perc = (\n",
444
+ " attended_and_missed_perc\n",
445
+ " .fillna(0)\n",
446
+ " .reset_index()\n",
447
+ " .set_index([\"visit_code\"])\n",
448
+ ")\n",
449
+ "\n",
450
+ "df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
451
+ "for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
452
+ " col_perc = f\"{col}_perc\"\n",
453
+ " df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
454
+ "df_result = df_result.reset_index().sort_values(by=[\"visit_code\"], ascending=True)\n",
455
+ "df_result = df_result.fillna(0.0)"
456
+ ],
457
+ "id": "c86c5f0ffe59e951",
458
+ "outputs": [],
459
+ "execution_count": null
460
+ },
461
+ {
462
+ "metadata": {},
463
+ "cell_type": "code",
464
+ "source": [
465
+ "df_table = df_result[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].copy()\n",
466
+ "gt = df_as_great_table(\n",
467
+ " df_table,\n",
468
+ " title=\"Table 2a: Visits Missed to Date\",\n",
469
+ " subtitle=\"as % of Visits Attended + Visits Missed\"\n",
470
+ ")\n",
471
+ "gt = (\n",
472
+ " gt\n",
473
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
474
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
475
+ " .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
476
+ " .tab_style(\n",
477
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
478
+ " locations=loc.body(\n",
479
+ " columns=[0],\n",
480
+ " rows=list(range(0, len(df_table))),\n",
481
+ " ),\n",
482
+ " )\n",
483
+ " .tab_source_note(source_note=f\"{subject_count} participants had at least one missed visit.\")\n",
484
+ "\n",
485
+ ")\n",
486
+ "html_data.append(gt.as_raw_html())\n",
487
+ "gt.show()\n"
488
+ ],
489
+ "id": "3cd8b1290091660c",
490
+ "outputs": [],
491
+ "execution_count": null
492
+ },
493
+ {
494
+ "metadata": {},
495
+ "cell_type": "code",
496
+ "source": [
497
+ "# Table 2b: Number of missed visits by participant\n",
498
+ "subject_count = (\n",
499
+ " df_visit\n",
500
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
501
+ " .query(\"_merge=='left_only'\")\n",
502
+ " .drop(columns=[\"_merge\"])\n",
503
+ " .reset_index(drop=True)\n",
504
+ " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
505
+ ").subject_identifier.nunique()\n",
506
+ "df_tbl = (\n",
507
+ " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
508
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
509
+ " .query(\"_merge=='left_only'\")\n",
510
+ " .drop(columns=[\"_merge\"])\n",
511
+ " .reset_index(drop=True)\n",
512
+ " .groupby(by=[\"subject_identifier\", \"site_id\"])\n",
513
+ " .size()\n",
514
+ " .to_frame()\n",
515
+ " .reset_index()\n",
516
+ ")\n",
517
+ "df_tbl.columns = [\"subject_identifier\", \"site_id\", \"missed_count\"]\n",
518
+ "df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100], labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
519
+ "df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False, aggfunc=\"count\").reset_index()\n",
520
+ "\n",
521
+ "df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)\n",
522
+ "\n",
523
+ "sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()\n",
524
+ "sum_row['category'] = 'Total'\n",
525
+ "\n",
526
+ "\n",
527
+ "df_tbl_pivot = (\n",
528
+ " pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)\n",
529
+ " .rename(columns={10: \"10\", 20: \"20\", 30: \"30\", 40: \"40\", 60: \"60\"})\n",
530
+ ")\n",
531
+ "\n",
532
+ "gt = df_as_great_table(\n",
533
+ " df_tbl_pivot,\n",
534
+ " title=\"Table 2b: Number of participants who missed one or more visits\",\n",
535
+ ")\n",
536
+ "gt = (\n",
537
+ " gt\n",
538
+ " .cols_label({\"category\": \"Category\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
539
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
540
+ " .cols_align(align=\"left\", columns=[\"category\"])\n",
541
+ " .tab_style(\n",
542
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
543
+ " locations=loc.body(\n",
544
+ " columns=[0],\n",
545
+ " rows=list(range(0, len(df_table))),\n",
546
+ " ),\n",
547
+ " )\n",
548
+ ")\n",
549
+ "html_data.append(gt.as_raw_html())\n",
550
+ "gt.show()\n",
551
+ "\n"
552
+ ],
553
+ "id": "b18088e16e0bf7f7",
554
+ "outputs": [],
555
+ "execution_count": null
556
+ },
557
+ {
558
+ "metadata": {},
559
+ "cell_type": "code",
560
+ "source": [
561
+ "# func for tables 3,4,5\n",
562
+ "def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:\n",
563
+ " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
564
+ " row_df[\"label\"] = label\n",
565
+ " row_df = row_df.reset_index()\n",
566
+ " row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
567
+ " row_df.columns.name = \"\"\n",
568
+ " all_sites = [10, 20, 30, 40, 60]\n",
569
+ " for site in all_sites:\n",
570
+ " if site not in row_df.columns:\n",
571
+ " row_df[site] = None\n",
572
+ " row_df = row_df.reset_index(drop=True)\n",
573
+ " return row_df\n",
574
+ "\n",
575
+ "\n",
576
+ "def get_table_df(df_source:pd.DataFrame, visit_code:float|None=None, month_label:str|None=None)->pd.DataFrame:\n",
577
+ " if visit_code:\n",
578
+ " df_month = df_source[df_source.visit_code==visit_code].copy()\n",
579
+ " elif month_label:\n",
580
+ " df_month = df_source.copy()\n",
581
+ "\n",
582
+ " \n",
583
+ " row_df = df_month.copy()\n",
584
+ " table_df = get_row_df(row_df, \"Total (n)\")\n",
585
+ " \n",
586
+ " row_df = df_month.query(\"ogtt_value<7.8 and fbg_value<6.1\").copy()\n",
587
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG <6.1\")])\n",
588
+ " \n",
589
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
590
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\")])\n",
591
+ " \n",
592
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
593
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=7.0\")])\n",
594
+ " \n",
595
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
596
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\")])\n",
597
+ " \n",
598
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
599
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\")])\n",
600
+ " \n",
601
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
602
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\")])\n",
603
+ " \n",
604
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
605
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG <6.1\")])\n",
606
+ " \n",
607
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
608
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\")])\n",
609
+ " \n",
610
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
611
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=7.0\")])\n",
612
+ "\n",
613
+ " row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
614
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"Missing OGTT\")])\n",
615
+ " return table_df\n",
616
+ "\n",
617
+ "\n",
618
+ "def format_table_df(tbl_df, add_totals:bool|None=None):\n",
619
+ " add_totals = True if add_totals is None else add_totals\n",
620
+ " tbl_df = tbl_df.fillna(0.0)\n",
621
+ " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
622
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
623
+ "\n",
624
+ " if add_totals:\n",
625
+ " df_last = tbl_df[1:].sum().to_frame()\n",
626
+ " df_last.loc[\"label\"] = np.nan\n",
627
+ " df_last = df_last.reset_index()\n",
628
+ " df_last.columns = [\"label\", \"value\"]\n",
629
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
630
+ " df_last.columns.name = \"\"\n",
631
+ " df_last[\"label\"] = \"Totals\"\n",
632
+ "\n",
633
+ " tbl_df = pd.concat([tbl_df, df_last])\n",
634
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
635
+ "\n",
636
+ " tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]\n",
637
+ "\n",
638
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
639
+ " tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
640
+ " tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
641
+ "\n",
642
+ "\n",
643
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
644
+ " tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
645
+ " tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
646
+ "\n",
647
+ " cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]]\n",
648
+ " tbl_df1 = tbl_df[cols]\n",
649
+ " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
650
+ " return tbl_df1"
651
+ ],
652
+ "id": "e1bcc6507b1e49c7",
653
+ "outputs": [],
654
+ "execution_count": null
655
+ },
656
+ {
657
+ "metadata": {},
658
+ "cell_type": "code",
659
+ "source": [
660
+ "# Table 3: OGTT and FBG at 12-month visit\n",
661
+ "df_table3 = get_table_df(df_glucose, 1120.0)\n",
662
+ "df_table3 = format_table_df(df_table3)\n",
663
+ "df_table3 = df_table3.fillna(0.0)\n",
664
+ "gt = df_as_great_table(df_table3, title=\"Table 3: OGTT and FBG at 12-month visit\")\n",
665
+ "gt = (\n",
666
+ " gt\n",
667
+ " .cols_label(column_headers_with_str)\n",
668
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
669
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
670
+ " .cols_width(cases={\"label\": \"35%\"})\n",
671
+ ")\n",
672
+ "html_data.append(gt.as_raw_html())\n",
673
+ "gt.show()\n"
674
+ ],
675
+ "id": "9a9616a118ae674d",
676
+ "outputs": [],
677
+ "execution_count": null
678
+ },
679
+ {
680
+ "metadata": {},
681
+ "cell_type": "code",
682
+ "source": [
683
+ "# Table 4: OGTT and FBG at 24-month visit\n",
684
+ "df_table4 = get_table_df(df_glucose, 1240.0)\n",
685
+ "df_table4 = format_table_df(df_table4)\n",
686
+ "df_table4 = df_table4.fillna(0.0)\n",
687
+ "gt = df_as_great_table(df_table4, title=\"Table 4: OGTT and FBG at 24-month visit\")\n",
688
+ "gt = (\n",
689
+ " gt\n",
690
+ " .cols_label(column_headers_with_str)\n",
691
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
692
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
693
+ " .cols_width(cases={\"label\": \"35%\"})\n",
694
+ ")\n",
695
+ "html_data.append(gt.as_raw_html())\n",
696
+ "gt.show()"
697
+ ],
698
+ "id": "ec0988364166e130",
699
+ "outputs": [],
700
+ "execution_count": null
701
+ },
702
+ {
703
+ "metadata": {},
704
+ "cell_type": "code",
705
+ "source": [
706
+ "# Table 5: OGTT and FBG at 36-month visit\n",
707
+ "df_table5 = get_table_df(df_glucose, 1360.0)\n",
708
+ "df_table5 = format_table_df(df_table5)\n",
709
+ "df_table5 = df_table5.fillna(0.0)\n",
710
+ "gt = df_as_great_table(df_table5, title=\"Table 5: OGTT and FBG at 36-month visit\")\n",
711
+ "gt = (\n",
712
+ " gt\n",
713
+ " .cols_label(column_headers_with_str)\n",
714
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
715
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
716
+ " .cols_width(cases={\"label\": \"35%\"})\n",
717
+ ")\n",
718
+ "html_data.append(gt.as_raw_html())\n",
719
+ "gt.show()"
720
+ ],
721
+ "id": "59be72121202df15",
722
+ "outputs": [],
723
+ "execution_count": null
724
+ },
725
+ {
726
+ "metadata": {},
727
+ "cell_type": "code",
728
+ "source": [
729
+ "# Table 6: Any OGTT>11.1 ever\n",
730
+ "row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
731
+ "table_df = get_row_df(row_df, \"Total (n)\")\n",
732
+ "df_table6 = format_table_df(table_df)\n",
733
+ "df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)\n",
734
+ "gt = df_as_great_table(df_table, title=\"Table 6: Any OGTT>11.1 ever\")\n",
735
+ "gt = (\n",
736
+ " gt\n",
737
+ " .cols_label(column_headers_with_str)\n",
738
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
739
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
740
+ " .cols_width(cases={\"label\": \"35%\"})\n",
741
+ ")\n",
742
+ "html_data.append(gt.as_raw_html())\n",
743
+ "gt.show()"
744
+ ],
745
+ "id": "f016ddbe736c2f93",
746
+ "outputs": [],
747
+ "execution_count": null
748
+ },
749
+ {
750
+ "metadata": {},
751
+ "cell_type": "code",
752
+ "source": [
753
+ "# func for table 7\n",
754
+ "def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
755
+ " df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
756
+ "\n",
757
+ " row_df = df_month.copy()\n",
758
+ " table_df = get_row_df(row_df, \"Total (n)\")\n",
759
+ "\n",
760
+ " row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
761
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
762
+ "\n",
763
+ " row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
764
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
765
+ "\n",
766
+ " row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
767
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
768
+ " return table_df"
769
+ ],
770
+ "id": "6193907cc12f5b5c",
771
+ "outputs": [],
772
+ "execution_count": null
773
+ },
774
+ {
775
+ "metadata": {},
776
+ "cell_type": "code",
777
+ "source": [
778
+ "# Table 7: Interim FBG results\n",
779
+ "df_table7 = get_table7_df(df_glucose, 1150.0)\n",
780
+ "df_table7 = format_table_df(df_table7, add_totals=False)\n",
781
+ "df_table7[\"visit_code\"] = MONTH15\n",
782
+ "\n",
783
+ "df_table71 = get_table7_df(df_glucose, 1180.0)\n",
784
+ "df_table71 = format_table_df(df_table71, add_totals=False)\n",
785
+ "df_table71[\"visit_code\"] = MONTH18\n",
786
+ "\n",
787
+ "df_table72 = get_table7_df(df_glucose, 1210.0)\n",
788
+ "df_table72 = format_table_df(df_table72, add_totals=False)\n",
789
+ "df_table72[\"visit_code\"] = MONTH21\n",
790
+ "\n",
791
+ "df_table73 = get_table7_df(df_glucose, 1270.0)\n",
792
+ "df_table73 = format_table_df(df_table73, add_totals=False)\n",
793
+ "df_table73[\"visit_code\"] = MONTH27\n",
794
+ "\n",
795
+ "df_table74 = get_table7_df(df_glucose, 1300.0)\n",
796
+ "df_table74 = format_table_df(df_table74, add_totals=False)\n",
797
+ "df_table74[\"visit_code\"] = MONTH30\n",
798
+ "\n",
799
+ "df_table75 = get_table7_df(df_glucose, 1330.0)\n",
800
+ "df_table75 = format_table_df(df_table75, add_totals=False)\n",
801
+ "df_table75[\"visit_code\"] = MONTH33\n",
802
+ "\n",
803
+ "df_table76 = get_table7_df(df_glucose, 1390.0)\n",
804
+ "df_table76 = format_table_df(df_table76, add_totals=False)\n",
805
+ "df_table76[\"visit_code\"] = MONTH39\n",
806
+ "\n",
807
+ "df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])\n",
808
+ "df_table = df_table.reset_index(drop=True)\n",
809
+ "df_table = df_table.fillna(0.0)"
810
+ ],
811
+ "id": "16adb1f965081358",
812
+ "outputs": [],
813
+ "execution_count": null
814
+ },
815
+ {
816
+ "metadata": {},
817
+ "cell_type": "code",
818
+ "source": [
819
+ "column_headers_with_str = {\"visit_code\": \"Visit Code\", **column_headers_with_str}\n",
820
+ "gt = df_as_great_table2(df_table, title=\"Table 7: Interim FBG results\")\n",
821
+ "gt = (\n",
822
+ " gt\n",
823
+ " .cols_label(column_headers_with_str)\n",
824
+ " .cols_move_to_start(columns=\"visit_code\")\n",
825
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
826
+ " .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
827
+ " .cols_width(cases={\"label\": \"15%\"})\n",
828
+ " .tab_style(\n",
829
+ " style=[\n",
830
+ " style.text(color=\"black\", weight=\"bold\"),\n",
831
+ " style.fill(color=\"lightgray\")\n",
832
+ " ],\n",
833
+ " locations=loc.row_groups()\n",
834
+ " )\n",
835
+ ")\n",
836
+ "html_data.append(gt.as_raw_html())\n",
837
+ "gt.show()"
838
+ ],
839
+ "id": "81bcfe52d364b646",
840
+ "outputs": [],
841
+ "execution_count": null
842
+ },
843
+ {
844
+ "metadata": {},
845
+ "cell_type": "code",
846
+ "source": [
847
+ "# Table 8: Primary Endpoint met\n",
848
+ "df_endpoint_grp = df_endpoint.groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
849
+ "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
850
+ "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
851
+ "df_endpoint_pivot.columns.name = \"\"\n",
852
+ "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
853
+ "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
854
+ "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
855
+ "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
856
+ "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
857
+ "\n",
858
+ "gt = df_as_great_table(\n",
859
+ " df_endpoint_pivot,\n",
860
+ " title=\"Table 8: Primary Endpoint met\"\n",
861
+ ")\n",
862
+ "gt = (\n",
863
+ " gt\n",
864
+ " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
865
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
866
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
867
+ " .cols_width(cases={\"label\": \"25%\"})\n",
868
+ ")\n",
869
+ "html_data.append(gt.as_raw_html())\n",
870
+ "gt.show()"
871
+ ],
872
+ "id": "37904c7ce49724e6",
873
+ "outputs": [],
874
+ "execution_count": null
875
+ },
876
+ {
877
+ "metadata": {},
878
+ "cell_type": "code",
879
+ "source": "#read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\"), verbose=False).rename(columns={\"site\": \"site_id\"})",
880
+ "id": "562a1cc911a71255",
881
+ "outputs": [],
882
+ "execution_count": null
883
+ },
884
+ {
885
+ "metadata": {},
886
+ "cell_type": "code",
887
+ "source": [
888
+ "from great_tables import html\n",
889
+ "\n",
890
+ "# Table 8a: Primary Endpoint no EOS or DM Referral\n",
891
+ "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
892
+ "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
893
+ "df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]], on=[\"subject_identifier\"], how=\"left\")\n",
894
+ "df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
895
+ "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
896
+ "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
897
+ "df_endpoint_pivot.columns.name = \"\"\n",
898
+ "df_endpoint_pivot.columns = ['label', *[str(col) for col in df_endpoint_pivot.columns if col != \"label\"]]\n",
899
+ "for col in [c for c in ['label', \"10\", \"20\", \"30\", \"40\", \"60\"] if str(c) not in df_endpoint_pivot.columns]:\n",
900
+ " df_endpoint_pivot[str(col)] = np.nan\n",
901
+ "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
902
+ "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
903
+ "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
904
+ "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
905
+ "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
906
+ "subjects = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").subject_identifier.to_list()\n",
907
+ "\n",
908
+ "gt = df_as_great_table(\n",
909
+ " df_endpoint_pivot,\n",
910
+ " title=\"Table 8a: Primary Endpoint met -- participant not referred\"\n",
911
+ ")\n",
912
+ "gt = (\n",
913
+ " gt\n",
914
+ " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
915
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
916
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
917
+ " .cols_width(cases={\"label\": \"25%\"})\n",
918
+ " .tab_source_note(source_note=html(\"<BR>\".join(subjects)))\n",
919
+ ")\n",
920
+ "html_data.append(gt.as_raw_html())\n",
921
+ "gt.show()"
922
+ ],
923
+ "id": "a74cd253ec5827f9",
924
+ "outputs": [],
925
+ "execution_count": null
926
+ },
927
+ {
928
+ "metadata": {},
929
+ "cell_type": "code",
930
+ "source": "",
931
+ "id": "4df949cb48b088d7",
932
+ "outputs": [],
933
+ "execution_count": null
934
+ },
935
+ {
936
+ "metadata": {},
937
+ "cell_type": "code",
938
+ "source": [
939
+ "# Table 9: Incident Rate per 1000 person years\n",
940
+ "\n",
941
+ "def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
942
+ " if not lower_days:\n",
943
+ " lower_days = -1\n",
944
+ " cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
945
+ " # exclude subjects for this reason\n",
946
+ " offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
947
+ "\n",
948
+ " df_eos = get_eos_df()\n",
949
+ " df_eos_excluded = (\n",
950
+ " df_eos\n",
951
+ " .query(\"followup_days>@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)\")\n",
952
+ " .copy()\n",
953
+ " .reset_index()\n",
954
+ " )\n",
955
+ " df_visit_final = (\n",
956
+ " df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0\")\n",
957
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
958
+ " .query(\"_merge=='left_only'\")\n",
959
+ " .drop(columns=[\"_merge\"])\n",
960
+ " )\n",
961
+ " df_main = (\n",
962
+ " df_visit_final\n",
963
+ " .groupby(by=[\"subject_identifier\"])[[\"baseline_datetime\", \"visit_datetime\", \"followup_days\"]]\n",
964
+ " .max()\n",
965
+ " .reset_index()\n",
966
+ " )\n",
967
+ "\n",
968
+ " df_main = (\n",
969
+ " df_main\n",
970
+ " .merge(\n",
971
+ " df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
972
+ " how=\"left\",\n",
973
+ " on=[\"subject_identifier\"])\n",
974
+ " .reset_index(drop=True)\n",
975
+ " )\n",
976
+ " if lower_days>=365.25:\n",
977
+ " df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
978
+ " df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
979
+ " return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
980
+ "\n",
981
+ "def get_rate_and_ci(events, person_years_total):\n",
982
+ " lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
983
+ " upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
984
+ " return events/person_years_total*1000, lower_ci, upper_ci\n",
985
+ "\n",
986
+ "def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
987
+ " data = {}\n",
988
+ " df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
989
+ " person_years_total = df_main.followup_years.sum()\n",
990
+ " data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
991
+ " return data"
992
+ ],
993
+ "id": "920db81ad440edab",
994
+ "outputs": [],
995
+ "execution_count": null
996
+ },
997
+ {
998
+ "metadata": {},
999
+ "cell_type": "code",
1000
+ "source": [
1001
+ "incidence_data = {}\n",
1002
+ "incidence_data.update(get_incidence_data(\"total\", lower_days=-1, upper_days=10000))\n",
1003
+ "incidence_data.update(get_incidence_data(\"0-1 years\", lower_days=-1, upper_days=365.25))\n",
1004
+ "incidence_data.update(get_incidence_data(\"1-2 years\", lower_days=365.25, upper_days=2 * 365.25))\n",
1005
+ "incidence_data.update(get_incidence_data(\"2-3 years\", lower_days=2 * 365.25, upper_days=3 * 365.25))\n",
1006
+ "incidence_data.update(get_incidence_data(\"3+ years\", lower_days=3 * 365.25, upper_days=10 * 365.25))\n",
1007
+ "data = dict(label=[], person_years=[], subjects=[], failures=[], rate=[], lower_ci=[], upper_ci=[])\n",
1008
+ "for k in incidence_data:\n",
1009
+ " data[\"label\"].append(k)\n",
1010
+ "\n",
1011
+ "for v in incidence_data.values():\n",
1012
+ " data[\"person_years\"].append(v[0])\n",
1013
+ " data[\"subjects\"].append(v[1])\n",
1014
+ " data[\"failures\"].append(v[2])\n",
1015
+ " data[\"rate\"].append(v[3])\n",
1016
+ " data[\"lower_ci\"].append(v[4])\n",
1017
+ " data[\"upper_ci\"].append(v[5])\n",
1018
+ "\n",
1019
+ "df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!=\"subjects\"})"
1020
+ ],
1021
+ "id": "44651e865641b75d",
1022
+ "outputs": [],
1023
+ "execution_count": null
1024
+ },
1025
+ {
1026
+ "metadata": {},
1027
+ "cell_type": "code",
1028
+ "source": [
1029
+ "gt = df_as_great_table(\n",
1030
+ " df_table9,\n",
1031
+ " title=\"Table 9: Incident Rate per 1000 person years\",\n",
1032
+ " subtitle=md(\"using randomisation to diabetes/last seen\"),\n",
1033
+ ")\n",
1034
+ "gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
1035
+ "gt = (gt\n",
1036
+ " .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
1037
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1038
+ " .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
1039
+ " .tab_spanner(\n",
1040
+ " label=\"95%CI\",\n",
1041
+ " columns=[\"lower_ci\", \"upper_ci\"],\n",
1042
+ " )\n",
1043
+ " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1044
+ ")\n",
1045
+ "gt.show()\n",
1046
+ "html_data.append(gt.as_raw_html())"
1047
+ ],
1048
+ "id": "da4e67d83522768a",
1049
+ "outputs": [],
1050
+ "execution_count": null
1051
+ },
1052
+ {
1053
+ "metadata": {},
1054
+ "cell_type": "code",
1055
+ "source": [
1056
+ "# Table 10: Proportion meeting primary endpoint\n",
1057
+ "df_table10 = pd.DataFrame(data=data)\n",
1058
+ "df_table10[\"proportion\"] = df_table10[\"failures\"]/df_table10[\"subjects\"]*100\n",
1059
+ "gt = df_as_great_table(\n",
1060
+ " df_table10[[\"label\", \"subjects\", 'failures', \"proportion\"]],\n",
1061
+ " title=\"Table 10: Proportion meeting primary endpoint\",\n",
1062
+ ")\n",
1063
+ "gt = (\n",
1064
+ " gt\n",
1065
+ " .fmt_number(columns=[\"failures\", \"proportion\"], decimals=2)\n",
1066
+ " .cols_label({\"label\": \"Label\", \"subjects\": \"Participants\", \"failures\": \"Failures\", \"proportion\": \"%\"})\n",
1067
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1068
+ " .cols_align(align=\"center\", columns=[\"subjects\", \"failures\", \"proportion\"])\n",
1069
+ " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1070
+ ")\n",
1071
+ "html_data.append(gt.as_raw_html())\n",
1072
+ "gt.show()\n"
1073
+ ],
1074
+ "id": "2376a30803fbc743",
1075
+ "outputs": [],
1076
+ "execution_count": null
1077
+ },
1078
+ {
1079
+ "metadata": {},
1080
+ "cell_type": "code",
1081
+ "source": "",
1082
+ "id": "afc730c0bd9d03aa",
1083
+ "outputs": [],
1084
+ "execution_count": null
1085
+ },
1086
+ {
1087
+ "metadata": {},
1088
+ "cell_type": "code",
1089
+ "source": [
1090
+ "# Table 11a: End of Study Table (for those who have completed an end of study form)\n",
1091
+ "df_eos = get_eos_df()\n",
1092
+ "offstudy_reasons = {\n",
1093
+ " \"Delivered / Completed followup from pregnancy\": \"Pregnancy\",\n",
1094
+ " \"Patient completed 36 months of follow-up\": \"Completed 36m\",\n",
1095
+ " \"Patient developed diabetes\": \"Developed diabetes\",\n",
1096
+ " \"Other reason (specify below)\": \"Other\",\n",
1097
+ " \"Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment\": \"Late exclusion\",\n",
1098
+ " \"Patient has been transferred to another health centre\": \"Transferred out\",\n",
1099
+ " \"Patient is withdrawn on CLINICAL grounds ...\": \"Withdrawal: Clinical grounds\",\n",
1100
+ " \"Patient lost to follow-up\": \"LTFU\",\n",
1101
+ " \"Patient reported/known to have died\": \"Died\",\n",
1102
+ " \"Patient withdrew consent to participate further\": \"Withdrawal: Consent\",\n",
1103
+ "}\n",
1104
+ "df_eos[\"offstudy_reason\"] = df_eos[\"offstudy_reason\"].map(offstudy_reasons)\n",
1105
+ "df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"], categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
1106
+ "df_eos[\"site_id\"] = df_eos[\"site_id\"].astype(str)\n",
1107
+ "df_eos_pivot = (\n",
1108
+ " df_eos\n",
1109
+ " .groupby(by=[\"offstudy_reason\", \"site_id\"],observed=True)\n",
1110
+ " .size()\n",
1111
+ " .reset_index()\n",
1112
+ " .pivot_table(index=\"offstudy_reason\", columns=\"site_id\", values=0, observed=True)\n",
1113
+ " .fillna(0)\n",
1114
+ " .astype(int)\n",
1115
+ " .reset_index()\n",
1116
+ ")\n",
1117
+ "df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1118
+ "df_eos_pivot.columns.name=\"\"\n",
1119
+ "sum_row = df_eos_pivot.select_dtypes(include='int64').sum()\n",
1120
+ "sum_row['offstudy_reason'] = 'Total'\n",
1121
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
1122
+ "enrolled_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
1123
+ "enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]\n",
1124
+ "df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
1125
+ "\n",
1126
+ "gt = df_as_great_table(\n",
1127
+ " df_eos_pivot,\n",
1128
+ " title=\"Table 11a: End of study report\",\n",
1129
+ " subtitle=md(\"for those who have completed an End of study report\"),\n",
1130
+ ")\n",
1131
+ "gt = (\n",
1132
+ " gt\n",
1133
+ " .cols_label({\"offstudy_reason\": \"Reason\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1134
+ " .cols_align(align=\"left\", columns=[\"offstudy_reason\"])\n",
1135
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1136
+ " .tab_style(\n",
1137
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1138
+ " locations=loc.body(\n",
1139
+ " columns=[0],\n",
1140
+ " rows=[len(df_eos_pivot)-1]),\n",
1141
+ " )\n",
1142
+ " .tab_style(\n",
1143
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1144
+ " locations=loc.body(\n",
1145
+ " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1146
+ " rows=[len(df_eos_pivot)-1],\n",
1147
+ " ),\n",
1148
+ " )\n",
1149
+ " .tab_style(\n",
1150
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1151
+ " locations=loc.body(\n",
1152
+ " columns=[\"total\"],\n",
1153
+ " rows=[len(df_eos_pivot)-1],\n",
1154
+ " ),\n",
1155
+ " )\n",
1156
+ " .tab_style(\n",
1157
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1158
+ " locations=loc.body(\n",
1159
+ " columns=[\"offstudy_reason\"],\n",
1160
+ " rows=[0],\n",
1161
+ " ),\n",
1162
+ " )\n",
1163
+ ")\n",
1164
+ "html_data.append(gt.as_raw_html())\n",
1165
+ "gt.show()\n"
1166
+ ],
1167
+ "id": "37dcd320411bd9c5",
1168
+ "outputs": [],
1169
+ "execution_count": null
1170
+ },
1171
+ {
1172
+ "metadata": {},
1173
+ "cell_type": "code",
1174
+ "source": "",
1175
+ "id": "9acdb7515d1490a3",
1176
+ "outputs": [],
1177
+ "execution_count": null
1178
+ },
1179
+ {
1180
+ "metadata": {},
1181
+ "cell_type": "code",
1182
+ "source": [
1183
+ "# Table 11b: Study status\n",
1184
+ "def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:\n",
1185
+ " columns = {k:f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
1186
+ " df_schedule = (\n",
1187
+ " df_subjecthistory\n",
1188
+ " .query(f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()\")\n",
1189
+ " .groupby(by=[\"onschedule_model\", \"site_id\"])\n",
1190
+ " .size()\n",
1191
+ " .reset_index()\n",
1192
+ " .pivot_table(index=\"onschedule_model\", columns=\"site_id\", values=0, observed=True)\n",
1193
+ " .reset_index()\n",
1194
+ " .rename(columns={\"onschedule_model\":\"schedule\", **columns})\n",
1195
+ " .fillna(0)\n",
1196
+ " .copy()\n",
1197
+ " )\n",
1198
+ " df_schedule.columns.name = \"\"\n",
1199
+ " return df_schedule\n",
1200
+ "\n",
1201
+ "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1202
+ "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
1203
+ "\n",
1204
+ "df_on = pd.concat([\n",
1205
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"on\"),\n",
1206
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"on\"),\n",
1207
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"on\"),\n",
1208
+ "])\n",
1209
+ "\n",
1210
+ "df_on = (\n",
1211
+ " df_on\n",
1212
+ " .fillna(0)\n",
1213
+ " .reset_index(drop=True)\n",
1214
+ ")\n",
1215
+ "\n",
1216
+ "df_off = pd.concat([\n",
1217
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"off\"),\n",
1218
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"off\"),\n",
1219
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"off\"),\n",
1220
+ "])\n",
1221
+ "df_off = (\n",
1222
+ " df_off\n",
1223
+ " .fillna(0)\n",
1224
+ " .reset_index(drop=True)\n",
1225
+ ")\n",
1226
+ "\n",
1227
+ "df_status = pd.merge(df_on, df_off, on=[\"schedule\"], how=\"outer\")\n",
1228
+ "columns = []\n",
1229
+ "for ele in [[f\"{x}_on\", f\"{x}_off\"] for x in [\"10\", \"20\", \"30\", \"40\", \"60\"]]:\n",
1230
+ " columns.extend(ele)\n",
1231
+ "df_status = df_status[[\"schedule\", *columns]]\n",
1232
+ "df_status[\"total_on\"] = df_status[[col for col in columns if \"on\" in col]].sum(axis=1)\n",
1233
+ "df_status[\"total_off\"] = df_status[[col for col in columns if \"off\" in col]].sum(axis=1)\n",
1234
+ "df_status[\"total\"] = df_status[columns].sum(axis=1)\n",
1235
+ "df_status[\"schedule\"] = df_status.schedule.map({\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\", \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
1236
+ "\n",
1237
+ "gt = df_as_great_table(\n",
1238
+ " df_status,\n",
1239
+ " title=\"Table 11b: Study status\",\n",
1240
+ " subtitle=md(\"Calculated from Offschedule form; not End of study report\"),\n",
1241
+ ")\n",
1242
+ "# gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=0)\n",
1243
+ "gt = (gt\n",
1244
+ " .tab_source_note(\n",
1245
+ " source_note=(\n",
1246
+ " \"Note: Offschedule form is always submitted before the End of study report. \"\n",
1247
+ " \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
1248
+ " \"the site staff are actioned to submit the End of study report.\"\n",
1249
+ " )\n",
1250
+ " )\n",
1251
+ " .cols_label({\n",
1252
+ " \"10_on\": \"On\", \"10_off\": \"Off\",\n",
1253
+ " \"20_on\": \"On\", \"20_off\": \"Off\",\n",
1254
+ " \"30_on\": \"On\", \"30_off\": \"Off\",\n",
1255
+ " \"40_on\": \"On\", \"40_off\": \"Off\",\n",
1256
+ " \"60_on\": \"On\", \"60_off\": \"Off\",\n",
1257
+ " \"total_on\": \"On\", \"total_off\": \"Off\",\n",
1258
+ " \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
1259
+ " .cols_align(align=\"center\")\n",
1260
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1261
+ " .tab_spanner(\n",
1262
+ " label=\"Hindu mandal\",\n",
1263
+ " columns=[\"10_on\", \"10_off\"],\n",
1264
+ " )\n",
1265
+ " .tab_spanner(\n",
1266
+ " label=\"Amana\",\n",
1267
+ " columns=[\"20_on\", \"20_off\"],\n",
1268
+ " )\n",
1269
+ " .tab_spanner(\n",
1270
+ " label=\"Temeke\",\n",
1271
+ " columns=[\"30_on\", \"30_off\"],\n",
1272
+ " )\n",
1273
+ " .tab_spanner(\n",
1274
+ " label=\"Mwananyamala\",\n",
1275
+ " columns=[\"40_on\", \"40_off\"],\n",
1276
+ " )\n",
1277
+ " .tab_spanner(\n",
1278
+ " label=\"Mnazi Moja\",\n",
1279
+ " columns=[\"60_on\", \"60_off\"],\n",
1280
+ " )\n",
1281
+ " .tab_spanner(\n",
1282
+ " label=\"Total\",\n",
1283
+ " columns=[\"total_on\", \"total_off\"],\n",
1284
+ " )\n",
1285
+ " .tab_style(\n",
1286
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1287
+ " locations=loc.body(\n",
1288
+ " columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
1289
+ " rows=list(range(0, 1)),\n",
1290
+ " ),\n",
1291
+ " )\n",
1292
+ " .tab_style(\n",
1293
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1294
+ " locations=loc.body(\n",
1295
+ " columns=[\"total_off\"],\n",
1296
+ " rows=list(range(0, 1)),\n",
1297
+ " ),\n",
1298
+ " )\n",
1299
+ " .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
1300
+ ")\n",
1301
+ "html_data.append(gt.as_raw_html())\n",
1302
+ "gt.show()"
1303
+ ],
1304
+ "id": "35840b9a7971e4cf",
1305
+ "outputs": [],
1306
+ "execution_count": null
1307
+ },
1308
+ {
1309
+ "metadata": {},
1310
+ "cell_type": "code",
1311
+ "source": [
1312
+ "# Table 12: Loss to Follow Up\n",
1313
+ "df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1314
+ "df_ltfu_pivot = (\n",
1315
+ " df_ltfu\n",
1316
+ " .groupby(by=[\"loss_category\", \"site_id\"],observed=True,dropna=False)\n",
1317
+ " .size()\n",
1318
+ " .reset_index()\n",
1319
+ " .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True,dropna=False)\n",
1320
+ " .fillna(0)\n",
1321
+ " .astype(int)\n",
1322
+ " .reset_index()\n",
1323
+ ")\n",
1324
+ "df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1325
+ "df_ltfu_pivot.columns.name=\"\"\n",
1326
+ "sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()\n",
1327
+ "sum_row['loss_category'] = 'Total'\n",
1328
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
1329
+ "df_ltfu_pivot = pd.concat([df_ltfu_pivot, sum_row_df], ignore_index=True)\n",
1330
+ "df_ltfu_pivot\n"
1331
+ ],
1332
+ "id": "534c51e7321e2ef3",
1333
+ "outputs": [],
1334
+ "execution_count": null
1335
+ },
1336
+ {
1337
+ "metadata": {},
1338
+ "cell_type": "code",
1339
+ "source": [
1340
+ "# Table 11c: End of study report not submitted\n",
1341
+ "\n",
1342
+ "df1 = (\n",
1343
+ " df_status\n",
1344
+ " .query(\"schedule=='Main trial'\")[[col for col in columns if \"off\" in col]]\n",
1345
+ " .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\",\"30\",\"40\",\"60\"])))\n",
1346
+ " .reset_index(drop=True)\n",
1347
+ ")\n",
1348
+ "df2 = (\n",
1349
+ " df_eos_pivot\n",
1350
+ " .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\",\"30\",\"40\",\"60\"]]\n",
1351
+ " .reset_index(drop=True)\n",
1352
+ ")\n",
1353
+ "\n",
1354
+ "df_eos_not_reported = df1-df2\n",
1355
+ "df_eos_not_reported[\"schedule\"] = 'Main trial'\n",
1356
+ "df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1357
+ "df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\",\"30\",\"40\",\"60\", \"total\"]]\n",
1358
+ "\n",
1359
+ "gt = df_as_great_table(\n",
1360
+ " df_eos_not_reported,\n",
1361
+ " title=\"Table 11c: End of study report not submitted\",\n",
1362
+ " subtitle=md(\"End of study report expected based on Offschedule form\"),\n",
1363
+ ")\n",
1364
+ "gt = (\n",
1365
+ " gt\n",
1366
+ " .cols_label({\"schedule\": \"Schedule\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1367
+ " .cols_align(align=\"left\", columns=[\"schedule\"])\n",
1368
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1369
+ " .tab_style(\n",
1370
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1371
+ " locations=loc.body(\n",
1372
+ " columns=[0],\n",
1373
+ " rows=[len(df_eos_pivot)-1]),\n",
1374
+ " )\n",
1375
+ " .tab_style(\n",
1376
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1377
+ " locations=loc.body(\n",
1378
+ " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1379
+ " rows=[len(df_eos_pivot)-1],\n",
1380
+ " ),\n",
1381
+ " )\n",
1382
+ " .tab_style(\n",
1383
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1384
+ " locations=loc.body(\n",
1385
+ " columns=[\"total\"],\n",
1386
+ " rows=[len(df_eos_pivot)-1],\n",
1387
+ " ),\n",
1388
+ " )\n",
1389
+ ")\n",
1390
+ "html_data.append(gt.as_raw_html())\n",
1391
+ "gt.show()\n"
1392
+ ],
1393
+ "id": "25d05831ef76f267",
1394
+ "outputs": [],
1395
+ "execution_count": null
1396
+ },
1397
+ {
1398
+ "metadata": {},
1399
+ "cell_type": "code",
1400
+ "source": "# Table 13: Baseline Sample",
1401
+ "id": "b607a436749cc3b2",
1402
+ "outputs": [],
1403
+ "execution_count": null
1404
+ },
1405
+ {
1406
+ "metadata": {},
1407
+ "cell_type": "code",
1408
+ "source": [
1409
+ "# Table 15: Consented to extended followup\n",
1410
+ "df_consented = (\n",
1411
+ " read_frame(SubjectConsentV1Ext.objects.all(), verbose=False)\n",
1412
+ " .query(\"agrees_to_extension==@YES\")\n",
1413
+ " .rename(columns={\"site\": \"site_id\"})\n",
1414
+ ")\n",
1415
+ "df_consented[\"site_id\"] = df_consented.site_id.astype(str)\n",
1416
+ "df_consented[\"month\"] = df_consented.report_datetime.dt.strftime(\"%m\")\n",
1417
+ "df_consented[\"year\"] = df_consented.report_datetime.dt.strftime(\"%Y\")\n",
1418
+ "df_consented_grp = (\n",
1419
+ " df_consented.groupby(by=[\"site_id\", \"year\", \"month\"]).\n",
1420
+ " size()\n",
1421
+ " .reset_index()\n",
1422
+ " .sort_values(by=[\"site_id\", \"year\", \"month\"], ascending=True)\n",
1423
+ " .reset_index(drop=True)\n",
1424
+ ")\n",
1425
+ "df_consented_pivot = (\n",
1426
+ " df_consented_grp\n",
1427
+ " .pivot_table(index=[\"year\", \"month\"], columns=\"site_id\", values=0, aggfunc=\"sum\")\n",
1428
+ " .reset_index()\n",
1429
+ " .fillna(0)\n",
1430
+ ")\n",
1431
+ "if \"60\" not in df_consented_pivot.columns:\n",
1432
+ " df_consented_pivot[\"60\"] = 0.0 * len(df_consented_pivot)\n",
1433
+ "df_consented_pivot.columns.name=\"\"\n",
1434
+ "df_consented_pivot[\"year\"] = df_consented_pivot[\"year\"].astype(str)\n",
1435
+ "df_consented_pivot[\"month\"] = df_consented_pivot[\"month\"].astype(str)\n",
1436
+ "\n",
1437
+ "sum_row = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum()\n",
1438
+ "sum_row['year'] = \"Total\"\n",
1439
+ "sum_row['month'] = \"\"\n",
1440
+ "df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)\n",
1441
+ "df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1).astype(int)\n",
1442
+ "df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].astype(int)\n",
1443
+ "gt = df_as_great_table2(\n",
1444
+ " df_consented_pivot,\n",
1445
+ " title=\"Table 15: Consented to extended followup\",\n",
1446
+ " rowname_col=\"month\",\n",
1447
+ " groupname_col=\"year\",\n",
1448
+ ")\n",
1449
+ "gt = (\n",
1450
+ " gt\n",
1451
+ " .cols_label({\"year\": \"Year\", \"month\": \"Month\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1452
+ " .cols_align(align=\"center\")\n",
1453
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
1454
+ " .tab_stubhead(label=\"Consented\")\n",
1455
+ " .tab_style(\n",
1456
+ " style=[\n",
1457
+ " style.text(color=\"black\", weight=\"bold\"),\n",
1458
+ " style.fill(color=\"lightgray\")\n",
1459
+ " ],\n",
1460
+ " locations=loc.row_groups()\n",
1461
+ " )\n",
1462
+ ")\n",
1463
+ "html_data.append(gt.as_raw_html())\n",
1464
+ "gt.show()"
1465
+ ],
1466
+ "id": "8acd2dd7e5a958e9",
1467
+ "outputs": [],
1468
+ "execution_count": null
1469
+ },
1470
+ {
1471
+ "metadata": {},
1472
+ "cell_type": "code",
1473
+ "source": "",
1474
+ "id": "45ee71bc4a06f8f7",
1475
+ "outputs": [],
1476
+ "execution_count": null
1477
+ },
1478
+ {
1479
+ "metadata": {},
1480
+ "cell_type": "code",
1481
+ "source": [
1482
+ "# gather raw html\n",
1483
+ "raw_html = [f'<div class=\"page-break\">{s}</div>' for s in html_data]\n",
1484
+ "style_css = \"\"\"\n",
1485
+ "<style>\n",
1486
+ " .page-break {\n",
1487
+ " page-break-inside: avoid; /* Always add page break before this element */\n",
1488
+ " }\n",
1489
+ " .table-header {\n",
1490
+ " font-weight: bold;\n",
1491
+ " font-size: 18px;\n",
1492
+ " text-align: center;\n",
1493
+ " border-bottom: None;\n",
1494
+ " }\n",
1495
+ "</style>\n",
1496
+ "\"\"\"\n",
1497
+ "raw_html = ''.join(raw_html)\n",
1498
+ "raw_html = f'<!DOCTYPE html>\\n<html lang=\"en\">\\n{style_css}\\n<head>\\n<meta charset=\"utf-8\"/>\\n</head>\\n<body>\\n' + document_title + raw_html + '\\n</body>\\n</html>\\n'"
1499
+ ],
1500
+ "id": "a38e9d7ba59d063b",
1501
+ "outputs": [],
1502
+ "execution_count": null
1503
+ },
1504
+ {
1505
+ "metadata": {},
1506
+ "cell_type": "code",
1507
+ "source": [
1508
+ "# render html to PDF\n",
1509
+ "pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),\n",
1510
+ "options={\n",
1511
+ " 'footer-center': 'Page [page] of [topage]',\n",
1512
+ " 'footer-font-size': '8',\n",
1513
+ " 'footer-spacing': '5',\n",
1514
+ " 'encoding': \"UTF-8\",\n",
1515
+ " 'margin-top':'10mm',\n",
1516
+ " 'margin-right':'15mm',\n",
1517
+ " 'margin-bottom':'15mm',\n",
1518
+ " 'margin-left':'15mm',\n",
1519
+ " 'header-center': study_title,\n",
1520
+ " 'header-font-size': '6',\n",
1521
+ " 'header-spacing': '0',\n",
1522
+ " 'disable-javascript': None,\n",
1523
+ " 'no-outline': None,\n",
1524
+ "},\n",
1525
+ "verbose=True)"
1526
+ ],
1527
+ "id": "792243aad557cc86",
1528
+ "outputs": [],
1529
+ "execution_count": null
1530
+ },
1531
+ {
1532
+ "metadata": {},
1533
+ "cell_type": "code",
1534
+ "source": "",
1535
+ "id": "4cfdfa6f69c3916a",
1536
+ "outputs": [],
1537
+ "execution_count": null
1538
+ }
1539
+ ],
1540
+ "metadata": {
1541
+ "kernelspec": {
1542
+ "display_name": "Python 3 (ipykernel)",
1543
+ "language": "python",
1544
+ "name": "python3"
1545
+ },
1546
+ "language_info": {
1547
+ "codemirror_mode": {
1548
+ "name": "ipython",
1549
+ "version": 3
1550
+ },
1551
+ "file_extension": ".py",
1552
+ "mimetype": "text/x-python",
1553
+ "name": "python",
1554
+ "nbconvert_exporter": "python",
1555
+ "pygments_lexer": "ipython3",
1556
+ "version": "3.12.4"
1557
+ }
1558
+ },
1559
+ "nbformat": 4,
1560
+ "nbformat_minor": 5
1561
+ }