meta-edc 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. meta_ae/action_items.py +10 -2
  2. meta_ae/baker_recipes.py +1 -2
  3. meta_ae/tests/tests/test_actions.py +1 -2
  4. meta_analytics/dataframes/__init__.py +3 -0
  5. meta_analytics/dataframes/constants.py +1 -1
  6. meta_analytics/dataframes/get_eos_df.py +15 -2
  7. meta_analytics/dataframes/get_glucose_df.py +149 -0
  8. meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
  9. meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
  10. meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
  11. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
  12. meta_analytics/dataframes/utils.py +18 -4
  13. meta_analytics/notebooks/anu.ipynb +95 -0
  14. meta_analytics/notebooks/appointment_planning.ipynb +329 -0
  15. meta_analytics/notebooks/arvs.ipynb +103 -0
  16. meta_analytics/notebooks/cleaning/consent_v1_ext.ipynb +227 -0
  17. meta_analytics/notebooks/cleaning/offschedule_eos.ipynb +353 -0
  18. meta_analytics/notebooks/dsmc/renal_dysfunction.ipynb +435 -0
  19. meta_analytics/notebooks/endpoints/meta_endpoints_by_date.ipynb +664 -0
  20. meta_analytics/notebooks/followup_examination.ipynb +141 -0
  21. meta_analytics/notebooks/hba1c.ipynb +136 -0
  22. meta_analytics/notebooks/hiv_regimens.ipynb +429 -0
  23. meta_analytics/notebooks/incidence.ipynb +232 -0
  24. meta_analytics/notebooks/liver.ipynb +389 -0
  25. meta_analytics/notebooks/magreth.ipynb +645 -0
  26. meta_analytics/notebooks/monitoring_report.ipynb +1751 -0
  27. meta_analytics/notebooks/pharmacy.ipynb +1070 -0
  28. meta_analytics/notebooks/pharmacy_stock_202410.ipynb +306 -0
  29. meta_analytics/notebooks/steering.ipynb +61 -0
  30. meta_analytics/notebooks/undiagnosed/meta3_screening_consort_chart.ipynb +1176 -0
  31. meta_analytics/notebooks/undiagnosed/meta3_screening_undiagnosed.ipynb +519 -0
  32. meta_analytics/notebooks/undiagnosed/meta_screening_table2.ipynb +964 -0
  33. meta_analytics/notebooks/undiagnosed/screen_undiagnosed_or.ipynb +296 -0
  34. meta_analytics/notebooks/undiagnosed/screening.ipynb +273 -0
  35. meta_analytics/notebooks/undiagnosed/screening2.ipynb +958 -0
  36. meta_analytics/notebooks/undiagnosed/screening_undiagnosed_20241002.ipynb +958 -0
  37. meta_analytics/notebooks/ven.ipynb +191 -0
  38. meta_analytics/notebooks/vitals.ipynb +263 -0
  39. meta_analytics/utils.py +81 -0
  40. meta_edc/settings/debug.py +3 -2
  41. meta_edc/urls.py +1 -0
  42. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/METADATA +6 -5
  43. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/RECORD +77 -36
  44. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/WHEEL +1 -1
  45. meta_edc-1.1.0.dist-info/licenses/AUTHORS.rst +8 -0
  46. meta_labs/reportables.py +14 -11
  47. meta_labs/tests/test_reportables.py +33 -12
  48. meta_pharmacy/notebooks/pharmacy.ipynb +41 -0
  49. meta_prn/migrations/0063_historicaloffstudymedication_singleton_field_and_more.py +37 -0
  50. meta_prn/migrations/0064_auto_20250602_2143.py +18 -0
  51. meta_prn/models/end_of_study.py +2 -0
  52. meta_prn/models/off_study_medication.py +2 -0
  53. meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
  54. meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
  55. meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
  56. meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
  57. meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
  58. meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
  59. meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
  60. meta_screening/eligibility/eligibility_part_three/base_eligibility_part_three.py +59 -47
  61. meta_screening/form_validators/screening_part_three.py +6 -1
  62. meta_screening/tests/meta_test_case_mixin.py +3 -0
  63. meta_screening/tests/tests/test_forms.py +9 -2
  64. meta_screening/tests/tests/test_screening_part_three.py +11 -14
  65. meta_subject/action_items.py +1 -2
  66. meta_subject/choices.py +2 -1
  67. meta_subject/form_validators/glucose_form_validator.py +16 -1
  68. meta_subject/forms/blood_results/blood_results_rft_form.py +60 -3
  69. meta_subject/forms/study_medication_form.py +5 -3
  70. meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
  71. meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
  72. meta_subject/migrations/0223_bloodresultsfbc_errors_bloodresultsgludummy_errors_and_more.py +83 -0
  73. meta_subject/migrations/0224_bloodresultsfbc_abnormal_summary_and_more.py +153 -0
  74. meta_subject/tests/tests/test_egfr.py +5 -5
  75. meta_analytics/dataframes/enrolled/__init__.py +0 -1
  76. meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
  77. /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
  78. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info/licenses}/LICENSE +0 -0
  79. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1751 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "%%capture\n",
11
+ "import os\n",
12
+ "from pathlib import Path\n",
13
+ "import pandas as pd\n",
14
+ "from dj_notebook import activate\n",
15
+ "import numpy as np\n",
16
+ "from django_pandas.io import read_frame\n",
17
+ "\n",
18
+ "env_file = os.environ[\"META_ENV\"]\n",
19
+ "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
20
+ "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
21
+ "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
22
+ "plus = activate(dotenv_file=env_file)\n",
23
+ "pd.set_option('future.no_silent_downcasting', True)"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "id": "1",
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "import pdfkit\n",
34
+ "from typing import Callable\n",
35
+ "from datetime import date\n",
36
+ "from edc_pdutils.dataframes import get_subject_visit\n",
37
+ "from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39\n",
38
+ "from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
39
+ "from scipy.stats import chi2\n",
40
+ "from great_tables import loc, style, md\n",
41
+ "from meta_analytics.dataframes import get_eos_df\n",
42
+ "from meta_analytics.utils import df_as_great_table, df_as_great_table2\n",
43
+ "from meta_prn.models import LossToFollowup\n",
44
+ "from edc_visit_schedule.models import SubjectScheduleHistory\n",
45
+ "from edc_appointment.analytics import get_appointment_df\n",
46
+ "from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT\n",
47
+ "from meta_consent.models import SubjectConsentV1Ext\n",
48
+ "from meta_analytics.dataframes import get_glucose_df, get_screening_df\n",
49
+ "\n",
50
+ "from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
51
+ "from edc_constants.constants import YES # noqa"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "id": "2",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "html_data = []\n",
62
+ "data_download_date= date(2025,6, 17)\n",
63
+ "cutoff_date = date(2025,6, 17)\n",
64
+ "end_of_trial_date= date(2026,7, 1)\n",
65
+ "document_title = f\"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {data_download_date.strftime('%d %B %Y')}</h5>\"\n",
66
+ "study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'\n",
67
+ "pdf_filename = f\"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf\"\n"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "id": "3",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "# 105-30-0288-5 should also be late excluded based on the haemoglobin 4.8 presented at baseline\n",
78
+ "\n",
79
+ "df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
80
+ "late_exlusion_offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
81
+ "df_eos = get_eos_df()\n",
82
+ "df_eos_excluded = (\n",
83
+ " df_eos\n",
84
+ " .query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")\n",
85
+ " .copy()\n",
86
+ " .reset_index()\n",
87
+ ")\n",
88
+ "df_visit = (\n",
89
+ " df_visit\n",
90
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
91
+ " .query(\"_merge=='left_only'\")\n",
92
+ " .drop(columns=[\"_merge\"])\n",
93
+ ")\n",
94
+ "\n",
95
+ "df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n",
96
+ "\n",
97
+ "df_appointments = get_appointment_df()\n",
98
+ "df_appointments[\"site_id\"] = df_appointments.site_id.astype(str)\n",
99
+ "df_appointments = (\n",
100
+ " df_appointments\n",
101
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
102
+ " .query(\"_merge=='left_only'\")\n",
103
+ " .drop(columns=[\"_merge\"])\n",
104
+ ")\n",
105
+ "\n",
106
+ "\n",
107
+ "cls = GlucoseEndpointsByDate()\n",
108
+ "cls.run()\n",
109
+ "df_endpoint = cls.endpoint_only_df.copy()\n",
110
+ "df_glucose = get_glucose_df()\n",
111
+ "# df_glucose_fbg = get_glucose_fbg_df()\n",
112
+ "# df_glucose = pd.concat([df_glucose, df_glucose_fbg])\n",
113
+ "\n",
114
+ "\n",
115
+ "enrolled = df_visit.copy()\n",
116
+ "enrolled[\"site_id\"] = enrolled[\"site_id\"].astype(str)\n",
117
+ "enrolled_pivot = (\n",
118
+ " enrolled\n",
119
+ " .query(\"visit_code==1000.0\").groupby([\"site_id\"])\n",
120
+ " .size()\n",
121
+ " .reset_index()\n",
122
+ " .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
123
+ ")\n",
124
+ "enrolled_pivot.columns.name=\"\"\n",
125
+ "enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": null,
131
+ "id": "4",
132
+ "metadata": {},
133
+ "outputs": [],
134
+ "source": [
135
+ "# df_eos.query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]].to_stata(\n",
136
+ "# path=analysis_folder / \"late_exclusions.dta\",\n",
137
+ "# variable_labels={},\n",
138
+ "# version=118,\n",
139
+ "# write_index=False,\n",
140
+ "# )"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": null,
146
+ "id": "5",
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\", \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
151
+ "column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "id": "6",
158
+ "metadata": {},
159
+ "outputs": [],
160
+ "source": [
161
+ "# Table 1a Visits completed to date\n",
162
+ "\n",
163
+ "df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
164
+ "\n",
165
+ "df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
166
+ "df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
167
+ "df1.columns.name = None\n",
168
+ "df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
169
+ "df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
170
+ "df1.fillna(0, inplace=True)\n",
171
+ "df_attended = df1.copy().reset_index(drop=True)\n",
172
+ "df_attended = df_attended.fillna(0.0)"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "id": "7",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "gt = df_as_great_table(\n",
183
+ " df_attended[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]],\n",
184
+ " title=\"Table 1a: Visits completed to date\"\n",
185
+ ")\n",
186
+ "gt = (\n",
187
+ " gt\n",
188
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
189
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
190
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
191
+ " .data_color(\n",
192
+ " columns=[\"visit_code\"],\n",
193
+ " palette=[\"lavender\", \"thistle\"],\n",
194
+ " domain=[2000, 5000],\n",
195
+ " na_color=\"white\"\n",
196
+ " )\n",
197
+ " .tab_source_note(source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
198
+ ")\n",
199
+ "html_data.append(gt.as_raw_html())\n",
200
+ "gt.show()"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": null,
206
+ "id": "8",
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": [
210
+ "# Table 1b Total scheduled appointments\n",
211
+ "df_appt_pivot = (\n",
212
+ " df_appointments.query(\"appt_reason==@SCHEDULED_APPT\")\n",
213
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
214
+ " .query(\"_merge=='left_only'\")\n",
215
+ " .drop(columns=[\"_merge\"])\n",
216
+ " .reset_index(drop=True)\n",
217
+ " .groupby([\"visit_code\", \"site_id\"])\n",
218
+ " .size()\n",
219
+ " .to_frame()\n",
220
+ " .reset_index()\n",
221
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
222
+ " .reset_index()\n",
223
+ " .fillna(0)\n",
224
+ ")\n",
225
+ "\n",
226
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
227
+ "df_appt_pivot.columns.name = None\n",
228
+ "gt = df_as_great_table(\n",
229
+ " df_appt_pivot,\n",
230
+ " title=\"Table 1b: Total appointments\",\n",
231
+ " subtitle=\"Total possible appointments not including unscheduled appointments\"\n",
232
+ "\n",
233
+ ")\n",
234
+ "gt = (\n",
235
+ " gt\n",
236
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
237
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
238
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
239
+ " .data_color(\n",
240
+ " columns=[\"visit_code\"],\n",
241
+ " palette=[\"lavender\", \"thistle\"],\n",
242
+ " domain=[2000, 5000],\n",
243
+ " na_color=\"white\"\n",
244
+ " )\n",
245
+ ")\n",
246
+ "html_data.append(gt.as_raw_html())\n",
247
+ "gt.show()"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": null,
253
+ "id": "9",
254
+ "metadata": {},
255
+ "outputs": [],
256
+ "source": [
257
+ "# Table 1c Past scheduled appointments -- no information provided\n",
258
+ "df_appt_pivot = (\n",
259
+ " df_appointments.query(\"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
260
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
261
+ " .query(\"_merge=='left_only'\")\n",
262
+ " .drop(columns=[\"_merge\"])\n",
263
+ " .reset_index(drop=True)\n",
264
+ " .groupby([\"visit_code\", \"site_id\"])\n",
265
+ " .size()\n",
266
+ " .to_frame()\n",
267
+ " .reset_index()\n",
268
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
269
+ " .reset_index()\n",
270
+ " .fillna(0)\n",
271
+ ")\n",
272
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
273
+ "df_appt_pivot.columns.name = None\n",
274
+ "gt = df_as_great_table(\n",
275
+ " df_appt_pivot,\n",
276
+ " title=\"Table 1c: Past appointments not attended/not reported\",\n",
277
+ " subtitle=\"Expected by now but no information provided by site\",\n",
278
+ ")\n",
279
+ "gt = (\n",
280
+ " gt\n",
281
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
282
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
283
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
284
+ " .data_color(\n",
285
+ " columns=[\"visit_code\"],\n",
286
+ " palette=[\"lavender\", \"thistle\"],\n",
287
+ " domain=[2000, 5000],\n",
288
+ " na_color=\"white\"\n",
289
+ " )\n",
290
+ " .tab_source_note(source_note=f\"Scheduled appointment date is before {cutoff_date.strftime('%d %B %Y')}.\")\n",
291
+ ")\n",
292
+ "html_data.append(gt.as_raw_html())\n",
293
+ "gt.show()"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "id": "10",
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "# Table 1d Unscheduled appointments\n",
304
+ "df_appt = (\n",
305
+ " df_appointments.query(\"appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT\")\n",
306
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
307
+ " .query(\"_merge=='left_only'\")\n",
308
+ " .drop(columns=[\"_merge\"])\n",
309
+ " .reset_index(drop=True)\n",
310
+ " .copy()\n",
311
+ " .reset_index(drop=True)\n",
312
+ ")\n",
313
+ "df_appt['visit_code'] = df_appt['visit_code'].astype(int)\n",
314
+ "df_appt['visit_code'] = df_appt['visit_code'].astype(str)\n",
315
+ "\n",
316
+ "subjects_with_unscheduled = df_appt.subject_identifier.nunique()\n",
317
+ "\n",
318
+ "df_appt_pivot = (\n",
319
+ " df_appt\n",
320
+ " .groupby([\"visit_code\", \"site_id\"])\n",
321
+ " .size()\n",
322
+ " .to_frame()\n",
323
+ " .reset_index()\n",
324
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
325
+ " .reset_index()\n",
326
+ " .fillna(0)\n",
327
+ ")\n",
328
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
329
+ "df_appt_pivot.columns.name = None\n",
330
+ "df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype('float64')\n",
331
+ "\n",
332
+ "\n",
333
+ "# add totals row\n",
334
+ "sum_row = df_appt_pivot.select_dtypes(include='float64').sum()\n",
335
+ "sum_row['visit_code'] = 'Total'\n",
336
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
337
+ "df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0).reset_index(drop=True)\n",
338
+ "\n",
339
+ "gt = df_as_great_table(\n",
340
+ " df_appt_pivot,\n",
341
+ " title=\"Table 1d: Unscheduled appointments\",\n",
342
+ " subtitle=\"Appointments with sequence>0 grouped by visit code\",\n",
343
+ ")\n",
344
+ "gt = (\n",
345
+ " gt\n",
346
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
347
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
348
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
349
+ " .data_color(\n",
350
+ " columns=[\"visit_code\"],\n",
351
+ " palette=[\"lavender\", \"thistle\"],\n",
352
+ " domain=[2000, 5000],\n",
353
+ " na_color=\"white\"\n",
354
+ " )\n",
355
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
356
+ " .tab_source_note(source_note=f\"{subjects_with_unscheduled} participants had at least one unscheduled appointment.\")\n",
357
+ ")\n",
358
+ "html_data.append(gt.as_raw_html())\n",
359
+ "gt.show()"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "code",
364
+ "execution_count": null,
365
+ "id": "11",
366
+ "metadata": {},
367
+ "outputs": [],
368
+ "source": [
369
+ "# Table 1e Future scheduled appointments\n",
370
+ "df_appt_pivot = (\n",
371
+ " df_appointments.query(\"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
372
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
373
+ " .query(\"_merge=='left_only'\")\n",
374
+ " .drop(columns=[\"_merge\"])\n",
375
+ " .reset_index(drop=True)\n",
376
+ " .groupby([\"visit_code\", \"site_id\"])\n",
377
+ " .size()\n",
378
+ " .to_frame()\n",
379
+ " .reset_index()\n",
380
+ " .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
381
+ " .reset_index()\n",
382
+ " .fillna(0)\n",
383
+ ")\n",
384
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
385
+ "df_appt_pivot.columns.name = None\n",
386
+ "gt = df_as_great_table(\n",
387
+ " df_appt_pivot,\n",
388
+ " title=\"Table 1e: Future appointments\",\n",
389
+ ")\n",
390
+ "gt = (\n",
391
+ " gt\n",
392
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
393
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
394
+ " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
395
+ " .data_color(\n",
396
+ " columns=[\"visit_code\"],\n",
397
+ " palette=[\"lavender\", \"thistle\"],\n",
398
+ " domain=[2000, 5000],\n",
399
+ " na_color=\"white\"\n",
400
+ " )\n",
401
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
402
+ " .tab_source_note(source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
403
+ ")\n",
404
+ "html_data.append(gt.as_raw_html())\n",
405
+ "gt.show()"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "execution_count": null,
411
+ "id": "12",
412
+ "metadata": {},
413
+ "outputs": [],
414
+ "source": [
415
+ "# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed\n",
416
+ "subject_count = (\n",
417
+ " df_visit\n",
418
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
419
+ " .query(\"_merge=='left_only'\")\n",
420
+ " .drop(columns=[\"_merge\"])\n",
421
+ " .reset_index(drop=True)\n",
422
+ " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
423
+ ").subject_identifier.nunique()\n",
424
+ "df_tbl = (\n",
425
+ " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
426
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
427
+ " .query(\"_merge=='left_only'\")\n",
428
+ " .drop(columns=[\"_merge\"])\n",
429
+ " .reset_index(drop=True)\n",
430
+ " .groupby(by=[\"visit_code\", \"site_id\"])\n",
431
+ " .size()\n",
432
+ " .to_frame()\n",
433
+ " .reset_index()\n",
434
+ ")\n",
435
+ "df_tbl.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
436
+ "df_tbl_pivot = df_tbl.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
437
+ "df_tbl_pivot.columns.name = None\n",
438
+ "df_tbl_pivot.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
439
+ "df_tbl_pivot['total'] = df_tbl_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
440
+ "df_missed = (\n",
441
+ " df_tbl_pivot\n",
442
+ " .fillna(0)\n",
443
+ " .copy()\n",
444
+ " .set_index([\"visit_code\"])\n",
445
+ ")\n",
446
+ "\n",
447
+ "df_attended_display = df_attended.copy()\n",
448
+ "df_attended_display = (\n",
449
+ " df_attended_display\n",
450
+ " .set_index([\"visit_code\"])\n",
451
+ ")\n",
452
+ "\n",
453
+ "attended_and_missed = df_attended_display + df_missed\n",
454
+ "attended_and_missed = (\n",
455
+ " attended_and_missed\n",
456
+ " .fillna(0)\n",
457
+ " .reset_index()\n",
458
+ " .set_index([\"visit_code\"])\n",
459
+ ")\n",
460
+ "\n",
461
+ "attended_and_missed_perc = df_missed/attended_and_missed\n",
462
+ "attended_and_missed_perc = (\n",
463
+ " attended_and_missed_perc\n",
464
+ " .fillna(0)\n",
465
+ " .reset_index()\n",
466
+ " .set_index([\"visit_code\"])\n",
467
+ ")\n",
468
+ "\n",
469
+ "df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
470
+ "for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
471
+ " col_perc = f\"{col}_perc\"\n",
472
+ " df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
473
+ "df_result = df_result.reset_index().sort_values(by=[\"visit_code\"], ascending=True)\n",
474
+ "df_result = df_result.fillna(0.0)"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "execution_count": null,
480
+ "id": "13",
481
+ "metadata": {},
482
+ "outputs": [],
483
+ "source": [
484
+ "df_table = df_result[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].copy()\n",
485
+ "gt = df_as_great_table(\n",
486
+ " df_table,\n",
487
+ " title=\"Table 2a: Visits Missed to Date\",\n",
488
+ " subtitle=\"as % of Visits Attended + Visits Missed\"\n",
489
+ ")\n",
490
+ "gt = (\n",
491
+ " gt\n",
492
+ " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
493
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
494
+ " .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
495
+ " .tab_style(\n",
496
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
497
+ " locations=loc.body(\n",
498
+ " columns=[0],\n",
499
+ " rows=list(range(0, len(df_table))),\n",
500
+ " ),\n",
501
+ " )\n",
502
+ " .tab_source_note(source_note=f\"{subject_count} participants had at least one missed visit.\")\n",
503
+ "\n",
504
+ ")\n",
505
+ "html_data.append(gt.as_raw_html())\n",
506
+ "gt.show()\n"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": null,
512
+ "id": "14",
513
+ "metadata": {},
514
+ "outputs": [],
515
+ "source": [
516
+ "# Table 2b: Number of missed visits by participant\n",
517
+ "subject_count = (\n",
518
+ " df_visit\n",
519
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
520
+ " .query(\"_merge=='left_only'\")\n",
521
+ " .drop(columns=[\"_merge\"])\n",
522
+ " .reset_index(drop=True)\n",
523
+ " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
524
+ ").subject_identifier.nunique()\n",
525
+ "df_tbl = (\n",
526
+ " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
527
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
528
+ " .query(\"_merge=='left_only'\")\n",
529
+ " .drop(columns=[\"_merge\"])\n",
530
+ " .reset_index(drop=True)\n",
531
+ " .groupby(by=[\"subject_identifier\", \"site_id\"])\n",
532
+ " .size()\n",
533
+ " .to_frame()\n",
534
+ " .reset_index()\n",
535
+ ")\n",
536
+ "df_tbl.columns = [\"subject_identifier\", \"site_id\", \"missed_count\"]\n",
537
+ "df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100], labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
538
+ "df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False, aggfunc=\"count\").reset_index()\n",
539
+ "\n",
540
+ "df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)\n",
541
+ "\n",
542
+ "sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()\n",
543
+ "sum_row['category'] = 'Total'\n",
544
+ "\n",
545
+ "\n",
546
+ "df_tbl_pivot = (\n",
547
+ " pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)\n",
548
+ " .rename(columns={10: \"10\", 20: \"20\", 30: \"30\", 40: \"40\", 60: \"60\"})\n",
549
+ ")\n",
550
+ "\n",
551
+ "gt = df_as_great_table(\n",
552
+ " df_tbl_pivot,\n",
553
+ " title=\"Table 2b: Number of participants who missed one or more visits\",\n",
554
+ ")\n",
555
+ "gt = (\n",
556
+ " gt\n",
557
+ " .cols_label({\"category\": \"Category\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
558
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
559
+ " .cols_align(align=\"left\", columns=[\"category\"])\n",
560
+ " .tab_style(\n",
561
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
562
+ " locations=loc.body(\n",
563
+ " columns=[0],\n",
564
+ " rows=list(range(0, len(df_table))),\n",
565
+ " ),\n",
566
+ " )\n",
567
+ ")\n",
568
+ "html_data.append(gt.as_raw_html())\n",
569
+ "gt.show()\n",
570
+ "\n"
571
+ ]
572
+ },
573
+ {
574
+ "cell_type": "code",
575
+ "execution_count": null,
576
+ "id": "15",
577
+ "metadata": {},
578
+ "outputs": [],
579
+ "source": [
580
+ "# func for tables 3,4,5\n",
581
+ "def get_row_df(row_df:pd.DataFrame, label:str=None, **kwargs)->pd.DataFrame:\n",
582
+ " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
583
+ " row_df[\"label\"] = label\n",
584
+ " row_df = row_df.reset_index()\n",
585
+ " row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
586
+ " row_df.columns.name = \"\"\n",
587
+ " all_sites = [10, 20, 30, 40, 60]\n",
588
+ " for site in all_sites:\n",
589
+ " if site not in row_df.columns:\n",
590
+ " row_df[site] = None\n",
591
+ " row_df = row_df.reset_index(drop=True)\n",
592
+ " return row_df\n",
593
+ "\n",
594
+ "\n",
595
+ "def get_table_df(\n",
596
+ " df_source:pd.DataFrame,\n",
597
+ " visit_code:float|None=None,\n",
598
+ " month_label:str|None=None,\n",
599
+ " visit_codes:list[float]|None=None,\n",
600
+ " get_row_func:Callable|None=None,\n",
601
+ " category_labels:list[str]|None=None,\n",
602
+ ")->pd.DataFrame:\n",
603
+ " get_row_df_func = get_row_func or get_row_df\n",
604
+ " if visit_code:\n",
605
+ " df_month = df_source[df_source.visit_code==visit_code].copy()\n",
606
+ " elif visit_codes:\n",
607
+ " df_month = df_source[df_source.visit_code.isin(visit_codes)].copy()\n",
608
+ " elif month_label:\n",
609
+ " df_month = df_source.copy()\n",
610
+ "\n",
611
+ " \n",
612
+ " row_df = df_month.copy()\n",
613
+ " table_df = get_row_df_func(row_df, \"Total (n)\", category_labels=category_labels)\n",
614
+ " \n",
615
+ " row_df = df_month.query(\"ogtt_value<7.8 and fbg_value<6.1\").copy()\n",
616
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG <6.1\", category_labels=category_labels)])\n",
617
+ " \n",
618
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
619
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
620
+ " \n",
621
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
622
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG >=7.0\", category_labels=category_labels)])\n",
623
+ " \n",
624
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
625
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\", category_labels=category_labels)])\n",
626
+ " \n",
627
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
628
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
629
+ " \n",
630
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
631
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\", category_labels=category_labels)])\n",
632
+ " \n",
633
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
634
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG <6.1\", category_labels=category_labels)])\n",
635
+ " \n",
636
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
637
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
638
+ " \n",
639
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
640
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG >=7.0\", category_labels=category_labels)])\n",
641
+ "\n",
642
+ " row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
643
+ " table_df = pd.concat([table_df, get_row_df_func(row_df, \"Missing OGTT\", category_labels=category_labels)])\n",
644
+ " return table_df\n",
645
+ "\n",
646
+ "\n",
647
+ "def format_table_df(tbl_df, add_totals:bool|None=None):\n",
648
+ " \"\"\"Pivot on site\"\"\"\n",
649
+ " add_totals = True if add_totals is None else add_totals\n",
650
+ " tbl_df = tbl_df.fillna(0.0)\n",
651
+ " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
652
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
653
+ "\n",
654
+ " if add_totals:\n",
655
+ " df_last = tbl_df[1:].sum().to_frame()\n",
656
+ " df_last.loc[\"label\"] = np.nan\n",
657
+ " df_last = df_last.reset_index()\n",
658
+ " df_last.columns = [\"label\", \"value\"]\n",
659
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
660
+ " df_last.columns.name = \"\"\n",
661
+ " df_last[\"label\"] = \"Totals\"\n",
662
+ "\n",
663
+ " tbl_df = pd.concat([tbl_df, df_last])\n",
664
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
665
+ "\n",
666
+ " tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]\n",
667
+ "\n",
668
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
669
+ " tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
670
+ " tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
671
+ "\n",
672
+ "\n",
673
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
674
+ " tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
675
+ " tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
676
+ "\n",
677
+ " cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]]\n",
678
+ " tbl_df1 = tbl_df[cols]\n",
679
+ " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
680
+ " return tbl_df1\n",
681
+ "\n",
682
+ "def format_table_with_bmi_df(tbl_df, add_totals:bool|None=None, category_labels:list[str]=None):\n",
683
+ " \"\"\"Pivot on BMI categories\"\"\"\n",
684
+ "\n",
685
+ " add_totals = True if add_totals is None else add_totals\n",
686
+ " tbl_df = tbl_df.fillna(0.0)\n",
687
+ " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
688
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
689
+ "\n",
690
+ " if add_totals:\n",
691
+ " df_last = tbl_df[1:].sum().to_frame()\n",
692
+ " df_last.loc[\"label\"] = np.nan\n",
693
+ " df_last = df_last.reset_index()\n",
694
+ " df_last.columns = [\"label\", \"value\"]\n",
695
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
696
+ " df_last.columns.name = \"\"\n",
697
+ " df_last[\"label\"] = \"Totals\"\n",
698
+ "\n",
699
+ " tbl_df = pd.concat([tbl_df, df_last])\n",
700
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
701
+ "\n",
702
+ " tbl_df.columns = [\"label\", *category_labels, \"total\"]\n",
703
+ "\n",
704
+ " for label in [*category_labels, \"total\"]:\n",
705
+ " tbl_df[f\"{label}_perc\"] = (tbl_df[label]/tbl_df.iloc[0][label]) * 100 if tbl_df.iloc[0][label]>0 else 0\n",
706
+ " tbl_df[f\"{label}_perc_str\"] = tbl_df[f\"{label}_perc\"].map('{:.1f}'.format)\n",
707
+ "\n",
708
+ " for cat in [*category_labels, \"total\"]:\n",
709
+ " tbl_df[f\"{label}_str\"] = tbl_df[[f\"{label}\", f\"{label}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
710
+ " tbl_df[f\"{label}_str\"] = tbl_df[f\"{label}_str\"] + \")\"\n",
711
+ "\n",
712
+ " cols = [\"label\", *[f\"{label}_str\" for label in [*category_labels, \"total\"]]]\n",
713
+ " tbl_df1 = tbl_df[cols]\n",
714
+ " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", *category_labels, \"total\"]].to_list()\n",
715
+ " return tbl_df1\n",
716
+ "\n",
717
+ "\n",
718
+ "def get_row_by_df(row_df:pd.DataFrame, label:str, category_labels:list[str])->pd.DataFrame:\n",
719
+ " # if label not in category_labels:\n",
720
+ " # raise ValueError(f\"Invalid label. Expected one of {category_labels}. Got {label}.\")\n",
721
+ " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
722
+ " row_df[\"label\"] = label\n",
723
+ " row_df = row_df.reset_index()\n",
724
+ " row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
725
+ " row_df.columns.name = \"\"\n",
726
+ "\n",
727
+ " for label in category_labels:\n",
728
+ " if label not in row_df.columns:\n",
729
+ " row_df[label] = None\n",
730
+ " row_df = row_df.reset_index(drop=True)\n",
731
+ " return row_df"
732
+ ]
733
+ },
734
+ {
735
+ "cell_type": "code",
736
+ "execution_count": null,
737
+ "id": "16",
738
+ "metadata": {},
739
+ "outputs": [],
740
+ "source": [
741
+ "def get_fbg_value(r):\n",
742
+ " if not pd.isna(r[\"converted_fbg2_value\"]):\n",
743
+ " return r[\"converted_fbg2_value\"]\n",
744
+ " return r[\"converted_fbg_value\"]\n",
745
+ "\n",
746
+ "def get_ogtt_value(r):\n",
747
+ " if not pd.isna(r[\"converted_ogtt2_value\"]):\n",
748
+ " return r[\"converted_ogtt2_value\"]\n",
749
+ " return r[\"converted_ogtt_value\"]\n"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": null,
755
+ "id": "17",
756
+ "metadata": {},
757
+ "outputs": [],
758
+ "source": [
759
+ "# Table 3: OGTT and FBG at Enrolment\n",
760
+ "\n",
761
+ "subjects = df_visit.subject_identifier.unique()\n",
762
+ "df_screening = get_screening_df().query(\"consented==True and subject_identifier.isin(@subjects)\")\n",
763
+ "df_screening[\"visit_code\"] = \"Enrol\"\n",
764
+ "df_screening[\"fbg_value\"] = df_screening.apply(get_fbg_value, axis=1)\n",
765
+ "df_screening[\"ogtt_value\"] = df_screening.apply(get_ogtt_value, axis=1)\n",
766
+ "df_screening[\"site_id\"] = df_screening.site.astype(int)\n",
767
+ "df_screening = df_screening.drop(columns=[\"site\"])\n",
768
+ "df_table3 = get_table_df(df_screening, month_label=\"enrol\")\n",
769
+ "df_table3 = format_table_df(df_table3)\n",
770
+ "df_table3 = df_table3.fillna(0.0)\n",
771
+ "gt = df_as_great_table(df_table3, title=\"Table 3a: OGTT and FBG at Screening / Enrolment\")\n",
772
+ "\n",
773
+ "column_headers_enrol = {k:v for k,v in column_headers_with_str.items() if k not in \"visit_code\"}\n",
774
+ "gt = (\n",
775
+ " gt\n",
776
+ " .cols_label(column_headers_enrol)\n",
777
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
778
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
779
+ " .cols_width(cases={\"label\": \"35%\"})\n",
780
+ " .tab_source_note(source_note=\"Excluding patients eventually withdrawn for `late exclusion` criteria\")\n",
781
+ ")\n",
782
+ "html_data.append(gt.as_raw_html())\n",
783
+ "gt.show()\n"
784
+ ]
785
+ },
786
+ {
787
+ "cell_type": "code",
788
+ "execution_count": null,
789
+ "id": "18",
790
+ "metadata": {},
791
+ "outputs": [],
792
+ "source": [
793
+ "# bmi_categories:\n",
794
+ "# 1 calculated_bmi_value<25\n",
795
+ "# 2 calculated_bmi_value>=25 & calculated_bmi_value<30\n",
796
+ "# 3 calculated_bmi_value>=30\n",
797
+ "\n",
798
+ "\n",
799
+ "# subjects = df_visit.subject_identifier.unique()\n",
800
+ "# df_screening = get_screening_df().query(\"consented==True and subject_identifier.isin(@subjects)\")\n",
801
+ "# df_screening[\"visit_code\"] = \"Enrol\"\n",
802
+ "# df_screening[\"fbg_value\"] = df_screening.apply(get_fbg_value, axis=1)\n",
803
+ "# df_screening[\"ogtt_value\"] = df_screening.apply(get_ogtt_value, axis=1)\n",
804
+ "# df_screening[\"site_id\"] = df_screening.site.astype(int)\n",
805
+ "# df_screening = df_screening.drop(columns=[\"site\"])\n",
806
+ "# df_screening[\"bmi\"] = pd.NA\n",
807
+ "# df_screening.loc[df_screening[\"calculated_bmi_value\"] < 25.0, \"bmi\"] = \"bmi<25\"\n",
808
+ "# df_screening.loc[(df_screening[\"calculated_bmi_value\"]>=25.0) & (df_screening[\"calculated_bmi_value\"] < 30.0), \"bmi\"] = \"25<=bmi<30\"\n",
809
+ "# df_screening.loc[df_screening[\"calculated_bmi_value\"] > 30.0, \"bmi\"] = \"bmi>30\"\n",
810
+ "#\n",
811
+ "# category_labels = [ \"bmi<25\", \"25<=bmi<30\", \"bmi>=30\", \"Total (n)\"]\n",
812
+ "# df_table3 = get_table_df(df_screening, month_label=\"enrol\", get_row_func=get_row_by_df, category_labels=category_labels)\n",
813
+ "# df_table3 = format_table_with_bmi_df(df_table3, category_labels=category_labels)\n",
814
+ "# df_table3 = df_table3.fillna(0.0)\n",
815
+ "# gt = df_as_great_table(df_table3, title=\"Table 3b: OGTT/FBG by BMI at Screening / Enrolment\")\n",
816
+ "# column_headers_enrol = {\"bmi<25_str\":\"bmi<25\", \"25<=bmi<30_str\":\"25<=bmi<30\", \"bmi>30_str\":\"bmi>30\", \"total_str\": \"total\"}\n",
817
+ "# gt = (\n",
818
+ "# gt\n",
819
+ "# .cols_label(column_headers_enrol)\n",
820
+ "# .cols_align(align=\"center\", columns=[\"bmi<25_str\", \"25<=bmi<30_str\", \"bmi>30_str\", \"total_str\"])\n",
821
+ "# .cols_align(align=\"left\", columns=[\"label\"])\n",
822
+ "# .cols_width(cases={\"label\": \"35%\"})\n",
823
+ "# .tab_source_note(source_note=\"Excluding patients eventually withdrawn for `late exclusion` criteria\")\n",
824
+ "# )\n",
825
+ "# html_data.append(gt.as_raw_html())\n",
826
+ "# gt.show()\n",
827
+ "\n"
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "code",
832
+ "execution_count": null,
833
+ "id": "19",
834
+ "metadata": {},
835
+ "outputs": [],
836
+ "source": [
837
+ "[col for col in df_screening.columns if \"bmi\" in col]"
838
+ ]
839
+ },
840
+ {
841
+ "cell_type": "code",
842
+ "execution_count": null,
843
+ "id": "20",
844
+ "metadata": {},
845
+ "outputs": [],
846
+ "source": [
847
+ "# Table 4: OGTT and FBG at 12-month visit\n",
848
+ "df_table3 = get_table_df(df_glucose, visit_codes=[1120.0])\n",
849
+ "df_table3 = format_table_df(df_table3)\n",
850
+ "df_table3 = df_table3.fillna(0.0)\n",
851
+ "gt = df_as_great_table(df_table3, title=\"Table 4: OGTT and FBG at 12-month visit\")\n",
852
+ "gt = (\n",
853
+ " gt\n",
854
+ " .cols_label(column_headers_with_str)\n",
855
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
856
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
857
+ " .cols_width(cases={\"label\": \"35%\"})\n",
858
+ ")\n",
859
+ "html_data.append(gt.as_raw_html())\n",
860
+ "gt.show()\n"
861
+ ]
862
+ },
863
+ {
864
+ "cell_type": "code",
865
+ "execution_count": null,
866
+ "id": "21",
867
+ "metadata": {},
868
+ "outputs": [],
869
+ "source": [
870
+ "# Table 5: OGTT and FBG at 24-month visit\n",
871
+ "df_table4 = get_table_df(df_glucose, 1240.0)\n",
872
+ "df_table4 = format_table_df(df_table4)\n",
873
+ "df_table4 = df_table4.fillna(0.0)\n",
874
+ "gt = df_as_great_table(df_table4, title=\"Table 5: OGTT and FBG at 24-month visit\")\n",
875
+ "gt = (\n",
876
+ " gt\n",
877
+ " .cols_label(column_headers_with_str)\n",
878
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
879
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
880
+ " .cols_width(cases={\"label\": \"35%\"})\n",
881
+ ")\n",
882
+ "html_data.append(gt.as_raw_html())\n",
883
+ "gt.show()"
884
+ ]
885
+ },
886
+ {
887
+ "cell_type": "code",
888
+ "execution_count": null,
889
+ "id": "22",
890
+ "metadata": {},
891
+ "outputs": [],
892
+ "source": [
893
+ "# Table 6: OGTT and FBG at 36-month visit\n",
894
+ "df_table5 = get_table_df(df_glucose, 1360.0)\n",
895
+ "df_table5 = format_table_df(df_table5)\n",
896
+ "df_table5 = df_table5.fillna(0.0)\n",
897
+ "gt = df_as_great_table(df_table5, title=\"Table 6: OGTT and FBG at 36-month visit\")\n",
898
+ "gt = (\n",
899
+ " gt\n",
900
+ " .cols_label(column_headers_with_str)\n",
901
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
902
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
903
+ " .cols_width(cases={\"label\": \"35%\"})\n",
904
+ ")\n",
905
+ "html_data.append(gt.as_raw_html())\n",
906
+ "gt.show()"
907
+ ]
908
+ },
909
+ {
910
+ "cell_type": "code",
911
+ "execution_count": null,
912
+ "id": "23",
913
+ "metadata": {},
914
+ "outputs": [],
915
+ "source": [
916
+ "# Table 7: Any OGTT>11.1 ever\n",
917
+ "row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
918
+ "table_df = get_row_df(row_df, \"Total (n)\")\n",
919
+ "df_table6 = format_table_df(table_df)\n",
920
+ "df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)\n",
921
+ "gt = df_as_great_table(df_table, title=\"Table 7: Any OGTT>11.1 ever\")\n",
922
+ "gt = (\n",
923
+ " gt\n",
924
+ " .cols_label(column_headers_with_str)\n",
925
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
926
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
927
+ " .cols_width(cases={\"label\": \"35%\"})\n",
928
+ ")\n",
929
+ "html_data.append(gt.as_raw_html())\n",
930
+ "gt.show()"
931
+ ]
932
+ },
933
+ {
934
+ "cell_type": "code",
935
+ "execution_count": null,
936
+ "id": "24",
937
+ "metadata": {},
938
+ "outputs": [],
939
+ "source": [
940
+ "# func for table 7\n",
941
+ "def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
942
+ " df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
943
+ "\n",
944
+ " row_df = df_month.copy()\n",
945
+ " table_df = get_row_df(row_df, \"Total (n)\")\n",
946
+ "\n",
947
+ " row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
948
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
949
+ "\n",
950
+ " row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
951
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
952
+ "\n",
953
+ " row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
954
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
955
+ " return table_df"
956
+ ]
957
+ },
958
+ {
959
+ "cell_type": "code",
960
+ "execution_count": null,
961
+ "id": "25",
962
+ "metadata": {},
963
+ "outputs": [],
964
+ "source": [
965
+ "# Table 8: Interim FBG results\n",
966
+ "df_table7 = get_table7_df(df_glucose, 1150.0)\n",
967
+ "df_table7 = format_table_df(df_table7, add_totals=False)\n",
968
+ "df_table7[\"visit_code\"] = MONTH15\n",
969
+ "\n",
970
+ "df_table71 = get_table7_df(df_glucose, 1180.0)\n",
971
+ "df_table71 = format_table_df(df_table71, add_totals=False)\n",
972
+ "df_table71[\"visit_code\"] = MONTH18\n",
973
+ "\n",
974
+ "df_table72 = get_table7_df(df_glucose, 1210.0)\n",
975
+ "df_table72 = format_table_df(df_table72, add_totals=False)\n",
976
+ "df_table72[\"visit_code\"] = MONTH21\n",
977
+ "\n",
978
+ "df_table73 = get_table7_df(df_glucose, 1270.0)\n",
979
+ "df_table73 = format_table_df(df_table73, add_totals=False)\n",
980
+ "df_table73[\"visit_code\"] = MONTH27\n",
981
+ "\n",
982
+ "df_table74 = get_table7_df(df_glucose, 1300.0)\n",
983
+ "df_table74 = format_table_df(df_table74, add_totals=False)\n",
984
+ "df_table74[\"visit_code\"] = MONTH30\n",
985
+ "\n",
986
+ "df_table75 = get_table7_df(df_glucose, 1330.0)\n",
987
+ "df_table75 = format_table_df(df_table75, add_totals=False)\n",
988
+ "df_table75[\"visit_code\"] = MONTH33\n",
989
+ "\n",
990
+ "df_table76 = get_table7_df(df_glucose, 1390.0)\n",
991
+ "df_table76 = format_table_df(df_table76, add_totals=False)\n",
992
+ "df_table76[\"visit_code\"] = MONTH39\n",
993
+ "\n",
994
+ "df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])\n",
995
+ "df_table = df_table.reset_index(drop=True)\n",
996
+ "df_table = df_table.fillna(0.0)"
997
+ ]
998
+ },
999
+ {
1000
+ "cell_type": "code",
1001
+ "execution_count": null,
1002
+ "id": "26",
1003
+ "metadata": {},
1004
+ "outputs": [],
1005
+ "source": [
1006
+ "column_headers_with_str = {\"visit_code\": \"Visit Code\", **column_headers_with_str}\n",
1007
+ "gt = df_as_great_table2(df_table, title=\"Table 8: Interim FBG results\")\n",
1008
+ "gt = (\n",
1009
+ " gt\n",
1010
+ " .cols_label(column_headers_with_str)\n",
1011
+ " .cols_move_to_start(columns=\"visit_code\")\n",
1012
+ " .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
1013
+ " .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
1014
+ " .cols_width(cases={\"label\": \"15%\"})\n",
1015
+ " .tab_style(\n",
1016
+ " style=[\n",
1017
+ " style.text(color=\"black\", weight=\"bold\"),\n",
1018
+ " style.fill(color=\"lightgray\")\n",
1019
+ " ],\n",
1020
+ " locations=loc.row_groups()\n",
1021
+ " )\n",
1022
+ ")\n",
1023
+ "html_data.append(gt.as_raw_html())\n",
1024
+ "gt.show()"
1025
+ ]
1026
+ },
1027
+ {
1028
+ "cell_type": "code",
1029
+ "execution_count": null,
1030
+ "id": "27",
1031
+ "metadata": {},
1032
+ "outputs": [],
1033
+ "source": [
1034
+ "# Table 9: Primary Endpoint met\n",
1035
+ "df_endpoint_grp = df_endpoint.groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
1036
+ "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
1037
+ "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
1038
+ "df_endpoint_pivot.columns.name = \"\"\n",
1039
+ "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
1040
+ "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
1041
+ "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
1042
+ "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
1043
+ "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
1044
+ "\n",
1045
+ "gt = df_as_great_table(\n",
1046
+ " df_endpoint_pivot,\n",
1047
+ " title=\"Table 9a: Primary Endpoint met\"\n",
1048
+ ")\n",
1049
+ "gt = (\n",
1050
+ " gt\n",
1051
+ " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1052
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1053
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1054
+ " .cols_width(cases={\"label\": \"25%\"})\n",
1055
+ ")\n",
1056
+ "html_data.append(gt.as_raw_html())\n",
1057
+ "gt.show()"
1058
+ ]
1059
+ },
1060
+ {
1061
+ "cell_type": "code",
1062
+ "execution_count": null,
1063
+ "id": "28",
1064
+ "metadata": {},
1065
+ "outputs": [],
1066
+ "source": [
1067
+ "#read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\"), verbose=False).rename(columns={\"site\": \"site_id\"})"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "execution_count": null,
1073
+ "id": "29",
1074
+ "metadata": {},
1075
+ "outputs": [],
1076
+ "source": [
1077
+ "from great_tables import html\n",
1078
+ "\n",
1079
+ "# Table 9b: Primary Endpoint no EOS or DM Referral\n",
1080
+ "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1081
+ "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
1082
+ "df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]], on=[\"subject_identifier\"], how=\"left\")\n",
1083
+ "df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
1084
+ "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
1085
+ "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
1086
+ "df_endpoint_pivot.columns.name = \"\"\n",
1087
+ "df_endpoint_pivot.columns = ['label', *[str(col) for col in df_endpoint_pivot.columns if col != \"label\"]]\n",
1088
+ "for col in [c for c in ['label', \"10\", \"20\", \"30\", \"40\", \"60\"] if str(c) not in df_endpoint_pivot.columns]:\n",
1089
+ " df_endpoint_pivot[str(col)] = np.nan\n",
1090
+ "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
1091
+ "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
1092
+ "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
1093
+ "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
1094
+ "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
1095
+ "subjects = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").subject_identifier.to_list()\n",
1096
+ "\n",
1097
+ "gt = df_as_great_table(\n",
1098
+ " df_endpoint_pivot,\n",
1099
+ " title=\"Table 9b: Primary Endpoint met -- participant not referred\"\n",
1100
+ ")\n",
1101
+ "gt = (\n",
1102
+ " gt\n",
1103
+ " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1104
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1105
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1106
+ " .cols_width(cases={\"label\": \"25%\"})\n",
1107
+ " .tab_source_note(source_note=html(\"<BR>\".join(subjects)))\n",
1108
+ ")\n",
1109
+ "html_data.append(gt.as_raw_html())\n",
1110
+ "gt.show()"
1111
+ ]
1112
+ },
1113
+ {
1114
+ "cell_type": "code",
1115
+ "execution_count": null,
1116
+ "id": "30",
1117
+ "metadata": {},
1118
+ "outputs": [],
1119
+ "source": []
1120
+ },
1121
+ {
1122
+ "cell_type": "code",
1123
+ "execution_count": null,
1124
+ "id": "31",
1125
+ "metadata": {},
1126
+ "outputs": [],
1127
+ "source": [
1128
+ "# Table 10: Incident Rate per 1000 person years\n",
1129
+ "\n",
1130
+ "def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
1131
+ " if not lower_days:\n",
1132
+ " lower_days = -1\n",
1133
+ " cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
1134
+ " # exclude subjects for this reason\n",
1135
+ " offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
1136
+ "\n",
1137
+ " df_eos = get_eos_df()\n",
1138
+ " df_eos_excluded = (\n",
1139
+ " df_eos\n",
1140
+ " .query(\"followup_days>@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)\")\n",
1141
+ " .copy()\n",
1142
+ " .reset_index()\n",
1143
+ " )\n",
1144
+ " df_visit_final = (\n",
1145
+ " df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0\")\n",
1146
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
1147
+ " .query(\"_merge=='left_only'\")\n",
1148
+ " .drop(columns=[\"_merge\"])\n",
1149
+ " )\n",
1150
+ " df_main = (\n",
1151
+ " df_visit_final\n",
1152
+ " .groupby(by=[\"subject_identifier\"])[[\"baseline_datetime\", \"visit_datetime\", \"followup_days\"]]\n",
1153
+ " .max()\n",
1154
+ " .reset_index()\n",
1155
+ " )\n",
1156
+ "\n",
1157
+ " df_main = (\n",
1158
+ " df_main\n",
1159
+ " .merge(\n",
1160
+ " df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
1161
+ " how=\"left\",\n",
1162
+ " on=[\"subject_identifier\"])\n",
1163
+ " .reset_index(drop=True)\n",
1164
+ " )\n",
1165
+ " if lower_days>=365.25:\n",
1166
+ " df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
1167
+ " df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
1168
+ " return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
1169
+ "\n",
1170
+ "def get_rate_and_ci(events, person_years_total):\n",
1171
+ " lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
1172
+ " upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
1173
+ " return events/person_years_total*1000, lower_ci, upper_ci\n",
1174
+ "\n",
1175
+ "def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
1176
+ " data = {}\n",
1177
+ " df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
1178
+ " person_years_total = df_main.followup_years.sum()\n",
1179
+ " data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
1180
+ " return data"
1181
+ ]
1182
+ },
1183
+ {
1184
+ "cell_type": "code",
1185
+ "execution_count": null,
1186
+ "id": "32",
1187
+ "metadata": {},
1188
+ "outputs": [],
1189
+ "source": [
1190
+ "incidence_data = {}\n",
1191
+ "incidence_data.update(get_incidence_data(\"total\", lower_days=-1, upper_days=10000))\n",
1192
+ "incidence_data.update(get_incidence_data(\"0-1 years\", lower_days=-1, upper_days=365.25))\n",
1193
+ "incidence_data.update(get_incidence_data(\"1-2 years\", lower_days=365.25, upper_days=2 * 365.25))\n",
1194
+ "incidence_data.update(get_incidence_data(\"2-3 years\", lower_days=2 * 365.25, upper_days=3 * 365.25))\n",
1195
+ "incidence_data.update(get_incidence_data(\"3+ years\", lower_days=3 * 365.25, upper_days=10 * 365.25))\n",
1196
+ "data = dict(label=[], person_years=[], subjects=[], failures=[], rate=[], lower_ci=[], upper_ci=[])\n",
1197
+ "for k in incidence_data:\n",
1198
+ " data[\"label\"].append(k)\n",
1199
+ "\n",
1200
+ "for v in incidence_data.values():\n",
1201
+ " data[\"person_years\"].append(v[0])\n",
1202
+ " data[\"subjects\"].append(v[1])\n",
1203
+ " data[\"failures\"].append(v[2])\n",
1204
+ " data[\"rate\"].append(v[3])\n",
1205
+ " data[\"lower_ci\"].append(v[4])\n",
1206
+ " data[\"upper_ci\"].append(v[5])\n",
1207
+ "\n",
1208
+ "df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!=\"subjects\"})"
1209
+ ]
1210
+ },
1211
+ {
1212
+ "cell_type": "code",
1213
+ "execution_count": null,
1214
+ "id": "33",
1215
+ "metadata": {},
1216
+ "outputs": [],
1217
+ "source": [
1218
+ "gt = df_as_great_table(\n",
1219
+ " df_table9,\n",
1220
+ " title=\"Table 10: Incident Rate per 1000 person years\",\n",
1221
+ " subtitle=md(\"using randomisation to diabetes/last seen\"),\n",
1222
+ ")\n",
1223
+ "gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
1224
+ "gt = (gt\n",
1225
+ " .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
1226
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1227
+ " .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
1228
+ " .tab_spanner(\n",
1229
+ " label=\"95%CI\",\n",
1230
+ " columns=[\"lower_ci\", \"upper_ci\"],\n",
1231
+ " )\n",
1232
+ " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1233
+ ")\n",
1234
+ "gt.show()\n",
1235
+ "html_data.append(gt.as_raw_html())"
1236
+ ]
1237
+ },
1238
+ {
1239
+ "cell_type": "code",
1240
+ "execution_count": null,
1241
+ "id": "34",
1242
+ "metadata": {},
1243
+ "outputs": [],
1244
+ "source": [
1245
+ "# Table 11: Proportion meeting primary endpoint\n",
1246
+ "df_table10 = pd.DataFrame(data=data)\n",
1247
+ "df_table10[\"proportion\"] = df_table10[\"failures\"]/df_table10[\"subjects\"]*100\n",
1248
+ "gt = df_as_great_table(\n",
1249
+ " df_table10[[\"label\", \"subjects\", 'failures', \"proportion\"]],\n",
1250
+ " title=\"Table 11: Proportion meeting primary endpoint\",\n",
1251
+ ")\n",
1252
+ "gt = (\n",
1253
+ " gt\n",
1254
+ " .fmt_number(columns=[\"failures\", \"proportion\"], decimals=2)\n",
1255
+ " .cols_label({\"label\": \"Label\", \"subjects\": \"Participants\", \"failures\": \"Failures\", \"proportion\": \"%\"})\n",
1256
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1257
+ " .cols_align(align=\"center\", columns=[\"subjects\", \"failures\", \"proportion\"])\n",
1258
+ " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1259
+ ")\n",
1260
+ "html_data.append(gt.as_raw_html())\n",
1261
+ "gt.show()\n"
1262
+ ]
1263
+ },
1264
+ {
1265
+ "cell_type": "code",
1266
+ "execution_count": null,
1267
+ "id": "35",
1268
+ "metadata": {},
1269
+ "outputs": [],
1270
+ "source": []
1271
+ },
1272
+ {
1273
+ "cell_type": "code",
1274
+ "execution_count": null,
1275
+ "id": "36",
1276
+ "metadata": {},
1277
+ "outputs": [],
1278
+ "source": [
1279
+ "# TODO: EoS should be for all 1691, not 1631\n",
1280
+ "\n",
1281
+ "# Table 11a: End of Study Table (for those who have completed an end of study form)\n",
1282
+ "df_eos = get_eos_df()\n",
1283
+ "offstudy_reasons = {\n",
1284
+ " \"Delivered / Completed followup from pregnancy\": \"Pregnancy\",\n",
1285
+ " \"Patient completed 36 months of follow-up\": \"Completed 36m\",\n",
1286
+ " \"Patient developed diabetes\": \"Developed diabetes\",\n",
1287
+ " \"Other reason (specify below)\": \"Other\",\n",
1288
+ " \"Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment\": \"Late exclusion\",\n",
1289
+ " \"Patient has been transferred to another health centre\": \"Transferred out\",\n",
1290
+ " \"Patient is withdrawn on CLINICAL grounds ...\": \"Withdrawal: Clinical grounds\",\n",
1291
+ " \"Patient lost to follow-up\": \"LTFU\",\n",
1292
+ " \"Patient reported/known to have died\": \"Died\",\n",
1293
+ " \"Patient withdrew consent to participate further\": \"Withdrawal: Consent\",\n",
1294
+ "}\n",
1295
+ "df_eos[\"offstudy_reason\"] = df_eos[\"offstudy_reason\"].map(offstudy_reasons)\n",
1296
+ "df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"], categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
1297
+ "df_eos[\"site_id\"] = df_eos[\"site_id\"].astype(str)\n",
1298
+ "df_eos_pivot = (\n",
1299
+ " df_eos\n",
1300
+ " .groupby(by=[\"offstudy_reason\", \"site_id\"],observed=True)\n",
1301
+ " .size()\n",
1302
+ " .reset_index()\n",
1303
+ " .pivot_table(index=\"offstudy_reason\", columns=\"site_id\", values=0, observed=True)\n",
1304
+ " .fillna(0)\n",
1305
+ " .astype(int)\n",
1306
+ " .reset_index()\n",
1307
+ ")\n",
1308
+ "df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1309
+ "df_eos_pivot.columns.name=\"\"\n",
1310
+ "sum_row = df_eos_pivot.select_dtypes(include='int64').sum()\n",
1311
+ "sum_row['offstudy_reason'] = 'Total'\n",
1312
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
1313
+ "enrolled_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
1314
+ "enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]\n",
1315
+ "df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
1316
+ "\n",
1317
+ "gt = df_as_great_table(\n",
1318
+ " df_eos_pivot,\n",
1319
+ " title=\"Table 12a: End of study report\",\n",
1320
+ " subtitle=md(\"for those who have completed an End of study report\"),\n",
1321
+ ")\n",
1322
+ "gt = (\n",
1323
+ " gt\n",
1324
+ " .cols_label({\"offstudy_reason\": \"Reason\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1325
+ " .cols_align(align=\"left\", columns=[\"offstudy_reason\"])\n",
1326
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1327
+ " .tab_style(\n",
1328
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1329
+ " locations=loc.body(\n",
1330
+ " columns=[0],\n",
1331
+ " rows=[len(df_eos_pivot)-1]),\n",
1332
+ " )\n",
1333
+ " .tab_style(\n",
1334
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1335
+ " locations=loc.body(\n",
1336
+ " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1337
+ " rows=[len(df_eos_pivot)-1],\n",
1338
+ " ),\n",
1339
+ " )\n",
1340
+ " .tab_style(\n",
1341
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1342
+ " locations=loc.body(\n",
1343
+ " columns=[\"total\"],\n",
1344
+ " rows=[len(df_eos_pivot)-1],\n",
1345
+ " ),\n",
1346
+ " )\n",
1347
+ " .tab_style(\n",
1348
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1349
+ " locations=loc.body(\n",
1350
+ " columns=[\"offstudy_reason\"],\n",
1351
+ " rows=[0],\n",
1352
+ " ),\n",
1353
+ " )\n",
1354
+ ")\n",
1355
+ "html_data.append(gt.as_raw_html())\n",
1356
+ "gt.show()\n"
1357
+ ]
1358
+ },
1359
+ {
1360
+ "cell_type": "code",
1361
+ "execution_count": null,
1362
+ "id": "37",
1363
+ "metadata": {},
1364
+ "outputs": [],
1365
+ "source": []
1366
+ },
1367
+ {
1368
+ "cell_type": "code",
1369
+ "execution_count": null,
1370
+ "id": "38",
1371
+ "metadata": {},
1372
+ "outputs": [],
1373
+ "source": [
1374
+ "# Table 12b: Study status\n",
1375
+ "def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:\n",
1376
+ " columns = {k:f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
1377
+ " df_schedule = (\n",
1378
+ " df_subjecthistory\n",
1379
+ " .query(f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()\")\n",
1380
+ " .groupby(by=[\"onschedule_model\", \"site_id\"])\n",
1381
+ " .size()\n",
1382
+ " .reset_index()\n",
1383
+ " .pivot_table(index=\"onschedule_model\", columns=\"site_id\", values=0, observed=True)\n",
1384
+ " .reset_index()\n",
1385
+ " .rename(columns={\"onschedule_model\":\"schedule\", **columns})\n",
1386
+ " .fillna(0)\n",
1387
+ " .copy()\n",
1388
+ " )\n",
1389
+ " df_schedule.columns.name = \"\"\n",
1390
+ " return df_schedule\n",
1391
+ "\n",
1392
+ "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1393
+ "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
1394
+ "\n",
1395
+ "df_on = pd.concat([\n",
1396
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"on\"),\n",
1397
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"on\"),\n",
1398
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"on\"),\n",
1399
+ "])\n",
1400
+ "\n",
1401
+ "df_on = (\n",
1402
+ " df_on\n",
1403
+ " .fillna(0)\n",
1404
+ " .reset_index(drop=True)\n",
1405
+ ")\n",
1406
+ "\n",
1407
+ "df_off = pd.concat([\n",
1408
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"off\"),\n",
1409
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"off\"),\n",
1410
+ " get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"off\"),\n",
1411
+ "])\n",
1412
+ "df_off = (\n",
1413
+ " df_off\n",
1414
+ " .fillna(0)\n",
1415
+ " .reset_index(drop=True)\n",
1416
+ ")\n",
1417
+ "\n",
1418
+ "df_status = pd.merge(df_on, df_off, on=[\"schedule\"], how=\"outer\")\n",
1419
+ "columns = []\n",
1420
+ "for ele in [[f\"{x}_on\", f\"{x}_off\"] for x in [\"10\", \"20\", \"30\", \"40\", \"60\"]]:\n",
1421
+ " columns.extend(ele)\n",
1422
+ "df_status = df_status[[\"schedule\", *columns]]\n",
1423
+ "df_status[\"total_on\"] = df_status[[col for col in columns if \"on\" in col]].sum(axis=1)\n",
1424
+ "df_status[\"total_off\"] = df_status[[col for col in columns if \"off\" in col]].sum(axis=1)\n",
1425
+ "df_status[\"total\"] = df_status[columns].sum(axis=1)\n",
1426
+ "df_status[\"schedule\"] = df_status.schedule.map({\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\", \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
1427
+ "\n",
1428
+ "gt = df_as_great_table(\n",
1429
+ " df_status,\n",
1430
+ " title=\"Table 12b: Study status\",\n",
1431
+ " subtitle=md(\"Calculated from Offschedule form; not End of study report\"),\n",
1432
+ ")\n",
1433
+ "# gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=0)\n",
1434
+ "gt = (gt\n",
1435
+ " .tab_source_note(\n",
1436
+ " source_note=(\n",
1437
+ " \"Note: Offschedule form is always submitted before the End of study report. \"\n",
1438
+ " \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
1439
+ " \"the site staff are actioned to submit the End of study report.\"\n",
1440
+ " )\n",
1441
+ " )\n",
1442
+ " .cols_label({\n",
1443
+ " \"10_on\": \"On\", \"10_off\": \"Off\",\n",
1444
+ " \"20_on\": \"On\", \"20_off\": \"Off\",\n",
1445
+ " \"30_on\": \"On\", \"30_off\": \"Off\",\n",
1446
+ " \"40_on\": \"On\", \"40_off\": \"Off\",\n",
1447
+ " \"60_on\": \"On\", \"60_off\": \"Off\",\n",
1448
+ " \"total_on\": \"On\", \"total_off\": \"Off\",\n",
1449
+ " \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
1450
+ " .cols_align(align=\"center\")\n",
1451
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1452
+ " .tab_spanner(\n",
1453
+ " label=\"Hindu mandal\",\n",
1454
+ " columns=[\"10_on\", \"10_off\"],\n",
1455
+ " )\n",
1456
+ " .tab_spanner(\n",
1457
+ " label=\"Amana\",\n",
1458
+ " columns=[\"20_on\", \"20_off\"],\n",
1459
+ " )\n",
1460
+ " .tab_spanner(\n",
1461
+ " label=\"Temeke\",\n",
1462
+ " columns=[\"30_on\", \"30_off\"],\n",
1463
+ " )\n",
1464
+ " .tab_spanner(\n",
1465
+ " label=\"Mwananyamala\",\n",
1466
+ " columns=[\"40_on\", \"40_off\"],\n",
1467
+ " )\n",
1468
+ " .tab_spanner(\n",
1469
+ " label=\"Mnazi Moja\",\n",
1470
+ " columns=[\"60_on\", \"60_off\"],\n",
1471
+ " )\n",
1472
+ " .tab_spanner(\n",
1473
+ " label=\"Total\",\n",
1474
+ " columns=[\"total_on\", \"total_off\"],\n",
1475
+ " )\n",
1476
+ " .tab_style(\n",
1477
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1478
+ " locations=loc.body(\n",
1479
+ " columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
1480
+ " rows=list(range(0, 1)),\n",
1481
+ " ),\n",
1482
+ " )\n",
1483
+ " .tab_style(\n",
1484
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1485
+ " locations=loc.body(\n",
1486
+ " columns=[\"total_off\"],\n",
1487
+ " rows=list(range(0, 1)),\n",
1488
+ " ),\n",
1489
+ " )\n",
1490
+ " .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
1491
+ ")\n",
1492
+ "html_data.append(gt.as_raw_html())\n",
1493
+ "gt.show()"
1494
+ ]
1495
+ },
1496
+ {
1497
+ "cell_type": "code",
1498
+ "execution_count": null,
1499
+ "id": "39",
1500
+ "metadata": {},
1501
+ "outputs": [],
1502
+ "source": [
1503
+ "# Table 13: Loss to Follow Up\n",
1504
+ "df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1505
+ "df_ltfu_pivot = (\n",
1506
+ " df_ltfu\n",
1507
+ " .groupby(by=[\"loss_category\", \"site_id\"],observed=True,dropna=False)\n",
1508
+ " .size()\n",
1509
+ " .reset_index()\n",
1510
+ " .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True,dropna=False)\n",
1511
+ " .fillna(0)\n",
1512
+ " .astype(int)\n",
1513
+ " .reset_index()\n",
1514
+ ")\n",
1515
+ "df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1516
+ "df_ltfu_pivot.columns.name=\"\"\n",
1517
+ "sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()\n",
1518
+ "sum_row['loss_category'] = 'Total'\n",
1519
+ "sum_row_df = pd.DataFrame(sum_row).T\n",
1520
+ "df_ltfu_pivot = pd.concat([df_ltfu_pivot, sum_row_df], ignore_index=True)\n",
1521
+ "df_ltfu_pivot\n"
1522
+ ]
1523
+ },
1524
+ {
1525
+ "cell_type": "code",
1526
+ "execution_count": null,
1527
+ "id": "40",
1528
+ "metadata": {},
1529
+ "outputs": [],
1530
+ "source": [
1531
+ "# Table 13c: End of study report not submitted\n",
1532
+ "\n",
1533
+ "df1 = (\n",
1534
+ " df_status\n",
1535
+ " .query(\"schedule=='Main trial'\")[[col for col in columns if \"off\" in col]]\n",
1536
+ " .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\",\"30\",\"40\",\"60\"])))\n",
1537
+ " .reset_index(drop=True)\n",
1538
+ ")\n",
1539
+ "df2 = (\n",
1540
+ " df_eos_pivot\n",
1541
+ " .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\",\"30\",\"40\",\"60\"]]\n",
1542
+ " .reset_index(drop=True)\n",
1543
+ ")\n",
1544
+ "\n",
1545
+ "df_eos_not_reported = df1-df2\n",
1546
+ "df_eos_not_reported[\"schedule\"] = 'Main trial'\n",
1547
+ "df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1548
+ "df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\",\"30\",\"40\",\"60\", \"total\"]]\n",
1549
+ "\n",
1550
+ "gt = df_as_great_table(\n",
1551
+ " df_eos_not_reported,\n",
1552
+ " title=\"Table 13c: End of study report not submitted\",\n",
1553
+ " subtitle=md(\"End of study report expected based on Offschedule form\"),\n",
1554
+ ")\n",
1555
+ "gt = (\n",
1556
+ " gt\n",
1557
+ " .cols_label({\"schedule\": \"Schedule\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1558
+ " .cols_align(align=\"left\", columns=[\"schedule\"])\n",
1559
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1560
+ " .tab_style(\n",
1561
+ " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1562
+ " locations=loc.body(\n",
1563
+ " columns=[0],\n",
1564
+ " rows=[len(df_eos_pivot)-1]),\n",
1565
+ " )\n",
1566
+ " .tab_style(\n",
1567
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1568
+ " locations=loc.body(\n",
1569
+ " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1570
+ " rows=[len(df_eos_pivot)-1],\n",
1571
+ " ),\n",
1572
+ " )\n",
1573
+ " .tab_style(\n",
1574
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1575
+ " locations=loc.body(\n",
1576
+ " columns=[\"total\"],\n",
1577
+ " rows=[len(df_eos_pivot)-1],\n",
1578
+ " ),\n",
1579
+ " )\n",
1580
+ ")\n",
1581
+ "html_data.append(gt.as_raw_html())\n",
1582
+ "gt.show()\n"
1583
+ ]
1584
+ },
1585
+ {
1586
+ "cell_type": "code",
1587
+ "execution_count": null,
1588
+ "id": "41",
1589
+ "metadata": {},
1590
+ "outputs": [],
1591
+ "source": [
1592
+ "# Table 14: Baseline Sample"
1593
+ ]
1594
+ },
1595
+ {
1596
+ "cell_type": "code",
1597
+ "execution_count": null,
1598
+ "id": "42",
1599
+ "metadata": {},
1600
+ "outputs": [],
1601
+ "source": [
1602
+ "# Table 15: Consented to extended followup\n",
1603
+ "df_consented = (\n",
1604
+ " read_frame(SubjectConsentV1Ext.objects.all(), verbose=False)\n",
1605
+ " .query(\"agrees_to_extension==@YES\")\n",
1606
+ " .rename(columns={\"site\": \"site_id\"})\n",
1607
+ ")\n",
1608
+ "df_consented[\"site_id\"] = df_consented.site_id.astype(str)\n",
1609
+ "df_consented[\"month\"] = df_consented.report_datetime.dt.strftime(\"%m\")\n",
1610
+ "df_consented[\"year\"] = df_consented.report_datetime.dt.strftime(\"%Y\")\n",
1611
+ "df_consented_grp = (\n",
1612
+ " df_consented.groupby(by=[\"site_id\", \"year\", \"month\"]).\n",
1613
+ " size()\n",
1614
+ " .reset_index()\n",
1615
+ " .sort_values(by=[\"site_id\", \"year\", \"month\"], ascending=True)\n",
1616
+ " .reset_index(drop=True)\n",
1617
+ ")\n",
1618
+ "df_consented_pivot = (\n",
1619
+ " df_consented_grp\n",
1620
+ " .pivot_table(index=[\"year\", \"month\"], columns=\"site_id\", values=0, aggfunc=\"sum\")\n",
1621
+ " .reset_index()\n",
1622
+ " .fillna(0)\n",
1623
+ ")\n",
1624
+ "if \"60\" not in df_consented_pivot.columns:\n",
1625
+ " df_consented_pivot[\"60\"] = 0.0 * len(df_consented_pivot)\n",
1626
+ "df_consented_pivot.columns.name=\"\"\n",
1627
+ "df_consented_pivot[\"year\"] = df_consented_pivot[\"year\"].astype(str)\n",
1628
+ "df_consented_pivot[\"month\"] = df_consented_pivot[\"month\"].astype(str)\n",
1629
+ "\n",
1630
+ "sum_row = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum()\n",
1631
+ "sum_row['year'] = \"Total\"\n",
1632
+ "sum_row['month'] = \"\"\n",
1633
+ "df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)\n",
1634
+ "df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1).astype(int)\n",
1635
+ "df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].astype(int)\n",
1636
+ "gt = df_as_great_table2(\n",
1637
+ " df_consented_pivot,\n",
1638
+ " title=\"Table 15: Consented to extended followup\",\n",
1639
+ " rowname_col=\"month\",\n",
1640
+ " groupname_col=\"year\",\n",
1641
+ ")\n",
1642
+ "gt = (\n",
1643
+ " gt\n",
1644
+ " .cols_label({\"year\": \"Year\", \"month\": \"Month\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1645
+ " .cols_align(align=\"center\")\n",
1646
+ " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
1647
+ " .tab_stubhead(label=\"Consented\")\n",
1648
+ " .tab_style(\n",
1649
+ " style=[\n",
1650
+ " style.text(color=\"black\", weight=\"bold\"),\n",
1651
+ " style.fill(color=\"lightgray\")\n",
1652
+ " ],\n",
1653
+ " locations=loc.row_groups()\n",
1654
+ " )\n",
1655
+ ")\n",
1656
+ "html_data.append(gt.as_raw_html())\n",
1657
+ "gt.show()"
1658
+ ]
1659
+ },
1660
+ {
1661
+ "cell_type": "code",
1662
+ "execution_count": null,
1663
+ "id": "43",
1664
+ "metadata": {},
1665
+ "outputs": [],
1666
+ "source": []
1667
+ },
1668
+ {
1669
+ "cell_type": "code",
1670
+ "execution_count": null,
1671
+ "id": "44",
1672
+ "metadata": {},
1673
+ "outputs": [],
1674
+ "source": [
1675
+ "# gather raw html\n",
1676
+ "raw_html = [f'<div class=\"page-break\">{s}</div>' for s in html_data]\n",
1677
+ "style_css = \"\"\"\n",
1678
+ "<style>\n",
1679
+ " .page-break {\n",
1680
+ " page-break-inside: avoid; /* Always add page break before this element */\n",
1681
+ " }\n",
1682
+ " .table-header {\n",
1683
+ " font-weight: bold;\n",
1684
+ " font-size: 18px;\n",
1685
+ " text-align: center;\n",
1686
+ " border-bottom: None;\n",
1687
+ " }\n",
1688
+ "</style>\n",
1689
+ "\"\"\"\n",
1690
+ "raw_html = ''.join(raw_html)\n",
1691
+ "raw_html = f'<!DOCTYPE html>\\n<html lang=\"en\">\\n{style_css}\\n<head>\\n<meta charset=\"utf-8\"/>\\n</head>\\n<body>\\n' + document_title + raw_html + '\\n</body>\\n</html>\\n'"
1692
+ ]
1693
+ },
1694
+ {
1695
+ "cell_type": "code",
1696
+ "execution_count": null,
1697
+ "id": "45",
1698
+ "metadata": {},
1699
+ "outputs": [],
1700
+ "source": [
1701
+ "# render html to PDF\n",
1702
+ "pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),\n",
1703
+ "options={\n",
1704
+ " 'footer-center': 'Page [page] of [topage]',\n",
1705
+ " 'footer-font-size': '8',\n",
1706
+ " 'footer-spacing': '5',\n",
1707
+ " 'encoding': \"UTF-8\",\n",
1708
+ " 'margin-top':'10mm',\n",
1709
+ " 'margin-right':'15mm',\n",
1710
+ " 'margin-bottom':'15mm',\n",
1711
+ " 'margin-left':'15mm',\n",
1712
+ " 'header-center': study_title,\n",
1713
+ " 'header-font-size': '6',\n",
1714
+ " 'header-spacing': '0',\n",
1715
+ " 'disable-javascript': None,\n",
1716
+ " 'no-outline': None,\n",
1717
+ "},\n",
1718
+ "verbose=True)"
1719
+ ]
1720
+ },
1721
+ {
1722
+ "cell_type": "code",
1723
+ "execution_count": null,
1724
+ "id": "46",
1725
+ "metadata": {},
1726
+ "outputs": [],
1727
+ "source": []
1728
+ }
1729
+ ],
1730
+ "metadata": {
1731
+ "kernelspec": {
1732
+ "display_name": "Python 3 (ipykernel)",
1733
+ "language": "python",
1734
+ "name": "python3"
1735
+ },
1736
+ "language_info": {
1737
+ "codemirror_mode": {
1738
+ "name": "ipython",
1739
+ "version": 3
1740
+ },
1741
+ "file_extension": ".py",
1742
+ "mimetype": "text/x-python",
1743
+ "name": "python",
1744
+ "nbconvert_exporter": "python",
1745
+ "pygments_lexer": "ipython3",
1746
+ "version": "3.12.4"
1747
+ }
1748
+ },
1749
+ "nbformat": 4,
1750
+ "nbformat_minor": 5
1751
+ }