meta-edc 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. meta_analytics/README.rst +1 -2
  2. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +8 -0
  3. meta_analytics/notebooks/endpoints/meta_endpoints_by_date.ipynb +38 -46
  4. meta_analytics/notebooks/monitoring_report.ipynb +329 -246
  5. meta_analytics/notebooks/qa.ipynb +265 -0
  6. meta_consent/admin/actions/create_missing_prescriptions.py +1 -1
  7. meta_consent/admin/modeladmin_mixins.py +1 -1
  8. meta_consent/management/commands/create_missing_prescriptions.py +1 -1
  9. meta_consent/models/signals.py +1 -1
  10. meta_edc/settings/debug.py +2 -2
  11. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/METADATA +4 -4
  12. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/RECORD +30 -29
  13. meta_labs/tests/test_reportables.py +1 -1
  14. meta_prn/admin/offschedule_pregnancy_admin.py +3 -3
  15. meta_prn/admin/onschedule_dm_referral_admin.py +5 -5
  16. meta_prn/form_validators/end_of_study.py +2 -2
  17. meta_reports/admin/last_imp_refill_admin.py +3 -2
  18. meta_reports/models/endpoints.py +3 -0
  19. meta_reports/tasks.py +3 -2
  20. meta_subject/action_items.py +1 -1
  21. meta_subject/form_validators/delivery_form_validator.py +1 -0
  22. meta_subject/forms/delivery_form.py +2 -0
  23. meta_subject/management/commands/create_missing_rx.py +1 -1
  24. meta_subject/migrations/0107_auto_20220415_0043.py +1 -1
  25. meta_subject/models/signals.py +1 -1
  26. meta_subject/tests/tests/test_study_medication.py +1 -2
  27. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/WHEEL +0 -0
  28. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/licenses/AUTHORS.rst +0 -0
  29. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/licenses/LICENSE +0 -0
  30. {meta_edc-1.1.0.dist-info → meta_edc-1.1.2.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@
10
10
  "%%capture\n",
11
11
  "import os\n",
12
12
  "from pathlib import Path\n",
13
+ "\n",
13
14
  "import pandas as pd\n",
14
15
  "from dj_notebook import activate\n",
15
16
  "import numpy as np\n",
@@ -47,8 +48,8 @@
47
48
  "from meta_consent.models import SubjectConsentV1Ext\n",
48
49
  "from meta_analytics.dataframes import get_glucose_df, get_screening_df\n",
49
50
  "\n",
50
- "from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
51
- "from edc_constants.constants import YES # noqa"
51
+ "from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
52
+ "from edc_constants.constants import YES # noqa"
52
53
  ]
53
54
  },
54
55
  {
@@ -59,9 +60,9 @@
59
60
  "outputs": [],
60
61
  "source": [
61
62
  "html_data = []\n",
62
- "data_download_date= date(2025,6, 17)\n",
63
- "cutoff_date = date(2025,6, 17)\n",
64
- "end_of_trial_date= date(2026,7, 1)\n",
63
+ "data_download_date = date(2025, 6, 30)\n",
64
+ "cutoff_date = date(2025, 6, 30)\n",
65
+ "end_of_trial_date = date(2026, 7, 1)\n",
65
66
  "document_title = f\"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {data_download_date.strftime('%d %B %Y')}</h5>\"\n",
66
67
  "study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'\n",
67
68
  "pdf_filename = f\"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf\"\n"
@@ -77,8 +78,12 @@
77
78
  "# 105-30-0288-5 should also be late excluded based on the haemoglobin 4.8 presented at baseline\n",
78
79
  "\n",
79
80
  "df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
80
- "late_exlusion_offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
81
+ "df_visit_1691 = df_visit.copy()\n",
82
+ "\n",
83
+ "late_exlusion_offstudy_reasons = [\n",
84
+ " 'Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
81
85
  "df_eos = get_eos_df()\n",
86
+ "df_eos_1691 = df_eos.copy()\n",
82
87
  "df_eos_excluded = (\n",
83
88
  " df_eos\n",
84
89
  " .query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")\n",
@@ -87,23 +92,25 @@
87
92
  ")\n",
88
93
  "df_visit = (\n",
89
94
  " df_visit\n",
90
- " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
95
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\",\n",
96
+ " how=\"left\", indicator=True)\n",
91
97
  " .query(\"_merge=='left_only'\")\n",
92
98
  " .drop(columns=[\"_merge\"])\n",
93
99
  ")\n",
94
100
  "\n",
95
- "df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n",
101
+ "df_visit = df_visit[df_visit.appt_datetime.dt.date <= cutoff_date]\n",
96
102
  "\n",
97
103
  "df_appointments = get_appointment_df()\n",
98
104
  "df_appointments[\"site_id\"] = df_appointments.site_id.astype(str)\n",
105
+ "df_appointments_1691 = df_appointments.copy()\n",
99
106
  "df_appointments = (\n",
100
107
  " df_appointments\n",
101
- " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
108
+ " .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\",\n",
109
+ " how=\"left\", indicator=True)\n",
102
110
  " .query(\"_merge=='left_only'\")\n",
103
111
  " .drop(columns=[\"_merge\"])\n",
104
112
  ")\n",
105
113
  "\n",
106
- "\n",
107
114
  "cls = GlucoseEndpointsByDate()\n",
108
115
  "cls.run()\n",
109
116
  "df_endpoint = cls.endpoint_only_df.copy()\n",
@@ -121,8 +128,8 @@
121
128
  " .reset_index()\n",
122
129
  " .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
123
130
  ")\n",
124
- "enrolled_pivot.columns.name=\"\"\n",
125
- "enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)"
131
+ "enrolled_pivot.columns.name = \"\"\n",
132
+ "enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1)"
126
133
  ]
127
134
  },
128
135
  {
@@ -132,12 +139,23 @@
132
139
  "metadata": {},
133
140
  "outputs": [],
134
141
  "source": [
135
- "# df_eos.query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]].to_stata(\n",
136
- "# path=analysis_folder / \"late_exclusions.dta\",\n",
137
- "# variable_labels={},\n",
138
- "# version=118,\n",
139
- "# write_index=False,\n",
140
- "# )"
142
+ "# before late exclusion\n",
143
+ "df_visit_orig = df_visit_1691[df_visit_1691.appt_datetime.dt.date <= cutoff_date]\n",
144
+ "enrolled_1691 = df_visit_1691.copy()\n",
145
+ "enrolled_1691[\"site_id\"] = enrolled_1691[\"site_id\"].astype(str)\n",
146
+ "enrolled_1691_pivot = (\n",
147
+ " enrolled_1691\n",
148
+ " .query(\"visit_code==1000.0\").groupby([\"site_id\"])\n",
149
+ " .size()\n",
150
+ " .reset_index()\n",
151
+ " .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
152
+ ")\n",
153
+ "enrolled_1691_pivot.columns.name = \"\"\n",
154
+ "enrolled_1691_pivot[\"total\"] = enrolled_1691_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1)\n",
155
+ "\n",
156
+ "# df_eos_1691\n",
157
+ "# df_appointments_1691\n",
158
+ "\n"
141
159
  ]
142
160
  },
143
161
  {
@@ -147,8 +165,10 @@
147
165
  "metadata": {},
148
166
  "outputs": [],
149
167
  "source": [
150
- "column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\", \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
151
- "column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
168
+ "column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\",\n",
169
+ " \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
170
+ "column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\",\n",
171
+ " \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
152
172
  ]
153
173
  },
154
174
  {
@@ -160,7 +180,9 @@
160
180
  "source": [
161
181
  "# Table 1a Visits completed to date\n",
162
182
  "\n",
163
- "df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
183
+ "df_tbl1 = df_visit[(df_visit.visit_code_sequence == 0) & (df_visit.appt_timing == ONTIME_APPT) & ~(\n",
184
+ " df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(\n",
185
+ " by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
164
186
  "\n",
165
187
  "df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
166
188
  "df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
@@ -185,7 +207,7 @@
185
207
  ")\n",
186
208
  "gt = (\n",
187
209
  " gt\n",
188
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
210
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
189
211
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
190
212
  " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
191
213
  " .data_color(\n",
@@ -194,7 +216,8 @@
194
216
  " domain=[2000, 5000],\n",
195
217
  " na_color=\"white\"\n",
196
218
  " )\n",
197
- " .tab_source_note(source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
219
+ " .tab_source_note(\n",
220
+ " source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
198
221
  ")\n",
199
222
  "html_data.append(gt.as_raw_html())\n",
200
223
  "gt.show()"
@@ -223,7 +246,7 @@
223
246
  " .fillna(0)\n",
224
247
  ")\n",
225
248
  "\n",
226
- "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
249
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:, 1:].sum(axis=1)\n",
227
250
  "df_appt_pivot.columns.name = None\n",
228
251
  "gt = df_as_great_table(\n",
229
252
  " df_appt_pivot,\n",
@@ -233,7 +256,7 @@
233
256
  ")\n",
234
257
  "gt = (\n",
235
258
  " gt\n",
236
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
259
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
237
260
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
238
261
  " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
239
262
  " .data_color(\n",
@@ -256,7 +279,8 @@
256
279
  "source": [
257
280
  "# Table 1c Past scheduled appointments -- no information provided\n",
258
281
  "df_appt_pivot = (\n",
259
- " df_appointments.query(\"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
282
+ " df_appointments.query(\n",
283
+ " \"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
260
284
  " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
261
285
  " .query(\"_merge=='left_only'\")\n",
262
286
  " .drop(columns=[\"_merge\"])\n",
@@ -269,7 +293,7 @@
269
293
  " .reset_index()\n",
270
294
  " .fillna(0)\n",
271
295
  ")\n",
272
- "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
296
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:, 1:].sum(axis=1)\n",
273
297
  "df_appt_pivot.columns.name = None\n",
274
298
  "gt = df_as_great_table(\n",
275
299
  " df_appt_pivot,\n",
@@ -278,7 +302,7 @@
278
302
  ")\n",
279
303
  "gt = (\n",
280
304
  " gt\n",
281
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
305
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
282
306
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
283
307
  " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
284
308
  " .data_color(\n",
@@ -303,11 +327,11 @@
303
327
  "# Table 1d Unscheduled appointments\n",
304
328
  "df_appt = (\n",
305
329
  " df_appointments.query(\"appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT\")\n",
306
- " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
330
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
307
331
  " .query(\"_merge=='left_only'\")\n",
308
332
  " .drop(columns=[\"_merge\"])\n",
309
333
  " .reset_index(drop=True)\n",
310
- " .copy()\n",
334
+ " .copy()\n",
311
335
  " .reset_index(drop=True)\n",
312
336
  ")\n",
313
337
  "df_appt['visit_code'] = df_appt['visit_code'].astype(int)\n",
@@ -325,10 +349,10 @@
325
349
  " .reset_index()\n",
326
350
  " .fillna(0)\n",
327
351
  ")\n",
328
- "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
352
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:, 1:].sum(axis=1)\n",
329
353
  "df_appt_pivot.columns.name = None\n",
330
- "df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype('float64')\n",
331
- "\n",
354
+ "df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype(\n",
355
+ " 'float64')\n",
332
356
  "\n",
333
357
  "# add totals row\n",
334
358
  "sum_row = df_appt_pivot.select_dtypes(include='float64').sum()\n",
@@ -343,7 +367,7 @@
343
367
  ")\n",
344
368
  "gt = (\n",
345
369
  " gt\n",
346
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
370
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
347
371
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
348
372
  " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
349
373
  " .data_color(\n",
@@ -368,7 +392,8 @@
368
392
  "source": [
369
393
  "# Table 1e Future scheduled appointments\n",
370
394
  "df_appt_pivot = (\n",
371
- " df_appointments.query(\"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
395
+ " df_appointments.query(\n",
396
+ " \"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
372
397
  " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
373
398
  " .query(\"_merge=='left_only'\")\n",
374
399
  " .drop(columns=[\"_merge\"])\n",
@@ -381,7 +406,7 @@
381
406
  " .reset_index()\n",
382
407
  " .fillna(0)\n",
383
408
  ")\n",
384
- "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
409
+ "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:, 1:].sum(axis=1)\n",
385
410
  "df_appt_pivot.columns.name = None\n",
386
411
  "gt = df_as_great_table(\n",
387
412
  " df_appt_pivot,\n",
@@ -389,7 +414,7 @@
389
414
  ")\n",
390
415
  "gt = (\n",
391
416
  " gt\n",
392
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
417
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
393
418
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
394
419
  " .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
395
420
  " .data_color(\n",
@@ -399,7 +424,8 @@
399
424
  " na_color=\"white\"\n",
400
425
  " )\n",
401
426
  " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
402
- " .tab_source_note(source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
427
+ " .tab_source_note(\n",
428
+ " source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
403
429
  ")\n",
404
430
  "html_data.append(gt.as_raw_html())\n",
405
431
  "gt.show()"
@@ -422,7 +448,8 @@
422
448
  " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
423
449
  ").subject_identifier.nunique()\n",
424
450
  "df_tbl = (\n",
425
- " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
451
+ " df_visit[(df_visit.visit_code_sequence == 0) & (df_visit.appt_timing == MISSED_APPT) & ~(\n",
452
+ " df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
426
453
  " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
427
454
  " .query(\"_merge=='left_only'\")\n",
428
455
  " .drop(columns=[\"_merge\"])\n",
@@ -458,7 +485,7 @@
458
485
  " .set_index([\"visit_code\"])\n",
459
486
  ")\n",
460
487
  "\n",
461
- "attended_and_missed_perc = df_missed/attended_and_missed\n",
488
+ "attended_and_missed_perc = df_missed / attended_and_missed\n",
462
489
  "attended_and_missed_perc = (\n",
463
490
  " attended_and_missed_perc\n",
464
491
  " .fillna(0)\n",
@@ -467,9 +494,9 @@
467
494
  ")\n",
468
495
  "\n",
469
496
  "df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
470
- "for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
497
+ "for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
471
498
  " col_perc = f\"{col}_perc\"\n",
472
- " df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
499
+ " df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc] * 100:.2f})\", axis=1)\n",
473
500
  "df_result = df_result.reset_index().sort_values(by=[\"visit_code\"], ascending=True)\n",
474
501
  "df_result = df_result.fillna(0.0)"
475
502
  ]
@@ -489,7 +516,7 @@
489
516
  ")\n",
490
517
  "gt = (\n",
491
518
  " gt\n",
492
- " .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
519
+ " .cols_label({k: v for k, v in column_headers.items() if k != \"label\"})\n",
493
520
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
494
521
  " .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
495
522
  " .tab_style(\n",
@@ -523,7 +550,8 @@
523
550
  " .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
524
551
  ").subject_identifier.nunique()\n",
525
552
  "df_tbl = (\n",
526
- " df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
553
+ " df_visit[(df_visit.visit_code_sequence == 0) & (df_visit.appt_timing == MISSED_APPT) & ~(\n",
554
+ " df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
527
555
  " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
528
556
  " .query(\"_merge=='left_only'\")\n",
529
557
  " .drop(columns=[\"_merge\"])\n",
@@ -534,15 +562,16 @@
534
562
  " .reset_index()\n",
535
563
  ")\n",
536
564
  "df_tbl.columns = [\"subject_identifier\", \"site_id\", \"missed_count\"]\n",
537
- "df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100], labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
538
- "df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False, aggfunc=\"count\").reset_index()\n",
565
+ "df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100],\n",
566
+ " labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
567
+ "df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False,\n",
568
+ " aggfunc=\"count\").reset_index()\n",
539
569
  "\n",
540
570
  "df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)\n",
541
571
  "\n",
542
572
  "sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()\n",
543
573
  "sum_row['category'] = 'Total'\n",
544
574
  "\n",
545
- "\n",
546
575
  "df_tbl_pivot = (\n",
547
576
  " pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)\n",
548
577
  " .rename(columns={10: \"10\", 20: \"20\", 30: \"30\", 40: \"40\", 60: \"60\"})\n",
@@ -554,7 +583,8 @@
554
583
  ")\n",
555
584
  "gt = (\n",
556
585
  " gt\n",
557
- " .cols_label({\"category\": \"Category\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
586
+ " .cols_label(\n",
587
+ " {\"category\": \"Category\", **{k: v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
558
588
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
559
589
  " .cols_align(align=\"left\", columns=[\"category\"])\n",
560
590
  " .tab_style(\n",
@@ -578,7 +608,7 @@
578
608
  "outputs": [],
579
609
  "source": [
580
610
  "# func for tables 3,4,5\n",
581
- "def get_row_df(row_df:pd.DataFrame, label:str=None, **kwargs)->pd.DataFrame:\n",
611
+ "def get_row_df(row_df: pd.DataFrame, label: str = None, **kwargs) -> pd.DataFrame:\n",
582
612
  " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
583
613
  " row_df[\"label\"] = label\n",
584
614
  " row_df = row_df.reset_index()\n",
@@ -593,50 +623,55 @@
593
623
  "\n",
594
624
  "\n",
595
625
  "def get_table_df(\n",
596
- " df_source:pd.DataFrame,\n",
597
- " visit_code:float|None=None,\n",
598
- " month_label:str|None=None,\n",
599
- " visit_codes:list[float]|None=None,\n",
600
- " get_row_func:Callable|None=None,\n",
601
- " category_labels:list[str]|None=None,\n",
602
- ")->pd.DataFrame:\n",
626
+ " df_source: pd.DataFrame,\n",
627
+ " visit_code: float | None = None,\n",
628
+ " month_label: str | None = None,\n",
629
+ " visit_codes: list[float] | None = None,\n",
630
+ " get_row_func: Callable | None = None,\n",
631
+ " category_labels: list[str] | None = None,\n",
632
+ ") -> pd.DataFrame:\n",
603
633
  " get_row_df_func = get_row_func or get_row_df\n",
604
634
  " if visit_code:\n",
605
- " df_month = df_source[df_source.visit_code==visit_code].copy()\n",
635
+ " df_month = df_source[df_source.visit_code == visit_code].copy()\n",
606
636
  " elif visit_codes:\n",
607
637
  " df_month = df_source[df_source.visit_code.isin(visit_codes)].copy()\n",
608
638
  " elif month_label:\n",
609
639
  " df_month = df_source.copy()\n",
610
640
  "\n",
611
- " \n",
612
641
  " row_df = df_month.copy()\n",
613
642
  " table_df = get_row_df_func(row_df, \"Total (n)\", category_labels=category_labels)\n",
614
- " \n",
643
+ "\n",
615
644
  " row_df = df_month.query(\"ogtt_value<7.8 and fbg_value<6.1\").copy()\n",
616
645
  " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG <6.1\", category_labels=category_labels)])\n",
617
- " \n",
618
- " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
619
- " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
620
- " \n",
621
- " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
646
+ "\n",
647
+ " row_df = df_month[(df_month.ogtt_value < 7.8) & (df_month.fbg_value >= 6.1) & (df_month.fbg_value < 7.0)].copy()\n",
648
+ " table_df = pd.concat(\n",
649
+ " [table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
650
+ "\n",
651
+ " row_df = df_month[(df_month.ogtt_value < 7.8) & (df_month.fbg_value >= 7.0)].copy()\n",
622
652
  " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT <7.8; FBG >=7.0\", category_labels=category_labels)])\n",
623
- " \n",
624
- " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
625
- " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\", category_labels=category_labels)])\n",
626
- " \n",
627
- " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
628
- " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT 7.8 to <11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
629
- " \n",
630
- " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
631
- " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\", category_labels=category_labels)])\n",
632
- " \n",
633
- " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
653
+ "\n",
654
+ " row_df = df_month[(df_month.ogtt_value >= 7.8) & (df_month.ogtt_value < 11.1) & (df_month.fbg_value < 6.1)].copy()\n",
655
+ " table_df = pd.concat(\n",
656
+ " [table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\", category_labels=category_labels)])\n",
657
+ "\n",
658
+ " row_df = df_month[(df_month.ogtt_value >= 7.8) & (df_month.ogtt_value < 11.1) & (df_month.fbg_value >= 6.1) & (\n",
659
+ " df_month.fbg_value < 7.0)].copy()\n",
660
+ " table_df = pd.concat(\n",
661
+ " [table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
662
+ "\n",
663
+ " row_df = df_month[(df_month.ogtt_value >= 7.8) & (df_month.ogtt_value < 11.1) & (df_month.fbg_value >= 7.0)].copy()\n",
664
+ " table_df = pd.concat(\n",
665
+ " [table_df, get_row_df_func(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\", category_labels=category_labels)])\n",
666
+ "\n",
667
+ " row_df = df_month[(df_month.ogtt_value >= 11.1) & (df_month.fbg_value < 6.1)].copy()\n",
634
668
  " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG <6.1\", category_labels=category_labels)])\n",
635
- " \n",
636
- " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
637
- " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
638
- " \n",
639
- " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
669
+ "\n",
670
+ " row_df = df_month[(df_month.ogtt_value >= 11.1) & (df_month.fbg_value >= 6.1) & (df_month.fbg_value < 7.0)].copy()\n",
671
+ " table_df = pd.concat(\n",
672
+ " [table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\", category_labels=category_labels)])\n",
673
+ "\n",
674
+ " row_df = df_month[(df_month.ogtt_value >= 11.1) & (df_month.fbg_value >= 7.0)].copy()\n",
640
675
  " table_df = pd.concat([table_df, get_row_df_func(row_df, \"OGTT ≥11.1; FBG >=7.0\", category_labels=category_labels)])\n",
641
676
  "\n",
642
677
  " row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
@@ -644,11 +679,11 @@
644
679
  " return table_df\n",
645
680
  "\n",
646
681
  "\n",
647
- "def format_table_df(tbl_df, add_totals:bool|None=None):\n",
682
+ "def format_table_df(tbl_df, add_totals: bool | None = None):\n",
648
683
  " \"\"\"Pivot on site\"\"\"\n",
649
684
  " add_totals = True if add_totals is None else add_totals\n",
650
685
  " tbl_df = tbl_df.fillna(0.0)\n",
651
- " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
686
+ " tbl_df[\"total\"] = tbl_df.iloc[:, 1:].sum(axis=1)\n",
652
687
  " tbl_df = tbl_df.reset_index(drop=True)\n",
653
688
  "\n",
654
689
  " if add_totals:\n",
@@ -656,7 +691,7 @@
656
691
  " df_last.loc[\"label\"] = np.nan\n",
657
692
  " df_last = df_last.reset_index()\n",
658
693
  " df_last.columns = [\"label\", \"value\"]\n",
659
- " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
694
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
660
695
  " df_last.columns.name = \"\"\n",
661
696
  " df_last[\"label\"] = \"Totals\"\n",
662
697
  "\n",
@@ -666,25 +701,27 @@
666
701
  " tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]\n",
667
702
  "\n",
668
703
  " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
669
- " tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
704
+ " tbl_df[f\"{site}_perc\"] = (tbl_df[site] / tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site] > 0 else 0\n",
670
705
  " tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
671
706
  "\n",
672
- "\n",
673
707
  " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
674
- " tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
708
+ " tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)),\n",
709
+ " axis=1)\n",
675
710
  " tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
676
711
  "\n",
677
712
  " cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]]\n",
678
713
  " tbl_df1 = tbl_df[cols]\n",
679
- " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
714
+ " tbl_df1.loc[tbl_df.label == \"Total (n)\"] = tbl_df.iloc[0][\n",
715
+ " [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
680
716
  " return tbl_df1\n",
681
717
  "\n",
682
- "def format_table_with_bmi_df(tbl_df, add_totals:bool|None=None, category_labels:list[str]=None):\n",
718
+ "\n",
719
+ "def format_table_with_bmi_df(tbl_df, add_totals: bool | None = None, category_labels: list[str] = None):\n",
683
720
  " \"\"\"Pivot on BMI categories\"\"\"\n",
684
721
  "\n",
685
722
  " add_totals = True if add_totals is None else add_totals\n",
686
723
  " tbl_df = tbl_df.fillna(0.0)\n",
687
- " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
724
+ " tbl_df[\"total\"] = tbl_df.iloc[:, 1:].sum(axis=1)\n",
688
725
  " tbl_df = tbl_df.reset_index(drop=True)\n",
689
726
  "\n",
690
727
  " if add_totals:\n",
@@ -692,7 +729,7 @@
692
729
  " df_last.loc[\"label\"] = np.nan\n",
693
730
  " df_last = df_last.reset_index()\n",
694
731
  " df_last.columns = [\"label\", \"value\"]\n",
695
- " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
732
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
696
733
  " df_last.columns.name = \"\"\n",
697
734
  " df_last[\"label\"] = \"Totals\"\n",
698
735
  "\n",
@@ -702,20 +739,21 @@
702
739
  " tbl_df.columns = [\"label\", *category_labels, \"total\"]\n",
703
740
  "\n",
704
741
  " for label in [*category_labels, \"total\"]:\n",
705
- " tbl_df[f\"{label}_perc\"] = (tbl_df[label]/tbl_df.iloc[0][label]) * 100 if tbl_df.iloc[0][label]>0 else 0\n",
742
+ " tbl_df[f\"{label}_perc\"] = (tbl_df[label] / tbl_df.iloc[0][label]) * 100 if tbl_df.iloc[0][label] > 0 else 0\n",
706
743
  " tbl_df[f\"{label}_perc_str\"] = tbl_df[f\"{label}_perc\"].map('{:.1f}'.format)\n",
707
744
  "\n",
708
745
  " for cat in [*category_labels, \"total\"]:\n",
709
- " tbl_df[f\"{label}_str\"] = tbl_df[[f\"{label}\", f\"{label}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
746
+ " tbl_df[f\"{label}_str\"] = tbl_df[[f\"{label}\", f\"{label}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)),\n",
747
+ " axis=1)\n",
710
748
  " tbl_df[f\"{label}_str\"] = tbl_df[f\"{label}_str\"] + \")\"\n",
711
749
  "\n",
712
750
  " cols = [\"label\", *[f\"{label}_str\" for label in [*category_labels, \"total\"]]]\n",
713
751
  " tbl_df1 = tbl_df[cols]\n",
714
- " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", *category_labels, \"total\"]].to_list()\n",
752
+ " tbl_df1.loc[tbl_df.label == \"Total (n)\"] = tbl_df.iloc[0][[\"label\", *category_labels, \"total\"]].to_list()\n",
715
753
  " return tbl_df1\n",
716
754
  "\n",
717
755
  "\n",
718
- "def get_row_by_df(row_df:pd.DataFrame, label:str, category_labels:list[str])->pd.DataFrame:\n",
756
+ "def get_row_by_df(row_df: pd.DataFrame, label: str, category_labels: list[str]) -> pd.DataFrame:\n",
719
757
  " # if label not in category_labels:\n",
720
758
  " # raise ValueError(f\"Invalid label. Expected one of {category_labels}. Got {label}.\")\n",
721
759
  " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
@@ -743,6 +781,7 @@
743
781
  " return r[\"converted_fbg2_value\"]\n",
744
782
  " return r[\"converted_fbg_value\"]\n",
745
783
  "\n",
784
+ "\n",
746
785
  "def get_ogtt_value(r):\n",
747
786
  " if not pd.isna(r[\"converted_ogtt2_value\"]):\n",
748
787
  " return r[\"converted_ogtt2_value\"]\n",
@@ -770,7 +809,7 @@
770
809
  "df_table3 = df_table3.fillna(0.0)\n",
771
810
  "gt = df_as_great_table(df_table3, title=\"Table 3a: OGTT and FBG at Screening / Enrolment\")\n",
772
811
  "\n",
773
- "column_headers_enrol = {k:v for k,v in column_headers_with_str.items() if k not in \"visit_code\"}\n",
812
+ "column_headers_enrol = {k: v for k, v in column_headers_with_str.items() if k not in \"visit_code\"}\n",
774
813
  "gt = (\n",
775
814
  " gt\n",
776
815
  " .cols_label(column_headers_enrol)\n",
@@ -914,7 +953,7 @@
914
953
  "outputs": [],
915
954
  "source": [
916
955
  "# Table 7: Any OGTT>11.1 ever\n",
917
- "row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
956
+ "row_df = df_glucose[df_glucose.ogtt_value >= 11.1].copy()\n",
918
957
  "table_df = get_row_df(row_df, \"Total (n)\")\n",
919
958
  "df_table6 = format_table_df(table_df)\n",
920
959
  "df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)\n",
@@ -938,19 +977,19 @@
938
977
  "outputs": [],
939
978
  "source": [
940
979
  "# func for table 7\n",
941
- "def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
942
- " df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
980
+ "def get_table7_df(df_source: pd.DataFrame, visit_code: float) -> pd.DataFrame:\n",
981
+ " df_month = df_source[(df_source.visit_code >= visit_code) & (df_source.visit_code <= visit_code + 0.9)].copy()\n",
943
982
  "\n",
944
983
  " row_df = df_month.copy()\n",
945
984
  " table_df = get_row_df(row_df, \"Total (n)\")\n",
946
985
  "\n",
947
- " row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
986
+ " row_df = df_month[(df_month.fbg_value < 6.1)].copy()\n",
948
987
  " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
949
988
  "\n",
950
- " row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
989
+ " row_df = df_month[(df_month.fbg_value >= 6.1) & (df_month.fbg_value < 7.0)].copy()\n",
951
990
  " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
952
991
  "\n",
953
- " row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
992
+ " row_df = df_month[(df_month.fbg_value >= 7.0)].copy()\n",
954
993
  " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
955
994
  " return table_df"
956
995
  ]
@@ -1038,7 +1077,7 @@
1038
1077
  "df_endpoint_pivot.columns.name = \"\"\n",
1039
1078
  "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
1040
1079
  "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
1041
- "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
1080
+ "df_endpoint_pivot.at[len(df_endpoint_pivot) - 1, 'label'] = 'Total'\n",
1042
1081
  "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
1043
1082
  "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
1044
1083
  "\n",
@@ -1048,7 +1087,7 @@
1048
1087
  ")\n",
1049
1088
  "gt = (\n",
1050
1089
  " gt\n",
1051
- " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1090
+ " .cols_label({k: v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1052
1091
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1053
1092
  " .cols_align(align=\"left\", columns=[\"label\"])\n",
1054
1093
  " .cols_width(cases={\"label\": \"25%\"})\n",
@@ -1077,10 +1116,14 @@
1077
1116
  "from great_tables import html\n",
1078
1117
  "\n",
1079
1118
  "# Table 9b: Primary Endpoint no EOS or DM Referral\n",
1080
- "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1119
+ "df_subjecthistory = read_frame(\n",
1120
+ " SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False),\n",
1121
+ " verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1081
1122
  "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
1082
- "df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]], on=[\"subject_identifier\"], how=\"left\")\n",
1083
- "df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
1123
+ "df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]],\n",
1124
+ " on=[\"subject_identifier\"], how=\"left\")\n",
1125
+ "df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(\n",
1126
+ " by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
1084
1127
  "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
1085
1128
  "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
1086
1129
  "df_endpoint_pivot.columns.name = \"\"\n",
@@ -1089,7 +1132,7 @@
1089
1132
  " df_endpoint_pivot[str(col)] = np.nan\n",
1090
1133
  "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
1091
1134
  "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
1092
- "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
1135
+ "df_endpoint_pivot.at[len(df_endpoint_pivot) - 1, 'label'] = 'Total'\n",
1093
1136
  "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
1094
1137
  "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
1095
1138
  "subjects = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").subject_identifier.to_list()\n",
@@ -1100,7 +1143,7 @@
1100
1143
  ")\n",
1101
1144
  "gt = (\n",
1102
1145
  " gt\n",
1103
- " .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1146
+ " .cols_label({k: v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
1104
1147
  " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1105
1148
  " .cols_align(align=\"left\", columns=[\"label\"])\n",
1106
1149
  " .cols_width(cases={\"label\": \"25%\"})\n",
@@ -1127,12 +1170,13 @@
1127
1170
  "source": [
1128
1171
  "# Table 10: Incident Rate per 1000 person years\n",
1129
1172
  "\n",
1130
- "def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
1173
+ "def get_df_main(df_visit: pd.DataFrame, lower_days: float | None = None, upper_days: float | None = None):\n",
1131
1174
  " if not lower_days:\n",
1132
1175
  " lower_days = -1\n",
1133
1176
  " cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
1134
1177
  " # exclude subjects for this reason\n",
1135
- " offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
1178
+ " offstudy_reasons = [\n",
1179
+ " 'Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
1136
1180
  "\n",
1137
1181
  " df_eos = get_eos_df()\n",
1138
1182
  " df_eos_excluded = (\n",
@@ -1143,7 +1187,8 @@
1143
1187
  " )\n",
1144
1188
  " df_visit_final = (\n",
1145
1189
  " df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0\")\n",
1146
- " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
1190
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"),\n",
1191
+ " indicator=True)\n",
1147
1192
  " .query(\"_merge=='left_only'\")\n",
1148
1193
  " .drop(columns=[\"_merge\"])\n",
1149
1194
  " )\n",
@@ -1157,26 +1202,30 @@
1157
1202
  " df_main = (\n",
1158
1203
  " df_main\n",
1159
1204
  " .merge(\n",
1160
- " df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
1205
+ " df_endpoint.query(\"days_to_endpoint>@lower_days\")[\n",
1206
+ " [\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
1161
1207
  " how=\"left\",\n",
1162
1208
  " on=[\"subject_identifier\"])\n",
1163
1209
  " .reset_index(drop=True)\n",
1164
1210
  " )\n",
1165
- " if lower_days>=365.25:\n",
1211
+ " if lower_days >= 365.25:\n",
1166
1212
  " df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
1167
- " df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
1168
- " return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
1213
+ " df_main[\"followup_years\"] = df_main[\"followup_days\"] / 365.25\n",
1214
+ " return df_main, len(df_main), len(\n",
1215
+ " df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
1216
+ "\n",
1169
1217
  "\n",
1170
1218
  "def get_rate_and_ci(events, person_years_total):\n",
1171
1219
  " lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
1172
1220
  " upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
1173
- " return events/person_years_total*1000, lower_ci, upper_ci\n",
1221
+ " return events / person_years_total * 1000, lower_ci, upper_ci\n",
1222
+ "\n",
1174
1223
  "\n",
1175
- "def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
1224
+ "def get_incidence_data(term: str, lower_days: float, upper_days: float):\n",
1176
1225
  " data = {}\n",
1177
1226
  " df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
1178
1227
  " person_years_total = df_main.followup_years.sum()\n",
1179
- " data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
1228
+ " data.update({term: [person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
1180
1229
  " return data"
1181
1230
  ]
1182
1231
  },
@@ -1205,7 +1254,7 @@
1205
1254
  " data[\"lower_ci\"].append(v[4])\n",
1206
1255
  " data[\"upper_ci\"].append(v[5])\n",
1207
1256
  "\n",
1208
- "df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!=\"subjects\"})"
1257
+ "df_table9 = pd.DataFrame(data={k: v for k, v in data.items() if k != \"subjects\"})"
1209
1258
  ]
1210
1259
  },
1211
1260
  {
@@ -1222,15 +1271,17 @@
1222
1271
  ")\n",
1223
1272
  "gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
1224
1273
  "gt = (gt\n",
1225
- " .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
1226
- " .cols_align(align=\"left\", columns=[\"label\"])\n",
1227
- " .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
1228
- " .tab_spanner(\n",
1229
- " label=\"95%CI\",\n",
1230
- " columns=[\"lower_ci\", \"upper_ci\"],\n",
1231
- " )\n",
1232
- " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1274
+ " .cols_label(\n",
1275
+ " {\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\",\n",
1276
+ " \"upper_ci\": \"Upper\"})\n",
1277
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1278
+ " .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
1279
+ " .tab_spanner(\n",
1280
+ " label=\"95%CI\",\n",
1281
+ " columns=[\"lower_ci\", \"upper_ci\"],\n",
1233
1282
  ")\n",
1283
+ " .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
1284
+ " )\n",
1234
1285
  "gt.show()\n",
1235
1286
  "html_data.append(gt.as_raw_html())"
1236
1287
  ]
@@ -1244,7 +1295,7 @@
1244
1295
  "source": [
1245
1296
  "# Table 11: Proportion meeting primary endpoint\n",
1246
1297
  "df_table10 = pd.DataFrame(data=data)\n",
1247
- "df_table10[\"proportion\"] = df_table10[\"failures\"]/df_table10[\"subjects\"]*100\n",
1298
+ "df_table10[\"proportion\"] = df_table10[\"failures\"] / df_table10[\"subjects\"] * 100\n",
1248
1299
  "gt = df_as_great_table(\n",
1249
1300
  " df_table10[[\"label\", \"subjects\", 'failures', \"proportion\"]],\n",
1250
1301
  " title=\"Table 11: Proportion meeting primary endpoint\",\n",
@@ -1276,8 +1327,6 @@
1276
1327
  "metadata": {},
1277
1328
  "outputs": [],
1278
1329
  "source": [
1279
- "# TODO: EoS should be for all 1691, not 1631\n",
1280
- "\n",
1281
1330
  "# Table 11a: End of Study Table (for those who have completed an end of study form)\n",
1282
1331
  "df_eos = get_eos_df()\n",
1283
1332
  "offstudy_reasons = {\n",
@@ -1293,11 +1342,12 @@
1293
1342
  " \"Patient withdrew consent to participate further\": \"Withdrawal: Consent\",\n",
1294
1343
  "}\n",
1295
1344
  "df_eos[\"offstudy_reason\"] = df_eos[\"offstudy_reason\"].map(offstudy_reasons)\n",
1296
- "df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"], categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
1345
+ "df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"],\n",
1346
+ " categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
1297
1347
  "df_eos[\"site_id\"] = df_eos[\"site_id\"].astype(str)\n",
1298
1348
  "df_eos_pivot = (\n",
1299
1349
  " df_eos\n",
1300
- " .groupby(by=[\"offstudy_reason\", \"site_id\"],observed=True)\n",
1350
+ " .groupby(by=[\"offstudy_reason\", \"site_id\"], observed=True)\n",
1301
1351
  " .size()\n",
1302
1352
  " .reset_index()\n",
1303
1353
  " .pivot_table(index=\"offstudy_reason\", columns=\"site_id\", values=0, observed=True)\n",
@@ -1305,14 +1355,14 @@
1305
1355
  " .astype(int)\n",
1306
1356
  " .reset_index()\n",
1307
1357
  ")\n",
1308
- "df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1309
- "df_eos_pivot.columns.name=\"\"\n",
1358
+ "df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1)\n",
1359
+ "df_eos_pivot.columns.name = \"\"\n",
1310
1360
  "sum_row = df_eos_pivot.select_dtypes(include='int64').sum()\n",
1311
1361
  "sum_row['offstudy_reason'] = 'Total'\n",
1312
1362
  "sum_row_df = pd.DataFrame(sum_row).T\n",
1313
- "enrolled_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
1314
- "enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]\n",
1315
- "df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
1363
+ "enrolled_1691_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
1364
+ "enrolled_1691_pivot = enrolled_1691_pivot[[*df_eos_pivot.columns]]\n",
1365
+ "df_eos_pivot = pd.concat([enrolled_1691_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
1316
1366
  "\n",
1317
1367
  "gt = df_as_great_table(\n",
1318
1368
  " df_eos_pivot,\n",
@@ -1321,27 +1371,28 @@
1321
1371
  ")\n",
1322
1372
  "gt = (\n",
1323
1373
  " gt\n",
1324
- " .cols_label({\"offstudy_reason\": \"Reason\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1374
+ " .cols_label(\n",
1375
+ " {\"offstudy_reason\": \"Reason\", **{k: v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1325
1376
  " .cols_align(align=\"left\", columns=[\"offstudy_reason\"])\n",
1326
- " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1377
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1327
1378
  " .tab_style(\n",
1328
1379
  " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1329
1380
  " locations=loc.body(\n",
1330
1381
  " columns=[0],\n",
1331
- " rows=[len(df_eos_pivot)-1]),\n",
1332
- " )\n",
1382
+ " rows=[len(df_eos_pivot) - 1]),\n",
1383
+ " )\n",
1333
1384
  " .tab_style(\n",
1334
1385
  " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1335
1386
  " locations=loc.body(\n",
1336
1387
  " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1337
- " rows=[len(df_eos_pivot)-1],\n",
1388
+ " rows=[len(df_eos_pivot) - 1],\n",
1338
1389
  " ),\n",
1339
1390
  " )\n",
1340
1391
  " .tab_style(\n",
1341
1392
  " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1342
1393
  " locations=loc.body(\n",
1343
1394
  " columns=[\"total\"],\n",
1344
- " rows=[len(df_eos_pivot)-1],\n",
1395
+ " rows=[len(df_eos_pivot) - 1],\n",
1345
1396
  " ),\n",
1346
1397
  " )\n",
1347
1398
  " .tab_style(\n",
@@ -1372,23 +1423,26 @@
1372
1423
  "outputs": [],
1373
1424
  "source": [
1374
1425
  "# Table 12b: Study status\n",
1375
- "def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:\n",
1376
- " columns = {k:f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
1426
+ "def get_schedule_df(df_subjecthistory: pd.DataFrame, onschedule_model: str, offschedule_model: str,\n",
1427
+ " mode: str) -> pd.DataFrame:\n",
1428
+ " columns = {k: f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
1377
1429
  " df_schedule = (\n",
1378
1430
  " df_subjecthistory\n",
1379
- " .query(f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()\")\n",
1431
+ " .query(\n",
1432
+ " f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode == 'on' else 'notna'}()\")\n",
1380
1433
  " .groupby(by=[\"onschedule_model\", \"site_id\"])\n",
1381
1434
  " .size()\n",
1382
1435
  " .reset_index()\n",
1383
1436
  " .pivot_table(index=\"onschedule_model\", columns=\"site_id\", values=0, observed=True)\n",
1384
1437
  " .reset_index()\n",
1385
- " .rename(columns={\"onschedule_model\":\"schedule\", **columns})\n",
1438
+ " .rename(columns={\"onschedule_model\": \"schedule\", **columns})\n",
1386
1439
  " .fillna(0)\n",
1387
1440
  " .copy()\n",
1388
1441
  " )\n",
1389
1442
  " df_schedule.columns.name = \"\"\n",
1390
1443
  " return df_schedule\n",
1391
1444
  "\n",
1445
+ "\n",
1392
1446
  "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1393
1447
  "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
1394
1448
  "\n",
@@ -1423,7 +1477,9 @@
1423
1477
  "df_status[\"total_on\"] = df_status[[col for col in columns if \"on\" in col]].sum(axis=1)\n",
1424
1478
  "df_status[\"total_off\"] = df_status[[col for col in columns if \"off\" in col]].sum(axis=1)\n",
1425
1479
  "df_status[\"total\"] = df_status[columns].sum(axis=1)\n",
1426
- "df_status[\"schedule\"] = df_status.schedule.map({\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\", \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
1480
+ "df_status[\"schedule\"] = df_status.schedule.map(\n",
1481
+ " {\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\",\n",
1482
+ " \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
1427
1483
  "\n",
1428
1484
  "gt = df_as_great_table(\n",
1429
1485
  " df_status,\n",
@@ -1432,63 +1488,63 @@
1432
1488
  ")\n",
1433
1489
  "# gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=0)\n",
1434
1490
  "gt = (gt\n",
1435
- " .tab_source_note(\n",
1436
- " source_note=(\n",
1437
- " \"Note: Offschedule form is always submitted before the End of study report. \"\n",
1438
- " \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
1439
- " \"the site staff are actioned to submit the End of study report.\"\n",
1440
- " )\n",
1441
- " )\n",
1442
- " .cols_label({\n",
1443
- " \"10_on\": \"On\", \"10_off\": \"Off\",\n",
1444
- " \"20_on\": \"On\", \"20_off\": \"Off\",\n",
1445
- " \"30_on\": \"On\", \"30_off\": \"Off\",\n",
1446
- " \"40_on\": \"On\", \"40_off\": \"Off\",\n",
1447
- " \"60_on\": \"On\", \"60_off\": \"Off\",\n",
1448
- " \"total_on\": \"On\", \"total_off\": \"Off\",\n",
1449
- " \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
1450
- " .cols_align(align=\"center\")\n",
1451
- " .cols_align(align=\"left\", columns=[\"label\"])\n",
1452
- " .tab_spanner(\n",
1453
- " label=\"Hindu mandal\",\n",
1454
- " columns=[\"10_on\", \"10_off\"],\n",
1455
- " )\n",
1456
- " .tab_spanner(\n",
1457
- " label=\"Amana\",\n",
1458
- " columns=[\"20_on\", \"20_off\"],\n",
1459
- " )\n",
1460
- " .tab_spanner(\n",
1461
- " label=\"Temeke\",\n",
1462
- " columns=[\"30_on\", \"30_off\"],\n",
1491
+ " .tab_source_note(\n",
1492
+ " source_note=(\n",
1493
+ " \"Note: Offschedule form is always submitted before the End of study report. \"\n",
1494
+ " \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
1495
+ " \"the site staff are actioned to submit the End of study report.\"\n",
1463
1496
  " )\n",
1464
- " .tab_spanner(\n",
1465
- " label=\"Mwananyamala\",\n",
1466
- " columns=[\"40_on\", \"40_off\"],\n",
1467
- " )\n",
1468
- " .tab_spanner(\n",
1469
- " label=\"Mnazi Moja\",\n",
1470
- " columns=[\"60_on\", \"60_off\"],\n",
1471
- " )\n",
1472
- " .tab_spanner(\n",
1473
- " label=\"Total\",\n",
1474
- " columns=[\"total_on\", \"total_off\"],\n",
1475
- " )\n",
1476
- " .tab_style(\n",
1477
- " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1478
- " locations=loc.body(\n",
1479
- " columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
1480
- " rows=list(range(0, 1)),\n",
1481
- " ),\n",
1482
- " )\n",
1483
- " .tab_style(\n",
1484
- " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1485
- " locations=loc.body(\n",
1486
- " columns=[\"total_off\"],\n",
1487
- " rows=list(range(0, 1)),\n",
1488
- " ),\n",
1489
- " )\n",
1490
- " .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
1491
1497
  ")\n",
1498
+ " .cols_label({\n",
1499
+ " \"10_on\": \"On\", \"10_off\": \"Off\",\n",
1500
+ " \"20_on\": \"On\", \"20_off\": \"Off\",\n",
1501
+ " \"30_on\": \"On\", \"30_off\": \"Off\",\n",
1502
+ " \"40_on\": \"On\", \"40_off\": \"Off\",\n",
1503
+ " \"60_on\": \"On\", \"60_off\": \"Off\",\n",
1504
+ " \"total_on\": \"On\", \"total_off\": \"Off\",\n",
1505
+ " \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
1506
+ " .cols_align(align=\"center\")\n",
1507
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
1508
+ " .tab_spanner(\n",
1509
+ " label=\"Hindu mandal\",\n",
1510
+ " columns=[\"10_on\", \"10_off\"],\n",
1511
+ ")\n",
1512
+ " .tab_spanner(\n",
1513
+ " label=\"Amana\",\n",
1514
+ " columns=[\"20_on\", \"20_off\"],\n",
1515
+ ")\n",
1516
+ " .tab_spanner(\n",
1517
+ " label=\"Temeke\",\n",
1518
+ " columns=[\"30_on\", \"30_off\"],\n",
1519
+ ")\n",
1520
+ " .tab_spanner(\n",
1521
+ " label=\"Mwananyamala\",\n",
1522
+ " columns=[\"40_on\", \"40_off\"],\n",
1523
+ ")\n",
1524
+ " .tab_spanner(\n",
1525
+ " label=\"Mnazi Moja\",\n",
1526
+ " columns=[\"60_on\", \"60_off\"],\n",
1527
+ ")\n",
1528
+ " .tab_spanner(\n",
1529
+ " label=\"Total\",\n",
1530
+ " columns=[\"total_on\", \"total_off\"],\n",
1531
+ ")\n",
1532
+ " .tab_style(\n",
1533
+ " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1534
+ " locations=loc.body(\n",
1535
+ " columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
1536
+ " rows=list(range(0, 1)),\n",
1537
+ " ),\n",
1538
+ ")\n",
1539
+ " .tab_style(\n",
1540
+ " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1541
+ " locations=loc.body(\n",
1542
+ " columns=[\"total_off\"],\n",
1543
+ " rows=list(range(0, 1)),\n",
1544
+ " ),\n",
1545
+ ")\n",
1546
+ " .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
1547
+ " )\n",
1492
1548
  "html_data.append(gt.as_raw_html())\n",
1493
1549
  "gt.show()"
1494
1550
  ]
@@ -1499,21 +1555,46 @@
1499
1555
  "id": "39",
1500
1556
  "metadata": {},
1501
1557
  "outputs": [],
1558
+ "source": [
1559
+ "# off schedule no eos\n",
1560
+ "\n",
1561
+ "subjects_preg_dm = df_subjecthistory[~(df_subjecthistory.offschedule_datetime.isna()) & (\n",
1562
+ " df_subjecthistory.schedule_name != \"schedule\")].subject_identifier\n",
1563
+ "\n",
1564
+ "df_subjecthistory[\n",
1565
+ " ~(df_subjecthistory.subject_identifier.isin(df_eos_1691.subject_identifier))].sort_values(\n",
1566
+ " by=[\"subject_identifier\", \"onschedule_datetime\"])"
1567
+ ]
1568
+ },
1569
+ {
1570
+ "cell_type": "code",
1571
+ "execution_count": null,
1572
+ "id": "40",
1573
+ "metadata": {},
1574
+ "outputs": [],
1575
+ "source": []
1576
+ },
1577
+ {
1578
+ "cell_type": "code",
1579
+ "execution_count": null,
1580
+ "id": "41",
1581
+ "metadata": {},
1582
+ "outputs": [],
1502
1583
  "source": [
1503
1584
  "# Table 13: Loss to Follow Up\n",
1504
1585
  "df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
1505
1586
  "df_ltfu_pivot = (\n",
1506
1587
  " df_ltfu\n",
1507
- " .groupby(by=[\"loss_category\", \"site_id\"],observed=True,dropna=False)\n",
1588
+ " .groupby(by=[\"loss_category\", \"site_id\"], observed=True, dropna=False)\n",
1508
1589
  " .size()\n",
1509
1590
  " .reset_index()\n",
1510
- " .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True,dropna=False)\n",
1591
+ " .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True, dropna=False)\n",
1511
1592
  " .fillna(0)\n",
1512
1593
  " .astype(int)\n",
1513
1594
  " .reset_index()\n",
1514
1595
  ")\n",
1515
- "df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1516
- "df_ltfu_pivot.columns.name=\"\"\n",
1596
+ "df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1)\n",
1597
+ "df_ltfu_pivot.columns.name = \"\"\n",
1517
1598
  "sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()\n",
1518
1599
  "sum_row['loss_category'] = 'Total'\n",
1519
1600
  "sum_row_df = pd.DataFrame(sum_row).T\n",
@@ -1524,7 +1605,7 @@
1524
1605
  {
1525
1606
  "cell_type": "code",
1526
1607
  "execution_count": null,
1527
- "id": "40",
1608
+ "id": "42",
1528
1609
  "metadata": {},
1529
1610
  "outputs": [],
1530
1611
  "source": [
@@ -1533,19 +1614,19 @@
1533
1614
  "df1 = (\n",
1534
1615
  " df_status\n",
1535
1616
  " .query(\"schedule=='Main trial'\")[[col for col in columns if \"off\" in col]]\n",
1536
- " .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\",\"30\",\"40\",\"60\"])))\n",
1617
+ " .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\", \"30\", \"40\", \"60\"])))\n",
1537
1618
  " .reset_index(drop=True)\n",
1538
1619
  ")\n",
1539
1620
  "df2 = (\n",
1540
1621
  " df_eos_pivot\n",
1541
- " .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\",\"30\",\"40\",\"60\"]]\n",
1622
+ " .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\", \"30\", \"40\", \"60\"]]\n",
1542
1623
  " .reset_index(drop=True)\n",
1543
1624
  ")\n",
1544
1625
  "\n",
1545
- "df_eos_not_reported = df1-df2\n",
1626
+ "df_eos_not_reported = df1 - df2\n",
1546
1627
  "df_eos_not_reported[\"schedule\"] = 'Main trial'\n",
1547
- "df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
1548
- "df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\",\"30\",\"40\",\"60\", \"total\"]]\n",
1628
+ "df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1)\n",
1629
+ "df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]\n",
1549
1630
  "\n",
1550
1631
  "gt = df_as_great_table(\n",
1551
1632
  " df_eos_not_reported,\n",
@@ -1554,27 +1635,28 @@
1554
1635
  ")\n",
1555
1636
  "gt = (\n",
1556
1637
  " gt\n",
1557
- " .cols_label({\"schedule\": \"Schedule\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1638
+ " .cols_label(\n",
1639
+ " {\"schedule\": \"Schedule\", **{k: v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1558
1640
  " .cols_align(align=\"left\", columns=[\"schedule\"])\n",
1559
- " .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
1641
+ " .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
1560
1642
  " .tab_style(\n",
1561
1643
  " style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
1562
1644
  " locations=loc.body(\n",
1563
1645
  " columns=[0],\n",
1564
- " rows=[len(df_eos_pivot)-1]),\n",
1565
- " )\n",
1646
+ " rows=[len(df_eos_pivot) - 1]),\n",
1647
+ " )\n",
1566
1648
  " .tab_style(\n",
1567
1649
  " style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
1568
1650
  " locations=loc.body(\n",
1569
1651
  " columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
1570
- " rows=[len(df_eos_pivot)-1],\n",
1652
+ " rows=[len(df_eos_pivot) - 1],\n",
1571
1653
  " ),\n",
1572
1654
  " )\n",
1573
1655
  " .tab_style(\n",
1574
1656
  " style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
1575
1657
  " locations=loc.body(\n",
1576
1658
  " columns=[\"total\"],\n",
1577
- " rows=[len(df_eos_pivot)-1],\n",
1659
+ " rows=[len(df_eos_pivot) - 1],\n",
1578
1660
  " ),\n",
1579
1661
  " )\n",
1580
1662
  ")\n",
@@ -1585,7 +1667,7 @@
1585
1667
  {
1586
1668
  "cell_type": "code",
1587
1669
  "execution_count": null,
1588
- "id": "41",
1670
+ "id": "43",
1589
1671
  "metadata": {},
1590
1672
  "outputs": [],
1591
1673
  "source": [
@@ -1595,7 +1677,7 @@
1595
1677
  {
1596
1678
  "cell_type": "code",
1597
1679
  "execution_count": null,
1598
- "id": "42",
1680
+ "id": "44",
1599
1681
  "metadata": {},
1600
1682
  "outputs": [],
1601
1683
  "source": [
@@ -1623,16 +1705,16 @@
1623
1705
  ")\n",
1624
1706
  "if \"60\" not in df_consented_pivot.columns:\n",
1625
1707
  " df_consented_pivot[\"60\"] = 0.0 * len(df_consented_pivot)\n",
1626
- "df_consented_pivot.columns.name=\"\"\n",
1708
+ "df_consented_pivot.columns.name = \"\"\n",
1627
1709
  "df_consented_pivot[\"year\"] = df_consented_pivot[\"year\"].astype(str)\n",
1628
1710
  "df_consented_pivot[\"month\"] = df_consented_pivot[\"month\"].astype(str)\n",
1629
1711
  "\n",
1630
- "sum_row = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum()\n",
1712
+ "sum_row = df_consented_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum()\n",
1631
1713
  "sum_row['year'] = \"Total\"\n",
1632
1714
  "sum_row['month'] = \"\"\n",
1633
1715
  "df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)\n",
1634
- "df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1).astype(int)\n",
1635
- "df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].astype(int)\n",
1716
+ "df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].sum(axis=1).astype(int)\n",
1717
+ "df_consented_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]] = df_consented_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\"]].astype(int)\n",
1636
1718
  "gt = df_as_great_table2(\n",
1637
1719
  " df_consented_pivot,\n",
1638
1720
  " title=\"Table 15: Consented to extended followup\",\n",
@@ -1641,7 +1723,8 @@
1641
1723
  ")\n",
1642
1724
  "gt = (\n",
1643
1725
  " gt\n",
1644
- " .cols_label({\"year\": \"Year\", \"month\": \"Month\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1726
+ " .cols_label({\"year\": \"Year\", \"month\": \"Month\",\n",
1727
+ " **{k: v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
1645
1728
  " .cols_align(align=\"center\")\n",
1646
1729
  " .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
1647
1730
  " .tab_stubhead(label=\"Consented\")\n",
@@ -1660,7 +1743,7 @@
1660
1743
  {
1661
1744
  "cell_type": "code",
1662
1745
  "execution_count": null,
1663
- "id": "43",
1746
+ "id": "45",
1664
1747
  "metadata": {},
1665
1748
  "outputs": [],
1666
1749
  "source": []
@@ -1668,7 +1751,7 @@
1668
1751
  {
1669
1752
  "cell_type": "code",
1670
1753
  "execution_count": null,
1671
- "id": "44",
1754
+ "id": "46",
1672
1755
  "metadata": {},
1673
1756
  "outputs": [],
1674
1757
  "source": [
@@ -1694,34 +1777,34 @@
1694
1777
  {
1695
1778
  "cell_type": "code",
1696
1779
  "execution_count": null,
1697
- "id": "45",
1780
+ "id": "47",
1698
1781
  "metadata": {},
1699
1782
  "outputs": [],
1700
1783
  "source": [
1701
1784
  "# render html to PDF\n",
1702
1785
  "pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),\n",
1703
- "options={\n",
1704
- " 'footer-center': 'Page [page] of [topage]',\n",
1705
- " 'footer-font-size': '8',\n",
1706
- " 'footer-spacing': '5',\n",
1707
- " 'encoding': \"UTF-8\",\n",
1708
- " 'margin-top':'10mm',\n",
1709
- " 'margin-right':'15mm',\n",
1710
- " 'margin-bottom':'15mm',\n",
1711
- " 'margin-left':'15mm',\n",
1712
- " 'header-center': study_title,\n",
1713
- " 'header-font-size': '6',\n",
1714
- " 'header-spacing': '0',\n",
1715
- " 'disable-javascript': None,\n",
1716
- " 'no-outline': None,\n",
1717
- "},\n",
1718
- "verbose=True)"
1786
+ " options={\n",
1787
+ " 'footer-center': 'Page [page] of [topage]',\n",
1788
+ " 'footer-font-size': '8',\n",
1789
+ " 'footer-spacing': '5',\n",
1790
+ " 'encoding': \"UTF-8\",\n",
1791
+ " 'margin-top': '10mm',\n",
1792
+ " 'margin-right': '15mm',\n",
1793
+ " 'margin-bottom': '15mm',\n",
1794
+ " 'margin-left': '15mm',\n",
1795
+ " 'header-center': study_title,\n",
1796
+ " 'header-font-size': '6',\n",
1797
+ " 'header-spacing': '0',\n",
1798
+ " 'disable-javascript': None,\n",
1799
+ " 'no-outline': None,\n",
1800
+ " },\n",
1801
+ " verbose=True)"
1719
1802
  ]
1720
1803
  },
1721
1804
  {
1722
1805
  "cell_type": "code",
1723
1806
  "execution_count": null,
1724
- "id": "46",
1807
+ "id": "48",
1725
1808
  "metadata": {},
1726
1809
  "outputs": [],
1727
1810
  "source": []