meta-edc 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. meta_ae/action_items.py +10 -2
  2. meta_ae/baker_recipes.py +1 -2
  3. meta_ae/tests/tests/test_actions.py +1 -2
  4. meta_analytics/dataframes/__init__.py +3 -0
  5. meta_analytics/dataframes/constants.py +1 -1
  6. meta_analytics/dataframes/get_eos_df.py +15 -2
  7. meta_analytics/dataframes/get_glucose_df.py +149 -0
  8. meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
  9. meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
  10. meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
  11. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
  12. meta_analytics/dataframes/utils.py +18 -4
  13. meta_analytics/notebooks/anu.ipynb +95 -0
  14. meta_analytics/notebooks/appointment_planning.ipynb +329 -0
  15. meta_analytics/notebooks/arvs.ipynb +103 -0
  16. meta_analytics/notebooks/cleaning/consent_v1_ext.ipynb +227 -0
  17. meta_analytics/notebooks/cleaning/offschedule_eos.ipynb +353 -0
  18. meta_analytics/notebooks/dsmc/renal_dysfunction.ipynb +435 -0
  19. meta_analytics/notebooks/endpoints/meta_endpoints_by_date.ipynb +664 -0
  20. meta_analytics/notebooks/followup_examination.ipynb +141 -0
  21. meta_analytics/notebooks/hba1c.ipynb +136 -0
  22. meta_analytics/notebooks/hiv_regimens.ipynb +429 -0
  23. meta_analytics/notebooks/incidence.ipynb +232 -0
  24. meta_analytics/notebooks/liver.ipynb +389 -0
  25. meta_analytics/notebooks/magreth.ipynb +645 -0
  26. meta_analytics/notebooks/monitoring_report.ipynb +1751 -0
  27. meta_analytics/notebooks/pharmacy.ipynb +1070 -0
  28. meta_analytics/notebooks/pharmacy_stock_202410.ipynb +306 -0
  29. meta_analytics/notebooks/steering.ipynb +61 -0
  30. meta_analytics/notebooks/undiagnosed/meta3_screening_consort_chart.ipynb +1176 -0
  31. meta_analytics/notebooks/undiagnosed/meta3_screening_undiagnosed.ipynb +519 -0
  32. meta_analytics/notebooks/undiagnosed/meta_screening_table2.ipynb +964 -0
  33. meta_analytics/notebooks/undiagnosed/screen_undiagnosed_or.ipynb +296 -0
  34. meta_analytics/notebooks/undiagnosed/screening.ipynb +273 -0
  35. meta_analytics/notebooks/undiagnosed/screening2.ipynb +958 -0
  36. meta_analytics/notebooks/undiagnosed/screening_undiagnosed_20241002.ipynb +958 -0
  37. meta_analytics/notebooks/ven.ipynb +191 -0
  38. meta_analytics/notebooks/vitals.ipynb +263 -0
  39. meta_analytics/utils.py +81 -0
  40. meta_edc/settings/debug.py +3 -2
  41. meta_edc/urls.py +1 -0
  42. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/METADATA +6 -5
  43. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/RECORD +77 -36
  44. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/WHEEL +1 -1
  45. meta_edc-1.1.0.dist-info/licenses/AUTHORS.rst +8 -0
  46. meta_labs/reportables.py +14 -11
  47. meta_labs/tests/test_reportables.py +33 -12
  48. meta_pharmacy/notebooks/pharmacy.ipynb +41 -0
  49. meta_prn/migrations/0063_historicaloffstudymedication_singleton_field_and_more.py +37 -0
  50. meta_prn/migrations/0064_auto_20250602_2143.py +18 -0
  51. meta_prn/models/end_of_study.py +2 -0
  52. meta_prn/models/off_study_medication.py +2 -0
  53. meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
  54. meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
  55. meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
  56. meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
  57. meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
  58. meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
  59. meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
  60. meta_screening/eligibility/eligibility_part_three/base_eligibility_part_three.py +59 -47
  61. meta_screening/form_validators/screening_part_three.py +6 -1
  62. meta_screening/tests/meta_test_case_mixin.py +3 -0
  63. meta_screening/tests/tests/test_forms.py +9 -2
  64. meta_screening/tests/tests/test_screening_part_three.py +11 -14
  65. meta_subject/action_items.py +1 -2
  66. meta_subject/choices.py +2 -1
  67. meta_subject/form_validators/glucose_form_validator.py +16 -1
  68. meta_subject/forms/blood_results/blood_results_rft_form.py +60 -3
  69. meta_subject/forms/study_medication_form.py +5 -3
  70. meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
  71. meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
  72. meta_subject/migrations/0223_bloodresultsfbc_errors_bloodresultsgludummy_errors_and_more.py +83 -0
  73. meta_subject/migrations/0224_bloodresultsfbc_abnormal_summary_and_more.py +153 -0
  74. meta_subject/tests/tests/test_egfr.py +5 -5
  75. meta_analytics/dataframes/enrolled/__init__.py +0 -1
  76. meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
  77. /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
  78. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info/licenses}/LICENSE +0 -0
  79. {meta_edc-1.0.6.dist-info → meta_edc-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,429 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "%%capture\n",
11
+ "import os\n",
12
+ "from pathlib import Path\n",
13
+ "import pandas as pd\n",
14
+ "from dj_notebook import activate\n",
15
+ "\n",
16
+ "env_file = os.environ[\"META_ENV\"]\n",
17
+ "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
18
+ "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
19
+ "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
20
+ "plus = activate(dotenv_file=env_file)\n",
21
+ "pd.set_option('future.no_silent_downcasting', True)"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": null,
27
+ "id": "1",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "from edc_pdutils.dataframes import get_crf, get_subject_visit\n",
32
+ "from edc_constants.constants import YES\n",
33
+ "from edc_appointment.analytics import get_appointment_df\n",
34
+ "from datetime import datetime"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "id": "2",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "cutoff_datetime = datetime(2026,3,1)\n",
45
+ "df_patienthistory = get_crf(\"meta_subject.patienthistory\", subject_visit_model=\"meta_subject.subjectvisit\")\n",
46
+ "df_followup_examination = get_crf(\"meta_subject.FollowupExamination\", subject_visit_model=\"meta_subject.subjectvisit\")"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": null,
52
+ "id": "3",
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "replacements = {\n",
57
+ " \"ABC+ 3TC+ DTG\": \"ABC + 3TC + DTG\",\n",
58
+ " \"ABC+3TC+DTG\": \"ABC + 3TC + DTG\",\n",
59
+ " \"ABC, 3TC, DTG\":\"ABC + 3TC + DTG\",\n",
60
+ " \"ABC, EFV, DTG\": \"ABC + EFV + DTG\",\n",
61
+ " \"TDF+FTC+DTG\": \"TDF + FTC + DTG\",\n",
62
+ " \"AZT+3TC+DTG\": \"AZT + 3TC + DTG\",\n",
63
+ " \"AZT + 3 TC + DTG\":\"AZT + 3TC + DTG\",\n",
64
+ " \"TDF+3TC+DTG\": \"TDF + 3TC + DTG\",\n",
65
+ "}\n",
66
+ "\n",
67
+ "df_patienthistory[\"other_current_arv_regimen\"] = (\n",
68
+ " df_patienthistory[\"other_current_arv_regimen\"]\n",
69
+ " .replace(replacements)\n",
70
+ ")"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "4",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "df_patienthistory['regimen'] = df_patienthistory[\"current_arv_regimen\"]\n",
81
+ "df_patienthistory.loc[df_patienthistory[\"current_arv_regimen\"]==\"Other, specify ...\", \"regimen\"] = df_patienthistory[\"other_current_arv_regimen\"]"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "id": "5",
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "df_followup_examination[\"art_new_regimen_other\"] = (\n",
92
+ " df_followup_examination[\"art_new_regimen_other\"]\n",
93
+ " .replace(replacements)\n",
94
+ ")"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "id": "6",
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": [
104
+ "df_followup_examination['regimen'] = pd.NA\n",
105
+ "df_followup_examination.loc[(df_followup_examination[\"art_change\"]==YES) & (df_followup_examination.art_new_regimen_other.notna()), \"regimen\"] = df_followup_examination[\"art_new_regimen_other\"]\n",
106
+ "df_followup_examination.loc[(df_followup_examination[\"art_change\"]==YES) & (df_followup_examination.art_new_regimen_other.isna()), \"regimen\"] = \"CHANGE_NOT_REPORTED\""
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "id": "7",
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": [
116
+ "df_regimen = pd.concat([df_patienthistory[[\"subject_identifier\", \"visit_datetime\", \"regimen\"]], df_followup_examination[[\"subject_identifier\", \"visit_datetime\", \"regimen\"]]])\n",
117
+ "df_regimen[\"regimen\"] = df_regimen[\"regimen\"].replace({\"Other second line\": \"CHANGE_NOT_REPORTED\"})\n",
118
+ "df_regimen[\"regimen\"] = pd.Categorical(df_regimen[\"regimen\"], categories=list(df_regimen.query(\"regimen.notna()\").regimen.unique()), ordered=False)\n",
119
+ "df_regimen = df_regimen.sort_values([\"subject_identifier\", \"visit_datetime\"])\n",
120
+ "df_regimen = df_regimen.reset_index(drop=True)"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "id": "8",
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": [
130
+ "df_pivot = df_regimen.pivot_table(values=\"visit_datetime\", columns=\"regimen\", index=\"subject_identifier\", observed=True)\n",
131
+ "df_pivot = df_pivot.reset_index()"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": null,
137
+ "id": "9",
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "subject_identifier = \"105-20-0050-0\"\n",
142
+ "df_pivot[df_pivot.subject_identifier==subject_identifier].melt().query(\"value.notna() and regimen!='subject_identifier'\").sort_values(\"value\", ascending=False)"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "10",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "df_melt = df_pivot.melt(id_vars=[\"subject_identifier\"]).query(\"value.notna()\")"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": null,
158
+ "id": "11",
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "df_melt[\"max_date\"] = df_melt.groupby(\"subject_identifier\")[\"value\"].transform(\"max\")\n",
163
+ "df_melt[\"current_regimen\"] = df_melt[df_melt.value==df_melt.max_date][\"regimen\"]"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": null,
169
+ "id": "12",
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "df_current_regimens = df_melt.query(\"current_regimen.notna()\")[[\"subject_identifier\", \"max_date\", \"current_regimen\"]].copy()"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "id": "13",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "df_visit = get_subject_visit(model=\"meta_subject.subjectvisit\")\n",
184
+ "df_visit = df_visit[df_visit.visit_code==1000.0].copy()"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": null,
190
+ "id": "14",
191
+ "metadata": {},
192
+ "outputs": [],
193
+ "source": [
194
+ "df_appointment = get_appointment_df()\n",
195
+ "df_appointment_next = (\n",
196
+ " df_appointment\n",
197
+ " .groupby(by=[\"subject_identifier\", \"next_appt_datetime\", \"next_visit_code\"])\n",
198
+ " .size()\n",
199
+ " .copy()\n",
200
+ " .reset_index()\n",
201
+ ")"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "id": "15",
208
+ "metadata": {},
209
+ "outputs": [],
210
+ "source": [
211
+ "df_appointment_last = (\n",
212
+ " df_appointment[df_appointment.appt_datetime<cutoff_datetime][[\"subject_identifier\", \"appt_datetime\", \"visit_code\"]]\n",
213
+ " .sort_values([\"subject_identifier\", \"appt_datetime\", \"visit_code\"])\n",
214
+ " .groupby(by=[\"subject_identifier\"])\n",
215
+ " .agg([\"last\"])\n",
216
+ " .reset_index() )\n",
217
+ "df_appointment_last.columns = [\"_\".join(col).strip() for col in df_appointment_last.columns.values]\n",
218
+ "df_appointment_last = (\n",
219
+ " df_appointment_last\n",
220
+ " .rename(columns={\n",
221
+ " \"subject_identifier_\":\"subject_identifier\",\n",
222
+ " \"appt_datetime_last\":\"last_appt_datetime\",\n",
223
+ " \"visit_code_last\":\"last_visit_code\"\n",
224
+ " }\n",
225
+ " )\n",
226
+ ")"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "id": "16",
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "df = df_current_regimens.merge(df_visit[[ \"subject_identifier\", \"baseline_datetime\", \"endline_visit_datetime\", \"endline_visit_code\"]], on=\"subject_identifier\", how=\"left\")\n",
237
+ "df = df.reset_index(drop=True)\n",
238
+ "df[\"changed\"] = False\n",
239
+ "df.loc[df.max_date != df.baseline_datetime, \"changed\"] = True"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "17",
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "df = df.merge(df_appointment_next[[\"subject_identifier\", \"next_appt_datetime\", \"next_visit_code\"]], on=\"subject_identifier\", how=\"left\")\n",
250
+ "df = df.merge(df_appointment_last[[\"subject_identifier\", \"last_appt_datetime\", \"last_visit_code\"]], on=\"subject_identifier\", how=\"left\")"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "id": "18",
257
+ "metadata": {},
258
+ "outputs": [],
259
+ "source": [
260
+ "# from last seen to final scheduled appt\n",
261
+ "df[\"remaining_delta_from_last_seen\"] = df.last_appt_datetime - df.endline_visit_datetime\n",
262
+ "df[\"remaining_delta_from_last_seen\"] = df[\"remaining_delta_from_last_seen\"].apply(lambda x: 0 if x.total_seconds()<0 else x)\n",
263
+ "df[\"remaining_delta_from_last_seen\"] = pd.to_timedelta(df[\"remaining_delta_from_last_seen\"])\n",
264
+ "df[\"remaining_days_last_seen_to_final\"] = df[\"remaining_delta_from_last_seen\"].dt.days\n",
265
+ "\n",
266
+ "# from now to final scheduled appt\n",
267
+ "df[\"remaining_delta_from_now\"] = 0.0\n",
268
+ "df[\"remaining_delta_from_now\"] = df[df.remaining_days_last_seen_to_final>0].last_appt_datetime - datetime.now()\n",
269
+ "df[\"remaining_delta_from_now\"] = df[\"remaining_delta_from_now\"].apply(lambda x: 0 if x.total_seconds()<0 else x)\n",
270
+ "df[\"remaining_delta_from_now\"] = pd.to_timedelta(df[\"remaining_delta_from_now\"])\n",
271
+ "df[\"remaining_days_now_to_final\"] = df[\"remaining_delta_from_now\"].dt.days\n",
272
+ "\n",
273
+ "# from next appointment to final scheduled appt\n",
274
+ "df[\"remaining_delta_from_next\"] = 0.0\n",
275
+ "df[\"remaining_delta_from_next\"] = df[df.remaining_days_last_seen_to_final>0].last_appt_datetime - df[df.remaining_days_last_seen_to_final>0].next_appt_datetime\n",
276
+ "df[\"remaining_delta_from_next\"] = df[\"remaining_delta_from_next\"].apply(lambda x: 0 if x.total_seconds()<0 else x)\n",
277
+ "df[\"remaining_delta_from_next\"] = pd.to_timedelta(df[\"remaining_delta_from_next\"])\n",
278
+ "df[\"remaining_days_next_to_final\"] = df[\"remaining_delta_from_next\"].dt.days"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "id": "19",
285
+ "metadata": {},
286
+ "outputs": [],
287
+ "source": [
288
+ "df_final = (\n",
289
+ " df\n",
290
+ " .rename(columns={\n",
291
+ " \"max_date\": \"current_regimen_date\",\n",
292
+ " \"endline_visit_code\": \"last_attended_visit_code\",\n",
293
+ " \"endline_visit_datetime\": \"last_attended_visit_datetime\",\n",
294
+ " })\n",
295
+ " .copy()\n",
296
+ ")\n",
297
+ "df_final = df_final[[\n",
298
+ " \"subject_identifier\",\n",
299
+ " \"current_regimen\",\n",
300
+ " \"current_regimen_date\",\n",
301
+ " \"changed\",\n",
302
+ " \"baseline_datetime\",\n",
303
+ " \"last_attended_visit_code\",\n",
304
+ " \"last_attended_visit_datetime\",\n",
305
+ " \"next_visit_code\",\n",
306
+ " \"next_appt_datetime\",\n",
307
+ " \"last_visit_code\",\n",
308
+ " \"last_appt_datetime\",\n",
309
+ " \"remaining_days_last_seen_to_final\",\n",
310
+ " \"remaining_days_now_to_final\",\n",
311
+ " \"remaining_days_next_to_final\",\n",
312
+ "]].copy()\n",
313
+ "\n",
314
+ "df_final = (\n",
315
+ " df_final\n",
316
+ " .sort_values(\"subject_identifier\")\n",
317
+ " .reset_index(drop=True)\n",
318
+ ")\n",
319
+ "df_final[\"remaining_days_last_seen_to_final\"] = df_final[\"remaining_days_last_seen_to_final\"].astype(\"float64\").fillna(0)\n",
320
+ "df_final[\"remaining_days_now_to_final\"] = df_final[\"remaining_days_now_to_final\"].astype(\"float64\").fillna(0)\n",
321
+ "df_final[\"remaining_days_next_to_final\"] = df_final[\"remaining_days_next_to_final\"].astype(\"float64\").fillna(0)\n",
322
+ "df_final"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "id": "20",
329
+ "metadata": {},
330
+ "outputs": [],
331
+ "source": [
332
+ "# need from now until end of study\n",
333
+ "df_summary1 = (pd.merge(\n",
334
+ " df_final.groupby(by=[\"current_regimen\"]).remaining_days_now_to_final.sum(),\n",
335
+ " df_final.groupby(by=[\"current_regimen\"]).subject_identifier.count(), on=\"current_regimen\")\n",
336
+ " .rename(columns={\n",
337
+ " \"remaining_days_now_to_final\": \"days_medication_needed\",\n",
338
+ " \"subject_identifier\": \"subjects\"\n",
339
+ " })\n",
340
+ " .sort_values(\"days_medication_needed\", ascending=False)\n",
341
+ " .reset_index()\n",
342
+ ")\n",
343
+ "df_summary1"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "id": "21",
350
+ "metadata": {},
351
+ "outputs": [],
352
+ "source": [
353
+ "# need from last seen to end of study\n",
354
+ "df_summary2 = (pd.merge(\n",
355
+ " df_final.groupby(by=[\"current_regimen\"]).remaining_days_last_seen_to_final.sum(),\n",
356
+ " df_final.groupby(by=[\"current_regimen\"]).subject_identifier.count(), on=\"current_regimen\")\n",
357
+ " .rename(columns={\n",
358
+ " \"remaining_days_last_seen_to_final\": \"days_medication_needed\",\n",
359
+ " \"subject_identifier\": \"subjects\"\n",
360
+ " })\n",
361
+ " .sort_values(\"days_medication_needed\", ascending=False)\n",
362
+ " .reset_index()\n",
363
+ ")\n",
364
+ "df_summary2"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": null,
370
+ "id": "22",
371
+ "metadata": {},
372
+ "outputs": [],
373
+ "source": [
374
+ "# need from next to end of study\n",
375
+ "df_summary3 = (pd.merge(\n",
376
+ " df_final.groupby(by=[\"current_regimen\"]).remaining_days_next_to_final.sum(),\n",
377
+ " df_final.groupby(by=[\"current_regimen\"]).subject_identifier.count(), on=\"current_regimen\")\n",
378
+ " .rename(columns={\n",
379
+ " \"remaining_days_next_to_final\": \"days_medication_needed\",\n",
380
+ " \"subject_identifier\": \"subjects\"\n",
381
+ " })\n",
382
+ " .sort_values(\"days_medication_needed\", ascending=False)\n",
383
+ " .reset_index()\n",
384
+ ")\n",
385
+ "\n",
386
+ "df_summary3"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": null,
392
+ "id": "23",
393
+ "metadata": {},
394
+ "outputs": [],
395
+ "source": [
396
+ "with pd.ExcelWriter(\n",
397
+ " analysis_folder / \"hiv_medication.xlsx\",\n",
398
+ " date_format=\"YYYY-MM-DD\",\n",
399
+ " datetime_format=\"YYYY-MM-DD HH:MM:SS\"\n",
400
+ ") as writer:\n",
401
+ " df_final.to_excel(writer, sheet_name=\"subjects\", index=False)\n",
402
+ " df_summary1.to_excel(writer, sheet_name=\"now to final\", index=False)\n",
403
+ " df_summary2.to_excel(writer, sheet_name=\"last seen to final\", index=False)\n",
404
+ " df_summary3.to_excel(writer, sheet_name=\"next to final\", index=False)\n"
405
+ ]
406
+ }
407
+ ],
408
+ "metadata": {
409
+ "kernelspec": {
410
+ "display_name": "Python 3",
411
+ "language": "python",
412
+ "name": "python3"
413
+ },
414
+ "language_info": {
415
+ "codemirror_mode": {
416
+ "name": "ipython",
417
+ "version": 2
418
+ },
419
+ "file_extension": ".py",
420
+ "mimetype": "text/x-python",
421
+ "name": "python",
422
+ "nbconvert_exporter": "python",
423
+ "pygments_lexer": "ipython2",
424
+ "version": "2.7.6"
425
+ }
426
+ },
427
+ "nbformat": 4,
428
+ "nbformat_minor": 5
429
+ }
@@ -0,0 +1,232 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "%%capture\n",
11
+ "import os\n",
12
+ "from pathlib import Path\n",
13
+ "import pandas as pd\n",
14
+ "from dj_notebook import activate\n",
15
+ "\n",
16
+ "env_file = os.environ[\"META_ENV\"]\n",
17
+ "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
18
+ "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
19
+ "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
20
+ "plus = activate(dotenv_file=env_file)\n",
21
+ "pd.set_option('future.no_silent_downcasting', True)"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": null,
27
+ "id": "1",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "\n",
32
+ "import pdfkit\n",
33
+ "from datetime import date\n",
34
+ "from edc_pdutils.dataframes import get_subject_visit\n",
35
+ "from meta_analytics.dataframes import get_eos_df\n",
36
+ "from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
37
+ "from scipy.stats import chi2\n",
38
+ "from meta_analytics.utils import df_as_great_table\n",
39
+ "from great_tables import md\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "id": "2",
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "html_data = []\n",
50
+ "cutoff_date = date(2025,3, 31)\n"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "id": "3",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
61
+ "df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "id": "4",
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "cls = GlucoseEndpointsByDate()\n",
72
+ "cls.run()\n",
73
+ "df_endpoint = cls.endpoint_only_df.copy()"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "5",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
84
+ " if not lower_days:\n",
85
+ " lower_days = -1\n",
86
+ " cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
87
+ " # exclude subjects for this reason\n",
88
+ " offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
89
+ "\n",
90
+ " df_eos = get_eos_df()\n",
91
+ " df_eos_excluded = (\n",
92
+ " df_eos\n",
93
+ " .query(\"followup_days<@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)\")\n",
94
+ " .copy()\n",
95
+ " .reset_index()\n",
96
+ " )\n",
97
+ " df_visit_final = (\n",
98
+ " df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed'\")\n",
99
+ " .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
100
+ " .query(\"_merge=='left_only'\")\n",
101
+ " .drop(columns=[\"_merge\"])\n",
102
+ " )\n",
103
+ " df_main = (\n",
104
+ " df_visit_final\n",
105
+ " .groupby(by=[\"subject_identifier\"])[[\"baseline_datetime\", \"visit_datetime\", \"followup_days\"]]\n",
106
+ " .max()\n",
107
+ " .reset_index()\n",
108
+ " )\n",
109
+ "\n",
110
+ " df_main = (\n",
111
+ " df_main\n",
112
+ " .merge(\n",
113
+ " df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
114
+ " how=\"left\",\n",
115
+ " on=[\"subject_identifier\"])\n",
116
+ " .reset_index(drop=True)\n",
117
+ " )\n",
118
+ " if lower_days>=365.25:\n",
119
+ " df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
120
+ " df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
121
+ " return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
122
+ "\n",
123
+ "def get_rate_and_ci(events, person_years_total):\n",
124
+ " lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
125
+ " upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
126
+ " return events/person_years_total*1000, lower_ci, upper_ci\n",
127
+ "\n",
128
+ "def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
129
+ " data = {}\n",
130
+ " df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
131
+ " person_years_total = df_main.followup_years.sum()\n",
132
+ " data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
133
+ " return data"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": null,
139
+ "id": "6",
140
+ "metadata": {},
141
+ "outputs": [],
142
+ "source": [
143
+ "incidence_data = {}\n",
144
+ "incidence_data.update(get_incidence_data(\"total\", lower_days=0, upper_days=10000))\n",
145
+ "incidence_data.update(get_incidence_data(\"0-1 years\", lower_days=0, upper_days=365.25))\n",
146
+ "incidence_data.update(get_incidence_data(\"1-2 years\", lower_days=365.25, upper_days=2*365.25))\n",
147
+ "incidence_data.update(get_incidence_data(\"2-3 years\", lower_days=2*365.25, upper_days=3*365.25))\n",
148
+ "incidence_data.update(get_incidence_data(\"3+ years\", lower_days=3*365.25, upper_days=10*365.25))"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": null,
154
+ "id": "7",
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "data = dict(label=[], person_years=[], failures=[], rate=[], lower_ci=[], upper_ci=[])\n",
159
+ "for k in incidence_data:\n",
160
+ " data[\"label\"].append(k)\n",
161
+ "\n",
162
+ "for v in incidence_data.values():\n",
163
+ " data[\"person_years\"].append(v[0])\n",
164
+ " data[\"failures\"].append(v[2])\n",
165
+ " data[\"rate\"].append(v[3])\n",
166
+ " data[\"lower_ci\"].append(v[4])\n",
167
+ " data[\"upper_ci\"].append(v[5])\n",
168
+ "\n",
169
+ "df = pd.DataFrame(data=data)"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": null,
175
+ "id": "8",
176
+ "metadata": {},
177
+ "outputs": [],
178
+ "source": [
179
+ "gt = df_as_great_table(\n",
180
+ " df,\n",
181
+ " title=\"Table 9: Incident Rate per 1000 person years\",\n",
182
+ " subtitle=md(\"using randomisation to diabetes/last seen\"),\n",
183
+ ")\n",
184
+ "gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
185
+ "gt = (gt\n",
186
+ " .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
187
+ " .cols_align(align=\"left\", columns=[\"label\"])\n",
188
+ " .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
189
+ " .tab_spanner(\n",
190
+ " label=\"95%CI\",\n",
191
+ " columns=[\"lower_ci\", \"upper_ci\"],\n",
192
+ " )\n",
193
+ ")\n",
194
+ "gt.show()\n",
195
+ "html_data.append(gt.as_raw_html())\n"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": null,
201
+ "id": "9",
202
+ "metadata": {},
203
+ "outputs": [],
204
+ "source": [
205
+ "raw_html = \"</BR>\".join(html_data)\n",
206
+ "raw_html = '<!DOCTYPE html>\\n<html lang=\"en\">\\n<head>\\n<meta charset=\"utf-8\"/>\\n</head>\\n<body>\\n' + raw_html + '\\n</body>\\n</html>\\n'\n",
207
+ "pdfkit.from_string(raw_html, str(analysis_folder / \"incident_rate.pdf\"))\n"
208
+ ]
209
+ }
210
+ ],
211
+ "metadata": {
212
+ "kernelspec": {
213
+ "display_name": "Python 3 (ipykernel)",
214
+ "language": "python",
215
+ "name": "python3"
216
+ },
217
+ "language_info": {
218
+ "codemirror_mode": {
219
+ "name": "ipython",
220
+ "version": 3
221
+ },
222
+ "file_extension": ".py",
223
+ "mimetype": "text/x-python",
224
+ "name": "python",
225
+ "nbconvert_exporter": "python",
226
+ "pygments_lexer": "ipython3",
227
+ "version": "3.12.4"
228
+ }
229
+ },
230
+ "nbformat": 4,
231
+ "nbformat_minor": 5
232
+ }