meta-edc 1.0.7__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. meta_ae/action_items.py +10 -2
  2. meta_ae/baker_recipes.py +1 -2
  3. meta_ae/tests/tests/test_actions.py +1 -2
  4. meta_analytics/README.rst +1 -2
  5. meta_analytics/notebooks/anu.ipynb +95 -0
  6. meta_analytics/notebooks/appointment_planning.ipynb +329 -0
  7. meta_analytics/notebooks/arvs.ipynb +103 -0
  8. meta_analytics/notebooks/cleaning/consent_v1_ext.ipynb +227 -0
  9. meta_analytics/notebooks/cleaning/offschedule_eos.ipynb +353 -0
  10. meta_analytics/notebooks/dsmc/renal_dysfunction.ipynb +435 -0
  11. meta_analytics/notebooks/endpoints/meta_endpoints_by_date.ipynb +664 -0
  12. meta_analytics/notebooks/followup_examination.ipynb +141 -0
  13. meta_analytics/notebooks/hba1c.ipynb +136 -0
  14. meta_analytics/notebooks/hiv_regimens.ipynb +122 -118
  15. meta_analytics/notebooks/incidence.ipynb +232 -0
  16. meta_analytics/notebooks/liver.ipynb +389 -0
  17. meta_analytics/notebooks/magreth.ipynb +645 -0
  18. meta_analytics/notebooks/monitoring_report.ipynb +721 -448
  19. meta_analytics/notebooks/pharmacy.ipynb +405 -306
  20. meta_analytics/notebooks/pharmacy_stock_202410.ipynb +306 -0
  21. meta_analytics/notebooks/steering.ipynb +61 -0
  22. meta_analytics/notebooks/undiagnosed/meta3_screening_consort_chart.ipynb +1176 -0
  23. meta_analytics/notebooks/undiagnosed/meta3_screening_undiagnosed.ipynb +519 -0
  24. meta_analytics/notebooks/undiagnosed/meta_screening_table2.ipynb +964 -0
  25. meta_analytics/notebooks/undiagnosed/screen_undiagnosed_or.ipynb +296 -0
  26. meta_analytics/notebooks/undiagnosed/screening.ipynb +273 -0
  27. meta_analytics/notebooks/undiagnosed/screening2.ipynb +958 -0
  28. meta_analytics/notebooks/undiagnosed/screening_undiagnosed_20241002.ipynb +958 -0
  29. meta_analytics/notebooks/ven.ipynb +191 -0
  30. meta_analytics/notebooks/vitals.ipynb +263 -0
  31. meta_edc/settings/debug.py +3 -2
  32. meta_edc/urls.py +1 -0
  33. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/METADATA +3 -3
  34. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/RECORD +62 -35
  35. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/WHEEL +1 -1
  36. meta_labs/reportables.py +14 -11
  37. meta_labs/tests/test_reportables.py +33 -12
  38. meta_pharmacy/notebooks/pharmacy.ipynb +41 -0
  39. meta_prn/admin/offschedule_pregnancy_admin.py +3 -3
  40. meta_prn/admin/onschedule_dm_referral_admin.py +5 -5
  41. meta_prn/form_validators/end_of_study.py +2 -2
  42. meta_prn/migrations/0063_historicaloffstudymedication_singleton_field_and_more.py +37 -0
  43. meta_prn/migrations/0064_auto_20250602_2143.py +18 -0
  44. meta_prn/models/end_of_study.py +2 -0
  45. meta_prn/models/off_study_medication.py +2 -0
  46. meta_reports/admin/last_imp_refill_admin.py +3 -2
  47. meta_screening/eligibility/eligibility_part_three/base_eligibility_part_three.py +59 -47
  48. meta_screening/form_validators/screening_part_three.py +6 -1
  49. meta_screening/tests/meta_test_case_mixin.py +3 -0
  50. meta_screening/tests/tests/test_forms.py +9 -2
  51. meta_screening/tests/tests/test_screening_part_three.py +11 -14
  52. meta_subject/action_items.py +2 -3
  53. meta_subject/choices.py +2 -1
  54. meta_subject/form_validators/delivery_form_validator.py +1 -0
  55. meta_subject/forms/blood_results/blood_results_rft_form.py +60 -3
  56. meta_subject/forms/delivery_form.py +2 -0
  57. meta_subject/migrations/0223_bloodresultsfbc_errors_bloodresultsgludummy_errors_and_more.py +83 -0
  58. meta_subject/migrations/0224_bloodresultsfbc_abnormal_summary_and_more.py +153 -0
  59. meta_subject/tests/tests/test_egfr.py +5 -5
  60. meta_analytics/dataframes/enrolled/__init__.py +0 -0
  61. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/licenses/AUTHORS.rst +0 -0
  62. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/licenses/LICENSE +0 -0
  63. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,664 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "%%capture\n",
11
+ "import os\n",
12
+ "from pathlib import Path\n",
13
+ "import pandas as pd\n",
14
+ "from dj_notebook import activate\n",
15
+ "env_file = os.environ[\"META_ENV\"]\n",
16
+ "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
17
+ "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
18
+ "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
19
+ "plus = activate(dotenv_file=env_file)\n",
20
+ "pd.set_option('future.no_silent_downcasting', True)"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "1",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "import numpy as np\n",
31
+ "import io\n",
32
+ "import msoffcrypto\n",
33
+ "import mempass\n",
34
+ "\n",
35
+ "from datetime import datetime\n",
36
+ "from edc_appointment.constants import ONTIME_APPT, NEW_APPT, CANCELLED_APPT, MISSED_APPT\n",
37
+ "from edc_pdutils.dataframes import get_crf, get_subject_visit\n",
38
+ "from tabulate import tabulate\n",
39
+ "from meta_analytics.dataframes import get_glucose_fbg_ogtt_df, get_glucose_fbg_df\n",
40
+ "from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
41
+ "from meta_analytics.dataframes import get_glucose_df\n",
42
+ "from meta_analytics.dataframes import EndpointByDate"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "id": "2",
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "cls = GlucoseEndpointsByDate()"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "id": "3",
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "cls.run()"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "4",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "cls.endpoint_only_df.endpoint_label.value_counts()"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "id": "5",
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "cls.endpoint_only_df"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": null,
88
+ "id": "6",
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "df_glu = get_glucose_df()\n",
93
+ "df_glu.query(\"subject_identifier=='105-40-0379-1'\")\n",
94
+ "\n",
95
+ "ep = EndpointByDate(subject_df=df_glu.query(\"subject_identifier=='105-40-0379-1'\").copy().sort_values(by=[\"visit_code\"]).reset_index(drop=True), fbg_threshhold=7.0, ogtt_threshhold=11.1)\n",
96
+ "ep.evaluate()\n",
97
+ "ep.subject_df"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": null,
103
+ "id": "7",
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "df_katie = pd.read_csv(analysis_folder / \"katie_endpoint_subjects.csv\")"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": null,
113
+ "id": "8",
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "cls.endpoint_only_df[~cls.endpoint_only_df.subject_identifier.isin(df_katie.subject_identifier)]"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": null,
123
+ "id": "9",
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "df_katie[~df_katie.subject_identifier.isin(cls.endpoint_only_df.subject_identifier)]\n"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": null,
133
+ "id": "10",
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "df_katie"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "id": "11",
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "df_glu = get_glucose_df()\n",
148
+ "df_glu.query(\"subject_identifier=='105-40-0370-0'\")\n"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": null,
154
+ "id": "12",
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "df_visit = get_subject_visit(model=\"meta_subject.subjectvisit\")"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "id": "13",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "# Table 1 Visits completed to date\n",
169
+ "df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
170
+ "df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
171
+ "df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
172
+ "df1.columns.name = None\n",
173
+ "df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
174
+ "df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
175
+ "df1.fillna(0, inplace=True)\n",
176
+ "df_attended = df1.copy()\n",
177
+ "df_attended"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "id": "14",
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed\n",
188
+ "df_tbl12 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
189
+ "df_tbl12.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
190
+ "df1 = df_tbl12.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
191
+ "df1.columns.name = None\n",
192
+ "df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
193
+ "df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
194
+ "df1.fillna(0, inplace=True)\n",
195
+ "df_missed = df1.copy()\n",
196
+ "\n",
197
+ "df_attended.set_index([\"visit_code\"], inplace=True)\n",
198
+ "df_missed.set_index([\"visit_code\"], inplace=True)\n",
199
+ "\n",
200
+ "attended_and_missed = df_attended + df_missed\n",
201
+ "attended_and_missed.fillna(0, inplace=True)\n",
202
+ "attended_and_missed.reset_index(inplace=True)\n",
203
+ "attended_and_missed.set_index([\"visit_code\"], inplace=True)\n",
204
+ "attended_and_missed_perc = df_missed/attended_and_missed\n",
205
+ "attended_and_missed_perc.fillna(0, inplace=True)\n",
206
+ "attended_and_missed_perc.reset_index(inplace=True)\n",
207
+ "attended_and_missed_perc.set_index([\"visit_code\"], inplace=True)\n",
208
+ "\n",
209
+ "df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
210
+ "for col in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
211
+ " col_perc = f\"{col}_perc\"\n",
212
+ " df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
213
+ "df_result.reset_index(inplace=True)\n",
214
+ "df_result.sort_values(by=[\"visit_code\"], ascending=True, inplace=True)\n",
215
+ "df_result[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": null,
221
+ "id": "15",
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "\n",
226
+ "# Table 3: OGTT and FBG at 12-month visit"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "id": "16",
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:\n",
237
+ " row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
238
+ " row_df[\"label\"] = label\n",
239
+ " row_df = row_df.reset_index()\n",
240
+ " row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
241
+ " row_df.columns.name = \"\"\n",
242
+ " all_sites = [10, 20, 30, 40, 60]\n",
243
+ " for site in all_sites:\n",
244
+ " if site not in row_df.columns:\n",
245
+ " row_df[site] = None\n",
246
+ " row_df = row_df.reset_index(drop=True)\n",
247
+ " return row_df\n",
248
+ "\n",
249
+ "\n",
250
+ "def get_table_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
251
+ " df_month = df_source[df_source.visit_code==visit_code].copy()\n",
252
+ " \n",
253
+ " row_df = df_month.copy()\n",
254
+ " table_df = get_row_df(row_df, \"Total (n)\")\n",
255
+ " \n",
256
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value<6.1)].copy()\n",
257
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT <7.8; FBG <6.1\")])\n",
258
+ " \n",
259
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
260
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT <7.8; FBG >=6.1 <7.0\")])\n",
261
+ " \n",
262
+ " row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
263
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT <7.8; FBG >=7.0\")])\n",
264
+ " \n",
265
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
266
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥7.8 to <11.1; FBG <6.1\")])\n",
267
+ " \n",
268
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
269
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\")])\n",
270
+ " \n",
271
+ " row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
272
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥7.8 to <11.1; FBG >=7.0\")])\n",
273
+ " \n",
274
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
275
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥11.1; FBG <6.1\")])\n",
276
+ " \n",
277
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
278
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥11.1; FBG >=6.1 <7.0\")])\n",
279
+ " \n",
280
+ " row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
281
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"2-hour OGTT ≥11.1; FBG >=7.0\")])\n",
282
+ "\n",
283
+ " row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
284
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"Missing OGTT\")])\n",
285
+ " return table_df\n",
286
+ "\n"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "id": "17",
293
+ "metadata": {},
294
+ "outputs": [],
295
+ "source": [
296
+ "def format_table_df(tbl_df, rename_columns:bool|None=None, add_totals:bool|None=None):\n",
297
+ " add_totals = True if add_totals is None else add_totals\n",
298
+ " tbl_df = tbl_df.fillna(0.0)\n",
299
+ " tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
300
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
301
+ "\n",
302
+ " if add_totals:\n",
303
+ " df_last = tbl_df[1:].sum().to_frame()\n",
304
+ " df_last.loc[\"label\"] = np.nan\n",
305
+ " df_last = df_last.reset_index()\n",
306
+ " df_last.columns = [\"label\", \"value\"]\n",
307
+ " df_last = df_last.pivot_table(columns=\"label\", values=\"value\").reset_index(drop=True)\n",
308
+ " df_last.columns.name = \"\"\n",
309
+ " df_last[\"label\"] = \"totals\"\n",
310
+ "\n",
311
+ " tbl_df = pd.concat([tbl_df, df_last])\n",
312
+ " tbl_df = tbl_df.reset_index(drop=True)\n",
313
+ "\n",
314
+ " tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"Total\"]\n",
315
+ "\n",
316
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"Total\"]:\n",
317
+ " tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
318
+ " tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
319
+ "\n",
320
+ "\n",
321
+ " for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"Total\"]:\n",
322
+ " tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
323
+ " tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
324
+ "\n",
325
+ " cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"Total\"]]]\n",
326
+ " tbl_df1 = tbl_df[cols]\n",
327
+ " tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"Total\"]].to_list()\n",
328
+ " if rename_columns:\n",
329
+ " tbl_df1 = tbl_df1.rename(columns={\"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"Total_str\": \"Total\"})\n",
330
+ " return tbl_df1\n"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": null,
336
+ "id": "18",
337
+ "metadata": {},
338
+ "outputs": [],
339
+ "source": [
340
+ "df_glucose = get_glucose_fbg_ogtt_df()\n",
341
+ "df_glucose_fbg = get_glucose_fbg_df()\n",
342
+ "df_glucose = pd.concat([df_glucose, df_glucose_fbg])"
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "code",
347
+ "execution_count": null,
348
+ "id": "19",
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "df_table3 = get_table_df(df_glucose, 1120.0)\n",
353
+ "df_table3 = format_table_df(df_table3, rename_columns=True)\n",
354
+ "df_table3"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": null,
360
+ "id": "20",
361
+ "metadata": {},
362
+ "outputs": [],
363
+ "source": [
364
+ "df_table4 = get_table_df(df_glucose, 1240.0)\n",
365
+ "df_table4 = format_table_df(df_table4, rename_columns=True)\n",
366
+ "df_table4"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "execution_count": null,
372
+ "id": "21",
373
+ "metadata": {},
374
+ "outputs": [],
375
+ "source": [
376
+ "df_table5 = get_table_df(df_glucose, 1360.0)\n",
377
+ "df_table5 = format_table_df(df_table5, rename_columns=True)\n",
378
+ "df_table5\n"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "id": "22",
385
+ "metadata": {},
386
+ "outputs": [],
387
+ "source": [
388
+ "# df_table6 = get_table_df(df_glucose, 1480.0, rename_columns=True)\n",
389
+ "# df_table6"
390
+ ]
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "execution_count": null,
395
+ "id": "23",
396
+ "metadata": {},
397
+ "outputs": [],
398
+ "source": [
399
+ "row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
400
+ "table_df = get_row_df(row_df, \"Total (n)\")\n",
401
+ "df_table6 = format_table_df(table_df, rename_columns=True)\n",
402
+ "df_table6[:1]"
403
+ ]
404
+ },
405
+ {
406
+ "cell_type": "code",
407
+ "execution_count": null,
408
+ "id": "24",
409
+ "metadata": {},
410
+ "outputs": [],
411
+ "source": [
412
+ "def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
413
+ " df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
414
+ "\n",
415
+ " row_df = df_month.copy()\n",
416
+ " table_df = get_row_df(row_df, \"Total (n)\")\n",
417
+ "\n",
418
+ " row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
419
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
420
+ "\n",
421
+ " row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
422
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
423
+ "\n",
424
+ " row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
425
+ " table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
426
+ " return table_df\n"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": null,
432
+ "id": "25",
433
+ "metadata": {},
434
+ "outputs": [],
435
+ "source": [
436
+ "from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39\n",
437
+ "\n",
438
+ "df_table7 = get_table7_df(df_glucose, 1150.0)\n",
439
+ "df_table7 = format_table_df(df_table7, rename_columns=True, add_totals=False)\n",
440
+ "df_table7[\"visit_code\"] = MONTH15\n",
441
+ "\n",
442
+ "df_table71 = get_table7_df(df_glucose, 1180.0)\n",
443
+ "df_table71 = format_table_df(df_table71, rename_columns=True, add_totals=False)\n",
444
+ "df_table71[\"visit_code\"] = MONTH18\n",
445
+ "\n",
446
+ "df_table72 = get_table7_df(df_glucose, 1210.0)\n",
447
+ "df_table72 = format_table_df(df_table72, rename_columns=True, add_totals=False)\n",
448
+ "df_table72[\"visit_code\"] = MONTH21\n",
449
+ "\n",
450
+ "df_table73 = get_table7_df(df_glucose, 1270.0)\n",
451
+ "df_table73 = format_table_df(df_table73, rename_columns=True, add_totals=False)\n",
452
+ "df_table73[\"visit_code\"] = MONTH27\n",
453
+ "\n",
454
+ "df_table74 = get_table7_df(df_glucose, 1300.0)\n",
455
+ "df_table74 = format_table_df(df_table74, rename_columns=True, add_totals=False)\n",
456
+ "df_table74[\"visit_code\"] = MONTH30\n",
457
+ "\n",
458
+ "df_table75 = get_table7_df(df_glucose, 1330.0)\n",
459
+ "df_table75 = format_table_df(df_table75, rename_columns=True, add_totals=False)\n",
460
+ "df_table75[\"visit_code\"] = MONTH33\n",
461
+ "\n",
462
+ "df_table76 = get_table7_df(df_glucose, 1390.0)\n",
463
+ "df_table76 = format_table_df(df_table76, rename_columns=True, add_totals=False)\n",
464
+ "df_table76[\"visit_code\"] = MONTH39\n",
465
+ "\n",
466
+ "df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])\n",
467
+ "df_table[[\"visit_code\", \"label\", \"Hindu Mandal\", \"Amana\", \"Temeke\", \"Mwananyamala\", \"Mnazi Moja\", \"Total\"]]\n"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "code",
472
+ "execution_count": null,
473
+ "id": "26",
474
+ "metadata": {},
475
+ "outputs": [],
476
+ "source": [
477
+ "cls = GlucoseEndpointsByDate()\n",
478
+ "cls.run()\n",
479
+ "# cls.endpoint_only_df.endpoint_type.value_counts()\n",
480
+ "# cls.endpoint_only_df.endpoint_label.value_counts(dropna=False)"
481
+ ]
482
+ },
483
+ {
484
+ "cell_type": "code",
485
+ "execution_count": null,
486
+ "id": "27",
487
+ "metadata": {},
488
+ "outputs": [],
489
+ "source": [
490
+ "df = cls.endpoint_only_df.groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
491
+ "df.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
492
+ "df = df.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
493
+ "df.columns.name = \"\"\n",
494
+ "df.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
495
+ "df.loc[len(df)] = df[['10', '20', '30', '40', '60']].sum().to_dict()\n",
496
+ "df.at[len(df)-1, 'label'] = 'Total'\n",
497
+ "df['Total'] = df[['10', '20', '30', '40', '60']].sum(axis=1)\n",
498
+ "df.fillna(0, inplace=True)\n",
499
+ "df\n",
500
+ "print(tabulate(df[['label', '10', '20', '30', '40', '60', 'Total']], showindex=False, headers=\"keys\", tablefmt=\"simple_grid\"))\n"
501
+ ]
502
+ },
503
+ {
504
+ "cell_type": "code",
505
+ "execution_count": null,
506
+ "id": "28",
507
+ "metadata": {},
508
+ "outputs": [],
509
+ "source": [
510
+ "df"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": null,
516
+ "id": "29",
517
+ "metadata": {},
518
+ "outputs": [],
519
+ "source": [
520
+ "len(cls.endpoint_df[\"subject_identifier\"].unique())"
521
+ ]
522
+ },
523
+ {
524
+ "cell_type": "code",
525
+ "execution_count": null,
526
+ "id": "30",
527
+ "metadata": {},
528
+ "outputs": [],
529
+ "source": [
530
+ "cls.endpoint_df[(cls.endpoint_df[\"endpoint\"]==1)][\"interval_in_days\"].describe()"
531
+ ]
532
+ },
533
+ {
534
+ "cell_type": "code",
535
+ "execution_count": null,
536
+ "id": "31",
537
+ "metadata": {},
538
+ "outputs": [],
539
+ "source": [
540
+ "cls.endpoint_only_df[(cls.endpoint_only_df[\"endpoint\"]==1)][\"days_to_endpoint\"].describe()"
541
+ ]
542
+ },
543
+ {
544
+ "cell_type": "code",
545
+ "execution_count": null,
546
+ "id": "32",
547
+ "metadata": {},
548
+ "outputs": [],
549
+ "source": [
550
+ "len(cls.endpoint_df)"
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "execution_count": null,
556
+ "id": "33",
557
+ "metadata": {},
558
+ "outputs": [],
559
+ "source": [
560
+ "len(cls.endpoint_only_df)"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": null,
566
+ "id": "34",
567
+ "metadata": {},
568
+ "outputs": [],
569
+ "source": [
570
+ "cls.endpoint_only_df[\"subject_identifier\"].nunique()"
571
+ ]
572
+ },
573
+ {
574
+ "cell_type": "code",
575
+ "execution_count": null,
576
+ "id": "35",
577
+ "metadata": {},
578
+ "outputs": [],
579
+ "source": [
580
+ "\n",
581
+ "fname = \"cross_check_end_fbgdate_pivot.csv\"\n",
582
+ "df_pivot = cls.endpoint_df.sort_values(by=[\"subject_identifier\"]).set_index(\"subject_identifier\").pivot_table(columns=[\"visit_code\"], values=[\"fbg_value\",\"ogtt_value\"], index=[\"subject_identifier\"])\n",
583
+ "df_pivot.sort_values(('visit_code'), axis=1).sort_values(\"subject_identifier\").to_csv(analysis_folder / fname, sep=\"|\", encoding=\"utf8\", index=True)"
584
+ ]
585
+ },
586
+ {
587
+ "cell_type": "code",
588
+ "execution_count": null,
589
+ "id": "36",
590
+ "metadata": {},
591
+ "outputs": [],
592
+ "source": [
593
+ "\n",
594
+ "fname = f\"glucose-{datetime.now().strftime(\"%Y-%m-%d-%H%M\")}.csv\"\n",
595
+ "get_crf(\"meta_subject.glucose\", subject_visit_model=\"meta_subject.subjectvisit\", drop_columns=[\"consent_model\"]).to_csv(analysis_folder / fname, sep=\"|\", encoding=\"utf8\", index=False)\n"
596
+ ]
597
+ },
598
+ {
599
+ "cell_type": "code",
600
+ "execution_count": null,
601
+ "id": "37",
602
+ "metadata": {},
603
+ "outputs": [],
604
+ "source": [
605
+ "fname = f\"glucosefbg-{datetime.now().strftime(\"%Y-%m-%d-%H%M\")}.csv\"\n",
606
+ "get_crf(\"meta_subject.glucosefbg\", subject_visit_model=\"meta_subject.subjectvisit\", drop_columns=[\"consent_model\"]).to_csv(analysis_folder / fname, sep=\"|\", encoding=\"utf8\", index=False)\n"
607
+ ]
608
+ },
609
+ {
610
+ "cell_type": "code",
611
+ "execution_count": null,
612
+ "id": "38",
613
+ "metadata": {},
614
+ "outputs": [],
615
+ "source": [
616
+ "\n",
617
+ "fname = f\"glucose-merged-{datetime.now().strftime(\"%Y-%m-%d-%H%M\")}.csv\"\n",
618
+ "cls.df.to_csv(analysis_folder / fname, sep=\"|\", encoding=\"utf8\", index=False)\n"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "execution_count": null,
624
+ "id": "39",
625
+ "metadata": {},
626
+ "outputs": [],
627
+ "source": [
628
+ "passwd = mempass.mkpassword(2)\n",
629
+ "fname = \"KBs_latest_enders_26072024.xlsx\"\n",
630
+ "decrypted_workbook = io.BytesIO()\n",
631
+ "with open(analysis_folder / fname, 'rb') as file:\n",
632
+ " office_file = msoffcrypto.OfficeFile(file)\n",
633
+ " office_file.load_key(password=passwd)\n",
634
+ " office_file.decrypt(decrypted_workbook)\n",
635
+ " \n",
636
+ "df_katie2 = pd.read_excel(decrypted_workbook, index_col=0)\n",
637
+ "df_katie2 = df_katie2.copy()\n",
638
+ "df_katie2 = df_katie2.reset_index()\n",
639
+ "print(passwd)"
640
+ ]
641
+ }
642
+ ],
643
+ "metadata": {
644
+ "kernelspec": {
645
+ "display_name": "Python 3 (ipykernel)",
646
+ "language": "python",
647
+ "name": "python3"
648
+ },
649
+ "language_info": {
650
+ "codemirror_mode": {
651
+ "name": "ipython",
652
+ "version": 3
653
+ },
654
+ "file_extension": ".py",
655
+ "mimetype": "text/x-python",
656
+ "name": "python",
657
+ "nbconvert_exporter": "python",
658
+ "pygments_lexer": "ipython3",
659
+ "version": "3.12.4"
660
+ }
661
+ },
662
+ "nbformat": 4,
663
+ "nbformat_minor": 5
664
+ }