meta-edc 1.0.7__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. meta_ae/action_items.py +10 -2
  2. meta_ae/baker_recipes.py +1 -2
  3. meta_ae/tests/tests/test_actions.py +1 -2
  4. meta_analytics/README.rst +1 -2
  5. meta_analytics/notebooks/anu.ipynb +95 -0
  6. meta_analytics/notebooks/appointment_planning.ipynb +329 -0
  7. meta_analytics/notebooks/arvs.ipynb +103 -0
  8. meta_analytics/notebooks/cleaning/consent_v1_ext.ipynb +227 -0
  9. meta_analytics/notebooks/cleaning/offschedule_eos.ipynb +353 -0
  10. meta_analytics/notebooks/dsmc/renal_dysfunction.ipynb +435 -0
  11. meta_analytics/notebooks/endpoints/meta_endpoints_by_date.ipynb +664 -0
  12. meta_analytics/notebooks/followup_examination.ipynb +141 -0
  13. meta_analytics/notebooks/hba1c.ipynb +136 -0
  14. meta_analytics/notebooks/hiv_regimens.ipynb +122 -118
  15. meta_analytics/notebooks/incidence.ipynb +232 -0
  16. meta_analytics/notebooks/liver.ipynb +389 -0
  17. meta_analytics/notebooks/magreth.ipynb +645 -0
  18. meta_analytics/notebooks/monitoring_report.ipynb +721 -448
  19. meta_analytics/notebooks/pharmacy.ipynb +405 -306
  20. meta_analytics/notebooks/pharmacy_stock_202410.ipynb +306 -0
  21. meta_analytics/notebooks/steering.ipynb +61 -0
  22. meta_analytics/notebooks/undiagnosed/meta3_screening_consort_chart.ipynb +1176 -0
  23. meta_analytics/notebooks/undiagnosed/meta3_screening_undiagnosed.ipynb +519 -0
  24. meta_analytics/notebooks/undiagnosed/meta_screening_table2.ipynb +964 -0
  25. meta_analytics/notebooks/undiagnosed/screen_undiagnosed_or.ipynb +296 -0
  26. meta_analytics/notebooks/undiagnosed/screening.ipynb +273 -0
  27. meta_analytics/notebooks/undiagnosed/screening2.ipynb +958 -0
  28. meta_analytics/notebooks/undiagnosed/screening_undiagnosed_20241002.ipynb +958 -0
  29. meta_analytics/notebooks/ven.ipynb +191 -0
  30. meta_analytics/notebooks/vitals.ipynb +263 -0
  31. meta_edc/settings/debug.py +3 -2
  32. meta_edc/urls.py +1 -0
  33. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/METADATA +3 -3
  34. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/RECORD +62 -35
  35. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/WHEEL +1 -1
  36. meta_labs/reportables.py +14 -11
  37. meta_labs/tests/test_reportables.py +33 -12
  38. meta_pharmacy/notebooks/pharmacy.ipynb +41 -0
  39. meta_prn/admin/offschedule_pregnancy_admin.py +3 -3
  40. meta_prn/admin/onschedule_dm_referral_admin.py +5 -5
  41. meta_prn/form_validators/end_of_study.py +2 -2
  42. meta_prn/migrations/0063_historicaloffstudymedication_singleton_field_and_more.py +37 -0
  43. meta_prn/migrations/0064_auto_20250602_2143.py +18 -0
  44. meta_prn/models/end_of_study.py +2 -0
  45. meta_prn/models/off_study_medication.py +2 -0
  46. meta_reports/admin/last_imp_refill_admin.py +3 -2
  47. meta_screening/eligibility/eligibility_part_three/base_eligibility_part_three.py +59 -47
  48. meta_screening/form_validators/screening_part_three.py +6 -1
  49. meta_screening/tests/meta_test_case_mixin.py +3 -0
  50. meta_screening/tests/tests/test_forms.py +9 -2
  51. meta_screening/tests/tests/test_screening_part_three.py +11 -14
  52. meta_subject/action_items.py +2 -3
  53. meta_subject/choices.py +2 -1
  54. meta_subject/form_validators/delivery_form_validator.py +1 -0
  55. meta_subject/forms/blood_results/blood_results_rft_form.py +60 -3
  56. meta_subject/forms/delivery_form.py +2 -0
  57. meta_subject/migrations/0223_bloodresultsfbc_errors_bloodresultsgludummy_errors_and_more.py +83 -0
  58. meta_subject/migrations/0224_bloodresultsfbc_abnormal_summary_and_more.py +153 -0
  59. meta_subject/tests/tests/test_egfr.py +5 -5
  60. meta_analytics/dataframes/enrolled/__init__.py +0 -0
  61. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/licenses/AUTHORS.rst +0 -0
  62. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/licenses/LICENSE +0 -0
  63. {meta_edc-1.0.7.dist-info → meta_edc-1.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,964 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "%%capture\n",
11
+ "import pandas as pd\n",
12
+ "import numpy as np\n",
13
+ "import math\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "import scipy.stats as stats\n",
16
+ "\n",
17
+ "from dj_notebook import activate\n",
18
+ "\n",
19
+ "plus = activate(dotenv_file=\"/Users/erikvw/source/edc_source/meta-edc/.env\")\n",
20
+ "# output is suppressed ut normally would spew out all the edc loading messages\n"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "1",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "# This notebook is incomplete / not working"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "id": "2",
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "from edc_analytics.custom_tables import BpTable\n",
41
+ "from edc_analytics.table import Table\n",
42
+ "from meta_screening.models import SubjectScreening\n",
43
+ "from meta_subject.models import PhysicalExam, SubjectVisit\n",
44
+ "from django_pandas.io import read_frame"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "id": "3",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "default_columns = [\"id\", \"subject_identifier\", \"report_datetime\", \"visit_code\"]\n",
55
+ "\n",
56
+ "title_row = [] # ???????????????"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "id": "4",
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "# this step is slow, maybe because it is the first call to the DB\n",
67
+ "qs_screening = SubjectScreening.objects.all()\n",
68
+ "df = read_frame(qs_screening)\n"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "id": "5",
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": [
78
+ "# backup the df\n",
79
+ "df_screen = df.copy()\n",
80
+ "# df = df_screen.copy()\n"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": null,
86
+ "id": "6",
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "# convert all to float\n",
91
+ "cols = [\"fbg_value\", \"fbg2_value\", \"ogtt_value\", \"ogtt2_value\", \"converted_fbg_value\", \n",
92
+ " \"converted_fbg2_value\", \"converted_ogtt_value\", \"converted_ogtt2_value\",\n",
93
+ " \"sys_blood_pressure_avg\", \"dia_blood_pressure_avg\",\n",
94
+ " \"waist_circumference\"]\n",
95
+ "df[cols] = df[cols].apply(pd.to_numeric)"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": null,
101
+ "id": "7",
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "\n",
106
+ "# condition to include any glucose test\n",
107
+ "cond_glu = (\n",
108
+ " (df['fbg_value'].notna()) | \n",
109
+ " (df['ogtt_value'].notna()) | \n",
110
+ " (df['fbg2_value'].notna()) |\n",
111
+ " (df['ogtt2_value'].notna())\n",
112
+ ")\n",
113
+ "\n",
114
+ "# conditions for Male/Female\n",
115
+ "male = (df[\"gender\"]==\"Male\")\n",
116
+ "female = (df[\"gender\"]==\"Female\")\n",
117
+ "\n",
118
+ "# condition for art stable\n",
119
+ "cond_art_stable = (df['on_rx_stable']==\"Yes\") & (df['vl_undetectable']==\"Yes\") & (df['art_six_months']==\"Yes\") \n"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": null,
125
+ "id": "8",
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": []
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": null,
133
+ "id": "9",
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "# lets fix some columns\n",
138
+ "# has_dm fillna with unk\n",
139
+ "df[\"has_dm\"] = df[\"has_dm\"].apply(lambda x: \"unk\" if not x else x)\n",
140
+ "# lets create a column that summarizes lives_nearby and staying_nearby_12\n",
141
+ "df[\"in_catchment\"] = (df[\"lives_nearby\"] == \"Yes\") & (df[\"staying_nearby_12\"] == \"Yes\")\n",
142
+ "\n",
143
+ "\n",
144
+ "# glucose\n",
145
+ "# are all glucose fields filled? YES\n",
146
+ "# for prefix in [\"fbg\", \"ogtt\", \"fbg2\", \"ogtt2\"]:\n",
147
+ "# print(df[(df[f\"{prefix}_value\"].isna()) & (df[f\"converted_{prefix}_value\"].notna())][\"gender\"].count())\n",
148
+ "# print(df[(df[f\"{prefix}_value\"].notna()) & (df[f\"converted_{prefix}_value\"].isna())][\"gender\"].count())\n",
149
+ "\n",
150
+ "# create fbg column\n",
151
+ "df[\"fbg\"] = df[\"converted_fbg_value\"]\n",
152
+ "df.loc[df[\"fbg\"].notna() & df[\"converted_fbg2_value\"].notna(), \"fbg\"] = df[\"converted_fbg2_value\"]\n",
153
+ "\n",
154
+ "# create ogtt column\n",
155
+ "df[\"ogtt\"] = df[\"converted_ogtt_value\"]\n",
156
+ "df.loc[df[\"ogtt\"].notna() & df[\"converted_ogtt2_value\"].notna(), \"ogtt\"] = df[\"converted_ogtt2_value\"]\n"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "id": "10",
163
+ "metadata": {},
164
+ "outputs": [],
165
+ "source": [
166
+ "# subject SR9E8B4D has eligible part two == No but subject has a glucose value\n",
167
+ "df.loc[(df[\"screening_identifier\"]==\"SR9E8B4D\"), \"eligible_part_two\"] = \"Yes\"\n"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": null,
173
+ "id": "11",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "\n",
178
+ "# condition where subject is eligible P1/P2 and has any type of glucose test\n",
179
+ "cond = ((df[\"eligible_part_one\"]==\"Yes\") & (df[\"eligible_part_two\"]==\"Yes\") & cond_glu)\n"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "id": "12",
186
+ "metadata": {},
187
+ "outputs": [],
188
+ "source": [
189
+ "# filter dataframe\n",
190
+ "df = df[cond]\n"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": null,
196
+ "id": "13",
197
+ "metadata": {},
198
+ "outputs": [],
199
+ "source": [
200
+ "print(len(df))"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": null,
206
+ "id": "14",
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": [
210
+ "wc_describe = df[\"waist_circumference\"].describe()\n",
211
+ "\n",
212
+ "# merge with physical exam to get waist circumference if taken at baseline\n",
213
+ "subject_identifiers = list(df[\"subject_identifier\"])\n",
214
+ "\n",
215
+ "qs_subject_visit = SubjectVisit.objects.filter(subject_identifier__in=subject_identifiers)\n",
216
+ "df_subject_visit = read_frame(qs_subject_visit)\n",
217
+ "df_subject_visit.rename(columns={\"id\": \"subject_visit\"}, inplace=True)\n",
218
+ "\n",
219
+ "qs_physical_exam = PhysicalExam.objects.filter(subject_visit__subject_identifier__in=subject_identifiers)\n",
220
+ "df_physical_exam = read_frame(qs_physical_exam)\n",
221
+ "\n",
222
+ "# merge w/ subject visit to get subject_identifier\n",
223
+ "df_physical_exam = pd.merge(df_physical_exam, df_subject_visit[[\"subject_visit\", \"subject_identifier\", \"visit_code\", \"visit_code_sequence\"]], on=\"subject_visit\", how=\"left\")\n",
224
+ "df_physical_exam = df_physical_exam[[\"subject_identifier\", \"visit_code\", \"visit_code_sequence\", \"waist_circumference\"]]\n",
225
+ "\n",
226
+ "df_physical_exam[[\"waist_circumference\"]] = df[[\"waist_circumference\"]].apply(pd.to_numeric)\n",
227
+ "\n",
228
+ "# rename column to waist_circumference_baseline\n",
229
+ "df_physical_exam[\"waist_circumference_baseline\"] = df_physical_exam[\"waist_circumference\"]\n",
230
+ "df_physical_exam.drop(columns=[\"waist_circumference\"])\n",
231
+ "\n",
232
+ "df_physical_exam[[\"waist_circumference_baseline\"]] = df_physical_exam[[\"waist_circumference_baseline\"]].apply(pd.to_numeric)\n",
233
+ "wc_baseline_describe = df_physical_exam[\"waist_circumference_baseline\"].describe()\n",
234
+ "\n",
235
+ "# merge on subject_identifier with main DF\n",
236
+ "df = pd.merge(df, df_physical_exam[[\"subject_identifier\", \"waist_circumference_baseline\"]], on=\"subject_identifier\", how=\"left\")\n",
237
+ "\n",
238
+ "# set waist_circumference=waist_circumference_baseline if `waist_circumference` is none and `waist_circumference_baseline` is not\n",
239
+ "df.loc[(df[\"waist_circumference\"].isna()) & (df[\"waist_circumference_baseline\"].notna()), \"waist_circumference\"] = df[\"waist_circumference_baseline\"]\n",
240
+ "\n",
241
+ "# drop waist_circumference_baseline\n",
242
+ "df.drop(columns=[\"waist_circumference_baseline\"], inplace=True)\n"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": null,
248
+ "id": "15",
249
+ "metadata": {},
250
+ "outputs": [],
251
+ "source": [
252
+ "# gender\n",
253
+ "def cell(gender, all=None):\n",
254
+ " cnt = df.loc[gender][\"gender\"].count()\n",
255
+ " if not all:\n",
256
+ " tot = df[\"gender\"].count()\n",
257
+ " return f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
258
+ " return f\"{cnt}\"\n",
259
+ "\n",
260
+ "df_gender = pd.DataFrame(columns=default_columns)\n",
261
+ "class GenderTable(Table):\n",
262
+ " def build_table_df(self):\n",
263
+ " pass\n",
264
+ "\n",
265
+ "tbl = Table(df, label=\"Gender\", columns=default_columns, show_ncol_perc=True)\n",
266
+ "# df_gender.loc[0] = [\"Gender\", \"n\", cell(female), cell(male), cell((male | female), all=True)]\n",
267
+ "tbl.table_df\n"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": null,
273
+ "id": "16",
274
+ "metadata": {},
275
+ "outputs": [],
276
+ "source": []
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "id": "17",
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "# age\n",
286
+ "agef = df.loc[female][\"age_in_years\"]\n",
287
+ "agem = df.loc[male][\"age_in_years\"]\n",
288
+ "age = df[\"age_in_years\"]\n",
289
+ "# bins\n",
290
+ "bin1 = (df[\"age_in_years\"]>=18) & (df[\"age_in_years\"]<35)\n",
291
+ "bin2 = (df[\"age_in_years\"]>=35) & (df[\"age_in_years\"]<50)\n",
292
+ "bin3 = (df[\"age_in_years\"]>=50) & (df[\"age_in_years\"]<65)\n",
293
+ "bin4 = (df[\"age_in_years\"]>=65)\n",
294
+ "\n",
295
+ "def cell(cond, gender, all=None):\n",
296
+ " cnt = df.loc[gender & cond][\"age_in_years\"].count()\n",
297
+ " if not all:\n",
298
+ " tot = df.loc[cond][\"age_in_years\"].count()\n",
299
+ " return f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
300
+ " tot = df[\"age_in_years\"].count()\n",
301
+ " return f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
302
+ "\n",
303
+ "df_age = pd.DataFrame(columns=title_row)\n",
304
+ "\n",
305
+ "df_age.loc[0] = [\"Age (years)\", \"n\", agef.count(), agem.count(), age.count()]\n",
306
+ "df_age.loc[1] = [\n",
307
+ " \"\", \"Median (IQR)\",\n",
308
+ " f\"{agef.quantile().astype(int)} ({agef.quantile(0.25).astype(int)}, {agef.quantile(0.75).astype(int)})\",\n",
309
+ " f\"{agem.quantile().astype(int)} ({agem.quantile(0.25).astype(int)}, {agem.quantile(0.75).astype(int)})\",\n",
310
+ " f\"{age.quantile().astype(int)} ({age.quantile(0.25).astype(int)}, {age.quantile(0.75).astype(int)})\",] \n",
311
+ "df_age.loc[2] = [\"\", \"18-34\", cell(female, bin1), cell(male, bin1), cell(bin1, (male | female), all=True)]\n",
312
+ "df_age.loc[3] = [\"\", \"35-49\", cell(female, bin2), cell(male, bin2), cell(bin2, (male | female), all=True)]\n",
313
+ "df_age.loc[4] = [\"\", \"50-64\", cell(female, bin3), cell(male, bin3), cell(bin3, (male | female), all=True)]\n",
314
+ "df_age.loc[5] = [\"\", \"65 and older\", cell(female, bin4), cell(male, bin4), cell(bin4, (male | female), all=True)]\n"
315
+ ]
316
+ },
317
+ {
318
+ "cell_type": "code",
319
+ "execution_count": null,
320
+ "id": "18",
321
+ "metadata": {},
322
+ "outputs": [],
323
+ "source": []
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "id": "19",
329
+ "metadata": {},
330
+ "outputs": [],
331
+ "source": [
332
+ "# waist_circumference\n",
333
+ "desc = df[[\"waist_circumference\"]].describe()\n",
334
+ "descf = df[df[\"gender\"]==\"Female\"][[\"waist_circumference\"]].describe()\n",
335
+ "descm = df[df[\"gender\"]==\"Male\"][[\"waist_circumference\"]].describe()\n",
336
+ "\n",
337
+ "f = f\"{descf.loc[\"50%\"].values[0]} ({descf.loc[\"25%\"].values[0]}, {descf.loc[\"75%\"].values[0]})\"\n",
338
+ "m = f\"{descm.loc[\"50%\"].values[0]} ({descm.loc[\"25%\"].values[0]}, {descm.loc[\"75%\"].values[0]})\"\n",
339
+ "all = f\"{desc.loc[\"50%\"].values[0]} ({desc.loc[\"25%\"].values[0]}, {desc.loc[\"75%\"].values[0]})\"\n",
340
+ "\n",
341
+ "df_waist = pd.DataFrame(columns=title_row)\n",
342
+ "\n",
343
+ "df_waist.loc[0] = [\"Waist circumference (cm)\", \"n\", descf.loc[\"count\"].values[0].astype(\"int64\"), descm.loc[\"count\"].values[0].astype(\"int64\"), desc.loc[\"count\"].values[0].astype(\"int64\")]\n",
344
+ "df_waist.loc[1] = [\"\", \"Median (IQR)\", f, m, all]\n"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": null,
350
+ "id": "20",
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "# waist_circumference (cont)\n",
355
+ "# Women 88 / Men 102\n",
356
+ "cond_lt_102 = ((df[\"waist_circumference\"]<102.0) & (df[\"gender\"]==\"Male\")) | ((df[\"waist_circumference\"]<88.0) & (df[\"gender\"]==\"Female\"))\n",
357
+ "cond_gte_102 = ((df[\"waist_circumference\"]>=102.0) & (df[\"gender\"]==\"Male\")) | ((df[\"waist_circumference\"]>=88.0) & (df[\"gender\"]==\"Female\"))\n",
358
+ "\n",
359
+ "tot = df[\"waist_circumference\"].count()\n",
360
+ "\n",
361
+ "f_cnt = df[cond_lt_102 & female][\"waist_circumference\"].count()\n",
362
+ "f = f\"{round(f_cnt, 0)} ({round(f_cnt/tot * 100, 1)}%)\"\n",
363
+ "m_cnt = df[cond_lt_102 & male][\"waist_circumference\"].count()\n",
364
+ "m = f\"{round(m_cnt, 0)} ({round(m_cnt / tot * 100, 1) }%)\"\n",
365
+ "value = f\"{round(df[cond_lt_102][\"waist_circumference\"].count(), 3)} ({round(df[cond_lt_102][\"waist_circumference\"].count() / df[\"waist_circumference\"].count(), 3) * 100}%)\"\n",
366
+ "\n",
367
+ "\n",
368
+ "df_waist.loc[2] = [\"\", \"Women<88 / Men<102\", f, m, value]\n",
369
+ "\n",
370
+ "\n",
371
+ "f_cnt = df[cond_gte_102 & female][\"waist_circumference\"].count()\n",
372
+ "f = f\"{round(f_cnt, 0)} ({round(f_cnt/tot * 100, 1)}%)\"\n",
373
+ "m_cnt = df[cond_gte_102 & male][\"waist_circumference\"].count()\n",
374
+ "m = f\"{round(m_cnt, 0)} ({round(m_cnt / tot * 100, 1) }%)\"\n",
375
+ "value = f\"{round(df[cond_gte_102][\"waist_circumference\"].count(), 3)} ({round(df[cond_gte_102][\"waist_circumference\"].count() / df[\"waist_circumference\"].count(), 3) * 100}%)\"\n",
376
+ "\n",
377
+ "df_waist.loc[3] = [\"\", \"Women>=88 / Men>=102\", f, m, value]\n",
378
+ " "
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "id": "21",
385
+ "metadata": {},
386
+ "outputs": [],
387
+ "source": [
388
+ "# cond_art\n",
389
+ "\n",
390
+ "def cell(gender, all=None):\n",
391
+ " cnt = df.loc[gender & cond_art_stable][\"gender\"].count()\n",
392
+ " if not all:\n",
393
+ " tot = df.loc[cond_art_stable][\"gender\"].count()\n",
394
+ " return f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
395
+ " tot = df[\"gender\"].count()\n",
396
+ " return f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
397
+ " \n",
398
+ "df_art = pd.DataFrame(columns=title_row)\n",
399
+ "df_art.loc[0] = [\"Stable on ART\", \"\", cell(female), cell(male), cell((male | female), all=True)]\n"
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": null,
405
+ "id": "22",
406
+ "metadata": {},
407
+ "outputs": [],
408
+ "source": [
409
+ "# blood pressure\n",
410
+ "# print(len(df[(df[\"sys_blood_pressure_one\"].notna()) & (df[\"dia_blood_pressure_one\"].notna())]))\n",
411
+ "# print(len(df[(df[\"sys_blood_pressure_two\"].notna()) & (df[\"dia_blood_pressure_two\"].notna())]))\n",
412
+ "# print(len(df[(df[\"sys_blood_pressure_avg\"].notna()) & (df[\"dia_blood_pressure_avg\"].notna())]))"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "execution_count": null,
418
+ "id": "23",
419
+ "metadata": {},
420
+ "outputs": [],
421
+ "source": [
422
+ "# blood pressure\n",
423
+ "\n",
424
+ "# df_tmp = df.copy()\n",
425
+ "# tot = len(df_tmp)\n",
426
+ "# print(f\"tot={tot}\")\n",
427
+ "# len(df_tmp[(df_tmp[\"sys_blood_pressure_avg\"].notna()) & (df_tmp[\"dia_blood_pressure_avg\"].notna())])\n",
428
+ "# severe_htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=180) | (df_tmp[\"dia_blood_pressure_avg\"]>=110)\n",
429
+ "# severe_htn_df = df_tmp[severe_htn_cond]\n",
430
+ "# print(f\"severe_htn={len(severe_htn_df)}\")\n",
431
+ "# df_tmp.drop(severe_htn_df.index, inplace=True)\n",
432
+ "\n",
433
+ "# htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=140) | (df_tmp[\"dia_blood_pressure_avg\"]>=90)\n",
434
+ "# htn_df = df_tmp[htn_cond]\n",
435
+ "# print(f\"htn={len(htn_df)}\")\n",
436
+ "# df_tmp.drop(htn_df.index, inplace=True)\n",
437
+ "\n",
438
+ "# pre_htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=120) | (df_tmp[\"dia_blood_pressure_avg\"]>=80)\n",
439
+ "# pre_htn_df = df_tmp[pre_htn_cond]\n",
440
+ "# print(f\"pre_htn={len(pre_htn_df)}\")\n",
441
+ "# df_tmp.drop(pre_htn_df.index, inplace=True)\n",
442
+ "\n",
443
+ "# normal_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=90) | (df_tmp[\"dia_blood_pressure_avg\"]>=60)\n",
444
+ "# normal_df = df_tmp[normal_cond]\n",
445
+ "# print(f\"normal={len(normal_df)}\")\n",
446
+ "# df_tmp.drop(normal_df.index, inplace=True)\n",
447
+ "\n",
448
+ "# low_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=0) | (df_tmp[\"dia_blood_pressure_avg\"]>=0)\n",
449
+ "# low_df = df_tmp[low_cond]\n",
450
+ "# print(f\"low={len(low_df)}\")\n",
451
+ "# df_tmp.drop(low_df.index, inplace=True)\n",
452
+ "\n",
453
+ " \n",
454
+ "# def cell(dfx, gender, all=None, perc=True):\n",
455
+ "# cnt = dfx.loc[gender][\"gender\"].count()\n",
456
+ "# if not all:\n",
457
+ "# tot = df.loc[gender][\"gender\"].count()\n",
458
+ "# return f\"{cnt}\" if not perc else f\"{cnt} ({round(cnt/tot *100, 1)}%)\" \n",
459
+ "# tot = df[\"gender\"].count()\n",
460
+ "# return f\"{cnt}\" if not perc else f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
461
+ "\n",
462
+ "# def quantile(gender, colname):\n",
463
+ "# q50 = df.loc[gender][colname].quantile()\n",
464
+ "# q25 = df.loc[gender][colname].quantile(0.25)\n",
465
+ "# q75 = df.loc[gender][colname].quantile(0.75)\n",
466
+ "# return f\"{q50} ({q25}, {q75})\"\n",
467
+ "\n",
468
+ "# df_bp = pd.DataFrame(columns=title_row)\n",
469
+ "# df_bp.loc[0] = [\"Blood pressure at baseline (mmHg)\", \"n\", cell(df_tmp, female), cell(df_tmp, male), cell(df_tmp, (male | female), all=True, perc=False)]\n",
470
+ "# df_bp.loc[1] = [\"\", \"Low (<90/60)\", cell(low_df, female), cell(low_df, male), cell(low_df, (male | female), all=True)]\n",
471
+ "# df_bp.loc[2] = [\"\", \"Normal (<120/80)\", cell(normal_df, female), cell(normal_df, male), cell(normal_df, (male | female), all=True)]\n",
472
+ "# df_bp.loc[3] = [\"\", \"Pre-hypertension (<140/90)\", cell(pre_htn_df, female), cell(pre_htn_df, male), cell(pre_htn_df, (male | female), all=True)]\n",
473
+ "# df_bp.loc[4] = [\"\", \"Hypertension (>=140/90)\", cell(htn_df, female), cell(htn_df, male), cell(htn_df, (male | female), all=True)]\n",
474
+ "# df_bp.loc[5] = [\"\", \"Severe hypertension (>=180/110)\", cell(severe_htn_df, female), cell(severe_htn_df, male), cell(severe_htn_df, (male | female), all=True)]\n",
475
+ "# df_bp.loc[6] = [\"\", \"Systolic - median (IQR)\", quantile(female, \"sys_blood_pressure_avg\"), quantile(male, \"sys_blood_pressure_avg\"), quantile((female | male), \"sys_blood_pressure_avg\")]\n",
476
+ "# df_bp.loc[7] = [\"\", \"Diastolic - median (IQR)\", quantile(female, \"dia_blood_pressure_avg\"), quantile(male, \"dia_blood_pressure_avg\"), quantile((female | male), \"dia_blood_pressure_avg\")]\n",
477
+ "\n"
478
+ ]
479
+ },
480
+ {
481
+ "cell_type": "code",
482
+ "execution_count": null,
483
+ "id": "24",
484
+ "metadata": {},
485
+ "outputs": [],
486
+ "source": []
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": null,
491
+ "id": "25",
492
+ "metadata": {},
493
+ "outputs": [],
494
+ "source": [
495
+ "# columns = [\n",
496
+ "# 'Characteristics', 'Statistics', \"F\", \"M\", 'All', \n",
497
+ "# \"fnum\",\"f_prop\",\"fq25\",\"fq50\",\"fq75\",\n",
498
+ "# \"mnum\",\"m_prop\",\"mq25\",\"mq50\",\"mq75\",\n",
499
+ "# \"q25\",\"q50\",\"q75\",\"tot\"]\n",
500
+ "\n",
501
+ "# class SubjectRow:\n",
502
+ "# def __init__(self, gender, dfx, main_df, iqr_col=None):\n",
503
+ "# self.num = dfx.loc[gender][\"gender\"].count()\n",
504
+ "# self.total = len(main_df.loc[gender])\n",
505
+ "# self.perc = self.num/self.total\n",
506
+ "# if iqr_col:\n",
507
+ "# self.q25, self.q50, self.q75 = dfx.loc[gender][iqr_col].quantile([0.25, 0.50, 0.75])\n",
508
+ "# else:\n",
509
+ "# self.q25, self.q50, self.q75 = np.nan,np.nan,np.nan\n",
510
+ "\n",
511
+ "# class MaleRow(SubjectRow):\n",
512
+ "# def __init__(self, dfx, main_df, iqr_col=None):\n",
513
+ "# super().__init__(male, dfx, main_df, iqr_col)\n",
514
+ "\n",
515
+ "# class FemaleRow(SubjectRow):\n",
516
+ "# def __init__(self, dfx, main_df, iqr_col=None):\n",
517
+ "# super().__init__(female, dfx, main_df, iqr_col)\n",
518
+ "\n",
519
+ "# class Row:\n",
520
+ "# def __init__(self, dfx, main_df, label=None, statistic=None, iqr_col=None, columns=None):\n",
521
+ "# self.m = MaleRow(dfx, main_df, iqr_col)\n",
522
+ "# self.f =FemaleRow(dfx, main_df, iqr_col)\n",
523
+ "# self.total = len(main_df)\n",
524
+ "# self.subtotal = len(dfx)\n",
525
+ "# if iqr_col:\n",
526
+ "# self.q25, self.q50, self.q75 = main_df[iqr_col].quantile([0.25, 0.50, 0.75])\n",
527
+ "# else:\n",
528
+ "# self.q25, self.q50, self.q75 = np.nan,np.nan,np.nan \n",
529
+ "# self.label = label or \"\"\n",
530
+ "# self.statistic = statistic\n",
531
+ "# self.df = pd.DataFrame(columns=columns)\n",
532
+ "\n",
533
+ "# def with_perc(total=None):\n",
534
+ "# if total:\n",
535
+ "# return f\"{self.num} ({round(self.num/self.total *100, 1)}%)\"\n",
536
+ "# return f\"{self.num} ({round(self.num/self.subtotal *100, 1)}%)\"\n",
537
+ " \n",
538
+ "# def values(self):\n",
539
+ "# if self.statistic==\"n\":\n",
540
+ "# return [\n",
541
+ "# self.label, self.statistic, \"\", \"\", \"\",\n",
542
+ "# self.f.num, self.f.perc, self.f.q25, self.f.q50, self.f.q75,\n",
543
+ "# self.m.num, self.m.perc, self.m.q25, self.m.q50, self.m.q75,\n",
544
+ "# self.q25, self.q50, self.q75, \n",
545
+ "# self.total]\n",
546
+ "# return [\n",
547
+ "# self.label, self.statistic, \"\", \"\", \"\", \n",
548
+ "# self.f.num, self.f.perc, self.f.q25, self.f.q50, self.f.q75, \n",
549
+ "# self.m.num, self.m.perc,self.m.q25, self.m.q50, self.m.q75, \n",
550
+ "# self.q25, self.q50, self.q75, \n",
551
+ "# self.subtotal]\n",
552
+ "\n",
553
+ "# class Table:\n",
554
+ "\n",
555
+ "# statistic_col = \"Statistics\"\n",
556
+ "# female_col = \"F\"\n",
557
+ "# male_col = \"M\"\n",
558
+ "# all_col = \"All\"\n",
559
+ "# n_sublabel = \"n\"\n",
560
+ "# grand_total_col = \"tot\"\n",
561
+ " \n",
562
+ "# def __init__(self, main_df, label=None, columns=None):\n",
563
+ "# self.main_df = main_df\n",
564
+ "# self.table_df = pd.DataFrame(columns=columns)\n",
565
+ "# self.row_zero = Row(main_df, main_df, label=label, statistic=self.n_sublabel, columns=columns)\n",
566
+ "\n",
567
+ "# self.build_table_df()\n",
568
+ " \n",
569
+ "# # format string cols\n",
570
+ "# self.table_df[self.female_col] = self.table_df.apply(lambda x: self.format_f_col(x), axis=1)\n",
571
+ "# self.table_df[self.male_col] = self.table_df.apply(lambda x: self.format_m_col(x), axis=1)\n",
572
+ "# self.table_df[self.all_col] = self.table_df.apply(lambda x: self.format_all_col(x), axis=1)\n",
573
+ "\n",
574
+ "# def build_table_df(self):\n",
575
+ "# self.table_df.loc[0] = self.row_zero.values()\n",
576
+ "\n",
577
+ "# @property\n",
578
+ "# def formatted_df(self):\n",
579
+ "# return self.table_df[['Characteristics', 'Statistics', \"F\", \"M\", 'All']]\n",
580
+ "\n",
581
+ "# def format_f_col(self, x):\n",
582
+ "# if x[self.statistic_col] == self.n_sublabel:\n",
583
+ "# return f\"{x.fnum}\"\n",
584
+ "# elif pd.notna(x.q25):\n",
585
+ "# return f\"{x.fq50} ({x.fq25},{x.fq75})\"\n",
586
+ "# return f\"{x.fnum} ({round(x.fnum/self.row_zero.f.total *100, 1)}%)\" \n",
587
+ "\n",
588
+ "# def format_m_col(self, x):\n",
589
+ "# if x[self.statistic_col] == self.n_sublabel:\n",
590
+ "# return f\"{x.mnum}\"\n",
591
+ "# elif pd.notna(x.q25):\n",
592
+ "# return f\"{x.mq50} ({x.mq25},{x.mq75})\"\n",
593
+ "# return f\"{x.mnum} ({round(x.mnum/self.row_zero.m.total *100, 1)}%)\" \n",
594
+ "\n",
595
+ "# def format_all_col(self, x):\n",
596
+ "# if x[self.statistic_col] == self.n_sublabel:\n",
597
+ "# return f\"{x.tot}\"\n",
598
+ "# elif pd.notna(x.q25):\n",
599
+ "# return f\"{x.q50} ({x.q25},{x.q75})\"\n",
600
+ "# return f\"{x.tot} ({round(x.tot/self.table_df.loc[0][self.grand_total_col] *100, 1)}%)\" \n",
601
+ "\n",
602
+ "# class BpTable(Table):\n",
603
+ "\n",
604
+ "# sys_col = \"sys_blood_pressure_avg\"\n",
605
+ "# dia_col = \"dia_blood_pressure_avg\"\n",
606
+ "\n",
607
+ "# def build_table_df(self):\n",
608
+ "# self.table_df.loc[0] = self.row_zero.values()\n",
609
+ "# i = 1\n",
610
+ "# for key, dfx in self.get_dfs(self.main_df).items():\n",
611
+ "# self.table_df.loc[i] = Row(dfx, self.main_df, label=\"\", statistic=key, columns=columns).values()\n",
612
+ "# i += 1\n",
613
+ "# self.table_df.loc[i+1] = Row(self.main_df, self.main_df, label=\"\", statistic=\"Systolic - median (IQR)\", iqr_col=\"sys_blood_pressure_avg\", columns=columns).values()\n",
614
+ "# self.table_df.loc[i+2] = Row(self.main_df, self.main_df, label=\"\", statistic=\"Diastolic - median (IQR)\", iqr_col=\"dia_blood_pressure_avg\", columns=columns).values() \n",
615
+ " \n",
616
+ "# def get_dfs(self, main_df):\n",
617
+ "# dfs = {}\n",
618
+ "# df_tmp = main_df.copy()\n",
619
+ "# tot = len(df_tmp)\n",
620
+ "# severe_htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=180) | (df_tmp[\"dia_blood_pressure_avg\"]>=110)\n",
621
+ "# severe_htn_df = df_tmp[severe_htn_cond]\n",
622
+ "# dfs.update({\"Severe hypertension (>=180/110)\": severe_htn_df})\n",
623
+ "# df_tmp.drop(severe_htn_df.index, inplace=True)\n",
624
+ " \n",
625
+ "# htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=140) | (df_tmp[\"dia_blood_pressure_avg\"]>=90)\n",
626
+ "# htn_df = df_tmp[htn_cond]\n",
627
+ "# dfs.update({\"Hypertension (>=140/90)\": htn_df})\n",
628
+ "# df_tmp.drop(htn_df.index, inplace=True)\n",
629
+ " \n",
630
+ "# pre_htn_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=120) | (df_tmp[\"dia_blood_pressure_avg\"]>=80)\n",
631
+ "# pre_htn_df = df_tmp[pre_htn_cond]\n",
632
+ "# dfs.update({\"Pre-hypertension (<140/90)\": pre_htn_df})\n",
633
+ "# df_tmp.drop(pre_htn_df.index, inplace=True)\n",
634
+ " \n",
635
+ "# normal_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=90) | (df_tmp[\"dia_blood_pressure_avg\"]>=60)\n",
636
+ "# normal_df = df_tmp[normal_cond]\n",
637
+ "# dfs.update({\"Normal (<120/80)\": normal_df})\n",
638
+ "# df_tmp.drop(normal_df.index, inplace=True)\n",
639
+ " \n",
640
+ "# low_cond = (df_tmp[\"sys_blood_pressure_avg\"]>=0) | (df_tmp[\"dia_blood_pressure_avg\"]>=0)\n",
641
+ "# low_df = df_tmp[low_cond]\n",
642
+ "# dfs.update({\"Low (<90/60)\": low_df})\n",
643
+ "# df_tmp.drop(low_df.index, inplace=True)\n",
644
+ "# dfs = dict(reversed(list(dfs.items())))\n",
645
+ "# return dfs\n",
646
+ "\n",
647
+ " \n",
648
+ "\n",
649
+ "tbl = BpTable(df, label=\"Blood pressure at baseline (mmHg)\", columns=columns)\n",
650
+ "tbl.formatted_df\n",
651
+ " "
652
+ ]
653
+ },
654
+ {
655
+ "cell_type": "code",
656
+ "execution_count": null,
657
+ "id": "26",
658
+ "metadata": {},
659
+ "outputs": [],
660
+ "source": [
661
+ "tbl.table_df"
662
+ ]
663
+ },
664
+ {
665
+ "cell_type": "code",
666
+ "execution_count": null,
667
+ "id": "27",
668
+ "metadata": {},
669
+ "outputs": [],
670
+ "source": [
671
+ "df_bp2"
672
+ ]
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "execution_count": null,
677
+ "id": "28",
678
+ "metadata": {},
679
+ "outputs": [],
680
+ "source": [
681
+ "# fbg\n",
682
+ "\n",
683
+ "def cell(measure, gender, all=None):\n",
684
+ " if measure == \"<6.1\":\n",
685
+ " cond = (df[\"fbg\"]<6.1)\n",
686
+ " elif measure == \"6.1-6.9\":\n",
687
+ " cond = (df[\"fbg\"]>=6.1) & (df[\"fbg\"]<7.0)\n",
688
+ " elif measure == \">=7.0\":\n",
689
+ " cond = (df[\"fbg\"]>=7.0)\n",
690
+ " else:\n",
691
+ " cond = (df[\"fbg\"].notna())\n",
692
+ " cnt = df.loc[gender & cond][\"gender\"].count()\n",
693
+ " if not all:\n",
694
+ " tot = df.loc[gender][\"gender\"].count()\n",
695
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\" \n",
696
+ " tot = df[\"gender\"].count()\n",
697
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
698
+ "\n",
699
+ "df_fbg = pd.DataFrame(columns=title_row)\n",
700
+ "df_fbg.loc[0] = [\"FBG (mmol/L) categories\", \"n\", cell(\"n\", female), cell(\"n\", male), cell(\"n\", (male | female), all=True)]\n",
701
+ "df_fbg.loc[1] = [\"\", \"<6.1\", cell(\"<6.1\", female), cell(\"<6.1\", male), cell(\"<6.1\", (male | female), all=True)]\n",
702
+ "df_fbg.loc[2] = [\"\", \"6.1-6.9\", cell(\"6.1-6.9\", female), cell(\"6.1-6.9\", male), cell(\"6.1-6.9\", (male | female), all=True)]\n",
703
+ "df_fbg.loc[3] = [\"\", \"7.0 and above\", cell(\">=7.0\", female), cell(\">=7.0\", male), cell(\">=7.0\", (male | female), all=True)]\n",
704
+ "\n"
705
+ ]
706
+ },
707
+ {
708
+ "cell_type": "code",
709
+ "execution_count": null,
710
+ "id": "29",
711
+ "metadata": {},
712
+ "outputs": [],
713
+ "source": [
714
+ "# ogtt\n",
715
+ "\n",
716
+ "def cell(measure, gender, all=None):\n",
717
+ " if measure == \"<7.7\":\n",
718
+ " cond = (df[\"ogtt\"]<7.8)\n",
719
+ " elif measure == \"7.8-11.1\":\n",
720
+ " cond = (df[\"ogtt\"]>=7.8) & (df[\"ogtt\"]<11.1)\n",
721
+ " elif measure == \">=11.1\":\n",
722
+ " cond = (df[\"ogtt\"]>=11.1)\n",
723
+ " elif measure == \"missing\":\n",
724
+ " cond = (df[\"ogtt\"].isna())\n",
725
+ " else:\n",
726
+ " cond = (df[\"ogtt\"].notna() | df[\"ogtt\"].isna())\n",
727
+ " cnt = df.loc[gender & cond][\"gender\"].count()\n",
728
+ " if not all:\n",
729
+ " tot = df.loc[gender][\"gender\"].count()\n",
730
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\" \n",
731
+ " tot = df[\"gender\"].count()\n",
732
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
733
+ "\n",
734
+ "df_ogtt = pd.DataFrame(columns=title_row)\n",
735
+ "df_ogtt.loc[0] = [\"OGTT (mmol/L) categories\", \"n\", cell(\"n\", female), cell(\"n\", male), cell(\"n\", (male | female), all=True)]\n",
736
+ "df_ogtt.loc[1] = [\"\", \"<7.7\", cell(\"<7.7\", female), cell(\"<7.7\", male), cell(\"<7.7\", (male | female), all=True)]\n",
737
+ "df_ogtt.loc[2] = [\"\", \"7.8-11.1\", cell(\"7.8-11.1\", female), cell(\"7.8-11.1\", male), cell(\"7.8-11.1\", (male | female), all=True)]\n",
738
+ "df_ogtt.loc[3] = [\"\", \"11.1 and above\", cell(\">=11.1\", female), cell(\">=11.1\", male), cell(\">=11.1\", (male | female), all=True)]\n",
739
+ "df_ogtt.loc[4] = [\"\", \"not done\", cell(\"missing\", female), cell(\"missing\", male), cell(\"missing\", (male | female), all=True)]\n"
740
+ ]
741
+ },
742
+ {
743
+ "cell_type": "code",
744
+ "execution_count": null,
745
+ "id": "30",
746
+ "metadata": {},
747
+ "outputs": [],
748
+ "source": [
749
+ "# fbg and ogtt\n",
750
+ "\n",
751
+ "def cell(measure, gender, all=None):\n",
752
+ " if measure == \"dm1\":\n",
753
+ " cond = (df[\"ogtt\"]>=11.1) | (df[\"fbg\"]>=7.0) & (df[\"ogtt\"].notna())\n",
754
+ " elif measure == \"other\":\n",
755
+ " cond = ~((df[\"ogtt\"]>=11.1) | (df[\"fbg\"]>=7.0)) & (df[\"ogtt\"].notna())\n",
756
+ " elif measure == \"ogtt\":\n",
757
+ " cond = (df[\"fbg\"].notna()) & (df[\"ogtt\"].isna())\n",
758
+ " else:\n",
759
+ " cond = (df[\"fbg\"].notna())\n",
760
+ " cnt = df.loc[gender & cond][\"gender\"].count()\n",
761
+ " if not all:\n",
762
+ " tot = df.loc[gender][\"gender\"].count()\n",
763
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\" \n",
764
+ " tot = df[\"gender\"].count()\n",
765
+ " return f\"{cnt}\" if measure==\"n\" else f\"{cnt} ({round(cnt/tot *100, 1)}%)\"\n",
766
+ "\n",
767
+ "df_fbg_ogtt = pd.DataFrame(columns=title_row)\n",
768
+ "df_fbg_ogtt.loc[0] = [\"OGTT & FBG (mmol/L) categories\", \"n\", cell(\"n\", female), cell(\"n\", male), cell(\"n\", (male | female), all=True)]\n",
769
+ "df_fbg_ogtt.loc[1] = [\"\", \"OGTT>=11.1 or FBG>=7.0\", cell(\"dm1\", female), cell(\"dm1\", male), cell(\"dm1\", (male | female), all=True)]\n",
770
+ "df_fbg_ogtt.loc[2] = [\"\", \"other\", cell(\"other\", female), cell(\"other\", male), cell(\"other\", (male | female), all=True)]\n",
771
+ "df_fbg_ogtt.loc[3] = [\"\", \"OGTT not done\", cell(\"ogtt\", female), cell(\"ogtt\", male), cell(\"ogtt\", (male | female), all=True)]\n"
772
+ ]
773
+ },
774
+ {
775
+ "cell_type": "code",
776
+ "execution_count": null,
777
+ "id": "31",
778
+ "metadata": {},
779
+ "outputs": [],
780
+ "source": [
781
+ "df_table2 = pd.concat([df_gender, df_age, df_waist, df_art, df_bp, df_fbg, df_ogtt, df_fbg_ogtt], ignore_index=True)\n",
782
+ "df_table2"
783
+ ]
784
+ },
785
+ {
786
+ "cell_type": "code",
787
+ "execution_count": null,
788
+ "id": "32",
789
+ "metadata": {},
790
+ "outputs": [],
791
+ "source": [
792
+ "# blood pressure\n",
793
+ "# Blood pressure interested in IQR25, IQR50(median), IQR75\n",
794
+ "df[[\"sys_blood_pressure_avg\", \"dia_blood_pressure_avg\"]].describe()"
795
+ ]
796
+ },
797
+ {
798
+ "cell_type": "code",
799
+ "execution_count": null,
800
+ "id": "33",
801
+ "metadata": {},
802
+ "outputs": [],
803
+ "source": [
804
+ "df_table"
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "code",
809
+ "execution_count": null,
810
+ "id": "34",
811
+ "metadata": {},
812
+ "outputs": [],
813
+ "source": [
814
+ "# blood pressure\n"
815
+ ]
816
+ },
817
+ {
818
+ "cell_type": "code",
819
+ "execution_count": null,
820
+ "id": "35",
821
+ "metadata": {},
822
+ "outputs": [],
823
+ "source": []
824
+ },
825
+ {
826
+ "cell_type": "code",
827
+ "execution_count": null,
828
+ "id": "36",
829
+ "metadata": {},
830
+ "outputs": [],
831
+ "source": []
832
+ },
833
+ {
834
+ "cell_type": "code",
835
+ "execution_count": null,
836
+ "id": "37",
837
+ "metadata": {},
838
+ "outputs": [],
839
+ "source": []
840
+ },
841
+ {
842
+ "cell_type": "code",
843
+ "execution_count": null,
844
+ "id": "38",
845
+ "metadata": {},
846
+ "outputs": [],
847
+ "source": []
848
+ },
849
+ {
850
+ "cell_type": "code",
851
+ "execution_count": null,
852
+ "id": "39",
853
+ "metadata": {},
854
+ "outputs": [],
855
+ "source": []
856
+ },
857
+ {
858
+ "cell_type": "code",
859
+ "execution_count": null,
860
+ "id": "40",
861
+ "metadata": {},
862
+ "outputs": [],
863
+ "source": [
864
+ "import matplotlib.pyplot as plt\n",
865
+ "import numpy as np\n",
866
+ "import scipy.stats as stats\n",
867
+ "import math\n",
868
+ "import seaborn as sns\n"
869
+ ]
870
+ },
871
+ {
872
+ "cell_type": "code",
873
+ "execution_count": null,
874
+ "id": "41",
875
+ "metadata": {},
876
+ "outputs": [],
877
+ "source": [
878
+ "sns.boxplot(x=\"age_in_years\",y=\"gender\", data=df)"
879
+ ]
880
+ },
881
+ {
882
+ "cell_type": "code",
883
+ "execution_count": null,
884
+ "id": "42",
885
+ "metadata": {},
886
+ "outputs": [],
887
+ "source": [
888
+ "sns.boxplot(x=\"fbg\",y=\"gender\", data=df)\n"
889
+ ]
890
+ },
891
+ {
892
+ "cell_type": "code",
893
+ "execution_count": null,
894
+ "id": "43",
895
+ "metadata": {},
896
+ "outputs": [],
897
+ "source": [
898
+ "sns.boxplot(x=\"ogtt\",y=\"gender\", data=df)\n"
899
+ ]
900
+ },
901
+ {
902
+ "cell_type": "code",
903
+ "execution_count": null,
904
+ "id": "44",
905
+ "metadata": {},
906
+ "outputs": [],
907
+ "source": [
908
+ "df[[\"age_in_years\", \"fbg\", \"ogtt\"]].hist()"
909
+ ]
910
+ },
911
+ {
912
+ "cell_type": "code",
913
+ "execution_count": null,
914
+ "id": "45",
915
+ "metadata": {},
916
+ "outputs": [],
917
+ "source": [
918
+ "sns.pairplot(df[[\"calculated_bmi_value\", \"fbg\"]])"
919
+ ]
920
+ },
921
+ {
922
+ "cell_type": "code",
923
+ "execution_count": null,
924
+ "id": "46",
925
+ "metadata": {},
926
+ "outputs": [],
927
+ "source": [
928
+ "cond = (df[\"fbg\"]>=7.0) & (df[\"fbg\"]<=10.0)\n",
929
+ "sns.displot(df[cond], x=\"fbg\", hue=\"gender\")"
930
+ ]
931
+ },
932
+ {
933
+ "cell_type": "code",
934
+ "execution_count": null,
935
+ "id": "47",
936
+ "metadata": {},
937
+ "outputs": [],
938
+ "source": [
939
+ "sns.displot(df, x=\"sys_\", hue=\"gender\")"
940
+ ]
941
+ }
942
+ ],
943
+ "metadata": {
944
+ "kernelspec": {
945
+ "display_name": "Python 3 (ipykernel)",
946
+ "language": "python",
947
+ "name": "python3"
948
+ },
949
+ "language_info": {
950
+ "codemirror_mode": {
951
+ "name": "ipython",
952
+ "version": 3
953
+ },
954
+ "file_extension": ".py",
955
+ "mimetype": "text/x-python",
956
+ "name": "python",
957
+ "nbconvert_exporter": "python",
958
+ "pygments_lexer": "ipython3",
959
+ "version": "3.12.4"
960
+ }
961
+ },
962
+ "nbformat": 4,
963
+ "nbformat_minor": 5
964
+ }