meta-edc 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meta_analytics/dataframes/__init__.py +3 -0
- meta_analytics/dataframes/constants.py +1 -1
- meta_analytics/dataframes/enrolled/__init__.py +0 -1
- meta_analytics/dataframes/get_eos_df.py +15 -2
- meta_analytics/dataframes/get_glucose_df.py +149 -0
- meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
- meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
- meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
- meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
- meta_analytics/dataframes/utils.py +18 -4
- meta_analytics/notebooks/hiv_regimens.ipynb +425 -0
- meta_analytics/notebooks/monitoring_report.ipynb +1561 -0
- meta_analytics/notebooks/pharmacy.ipynb +971 -0
- meta_analytics/utils.py +81 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/METADATA +4 -3
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/RECORD +32 -18
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/WHEEL +1 -1
- meta_edc-1.0.7.dist-info/licenses/AUTHORS.rst +8 -0
- meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
- meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
- meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
- meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
- meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
- meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
- meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
- meta_subject/form_validators/glucose_form_validator.py +16 -1
- meta_subject/forms/study_medication_form.py +5 -3
- meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
- meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
- meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
- /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info/licenses}/LICENSE +0 -0
- {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,26 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import pandas as pd
|
3
3
|
from django.apps import apps as django_apps
|
4
|
-
from edc_constants.constants import
|
5
|
-
from edc_pdutils.dataframes import (
|
6
|
-
get_crf,
|
7
|
-
get_eos,
|
8
|
-
get_subject_consent,
|
9
|
-
get_subject_visit,
|
10
|
-
)
|
4
|
+
from edc_constants.constants import YES
|
11
5
|
from edc_utils import get_utcnow
|
12
6
|
|
13
7
|
from ..constants import (
|
14
8
|
CASE_EOS,
|
15
|
-
CASE_FBG_ONLY,
|
16
9
|
CASE_FBGS_WITH_FIRST_OGTT,
|
17
10
|
CASE_FBGS_WITH_SECOND_OGTT,
|
18
11
|
CASE_OGTT,
|
19
12
|
endpoint_cases,
|
20
13
|
endpoint_columns,
|
21
14
|
)
|
15
|
+
from ..get_glucose_df import get_glucose_df
|
22
16
|
from ..utils import (
|
23
17
|
get_empty_endpoint_df,
|
24
18
|
get_test_string,
|
25
19
|
get_unique_subject_identifiers,
|
26
|
-
get_unique_visit_codes,
|
27
20
|
)
|
28
21
|
from .endpoint_by_date import EndpointByDate
|
29
22
|
|
30
23
|
|
31
|
-
def calculate_fasting_hrs(df: pd.DataFrame):
|
32
|
-
df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
|
33
|
-
if df.empty:
|
34
|
-
df["fasting_hrs"] = np.nan
|
35
|
-
else:
|
36
|
-
df["fasting_hrs"] = df["fasting_duration_delta"].apply(
|
37
|
-
lambda s: np.nan if pd.isna(s) else s.total_seconds() / 3600
|
38
|
-
)
|
39
|
-
return df
|
40
|
-
|
41
|
-
|
42
24
|
class GlucoseEndpointsByDate:
|
43
25
|
"""
|
44
26
|
Usage:
|
@@ -61,7 +43,7 @@ class GlucoseEndpointsByDate:
|
|
61
43
|
ogtt_threshhold = 11.1
|
62
44
|
endpoint_cls = EndpointByDate
|
63
45
|
keep_cols = [
|
64
|
-
"
|
46
|
+
"fasted",
|
65
47
|
"fasting_hrs",
|
66
48
|
"fbg_value",
|
67
49
|
"fbg_units",
|
@@ -82,6 +64,7 @@ class GlucoseEndpointsByDate:
|
|
82
64
|
def __init__(
|
83
65
|
self, subject_identifiers: list[str] | None = None, case_list: list[int] | None = None
|
84
66
|
):
|
67
|
+
|
85
68
|
self._glucose_fbg_df = pd.DataFrame()
|
86
69
|
self._glucose_fbg_ogtt_df = pd.DataFrame()
|
87
70
|
self.endpoint_only_df = pd.DataFrame()
|
@@ -95,213 +78,35 @@ class GlucoseEndpointsByDate:
|
|
95
78
|
]
|
96
79
|
self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
|
97
80
|
|
98
|
-
|
99
|
-
if self.glucose_fbg_ogtt_df.empty:
|
100
|
-
self.df = self.glucose_fbg_df.copy()
|
101
|
-
self.df[["ogtt_value", "ogtt_units"]] = np.nan
|
102
|
-
self.df[["ogtt_datetime"]] = pd.NaT
|
103
|
-
elif self.glucose_fbg_df.empty:
|
104
|
-
self.df = self.glucose_fbg_ogtt_df.copy()
|
105
|
-
else:
|
106
|
-
self.df = self.glucose_fbg_ogtt_df.merge(
|
107
|
-
self.glucose_fbg_df,
|
108
|
-
on=["subject_visit_id"],
|
109
|
-
how="outer",
|
110
|
-
indicator=True,
|
111
|
-
suffixes=("", "_y"),
|
112
|
-
)
|
113
|
-
# cast as ...
|
114
|
-
for col in ["fasting_hrs", "fbg_value"]:
|
115
|
-
self.df[col] = self.df[col].astype("float64")
|
116
|
-
if f"{col}_y" in self.df.columns:
|
117
|
-
self.df[f"{col}_y"] = self.df[f"{col}_y"].astype("float64")
|
118
|
-
for col in ["fasting", "fbg_units", "source"]:
|
119
|
-
self.df[col] = self.df[col].astype("object")
|
120
|
-
if f"{col}_y" in self.df.columns:
|
121
|
-
self.df[f"{col}_y"] = self.df[f"{col}_y"].astype("object")
|
122
|
-
self.df = self.df.drop(
|
123
|
-
columns=[col for col in self.df.columns if col.endswith("_y") or col == "_merge"]
|
124
|
-
)
|
125
|
-
self.df = self.df.reset_index(drop=True)
|
126
|
-
|
127
|
-
# merge w/ subject_visit
|
128
|
-
subject_visit_df = get_subject_visit(
|
129
|
-
"meta_subject.subjectvisit", subject_identifiers=self.subject_identifiers
|
130
|
-
)
|
131
|
-
self.df = subject_visit_df.merge(
|
132
|
-
self.df, on=["subject_visit_id"], how="left", suffixes=("", "_y")
|
133
|
-
)
|
134
|
-
self.df = self.df[[col for col in self.keep_cols]]
|
135
|
-
self.df = self.df.reset_index(drop=True)
|
136
|
-
|
137
|
-
# pivot right_only cols
|
138
|
-
cols = [
|
139
|
-
"fasting",
|
140
|
-
"fasting_hrs",
|
141
|
-
"fbg_value",
|
142
|
-
"fbg_units",
|
143
|
-
"fbg_datetime",
|
144
|
-
"source",
|
145
|
-
"report_datetime",
|
146
|
-
]
|
147
|
-
for col in cols:
|
148
|
-
if f"{col}_y" in self.df.columns and not self.df[f"{col}_y"].isnull().all():
|
149
|
-
self.df.loc[
|
150
|
-
(self.df["_merge"].isin(["both", "right_only"])) & (self.df[col].isna()),
|
151
|
-
col,
|
152
|
-
] = self.df[f"{col}_y"]
|
153
|
-
# if fbg_datetime still null, use visit datetime
|
154
|
-
if self.df["fbg_datetime"].isnull().all():
|
155
|
-
self["fbg_datetime"] = self.df["visit_datetime"]
|
156
|
-
else:
|
157
|
-
self.df.loc[(self.df["fbg_datetime"].isna()), "fbg_datetime"] = self.df[
|
158
|
-
"visit_datetime"
|
159
|
-
]
|
160
|
-
self.df = self.df.drop(
|
161
|
-
columns=[col for col in self.df.columns if col.endswith("_y") or col == "_merge"]
|
162
|
-
)
|
163
|
-
self.df = self.df.reset_index(drop=True)
|
164
|
-
|
165
|
-
self.merge_with_consent()
|
166
|
-
self.merge_with_eos()
|
167
|
-
|
168
|
-
self.add_calculated_days_from_baseline_to_event_columns()
|
81
|
+
self.df = get_glucose_df().copy()
|
169
82
|
|
170
83
|
# label rows by type of glu tests (ones with value)
|
171
84
|
self.df["test"] = self.df.apply(get_test_string, axis=1)
|
172
|
-
self.df = self.df.reset_index(drop=True)
|
173
|
-
|
174
|
-
self.visit_codes_df = get_unique_visit_codes(self.df)
|
175
|
-
self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
|
176
|
-
|
177
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
178
|
-
self.df = self.df.reset_index(drop=True)
|
179
85
|
|
86
|
+
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"]).reset_index(
|
87
|
+
drop=True
|
88
|
+
)
|
180
89
|
self.working_df = self.df.copy()
|
181
90
|
self.working_df["endpoint"] = 0
|
182
91
|
self.endpoint_df = get_empty_endpoint_df()
|
183
92
|
|
184
93
|
def run(self):
|
185
|
-
self.
|
186
|
-
|
94
|
+
self.process_by_ogtt_only()
|
95
|
+
subject_identifiers_df = get_unique_subject_identifiers(self.df)
|
96
|
+
for index, row in subject_identifiers_df.iterrows():
|
187
97
|
subject_df = self.get_subject_df(row["subject_identifier"])
|
188
|
-
subject_df = self.
|
189
|
-
subject_df
|
190
|
-
|
98
|
+
subject_df = self.endpoint_cls(
|
99
|
+
subject_df=subject_df,
|
100
|
+
fbg_threshhold=self.fbg_threshhold,
|
101
|
+
ogtt_threshhold=self.ogtt_threshhold,
|
102
|
+
).subject_df
|
191
103
|
if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
|
192
104
|
self.append_subject_to_endpoint_df(subject_df)
|
193
105
|
self.remove_subject_from_working_df(row)
|
194
|
-
|
195
|
-
if CASE_FBG_ONLY in self.endpoint_cases:
|
196
|
-
for index, row in self.subject_identifiers_df.iterrows():
|
197
|
-
subject_df = self.get_subject_df(row["subject_identifier"])
|
198
|
-
subject_df = self.check_endpoint_by_fbg_for_subject(
|
199
|
-
subject_df, case_list=[CASE_FBG_ONLY]
|
200
|
-
)
|
201
|
-
if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
|
202
|
-
self.append_subject_to_endpoint_df(subject_df)
|
203
|
-
self.remove_subject_from_working_df(row)
|
204
|
-
|
205
106
|
self.post_check_endpoint()
|
206
107
|
self.merge_with_final_endpoints()
|
207
108
|
|
208
|
-
|
209
|
-
def glucose_fbg_df(self) -> pd.DataFrame:
|
210
|
-
"""Returns a prepared Dataframe of CRF
|
211
|
-
meta_subject.glucosefbg.
|
212
|
-
|
213
|
-
Note: meta_subject.glucosefbg has only FBG measures.
|
214
|
-
"""
|
215
|
-
if self._glucose_fbg_df.empty:
|
216
|
-
df = get_crf(
|
217
|
-
model="meta_subject.glucosefbg",
|
218
|
-
subject_identifiers=self.subject_identifiers,
|
219
|
-
# subject_visit_model="meta_subject.subjectvisit",
|
220
|
-
)
|
221
|
-
df["source"] = "meta_subject.glucosefbg"
|
222
|
-
df.rename(columns={"fbg_fasting": "fasting"}, inplace=True)
|
223
|
-
df.loc[(df["fasting"] == "fasting"), "fasting"] = YES
|
224
|
-
df.loc[(df["fasting"] == "non_fasting"), "fasting"] = NO
|
225
|
-
df = calculate_fasting_hrs(df)
|
226
|
-
# df = df[[col for col in self.keep_cols if not col.startswith("ogtt")]]
|
227
|
-
df = df.reset_index(drop=True)
|
228
|
-
self._glucose_fbg_df = df
|
229
|
-
return self._glucose_fbg_df
|
230
|
-
|
231
|
-
@property
|
232
|
-
def glucose_fbg_ogtt_df(self):
|
233
|
-
"""Returns a prepared Dataframe of CRF meta_subject.glucose.
|
234
|
-
|
235
|
-
Note: meta_subject.glucose has FBG and OGTT measures.
|
236
|
-
"""
|
237
|
-
if self._glucose_fbg_ogtt_df.empty:
|
238
|
-
df = get_crf(
|
239
|
-
model="meta_subject.glucose",
|
240
|
-
subject_identifiers=self.subject_identifiers,
|
241
|
-
# subject_visit_model="meta_subject.subjectvisit",
|
242
|
-
)
|
243
|
-
df["source"] = "meta_subject.glucose"
|
244
|
-
df = calculate_fasting_hrs(df)
|
245
|
-
# df = df[self.keep_cols]
|
246
|
-
df = df.reset_index(drop=True)
|
247
|
-
self._glucose_fbg_ogtt_df = df
|
248
|
-
return self._glucose_fbg_ogtt_df
|
249
|
-
|
250
|
-
def merge_with_consent(self):
|
251
|
-
"""Merge in consent DF."""
|
252
|
-
df_consent = get_subject_consent(
|
253
|
-
"meta_consent.subjectconsent", subject_identifiers=self.subject_identifiers
|
254
|
-
)
|
255
|
-
self.df = pd.merge(
|
256
|
-
self.df, df_consent, on="subject_identifier", how="left", suffixes=("", "_y")
|
257
|
-
)
|
258
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
259
|
-
self.df = self.df.reset_index(drop=True)
|
260
|
-
|
261
|
-
def merge_with_eos(self):
|
262
|
-
"""Merge in EoS DF.
|
263
|
-
|
264
|
-
Drops patients who were taken off study by late exclusion.
|
265
|
-
"""
|
266
|
-
df_eos = get_eos("meta_prn.endofstudy", subject_identifiers=self.subject_identifiers)
|
267
|
-
df_eos = df_eos[
|
268
|
-
df_eos["offstudy_reason"]
|
269
|
-
!= (
|
270
|
-
"Patient fulfilled late exclusion criteria (due to abnormal blood "
|
271
|
-
"values or raised blood pressure at enrolment"
|
272
|
-
)
|
273
|
-
]
|
274
|
-
self.df = pd.merge(
|
275
|
-
self.df, df_eos, on="subject_identifier", how="left", suffixes=("", "_y")
|
276
|
-
)
|
277
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
278
|
-
self.df = self.df.reset_index(drop=True)
|
279
|
-
|
280
|
-
def add_calculated_days_from_baseline_to_event_columns(self):
|
281
|
-
"""Add columns that calculate number of days from
|
282
|
-
baseline to visit, fbg, and ogtt.
|
283
|
-
"""
|
284
|
-
self.df["visit_days"] = np.nan
|
285
|
-
self.df["fbg_days"] = np.nan
|
286
|
-
self.df["ogtt_days"] = np.nan
|
287
|
-
self.df["test"] = np.nan
|
288
|
-
self.df["visit_days"] = (
|
289
|
-
self.df["visit_datetime"] - self.df["baseline_datetime"]
|
290
|
-
).dt.days
|
291
|
-
if not self.df["fbg_datetime"].isnull().all():
|
292
|
-
self.df["fbg_days"] = (
|
293
|
-
self.df["fbg_datetime"] - self.df["baseline_datetime"]
|
294
|
-
).dt.days
|
295
|
-
if not self.df["ogtt_datetime"].isnull().all():
|
296
|
-
self.df["ogtt_days"] = (
|
297
|
-
self.df["ogtt_datetime"] - self.df["baseline_datetime"]
|
298
|
-
).dt.days
|
299
|
-
self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
|
300
|
-
self.df["fbg_days"] = pd.to_numeric(self.df["fbg_days"], downcast="integer")
|
301
|
-
self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
|
302
|
-
self.df = self.df.reset_index(drop=True)
|
303
|
-
|
304
|
-
def pre_check_endpoint(self):
|
109
|
+
def process_by_ogtt_only(self):
|
305
110
|
"""Flag subjects that met endpoint by hitting the OGTT
|
306
111
|
threshold.
|
307
112
|
|
@@ -322,15 +127,17 @@ class GlucoseEndpointsByDate:
|
|
322
127
|
"""
|
323
128
|
subject_endpoint_df = self.working_df.loc[
|
324
129
|
(self.working_df["ogtt_value"] >= self.ogtt_threshhold)
|
325
|
-
& (self.working_df["
|
130
|
+
& (self.working_df["ogtt_value"] < 9999.99)
|
131
|
+
& (self.working_df["fasted"] == YES)
|
326
132
|
& (self.working_df["fbg_value"].notna())
|
327
133
|
].copy()
|
328
|
-
|
329
|
-
subject_endpoint_df =
|
330
|
-
|
331
|
-
|
134
|
+
|
135
|
+
subject_endpoint_df = (
|
136
|
+
subject_endpoint_df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
137
|
+
.reset_index(drop=True)
|
138
|
+
.drop_duplicates(subset=["subject_identifier"], keep="first")
|
139
|
+
.reset_index(drop=True)
|
332
140
|
)
|
333
|
-
subject_endpoint_df = subject_endpoint_df.reset_index(drop=True)
|
334
141
|
if not subject_endpoint_df.empty:
|
335
142
|
# flag the selected endpoint rows as endpoints
|
336
143
|
subject_endpoint_df["endpoint"] = 1
|
@@ -372,8 +179,7 @@ class GlucoseEndpointsByDate:
|
|
372
179
|
self.endpoint_df = pd.concat([self.endpoint_df, subject_df])
|
373
180
|
self.endpoint_df = self.endpoint_df.sort_values(
|
374
181
|
by=["subject_identifier", "visit_code"]
|
375
|
-
)
|
376
|
-
self.endpoint_df = self.endpoint_df.reset_index(drop=True)
|
182
|
+
).reset_index(drop=True)
|
377
183
|
|
378
184
|
def remove_subject_from_working_df(self, row: pd.Series) -> None:
|
379
185
|
"""Removes one subject from the working DF given a Series with
|
@@ -383,8 +189,7 @@ class GlucoseEndpointsByDate:
|
|
383
189
|
index=self.working_df[
|
384
190
|
self.working_df["subject_identifier"] == row["subject_identifier"]
|
385
191
|
].index
|
386
|
-
)
|
387
|
-
self.working_df = self.working_df.reset_index(drop=True)
|
192
|
+
).reset_index(drop=True)
|
388
193
|
|
389
194
|
def remove_subjects_from_working_df(self, rows: pd.DataFrame) -> None:
|
390
195
|
"""Removes subjects from the working DF given a DF with
|
@@ -394,8 +199,7 @@ class GlucoseEndpointsByDate:
|
|
394
199
|
index=self.working_df.loc[
|
395
200
|
self.working_df["subject_identifier"].isin(rows["subject_identifier"])
|
396
201
|
].index
|
397
|
-
)
|
398
|
-
self.working_df = self.working_df.reset_index(drop=True)
|
202
|
+
).reset_index(drop=True)
|
399
203
|
|
400
204
|
def get_subject_df(self, subject_identifier: str) -> pd.DataFrame:
|
401
205
|
subject_df = self.working_df.loc[
|
@@ -407,22 +211,27 @@ class GlucoseEndpointsByDate:
|
|
407
211
|
subject_df["endpoint"] = 0
|
408
212
|
subject_df = subject_df[endpoint_columns]
|
409
213
|
subject_df = subject_df.sort_values(["subject_identifier", "fbg_datetime"])
|
214
|
+
subject_df[[col for col in subject_df if "value" in col]] = subject_df[
|
215
|
+
[col for col in subject_df if "value" in col]
|
216
|
+
].fillna(0.0)
|
217
|
+
|
410
218
|
subject_df = subject_df.reset_index(drop=True)
|
411
219
|
return subject_df
|
412
220
|
|
413
221
|
def check_endpoint_by_fbg_for_subject(
|
414
222
|
self, subject_df: pd.DataFrame, case_list: list[int] | None = None
|
415
223
|
) -> pd.DataFrame:
|
416
|
-
case_list = case_list or [2, 3]
|
417
224
|
endpoint = self.endpoint_cls(
|
418
225
|
subject_df=subject_df,
|
419
226
|
fbg_threshhold=self.fbg_threshhold,
|
420
227
|
ogtt_threshhold=self.ogtt_threshhold,
|
421
|
-
case_list=case_list,
|
422
228
|
)
|
423
229
|
return endpoint.subject_df
|
424
230
|
|
425
231
|
def post_check_endpoint(self):
|
232
|
+
"""Add any who were taken off study before endpoint guidelines
|
233
|
+
were clearly defined.
|
234
|
+
"""
|
426
235
|
df_eos = self.working_df.loc[
|
427
236
|
self.working_df["offstudy_reason"] == "Patient developed diabetes"
|
428
237
|
].copy()
|
@@ -512,7 +321,7 @@ class GlucoseEndpointsByDate:
|
|
512
321
|
fbg_value=(None if pd.isna(row["fbg_value"]) else row["fbg_value"]),
|
513
322
|
ogtt_value=None if pd.isna(row["ogtt_value"]) else row["ogtt_value"],
|
514
323
|
fbg_date=(None if pd.isna(row["fbg_datetime"]) else row["fbg_datetime"]),
|
515
|
-
fasting=(None if pd.isna(row["
|
324
|
+
fasting=(None if pd.isna(row["fasted"]) else row["fasted"]),
|
516
325
|
endpoint_label=(
|
517
326
|
None if pd.isna(row["endpoint_label"]) else row["endpoint_label"]
|
518
327
|
),
|
@@ -1,4 +1,6 @@
|
|
1
|
+
import numpy as np
|
1
2
|
import pandas as pd
|
3
|
+
from edc_constants.constants import NO
|
2
4
|
|
3
5
|
from .constants import endpoint_columns
|
4
6
|
|
@@ -59,7 +61,19 @@ def get_unique_visit_codes(df: pd.DataFrame) -> pd.DataFrame:
|
|
59
61
|
|
60
62
|
|
61
63
|
def get_unique_subject_identifiers(df: pd.DataFrame) -> pd.DataFrame:
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
64
|
+
return (
|
65
|
+
pd.DataFrame(df["subject_identifier"].unique(), columns=["subject_identifier"])
|
66
|
+
.sort_values(["subject_identifier"])
|
67
|
+
.reset_index()
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
def calculate_fasting_hrs(df: pd.DataFrame) -> pd.DataFrame:
|
72
|
+
df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
|
73
|
+
if df.empty:
|
74
|
+
df["fasting_hrs"] = np.nan
|
75
|
+
else:
|
76
|
+
df["fasting_hrs"] = df["fasting_duration_delta"].apply(
|
77
|
+
lambda s: np.nan if pd.isna(s) else s.total_seconds() / 3600
|
78
|
+
)
|
79
|
+
return df
|