glucose360 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glucose360/__init__.py +1 -0
- glucose360/events.py +680 -0
- glucose360/features.py +1042 -0
- glucose360/plots.py +494 -0
- glucose360/preprocessing.py +558 -0
- glucose360-0.0.1.dist-info/LICENSE +674 -0
- glucose360-0.0.1.dist-info/METADATA +34 -0
- glucose360-0.0.1.dist-info/RECORD +10 -0
- glucose360-0.0.1.dist-info/WHEEL +5 -0
- glucose360-0.0.1.dist-info/top_level.txt +1 -0
glucose360/features.py
ADDED
@@ -0,0 +1,1042 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
import configparser
|
4
|
+
from multiprocessing import Pool
|
5
|
+
import os
|
6
|
+
from scipy.integrate import trapezoid
|
7
|
+
|
8
|
+
dir_path = os.path.dirname(os.path.realpath(__file__))
|
9
|
+
config_path = os.path.join(dir_path, "config.ini")
|
10
|
+
config = configparser.ConfigParser()
|
11
|
+
config.read(config_path)
|
12
|
+
ID = config['variables']['id']
|
13
|
+
GLUCOSE = config['variables']['glucose']
|
14
|
+
TIME = config['variables']['time']
|
15
|
+
|
16
|
+
"""
|
17
|
+
All of the metric-calculating functions are designed for DataFrames that contain only one patient's data.
|
18
|
+
For example, if 'df' is the outputted DataFrame from 'import_data()', 'LBGI(df)' would not be accurate.
|
19
|
+
Instead, do 'LBGI(df.loc[PATIENT_ID])'.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def mean(df: pd.DataFrame) -> float:
|
23
|
+
"""Calculates the mean glucose level for the given CGM trace
|
24
|
+
|
25
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the mean glucose for
|
26
|
+
:type df: 'pandas.DataFrame'
|
27
|
+
:return: the mean glucose level of the given CGM trace
|
28
|
+
:rtype: float
|
29
|
+
"""
|
30
|
+
return df[GLUCOSE].mean()
|
31
|
+
|
32
|
+
def summary_stats(df: pd.DataFrame) -> list[float]:
|
33
|
+
"""Calculates summary statistics (minimum, first quartile, median, third quartile, and maximum) for the given CGM trace
|
34
|
+
|
35
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the five-point summary for
|
36
|
+
:type df: 'pandas.DataFrame'
|
37
|
+
:return: a list containing the five-point summary for the given CGM trace
|
38
|
+
:rtype: list[float]
|
39
|
+
"""
|
40
|
+
min = df[GLUCOSE].min()
|
41
|
+
first = df[GLUCOSE].quantile(0.25)
|
42
|
+
median = df[GLUCOSE].median()
|
43
|
+
third = df[GLUCOSE].quantile(0.75)
|
44
|
+
max = df[GLUCOSE].max()
|
45
|
+
|
46
|
+
return [min, first, median, third, max]
|
47
|
+
|
48
|
+
def SD(df: pd.DataFrame) -> float:
|
49
|
+
"""Calculates the standard deviation for the given CGM trace
|
50
|
+
|
51
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the standard deviation for
|
52
|
+
:type df: 'pandas.DataFrame'
|
53
|
+
:return: the standard deviation of the given CGM trace
|
54
|
+
:rtype: float
|
55
|
+
"""
|
56
|
+
return df[GLUCOSE].std()
|
57
|
+
|
58
|
+
def CV(df: pd.DataFrame) -> float:
|
59
|
+
"""Calculates the coefficient of variation (CV) for the given CGM trace
|
60
|
+
|
61
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the CV for
|
62
|
+
:type df: 'pandas.DataFrame'
|
63
|
+
:return: the CV of the given CGM trace
|
64
|
+
:rtype: float
|
65
|
+
"""
|
66
|
+
return SD(df) / mean(df) * 100
|
67
|
+
|
68
|
+
def eA1c(df: pd.DataFrame) -> float:
|
69
|
+
"""Calculates the estimated A1c (eA1c) for the given CGM trace
|
70
|
+
|
71
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the eA1c for
|
72
|
+
:type df: 'pandas.DataFrame'
|
73
|
+
:return: the eA1c of the given CGM trace
|
74
|
+
:rtype: float
|
75
|
+
"""
|
76
|
+
return (46.7 + mean(df)) / 28.7
|
77
|
+
|
78
|
+
def GMI(df: pd.DataFrame) -> float:
|
79
|
+
"""Calculates the Glucose Management Indicator (GMI) for the given CGM trace
|
80
|
+
|
81
|
+
:param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the GMI for
|
82
|
+
:type df: 'pandas.DataFrame'
|
83
|
+
:return: the GMI of the given CGM trace
|
84
|
+
:rtype: float
|
85
|
+
"""
|
86
|
+
return (0.02392 * mean(df)) + 3.31
|
87
|
+
|
88
|
+
def percent_time_in_range(df: pd.DataFrame, low: int = 70, high: int = 180) -> float:
|
89
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were between the given lower and upper bounds (inclusive)
|
90
|
+
|
91
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
92
|
+
:type df: 'pandas.DataFrame'
|
93
|
+
:param low: the lower bound of the acceptable glucose values, defaults to 70
|
94
|
+
:type low: int, optional
|
95
|
+
:param high: the upper bound of the acceptable glucose values, defaults to 180
|
96
|
+
:type high: int, optional
|
97
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were between the given bounds (inclusive)
|
98
|
+
:rtype: float
|
99
|
+
"""
|
100
|
+
valid_df = df.dropna(subset=[GLUCOSE])
|
101
|
+
in_range_df = valid_df[(valid_df[GLUCOSE] <= high) & (valid_df[GLUCOSE] >= low)]
|
102
|
+
time_in_range = len(in_range_df)
|
103
|
+
total_time = len(valid_df)
|
104
|
+
return (100 * time_in_range / total_time) if total_time > 0 else np.nan
|
105
|
+
|
106
|
+
def percent_time_in_tight_range(df: pd.DataFrame):
|
107
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within 70-140 mg/dL (inclusive)
|
108
|
+
|
109
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
110
|
+
:type df: 'pandas.DataFrame'
|
111
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were within 70-140 mg/dL (inclusive)
|
112
|
+
:rtype: float
|
113
|
+
"""
|
114
|
+
return percent_time_in_range(df, low = 70, high = 140)
|
115
|
+
|
116
|
+
def percent_time_above_range(df: pd.DataFrame, limit: int = 180) -> float:
|
117
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were above a given threshold (inclusive)
|
118
|
+
|
119
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
120
|
+
:type df: 'pandas.DataFrame'
|
121
|
+
:param limit: the threshold for calculating the percent time above, defaults to 180
|
122
|
+
:type limit: int, optional
|
123
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were above the given threshold (inclusive)
|
124
|
+
:rtype: float
|
125
|
+
"""
|
126
|
+
return percent_time_in_range(df, low = limit, high = 400)
|
127
|
+
|
128
|
+
def percent_time_below_range(df: pd.DataFrame, limit: int = 70) -> float:
|
129
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were below a given threshold (inclusive)
|
130
|
+
|
131
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
132
|
+
:type df: 'pandas.DataFrame'
|
133
|
+
:param limit: the threshold for calculating the percent time below, defaults to 70
|
134
|
+
:type limit: int, optional
|
135
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were below the given threshold (inclusive)
|
136
|
+
:rtype: float
|
137
|
+
"""
|
138
|
+
return percent_time_in_range(df, low = 40, high = limit)
|
139
|
+
|
140
|
+
def percent_time_in_hypoglycemia(df: pd.DataFrame) -> float:
|
141
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
142
|
+
ranges that indicate hypoglycemia
|
143
|
+
|
144
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
145
|
+
:type df: 'pandas.DataFrame'
|
146
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating hypoglycemia (< 70 mg/dL)
|
147
|
+
:rtype: float
|
148
|
+
"""
|
149
|
+
return percent_time_below_range(df, 70)
|
150
|
+
|
151
|
+
def percent_time_in_level_1_hypoglycemia(df: pd.DataFrame) -> float:
|
152
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
153
|
+
ranges that indicate level 1 hypoglycemia
|
154
|
+
|
155
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
156
|
+
:type df: 'pandas.DataFrame'
|
157
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 1 hypoglycemia (54-70 mg/dL)
|
158
|
+
:rtype: float
|
159
|
+
"""
|
160
|
+
return percent_time_in_range(df, 54, 69)
|
161
|
+
|
162
|
+
def percent_time_in_level_2_hypoglycemia(df: pd.DataFrame) -> float:
|
163
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
164
|
+
ranges that indicate level 2 hypoglycemia
|
165
|
+
|
166
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
167
|
+
:type df: 'pandas.DataFrame'
|
168
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 2 hypoglycemia (< 54 mg/dL)
|
169
|
+
:rtype: float
|
170
|
+
"""
|
171
|
+
return percent_time_below_range(df, 53)
|
172
|
+
|
173
|
+
def percent_time_in_hyperglycemia(df: pd.DataFrame) -> float:
|
174
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
175
|
+
ranges that indicate hyperglycemia
|
176
|
+
|
177
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
178
|
+
:type df: 'pandas.DataFrame'
|
179
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating hyperglycemia (> 180 mg/dL)
|
180
|
+
:rtype: float
|
181
|
+
"""
|
182
|
+
return percent_time_above_range(df, 180)
|
183
|
+
|
184
|
+
def percent_time_in_level_0_hyperglycemia(df: pd.DataFrame) -> float:
|
185
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within
|
186
|
+
ranges that indicate level 0 hyperglycemia
|
187
|
+
|
188
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
189
|
+
:type df: 'pandas.DataFrame'
|
190
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 0 hyperglycemia (140-180 mg/dL)
|
191
|
+
:rtype: float
|
192
|
+
"""
|
193
|
+
return percent_time_in_range(df, 140, 180)
|
194
|
+
|
195
|
+
def percent_time_in_level_1_hyperglycemia(df: pd.DataFrame) -> float:
|
196
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
197
|
+
ranges that indicate level 1 hyperglycemia
|
198
|
+
|
199
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
200
|
+
:type df: 'pandas.DataFrame'
|
201
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 1 hyperglycemia (180-250 mg/dL)
|
202
|
+
:rtype: float
|
203
|
+
"""
|
204
|
+
return percent_time_in_range(df, 181, 249)
|
205
|
+
|
206
|
+
def percent_time_in_level_2_hyperglycemia(df: pd.DataFrame) -> float:
|
207
|
+
"""Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
|
208
|
+
ranges that indicate level 2 hyperglycemia
|
209
|
+
|
210
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
211
|
+
:type df: 'pandas.DataFrame'
|
212
|
+
:return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 2 hyperglycemia (> 250 mg/dL)
|
213
|
+
:rtype: float
|
214
|
+
"""
|
215
|
+
return percent_time_above_range(df, 250)
|
216
|
+
|
217
|
+
def ADRR(df: pd.DataFrame) -> float:
|
218
|
+
"""Calculates the Average Daily Risk Range (ADRR) for the given CGM trace.
|
219
|
+
|
220
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
221
|
+
:type df: 'pandas.DataFrame'
|
222
|
+
:return: the ADRR for the given CGM trace
|
223
|
+
:rtype: float
|
224
|
+
"""
|
225
|
+
data = df.copy()
|
226
|
+
|
227
|
+
# Convert time to date
|
228
|
+
data['date'] = pd.to_datetime(data[TIME]).dt.date
|
229
|
+
data = data.dropna(subset=[GLUCOSE])
|
230
|
+
data['bgi'] = (np.log(data[GLUCOSE]) ** 1.084) - 5.381
|
231
|
+
data['right'] = 22.7 * np.maximum(data['bgi'], 0) ** 2
|
232
|
+
data['left'] = 22.7 * np.minimum(data['bgi'], 0) ** 2
|
233
|
+
|
234
|
+
adrr = data.groupby(['date']).apply(lambda df: np.max(df['left']) + np.max(df['right'])).mean()
|
235
|
+
return adrr
|
236
|
+
|
237
|
+
def BG_formula(ser: pd.Series) -> pd.Series:
|
238
|
+
"""Calculates the Average Daily Risk Range (ADRR) for the given CGM trace.
|
239
|
+
|
240
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
241
|
+
:type df: 'pandas.DataFrame'
|
242
|
+
:return: the ADRR for the given CGM trace
|
243
|
+
:rtype: float
|
244
|
+
"""
|
245
|
+
return 1.509 * (np.power(np.log(ser), 1.084) - 5.381)
|
246
|
+
|
247
|
+
def LBGI(df: pd.DataFrame) -> float:
|
248
|
+
"""Calculates the Low Blood Glucose Index (LBGI) for the given CGM trace.
|
249
|
+
|
250
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
251
|
+
:type df: 'pandas.DataFrame'
|
252
|
+
:return: the LBGI for the given CGM trace
|
253
|
+
:rtype: float
|
254
|
+
"""
|
255
|
+
BG = np.minimum(0, BG_formula(df[GLUCOSE]))
|
256
|
+
return np.mean(10 * (BG ** 2))
|
257
|
+
|
258
|
+
def HBGI(df: pd.DataFrame) -> float:
|
259
|
+
"""Calculates the High Blood Glucose Index (HBGI) for the given CGM trace.
|
260
|
+
|
261
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
262
|
+
:type df: 'pandas.DataFrame'
|
263
|
+
:return: the HBGI for the given CGM trace
|
264
|
+
:rtype: float
|
265
|
+
"""
|
266
|
+
BG = np.maximum(0, BG_formula(df[GLUCOSE]))
|
267
|
+
return np.mean(10 * (BG ** 2))
|
268
|
+
|
269
|
+
def COGI(df: pd.DataFrame) -> float:
|
270
|
+
"""Calculates the Continuous Glucose Monitoring Index (COGI) for the given CGM trace.
|
271
|
+
|
272
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
273
|
+
:type df: 'pandas.DataFrame'
|
274
|
+
:return: the COGI for the given CGM trace
|
275
|
+
:rtype: float
|
276
|
+
"""
|
277
|
+
tir = percent_time_in_range(df)
|
278
|
+
tir_score = 0.5 * tir
|
279
|
+
|
280
|
+
tbr = percent_time_in_range(df, 0, 70)
|
281
|
+
tbr_score = 0.35 * ((1 - (np.minimum(tbr, 15) / 15)) * 100)
|
282
|
+
|
283
|
+
sd = SD(df)
|
284
|
+
sd_score = 100
|
285
|
+
if sd >= 108:
|
286
|
+
sd_score = 0
|
287
|
+
elif sd > 18:
|
288
|
+
sd_score = (1 - ((sd-18) / 90)) * 100
|
289
|
+
sd_score *= 0.15
|
290
|
+
|
291
|
+
COGI = tir_score + tbr_score + sd_score
|
292
|
+
return COGI
|
293
|
+
|
294
|
+
def GRADE_formula(df: pd.DataFrame) -> pd.DataFrame:
|
295
|
+
"""Transforms each glucose value within the given CGM trace as needed to help calculate
|
296
|
+
the Glycaemic Risk Assessment Diabetes Equation (GRADE).
|
297
|
+
|
298
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
299
|
+
:type df: 'pandas.DataFrame'
|
300
|
+
:return: the LBGI for the given CGM trace
|
301
|
+
:rtype: float
|
302
|
+
"""
|
303
|
+
df_GRADE = pd.DataFrame()
|
304
|
+
df_GRADE[GLUCOSE] = df[GLUCOSE].copy()
|
305
|
+
df_GRADE["GRADE"] = ((np.log10(np.log10(df[GLUCOSE] / 18)) + 0.16) ** 2) * 425
|
306
|
+
return df_GRADE
|
307
|
+
|
308
|
+
def GRADE_eugly(df: pd.DataFrame) -> float:
|
309
|
+
"""Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
|
310
|
+
solely the glucose values in target range (70-140 mg/dL) within the given CGM trace.
|
311
|
+
|
312
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
313
|
+
:type df: 'pandas.DataFrame'
|
314
|
+
:return: the euglycemic GRADE for the given CGM trace
|
315
|
+
:rtype: float
|
316
|
+
"""
|
317
|
+
df_GRADE = GRADE_formula(df)
|
318
|
+
return np.sum(df_GRADE[(df_GRADE[GLUCOSE] >= 70) & (df_GRADE[GLUCOSE] <= 140)]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
|
319
|
+
|
320
|
+
def GRADE_hypo(df: pd.DataFrame) -> float:
|
321
|
+
"""Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
|
322
|
+
solely the glucose values in hypoglycemic range (<70 mg/dL) within the given CGM trace.
|
323
|
+
|
324
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
325
|
+
:type df: 'pandas.DataFrame'
|
326
|
+
:return: the hypoglycemic GRADE for the given CGM trace
|
327
|
+
:rtype: float
|
328
|
+
"""
|
329
|
+
df_GRADE = GRADE_formula(df)
|
330
|
+
return np.sum(df_GRADE[df_GRADE[GLUCOSE] < 70]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
|
331
|
+
|
332
|
+
def GRADE_hyper(df: pd.DataFrame) -> float:
|
333
|
+
"""Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
|
334
|
+
solely the glucose values in hyperglycemic range (>140 mg/dL) within the given CGM trace.
|
335
|
+
|
336
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
337
|
+
:type df: 'pandas.DataFrame'
|
338
|
+
:return: the hyperglycemic GRADE for the given CGM trace
|
339
|
+
:rtype: float
|
340
|
+
"""
|
341
|
+
df_GRADE = GRADE_formula(df)
|
342
|
+
return np.sum(df_GRADE[df_GRADE[GLUCOSE] > 140]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
|
343
|
+
|
344
|
+
def GRADE(df: pd.DataFrame) -> float:
|
345
|
+
"""Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for the given CGM trace.
|
346
|
+
|
347
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
348
|
+
:type df: 'pandas.DataFrame'
|
349
|
+
:return: the GRADE for the given CGM trace
|
350
|
+
:rtype: float
|
351
|
+
"""
|
352
|
+
df_GRADE = GRADE_formula(df)
|
353
|
+
return df_GRADE["GRADE"].mean()
|
354
|
+
|
355
|
+
def GRI(df: pd.DataFrame) -> float:
|
356
|
+
"""Calculates the Glycemia Risk Index (GRI) for the given CGM trace.
|
357
|
+
|
358
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
359
|
+
:type df: 'pandas.DataFrame'
|
360
|
+
:return: the GRI for the given CGM trace
|
361
|
+
:rtype: float
|
362
|
+
"""
|
363
|
+
vlow = percent_time_in_range(df, 0, 53)
|
364
|
+
low = percent_time_in_range(df, 54, 69)
|
365
|
+
high = percent_time_in_range(df, 181, 250)
|
366
|
+
vhigh = percent_time_in_range(df, 251, 500)
|
367
|
+
|
368
|
+
return min((3 * vlow) + (2.4 * low) + (0.8 * high) + (1.6 * vhigh), 100)
|
369
|
+
|
370
|
+
def GVP(df: pd.DataFrame) -> float:
|
371
|
+
"""Calculates the Glucose Variability Percentage (GVP) for the given CGM trace.
|
372
|
+
|
373
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
374
|
+
:type df: 'pandas.DataFrame'
|
375
|
+
:return: the GVP for the given CGM trace
|
376
|
+
:rtype: float
|
377
|
+
"""
|
378
|
+
copy_df = df.dropna(subset=["Glucose"])
|
379
|
+
delta_x = pd.Series(5, index=np.arange(copy_df.shape[0]), dtype="float", name='orders')
|
380
|
+
delta_y = copy_df.reset_index()["Glucose"].diff()
|
381
|
+
L = np.sum(np.sqrt((delta_x ** 2) + (delta_y ** 2)))
|
382
|
+
L_0 = np.sum(delta_x)
|
383
|
+
return L / L_0
|
384
|
+
|
385
|
+
def hyper_index(df: pd.DataFrame, limit: int = 140, a: float = 1.1, c: float = 30) -> float:
|
386
|
+
"""Calculates the Hyperglycemia Index for the given CGM trace.
|
387
|
+
|
388
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
389
|
+
:type df: 'pandas.DataFrame'
|
390
|
+
:param limit: upper limit of target range (above which would hyperglycemia), defaults to 140 mg/dL
|
391
|
+
:type limit: int, optional
|
392
|
+
:param a: exponent utilized for Hyperglycemia Index calculation, defaults to 1.1
|
393
|
+
:type a: float, optional
|
394
|
+
:param c: constant to help scale Hyperglycemia Index the same as other metrics (e.g. LBGI, HBGI, and GRADE), defaults to 30
|
395
|
+
:type c: float, optional
|
396
|
+
:return: the Hyperglycemia Index for the given CGM trace
|
397
|
+
:rtype: float
|
398
|
+
"""
|
399
|
+
BG = df[GLUCOSE].dropna()
|
400
|
+
return np.sum(np.power(BG[BG > limit] - limit, a)) / (BG.size * c)
|
401
|
+
|
402
|
+
def hypo_index(df: pd.DataFrame, limit: int = 80, b: float = 2, d: float = 30) -> float:
|
403
|
+
"""Calculates the Hypoglycemia Index for the given CGM trace.
|
404
|
+
|
405
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
406
|
+
:type df: 'pandas.DataFrame'
|
407
|
+
:param limit: lower limit of target range (above which would hypoglycemia), defaults to 80 mg/dL
|
408
|
+
:type limit: int, optional
|
409
|
+
:param b: exponent utilized for Hypoglycemia Index calculation, defaults to 2
|
410
|
+
:type b: float, optional
|
411
|
+
:param d: constant to help scale Hypoglycemia Index the same as other metrics (e.g. LBGI, HBGI, and GRADE), defaults to 30
|
412
|
+
:type d: float, optional
|
413
|
+
:return: the Hypoglycemia Index for the given CGM trace
|
414
|
+
:rtype: float
|
415
|
+
"""
|
416
|
+
BG = df[GLUCOSE].dropna()
|
417
|
+
return np.sum(np.power(limit - BG[BG < limit], b)) / (BG.size * d)
|
418
|
+
|
419
|
+
def IGC(df: pd.DataFrame) -> float:
|
420
|
+
"""Calculates the Index of Glycemic Control (IGC) for the given CGM trace.
|
421
|
+
|
422
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
423
|
+
:type df: 'pandas.DataFrame'
|
424
|
+
:return: the IGC for the given CGM trace
|
425
|
+
:rtype: float
|
426
|
+
"""
|
427
|
+
return hyper_index(df) + hypo_index(df)
|
428
|
+
|
429
|
+
def j_index(df: pd.DataFrame) -> float:
|
430
|
+
"""Calculates the J-Index for the given CGM trace.
|
431
|
+
|
432
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
433
|
+
:type df: 'pandas.DataFrame'
|
434
|
+
:return: the J-Index for the given CGM trace
|
435
|
+
:rtype: float
|
436
|
+
"""
|
437
|
+
return 0.001 * ((mean(df) + SD(df)) ** 2)
|
438
|
+
|
439
|
+
def CONGA(df: pd.DataFrame, n: int = 24) -> float:
|
440
|
+
"""Calculates the Continuous Overall Net Glycemic Action (CONGA) for the given CGM trace.
|
441
|
+
|
442
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
443
|
+
:type df: 'pandas.DataFrame'
|
444
|
+
:param n: the difference in time (in hours) between observations used to calculate CONGA, defaults to 24
|
445
|
+
:type n: int, optional
|
446
|
+
:return: the CONGA for the given CGM trace
|
447
|
+
:rtype: float
|
448
|
+
"""
|
449
|
+
config.read('config.ini')
|
450
|
+
interval = int(config["variables"]["interval"])
|
451
|
+
period = n * (60 / interval)
|
452
|
+
return np.std(df[GLUCOSE].diff(periods=period))
|
453
|
+
|
454
|
+
# lag is in days
|
455
|
+
def MODD(df: pd.DataFrame, lag: int = 1) -> float:
|
456
|
+
"""Calculates the Mean Difference Between Glucose Values Obtained at the Same Time of Day (MODD) for the given CGM trace.
|
457
|
+
|
458
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
459
|
+
:type df: 'pandas.DataFrame'
|
460
|
+
:param lag: the difference in time (in days) between observations used to calculate MODD, defaults to 1
|
461
|
+
:type lag: int, optional
|
462
|
+
:return: the MODD for the given CGM trace
|
463
|
+
:rtype: float
|
464
|
+
"""
|
465
|
+
config.read('config.ini')
|
466
|
+
interval = int(config["variables"]["interval"])
|
467
|
+
period = lag * 24 * (60 / interval)
|
468
|
+
|
469
|
+
return np.mean(np.abs(df[GLUCOSE].diff(periods=period)))
|
470
|
+
|
471
|
+
def mean_absolute_differences(df: pd.DataFrame) -> float:
|
472
|
+
"""Calculates the Mean Absolute Differences for the given CGM trace.
|
473
|
+
|
474
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
475
|
+
:type df: 'pandas.DataFrame'
|
476
|
+
:return: the mean absolute differences for the given CGM trace
|
477
|
+
:rtype: float
|
478
|
+
"""
|
479
|
+
return np.mean(np.abs(df[GLUCOSE].diff()))
|
480
|
+
|
481
|
+
def median_absolute_deviation(df: pd.DataFrame, constant: float = 1.4826) -> float:
|
482
|
+
"""Calculates the Median Absolute Deviation for the given CGM trace.
|
483
|
+
|
484
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
485
|
+
:type df: 'pandas.DataFrame'
|
486
|
+
:param constant: factor to multiply median absolute deviation by, defaults to 1.4826
|
487
|
+
:type constant: float, optional
|
488
|
+
:return: the median absolute deviation for the given CGM trace
|
489
|
+
:rtype: float
|
490
|
+
"""
|
491
|
+
return constant * np.nanmedian(np.abs(df[GLUCOSE] - np.nanmedian(df[GLUCOSE])))
|
492
|
+
|
493
|
+
def MAG(df: pd.DataFrame) -> float:
|
494
|
+
"""Calculates the Mean Absolute Glucose (MAG) for the given CGM trace.
|
495
|
+
|
496
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
497
|
+
:type df: 'pandas.DataFrame'
|
498
|
+
:return: the MAG for the given CGM trace
|
499
|
+
:rtype: float
|
500
|
+
"""
|
501
|
+
df.dropna(subset=[GLUCOSE], inplace=True)
|
502
|
+
data = df[(df[TIME].dt.minute == (df[TIME].dt.minute).iloc[0]) & (df[TIME].dt.second == (df[TIME].dt.second).iloc[0])][GLUCOSE]
|
503
|
+
return np.sum(data.diff().abs()) / data.size
|
504
|
+
|
505
|
+
def MAGE(df: pd.DataFrame, short_ma: int = 5, long_ma: int = 32, max_gap: int = 180) -> float:
|
506
|
+
"""Calculates the Mean Amplitude of Glycemic Excursions (MAGE) for the given CGM trace.
|
507
|
+
|
508
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
509
|
+
:type df: 'pandas.DataFrame'
|
510
|
+
:param short_ma: number of data points utilized to calculate short moving average values, defaults to 5
|
511
|
+
:type short_ma: int, optional
|
512
|
+
:param long_ma: number of data points utilized to calculate long moving average values, defaults to 32
|
513
|
+
:type long_ma: int, optional
|
514
|
+
:param max_gap: number of minutes a gap between CGM data points can be without having to split the MAGE calculation into multiple segments, defaults to 180
|
515
|
+
:type max_gap: int, optional
|
516
|
+
:return: the MAGE for the given CGM trace
|
517
|
+
:rtype: float
|
518
|
+
"""
|
519
|
+
data = df.reset_index(drop=True)
|
520
|
+
|
521
|
+
config.read('config.ini')
|
522
|
+
interval = int(config["variables"]["interval"])
|
523
|
+
|
524
|
+
missing = data[GLUCOSE].isnull()
|
525
|
+
# create groups of consecutive missing values
|
526
|
+
groups = missing.ne(missing.shift()).cumsum()
|
527
|
+
# group by the created groups and count the size of each group, then apply it where values are missing
|
528
|
+
size_of_groups = data.groupby([groups, missing])[GLUCOSE].transform('size').where(missing, 0)
|
529
|
+
# filter groups where size is greater than 0 and take their indexes
|
530
|
+
indexes = size_of_groups[size_of_groups.diff() > (max_gap / interval)].index.tolist()
|
531
|
+
|
532
|
+
if not indexes: # no gaps in data larger than max_gap
|
533
|
+
return MAGE_helper(df, short_ma, long_ma)
|
534
|
+
else: # calculate MAGE per segment and add them together (weighted)
|
535
|
+
indexes.insert(0, 0); indexes.append(None)
|
536
|
+
mage = 0
|
537
|
+
total_duration = 0
|
538
|
+
for i in range(len(indexes) - 1):
|
539
|
+
segment = data.iloc[indexes[i]:indexes[i+1]]
|
540
|
+
segment = segment.loc[segment[GLUCOSE].first_valid_index():].reset_index(drop=True)
|
541
|
+
segment_duration = (segment.iloc[-1][TIME] - segment.iloc[0][TIME]).total_seconds(); total_duration += segment_duration
|
542
|
+
mage += segment_duration * MAGE_helper(segment, short_ma, long_ma)
|
543
|
+
return mage / total_duration
|
544
|
+
|
545
|
+
def MAGE_helper(df: pd.DataFrame, short_ma: int = 5, long_ma: int = 32) -> float:
|
546
|
+
"""Calculates the Mean Amplitude of Glycemic Excursions (MAGE) for a specific segment of a CGM trace.
|
547
|
+
Algorithm for calculating MAGE is based on iglu's implementation, and this method is a helper for the MAGE() function.
|
548
|
+
|
549
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data without significant gaps (as defined in the MAGE() function)
|
550
|
+
:type df: 'pandas.DataFrame'
|
551
|
+
:param short_ma: number of data points utilized to calculate short moving average values, defaults to 5
|
552
|
+
:type short_ma: int, optional
|
553
|
+
:param long_ma: number of data points utilized to calculate long moving average values, defaults to 32
|
554
|
+
:type long_ma: int, optional
|
555
|
+
:return: the MAGE for the given segment of a CGM trace
|
556
|
+
:rtype: float
|
557
|
+
"""
|
558
|
+
averages = pd.DataFrame()
|
559
|
+
averages[GLUCOSE] = df[GLUCOSE]
|
560
|
+
averages.reset_index(drop=True, inplace=True)
|
561
|
+
|
562
|
+
if short_ma < 1 or long_ma < 1:
|
563
|
+
raise Exception("Moving average spans must be positive, non-zero integers.")
|
564
|
+
|
565
|
+
if short_ma >= long_ma:
|
566
|
+
raise Exception("Short moving average span must be smaller than the long moving average span.")
|
567
|
+
|
568
|
+
if averages.shape[0] < long_ma:
|
569
|
+
return np.nan
|
570
|
+
|
571
|
+
# calculate rolling means, iglu does right align instead of center
|
572
|
+
averages["MA_Short"] = averages[GLUCOSE].rolling(window=short_ma, min_periods=1).mean()
|
573
|
+
averages["MA_Long"] = averages[GLUCOSE].rolling(window=long_ma, min_periods=1).mean()
|
574
|
+
|
575
|
+
# fill in leading NaNs due to moving average calculation
|
576
|
+
averages["MA_Short"].iloc[:short_ma-1] = averages["MA_Short"].iloc[short_ma-1]
|
577
|
+
averages["MA_Long"].iloc[:long_ma-1] = averages["MA_Long"].iloc[long_ma-1]
|
578
|
+
averages["DELTA_SL"] = averages["MA_Short"] - averages["MA_Long"]
|
579
|
+
|
580
|
+
# get crossing points
|
581
|
+
glu = lambda i: averages[GLUCOSE].iloc[i]
|
582
|
+
average = lambda i: averages["DELTA_SL"].iloc[i]
|
583
|
+
crosses_list = [{"location": 0, "type": np.where(average(0) > 0, "peak", "nadir")}]
|
584
|
+
|
585
|
+
for index in range(1, averages.shape[0]):
|
586
|
+
current_actual = glu(index)
|
587
|
+
current_average = average(index)
|
588
|
+
previous_actual = glu(index-1)
|
589
|
+
previous_average = average(index-1)
|
590
|
+
|
591
|
+
if not (np.isnan(current_actual) or np.isnan(previous_actual) or np.isnan(current_average) or np.isnan(previous_average)):
|
592
|
+
if current_average * previous_average < 0:
|
593
|
+
type = np.where(current_average < previous_average, "nadir", "peak")
|
594
|
+
crosses_list.append({"location": index, "type": type})
|
595
|
+
elif (not np.isnan(current_average) and (current_average * average(crosses_list[-1]["location"]) < 0)):
|
596
|
+
prev_delta = average(crosses_list[-1]["location"])
|
597
|
+
type = np.where(current_average < prev_delta, "nadir", "peak")
|
598
|
+
crosses_list.append({"location": index, "type": type})
|
599
|
+
|
600
|
+
crosses_list.append({"location": None, "type": np.where(average(-1) > 0, "peak", "nadir")})
|
601
|
+
crosses = pd.DataFrame(crosses_list)
|
602
|
+
|
603
|
+
num_extrema = crosses.shape[0] - 1
|
604
|
+
minmax = np.tile(np.nan, num_extrema)
|
605
|
+
indexes = pd.Series(np.nan, index=range(num_extrema))
|
606
|
+
|
607
|
+
for index in range(num_extrema):
|
608
|
+
s1 = int(np.where(index == 0, crosses["location"].iloc[index], indexes.iloc[index-1]))
|
609
|
+
s2 = crosses["location"].iloc[index+1]
|
610
|
+
|
611
|
+
values = averages[GLUCOSE].loc[s1:s2]
|
612
|
+
if crosses["type"].iloc[index] == "nadir":
|
613
|
+
minmax[index] = np.min(values)
|
614
|
+
indexes.iloc[index] = values.idxmin()
|
615
|
+
else:
|
616
|
+
minmax[index] = np.max(values)
|
617
|
+
indexes.iloc[index] = values.idxmax()
|
618
|
+
|
619
|
+
differences = np.transpose(minmax[:, np.newaxis] - minmax)
|
620
|
+
sd = np.std(df[GLUCOSE].dropna())
|
621
|
+
N = len(minmax)
|
622
|
+
|
623
|
+
# MAGE+
|
624
|
+
mage_plus_heights = []
|
625
|
+
mage_plus_tp_pairs = []
|
626
|
+
j = 0; prev_j = 0
|
627
|
+
while j < N:
|
628
|
+
delta = differences[prev_j:j+1,j]
|
629
|
+
|
630
|
+
max_v = np.max(delta)
|
631
|
+
i = np.argmax(delta) + prev_j
|
632
|
+
|
633
|
+
if max_v >= sd:
|
634
|
+
for k in range(j, N):
|
635
|
+
if minmax[k] > minmax[j]:
|
636
|
+
j = k
|
637
|
+
if (differences[j, k] < (-1 * sd)) or (k == N - 1):
|
638
|
+
max_v = minmax[j] - minmax[i]
|
639
|
+
mage_plus_heights.append(max_v)
|
640
|
+
mage_plus_tp_pairs.append([i, j])
|
641
|
+
|
642
|
+
prev_j = k
|
643
|
+
j = k
|
644
|
+
break
|
645
|
+
else:
|
646
|
+
j += 1
|
647
|
+
|
648
|
+
# MAGE-
|
649
|
+
mage_minus_heights = []
|
650
|
+
mage_minus_tp_pairs = []
|
651
|
+
j = 0; prev_j = 0
|
652
|
+
while j < N:
|
653
|
+
delta = differences[prev_j:j+1,j]
|
654
|
+
min_v = np.min(delta)
|
655
|
+
i = np.argmin(delta) + prev_j
|
656
|
+
|
657
|
+
if min_v <= (-1 * sd):
|
658
|
+
for k in range(j, N):
|
659
|
+
if minmax[k] < minmax[j]:
|
660
|
+
j = k
|
661
|
+
if (differences[j, k] > sd) or (k == N - 1):
|
662
|
+
min_v = minmax[j] - minmax[i]
|
663
|
+
mage_minus_heights.append(min_v)
|
664
|
+
mage_minus_tp_pairs.append([i, j, k])
|
665
|
+
|
666
|
+
prev_j = k
|
667
|
+
j = k
|
668
|
+
break
|
669
|
+
else:
|
670
|
+
j += 1
|
671
|
+
|
672
|
+
plus_first = len(mage_plus_heights) > 0 and ((len(mage_minus_heights) == 0) or (mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0]))
|
673
|
+
return float(np.where(plus_first, np.mean(mage_plus_heights), np.mean(np.absolute(mage_minus_heights))))
|
674
|
+
|
675
|
+
def m_value(df: pd.DataFrame, r: int = 100) -> float:
|
676
|
+
"""Calculates the M-value for the given CGM trace.
|
677
|
+
|
678
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
679
|
+
:type df: 'pandas.DataFrame'
|
680
|
+
:param r: a reference value utilized for calculating the M-value, defaults to 100
|
681
|
+
:type r: int, optional
|
682
|
+
:return: the M-value for the given CGM trace
|
683
|
+
:rtype: float
|
684
|
+
"""
|
685
|
+
return (abs(10 * np.log10(df[GLUCOSE] / r)) ** 3).mean()
|
686
|
+
|
687
|
+
def ROC(df: pd.DataFrame, timedelta: int = 15) -> pd.Series:
|
688
|
+
"""Returns a Pandas Series with the rate of change in glucose values at every data point
|
689
|
+
|
690
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
691
|
+
:type df: 'pandas.DataFrame'
|
692
|
+
:param timedelta: difference in time (in minutes) to utilize when calculating differences between data points, defaults to 15
|
693
|
+
:type timedelta: int, optional
|
694
|
+
:return: a Pandas Series with the rate of change in glucose values at every data point
|
695
|
+
:rtype: 'pandas.Series'
|
696
|
+
"""
|
697
|
+
config.read('config.ini')
|
698
|
+
interval = int(config["variables"]["interval"])
|
699
|
+
if timedelta < interval:
|
700
|
+
raise Exception("Given timedelta must be greater than resampling interval.")
|
701
|
+
|
702
|
+
positiondelta = round(timedelta / interval)
|
703
|
+
return df[GLUCOSE].diff(periods=positiondelta) / timedelta
|
704
|
+
|
705
|
+
def number_readings(df: pd.DataFrame):
|
706
|
+
return df[GLUCOSE].count()
|
707
|
+
|
708
|
+
def FBG(df: pd.DataFrame) -> float:
|
709
|
+
# Ensure time is in datetime
|
710
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
711
|
+
df['date'] = df[TIME].dt.date
|
712
|
+
|
713
|
+
daily_fbg_means = []
|
714
|
+
for day, day_df in df.groupby('date'):
|
715
|
+
# Filter data for readings between 6:00 and 7:00 AM
|
716
|
+
morning_df = day_df[(day_df[TIME].dt.hour == 6)]
|
717
|
+
morning_df = morning_df.sort_values(by=TIME)
|
718
|
+
|
719
|
+
if len(morning_df) >= 6:
|
720
|
+
# Take the first 6 readings within 6:00-7:00 AM
|
721
|
+
first_6 = morning_df.head(6)
|
722
|
+
daily_fbg_means.append(first_6[GLUCOSE].mean())
|
723
|
+
|
724
|
+
return np.nan if not daily_fbg_means else np.mean(daily_fbg_means)
|
725
|
+
|
726
|
+
def LSBG(df: pd.DataFrame) -> float:
|
727
|
+
"""Calculates the Lowest Sleeping Blood Glucose (LSBG).
|
728
|
+
|
729
|
+
Defined as the mean of the six lowest consecutive glucose measures between
|
730
|
+
23:30 and 06:30 (spanning midnight) for each day, averaged over all days.
|
731
|
+
|
732
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
733
|
+
:type df: 'pandas.DataFrame'
|
734
|
+
:return: the LSBG for the given CGM trace
|
735
|
+
:rtype: float
|
736
|
+
"""
|
737
|
+
# Drop rows with missing glucose values
|
738
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
739
|
+
df['date'] = df[TIME].dt.date
|
740
|
+
|
741
|
+
daily_lowest_means = []
|
742
|
+
unique_dates = sorted(df['date'].unique())
|
743
|
+
|
744
|
+
# For each date d, nighttime window: d 23:30 -> (d+1) 06:30
|
745
|
+
for d in unique_dates:
|
746
|
+
start_night = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
747
|
+
end_night = pd.to_datetime(d) + pd.Timedelta(days=1, hours=6, minutes=30)
|
748
|
+
|
749
|
+
night_df = df[(df[TIME] >= start_night) & (df[TIME] < end_night)].sort_values(by=TIME)
|
750
|
+
|
751
|
+
if len(night_df) >= 6:
|
752
|
+
# Compute rolling mean of 6 consecutive readings
|
753
|
+
rolling_means = night_df[GLUCOSE].rolling(window=6).mean()
|
754
|
+
# Get the minimum of those rolling means for the night
|
755
|
+
min_rolling_mean = rolling_means.min()
|
756
|
+
if not np.isnan(min_rolling_mean):
|
757
|
+
daily_lowest_means.append(min_rolling_mean)
|
758
|
+
|
759
|
+
return np.nan if not daily_lowest_means else np.mean(daily_lowest_means)
|
760
|
+
|
761
|
+
def mean_24h(df: pd.DataFrame) -> float:
|
762
|
+
"""Calculates the Mean 24-h starting from 23:30 to the next day's 23:30 for each day.
|
763
|
+
|
764
|
+
For each date d, the 24-hour window is from d 23:30 to (d+1) 23:30.
|
765
|
+
We compute the mean glucose within this window for each day, then average across all days.
|
766
|
+
|
767
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
768
|
+
:type df: 'pandas.DataFrame'
|
769
|
+
:return: the mean 24-h BG value for the given CGM trace
|
770
|
+
:rtype: float
|
771
|
+
"""
|
772
|
+
# Drop rows with missing glucose values
|
773
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
774
|
+
df['date'] = df[TIME].dt.date
|
775
|
+
|
776
|
+
daily_means = []
|
777
|
+
unique_dates = sorted(df['date'].unique())
|
778
|
+
|
779
|
+
for d in unique_dates:
|
780
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
781
|
+
end_period = start_period + pd.Timedelta(days=1) # next day's 23:30
|
782
|
+
|
783
|
+
period_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)]
|
784
|
+
|
785
|
+
if not period_df.empty:
|
786
|
+
daily_mean = period_df[GLUCOSE].mean()
|
787
|
+
daily_means.append(daily_mean)
|
788
|
+
|
789
|
+
return np.nan if not daily_means else np.mean(daily_means)
|
790
|
+
|
791
|
+
def mean_24h_auc(df: pd.DataFrame) -> float:
|
792
|
+
"""Calculates the mean 24-hour AUC (Area Under the Curve) using the trapezoidal rule,
|
793
|
+
with a 24-hour period defined from 23:30 of one day to 23:30 of the next day.
|
794
|
+
|
795
|
+
For each date d, the 24-hour period is from d 23:30 to (d+1) 23:30.
|
796
|
+
The AUC for that day is the integral of glucose over time.
|
797
|
+
We then average over all days to get the mean 24-hour AUC.
|
798
|
+
|
799
|
+
Integration is done using scipy.integrate.trapezoid with actual timestamps as 'x'.
|
800
|
+
|
801
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
802
|
+
:type df: 'pandas.DataFrame'
|
803
|
+
:return: the mean 24-hour AUC for the given CGM trace (23:30–23:30)
|
804
|
+
:rtype: float
|
805
|
+
"""
|
806
|
+
# Drop rows with missing glucose values
|
807
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
808
|
+
df['date'] = df[TIME].dt.date
|
809
|
+
|
810
|
+
daily_aucs = []
|
811
|
+
unique_dates = sorted(df['date'].unique())
|
812
|
+
|
813
|
+
for d in unique_dates:
|
814
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
815
|
+
end_period = start_period + pd.Timedelta(days=1)
|
816
|
+
|
817
|
+
day_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)].sort_values(by=TIME)
|
818
|
+
|
819
|
+
if len(day_df) < 2:
|
820
|
+
# Not enough data points to form a meaningful trapezoid
|
821
|
+
continue
|
822
|
+
|
823
|
+
# Compute time array in hours relative to start_period
|
824
|
+
times_in_hours = (day_df[TIME] - start_period).dt.total_seconds() / 3600.0
|
825
|
+
glucose_values = day_df[GLUCOSE].values
|
826
|
+
|
827
|
+
# Use scipy's trapezoid to integrate glucose over the 24-hour period
|
828
|
+
auc = trapezoid(glucose_values, x=times_in_hours)
|
829
|
+
daily_aucs.append(auc)
|
830
|
+
|
831
|
+
return np.nan if not daily_aucs else np.mean(daily_aucs)
|
832
|
+
|
833
|
+
def mean_daytime(df: pd.DataFrame) -> float:
|
834
|
+
"""
|
835
|
+
Calculates the mean daytime glucose, defined as the mean of all measures
|
836
|
+
between 06:30 and 23:30 for each day, averaged across all days.
|
837
|
+
|
838
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
839
|
+
:type df: 'pandas.DataFrame'
|
840
|
+
:return: the mean daytime glucose level
|
841
|
+
:rtype: float
|
842
|
+
"""
|
843
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
844
|
+
df['date'] = df[TIME].dt.date
|
845
|
+
|
846
|
+
daily_means = []
|
847
|
+
for d, day_df in df.groupby('date'):
|
848
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=6, minutes=30)
|
849
|
+
end_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
850
|
+
|
851
|
+
daytime_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)]
|
852
|
+
|
853
|
+
if not daytime_df.empty:
|
854
|
+
daily_mean = daytime_df[GLUCOSE].mean()
|
855
|
+
daily_means.append(daily_mean)
|
856
|
+
|
857
|
+
return np.nan if not daily_means else np.mean(daily_means)
|
858
|
+
|
859
|
+
def mean_nocturnal(df: pd.DataFrame) -> float:
|
860
|
+
"""
|
861
|
+
Calculates the mean nocturnal glucose, defined as the mean of all measures
|
862
|
+
between 23:30 and 06:30 for each day, averaged across all days.
|
863
|
+
|
864
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
865
|
+
:type df: 'pandas.DataFrame'
|
866
|
+
:return: the mean nocturnal glucose level
|
867
|
+
:rtype: float
|
868
|
+
"""
|
869
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
870
|
+
df['date'] = df[TIME].dt.date
|
871
|
+
|
872
|
+
daily_means = []
|
873
|
+
for d, day_df in df.groupby('date'):
|
874
|
+
# Define the nighttime window
|
875
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
876
|
+
end_period = pd.to_datetime(d) + pd.Timedelta(days=1, hours=6, minutes=30)
|
877
|
+
|
878
|
+
night_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)]
|
879
|
+
|
880
|
+
# If no readings in that interval for this particular night, skip it
|
881
|
+
if not night_df.empty:
|
882
|
+
daily_mean = night_df[GLUCOSE].mean()
|
883
|
+
daily_means.append(daily_mean)
|
884
|
+
|
885
|
+
return np.nan if not daily_means else np.mean(daily_means)
|
886
|
+
|
887
|
+
def auc_daytime(df: pd.DataFrame) -> float:
|
888
|
+
"""
|
889
|
+
Calculates the mean daytime AUC (Area Under the Curve) of glucose
|
890
|
+
between 06:30 and 23:30 for each day, and then averages these daily AUCs.
|
891
|
+
|
892
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
893
|
+
:type df: 'pandas.DataFrame'
|
894
|
+
:return: the mean daytime AUC
|
895
|
+
:rtype: float
|
896
|
+
"""
|
897
|
+
# Drop rows with missing glucose values
|
898
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
899
|
+
df['date'] = df[TIME].dt.date
|
900
|
+
|
901
|
+
daily_aucs = []
|
902
|
+
for d, day_df in df.groupby('date'):
|
903
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=6, minutes=30)
|
904
|
+
end_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
905
|
+
|
906
|
+
daytime_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)].sort_values(by=TIME)
|
907
|
+
|
908
|
+
if len(daytime_df) < 2:
|
909
|
+
# Not enough data points for integration
|
910
|
+
continue
|
911
|
+
|
912
|
+
# Compute time array in hours relative to start_period
|
913
|
+
times_in_hours = (daytime_df[TIME] - start_period).dt.total_seconds() / 3600.0
|
914
|
+
glucose_values = daytime_df[GLUCOSE].values
|
915
|
+
|
916
|
+
# Use scipy's trapezoid to integrate glucose over the daytime period
|
917
|
+
auc = trapezoid(glucose_values, x=times_in_hours)
|
918
|
+
daily_aucs.append(auc)
|
919
|
+
|
920
|
+
return np.nan if not daily_aucs else np.mean(daily_aucs)
|
921
|
+
|
922
|
+
def nocturnal_auc(df: pd.DataFrame) -> float:
|
923
|
+
"""
|
924
|
+
Calculates the mean nocturnal AUC (Area Under the Curve) of glucose
|
925
|
+
between 23:30 and 06:30 for each day, and then averages these daily AUCs.
|
926
|
+
|
927
|
+
For each date d, we define the nocturnal period as d 23:30 to (d+1) 06:30.
|
928
|
+
|
929
|
+
:param df: a Pandas DataFrame containing preprocessed CGM data
|
930
|
+
:type df: 'pandas.DataFrame'
|
931
|
+
:return: the mean nocturnal AUC
|
932
|
+
:rtype: float
|
933
|
+
"""
|
934
|
+
df = df.dropna(subset=[GLUCOSE]).copy()
|
935
|
+
df['date'] = df[TIME].dt.date
|
936
|
+
|
937
|
+
daily_aucs = []
|
938
|
+
unique_dates = sorted(df['date'].unique())
|
939
|
+
|
940
|
+
for d in unique_dates:
|
941
|
+
start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
|
942
|
+
end_period = start_period + pd.Timedelta(hours=6, minutes=30)
|
943
|
+
|
944
|
+
night_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)].sort_values(by=TIME)
|
945
|
+
|
946
|
+
if len(night_df) < 2:
|
947
|
+
# Not enough data points for integration
|
948
|
+
continue
|
949
|
+
|
950
|
+
# Compute time array in hours relative to start_period
|
951
|
+
times_in_hours = (night_df[TIME] - start_period).dt.total_seconds() / 3600.0
|
952
|
+
glucose_values = night_df[GLUCOSE].values
|
953
|
+
|
954
|
+
# Use scipy's trapezoid to integrate glucose over the nocturnal period
|
955
|
+
auc = trapezoid(glucose_values, x=times_in_hours)
|
956
|
+
daily_aucs.append(auc)
|
957
|
+
|
958
|
+
return np.nan if not daily_aucs else np.mean(daily_aucs)
|
959
|
+
|
960
|
+
|
961
|
+
def compute_features(id: str, data: pd.DataFrame) -> dict[str, any]:
|
962
|
+
"""Calculates statistics and metrics for a single patient within the given DataFrame
|
963
|
+
|
964
|
+
:param id: the patient to calculate features for
|
965
|
+
:type id: str
|
966
|
+
:param data: Pandas DataFrame containing preprocessed CGM data for one or more patients
|
967
|
+
:type data: 'pandas.DataFrame'
|
968
|
+
:return: a dictionary (with each key referring to the name of a statistic or metric)
|
969
|
+
:rtype: dict[str, any]
|
970
|
+
"""
|
971
|
+
summary = summary_stats(data)
|
972
|
+
|
973
|
+
features = {
|
974
|
+
ID: id,
|
975
|
+
"ADRR": ADRR(data),
|
976
|
+
"COGI": COGI(data),
|
977
|
+
"CONGA": CONGA(data),
|
978
|
+
"CV": CV(data),
|
979
|
+
"Daytime AUC": auc_daytime(data),
|
980
|
+
"eA1c": eA1c(data),
|
981
|
+
"FBG": FBG(data),
|
982
|
+
"First Quartile": summary[1],
|
983
|
+
"GMI": GMI(data),
|
984
|
+
"GRADE": GRADE(data),
|
985
|
+
"GRADE (euglycemic)": GRADE_eugly(data),
|
986
|
+
"GRADE (hyperglycemic)": GRADE_hyper(data),
|
987
|
+
"GRADE (hypoglycemic)": GRADE_hypo(data),
|
988
|
+
"GRI": GRI(data),
|
989
|
+
"GVP": GVP(data),
|
990
|
+
"HBGI": HBGI(data),
|
991
|
+
"Hyperglycemia Index": hyper_index(data),
|
992
|
+
"Hypoglycemia Index": hypo_index(data),
|
993
|
+
"IGC": IGC(data),
|
994
|
+
"J-Index": j_index(data),
|
995
|
+
"LBGI": LBGI(data),
|
996
|
+
"LSBG": LSBG(data),
|
997
|
+
"MAG": MAG(data),
|
998
|
+
"MAGE": MAGE(data),
|
999
|
+
"Maximum": summary[4],
|
1000
|
+
"Mean": mean(data),
|
1001
|
+
"Mean 24h Glucose": mean_24h(data),
|
1002
|
+
"Mean 24h AUC": mean_24h_auc(data),
|
1003
|
+
"Mean Absolute Differences": mean_absolute_differences(data),
|
1004
|
+
"Mean Daytime": mean_daytime(data),
|
1005
|
+
"Mean Nocturnal": mean_nocturnal(data),
|
1006
|
+
"Median": summary[2],
|
1007
|
+
"Median Absolute Deviation": median_absolute_deviation(data),
|
1008
|
+
"Minimum": summary[0],
|
1009
|
+
"MODD": MODD(data),
|
1010
|
+
"M-Value": m_value(data),
|
1011
|
+
"Nocturnal AUC": nocturnal_auc(data),
|
1012
|
+
"Number of Readings": number_readings(data),
|
1013
|
+
"Percent Time Above Range (180)": percent_time_above_range(data),
|
1014
|
+
"Percent Time Below Range (70)": percent_time_below_range(data),
|
1015
|
+
"Percent Time in Hyperglycemia": percent_time_in_hyperglycemia(data),
|
1016
|
+
"Percent Time in Hyperglycemia (level 0)": percent_time_in_level_0_hyperglycemia(data),
|
1017
|
+
"Percent Time in Hyperglycemia (level 1)": percent_time_in_level_1_hyperglycemia(data),
|
1018
|
+
"Percent Time in Hyperglycemia (level 2)": percent_time_in_level_2_hyperglycemia(data),
|
1019
|
+
"Percent Time in Hypoglycemia": percent_time_in_hypoglycemia(data),
|
1020
|
+
"Percent Time in Hypoglycemia (level 1)": percent_time_in_level_1_hypoglycemia(data),
|
1021
|
+
"Percent Time in Hypoglycemia (level 2)": percent_time_in_level_2_hypoglycemia(data),
|
1022
|
+
"Percent Time In Range (70-180)": percent_time_in_range(data),
|
1023
|
+
"Percent Time In Tight Range (70-140)": percent_time_in_tight_range(data),
|
1024
|
+
"SD": SD(data),
|
1025
|
+
"Third Quartile": summary[3],
|
1026
|
+
}
|
1027
|
+
return features
|
1028
|
+
|
1029
|
+
|
1030
|
+
def create_features(dataset: pd.DataFrame) -> pd.DataFrame:
|
1031
|
+
"""Takes in a multiindexed Pandas DataFrame containing CGM data for multiple patients/datasets, and
|
1032
|
+
returns a single indexed Pandas DataFrame containing summary metrics in the form of one row per patient/dataset
|
1033
|
+
|
1034
|
+
:param dataset: a Pandas DataFrame containing the CGM data to calculate metrics for
|
1035
|
+
:type dataset: 'pandas.DataFrame'
|
1036
|
+
:return: a Pandas DataFrame with each row representing a patient in 'dataset' and each column representing a specific statistic or metric
|
1037
|
+
:rtype: 'pandas.DataFrame'
|
1038
|
+
"""
|
1039
|
+
with Pool() as pool:
|
1040
|
+
features = pool.starmap(compute_features, dataset.groupby(ID))
|
1041
|
+
features = pd.DataFrame(features).set_index([ID])
|
1042
|
+
return features
|