glucose360 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
glucose360/features.py ADDED
@@ -0,0 +1,1042 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import configparser
4
+ from multiprocessing import Pool
5
+ import os
6
+ from scipy.integrate import trapezoid
7
+
8
+ dir_path = os.path.dirname(os.path.realpath(__file__))
9
+ config_path = os.path.join(dir_path, "config.ini")
10
+ config = configparser.ConfigParser()
11
+ config.read(config_path)
12
+ ID = config['variables']['id']
13
+ GLUCOSE = config['variables']['glucose']
14
+ TIME = config['variables']['time']
15
+
16
+ """
17
+ All of the metric-calculating functions are designed for DataFrames that contain only one patient's data.
18
+ For example, if 'df' is the outputted DataFrame from 'import_data()', 'LBGI(df)' would not be accurate.
19
+ Instead, do 'LBGI(df.loc[PATIENT_ID])'.
20
+ """
21
+
22
+ def mean(df: pd.DataFrame) -> float:
23
+ """Calculates the mean glucose level for the given CGM trace
24
+
25
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the mean glucose for
26
+ :type df: 'pandas.DataFrame'
27
+ :return: the mean glucose level of the given CGM trace
28
+ :rtype: float
29
+ """
30
+ return df[GLUCOSE].mean()
31
+
32
+ def summary_stats(df: pd.DataFrame) -> list[float]:
33
+ """Calculates summary statistics (minimum, first quartile, median, third quartile, and maximum) for the given CGM trace
34
+
35
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the five-point summary for
36
+ :type df: 'pandas.DataFrame'
37
+ :return: a list containing the five-point summary for the given CGM trace
38
+ :rtype: list[float]
39
+ """
40
+ min = df[GLUCOSE].min()
41
+ first = df[GLUCOSE].quantile(0.25)
42
+ median = df[GLUCOSE].median()
43
+ third = df[GLUCOSE].quantile(0.75)
44
+ max = df[GLUCOSE].max()
45
+
46
+ return [min, first, median, third, max]
47
+
48
+ def SD(df: pd.DataFrame) -> float:
49
+ """Calculates the standard deviation for the given CGM trace
50
+
51
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the standard deviation for
52
+ :type df: 'pandas.DataFrame'
53
+ :return: the standard deviation of the given CGM trace
54
+ :rtype: float
55
+ """
56
+ return df[GLUCOSE].std()
57
+
58
+ def CV(df: pd.DataFrame) -> float:
59
+ """Calculates the coefficient of variation (CV) for the given CGM trace
60
+
61
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the CV for
62
+ :type df: 'pandas.DataFrame'
63
+ :return: the CV of the given CGM trace
64
+ :rtype: float
65
+ """
66
+ return SD(df) / mean(df) * 100
67
+
68
+ def eA1c(df: pd.DataFrame) -> float:
69
+ """Calculates the estimated A1c (eA1c) for the given CGM trace
70
+
71
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the eA1c for
72
+ :type df: 'pandas.DataFrame'
73
+ :return: the eA1c of the given CGM trace
74
+ :rtype: float
75
+ """
76
+ return (46.7 + mean(df)) / 28.7
77
+
78
+ def GMI(df: pd.DataFrame) -> float:
79
+ """Calculates the Glucose Management Indicator (GMI) for the given CGM trace
80
+
81
+ :param df: a Pandas DataFrame containing the preprocessed CGM data to calculate the GMI for
82
+ :type df: 'pandas.DataFrame'
83
+ :return: the GMI of the given CGM trace
84
+ :rtype: float
85
+ """
86
+ return (0.02392 * mean(df)) + 3.31
87
+
88
+ def percent_time_in_range(df: pd.DataFrame, low: int = 70, high: int = 180) -> float:
89
+ """Returns the percent of total time the given CGM trace's glucose levels were between the given lower and upper bounds (inclusive)
90
+
91
+ :param df: a Pandas DataFrame containing preprocessed CGM data
92
+ :type df: 'pandas.DataFrame'
93
+ :param low: the lower bound of the acceptable glucose values, defaults to 70
94
+ :type low: int, optional
95
+ :param high: the upper bound of the acceptable glucose values, defaults to 180
96
+ :type high: int, optional
97
+ :return: the percentage of total time the glucose levels within the given CGM trace were between the given bounds (inclusive)
98
+ :rtype: float
99
+ """
100
+ valid_df = df.dropna(subset=[GLUCOSE])
101
+ in_range_df = valid_df[(valid_df[GLUCOSE] <= high) & (valid_df[GLUCOSE] >= low)]
102
+ time_in_range = len(in_range_df)
103
+ total_time = len(valid_df)
104
+ return (100 * time_in_range / total_time) if total_time > 0 else np.nan
105
+
106
+ def percent_time_in_tight_range(df: pd.DataFrame):
107
+ """Returns the percent of total time the given CGM trace's glucose levels were within 70-140 mg/dL (inclusive)
108
+
109
+ :param df: a Pandas DataFrame containing preprocessed CGM data
110
+ :type df: 'pandas.DataFrame'
111
+ :return: the percentage of total time the glucose levels within the given CGM trace were within 70-140 mg/dL (inclusive)
112
+ :rtype: float
113
+ """
114
+ return percent_time_in_range(df, low = 70, high = 140)
115
+
116
+ def percent_time_above_range(df: pd.DataFrame, limit: int = 180) -> float:
117
+ """Returns the percent of total time the given CGM trace's glucose levels were above a given threshold (inclusive)
118
+
119
+ :param df: a Pandas DataFrame containing preprocessed CGM data
120
+ :type df: 'pandas.DataFrame'
121
+ :param limit: the threshold for calculating the percent time above, defaults to 180
122
+ :type limit: int, optional
123
+ :return: the percentage of total time the glucose levels within the given CGM trace were above the given threshold (inclusive)
124
+ :rtype: float
125
+ """
126
+ return percent_time_in_range(df, low = limit, high = 400)
127
+
128
+ def percent_time_below_range(df: pd.DataFrame, limit: int = 70) -> float:
129
+ """Returns the percent of total time the given CGM trace's glucose levels were below a given threshold (inclusive)
130
+
131
+ :param df: a Pandas DataFrame containing preprocessed CGM data
132
+ :type df: 'pandas.DataFrame'
133
+ :param limit: the threshold for calculating the percent time below, defaults to 70
134
+ :type limit: int, optional
135
+ :return: the percentage of total time the glucose levels within the given CGM trace were below the given threshold (inclusive)
136
+ :rtype: float
137
+ """
138
+ return percent_time_in_range(df, low = 40, high = limit)
139
+
140
+ def percent_time_in_hypoglycemia(df: pd.DataFrame) -> float:
141
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
142
+ ranges that indicate hypoglycemia
143
+
144
+ :param df: a Pandas DataFrame containing preprocessed CGM data
145
+ :type df: 'pandas.DataFrame'
146
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating hypoglycemia (< 70 mg/dL)
147
+ :rtype: float
148
+ """
149
+ return percent_time_below_range(df, 70)
150
+
151
+ def percent_time_in_level_1_hypoglycemia(df: pd.DataFrame) -> float:
152
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
153
+ ranges that indicate level 1 hypoglycemia
154
+
155
+ :param df: a Pandas DataFrame containing preprocessed CGM data
156
+ :type df: 'pandas.DataFrame'
157
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 1 hypoglycemia (54-70 mg/dL)
158
+ :rtype: float
159
+ """
160
+ return percent_time_in_range(df, 54, 69)
161
+
162
+ def percent_time_in_level_2_hypoglycemia(df: pd.DataFrame) -> float:
163
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
164
+ ranges that indicate level 2 hypoglycemia
165
+
166
+ :param df: a Pandas DataFrame containing preprocessed CGM data
167
+ :type df: 'pandas.DataFrame'
168
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 2 hypoglycemia (< 54 mg/dL)
169
+ :rtype: float
170
+ """
171
+ return percent_time_below_range(df, 53)
172
+
173
+ def percent_time_in_hyperglycemia(df: pd.DataFrame) -> float:
174
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
175
+ ranges that indicate hyperglycemia
176
+
177
+ :param df: a Pandas DataFrame containing preprocessed CGM data
178
+ :type df: 'pandas.DataFrame'
179
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating hyperglycemia (> 180 mg/dL)
180
+ :rtype: float
181
+ """
182
+ return percent_time_above_range(df, 180)
183
+
184
+ def percent_time_in_level_0_hyperglycemia(df: pd.DataFrame) -> float:
185
+ """Returns the percent of total time the given CGM trace's glucose levels were within
186
+ ranges that indicate level 0 hyperglycemia
187
+
188
+ :param df: a Pandas DataFrame containing preprocessed CGM data
189
+ :type df: 'pandas.DataFrame'
190
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 0 hyperglycemia (140-180 mg/dL)
191
+ :rtype: float
192
+ """
193
+ return percent_time_in_range(df, 140, 180)
194
+
195
+ def percent_time_in_level_1_hyperglycemia(df: pd.DataFrame) -> float:
196
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
197
+ ranges that indicate level 1 hyperglycemia
198
+
199
+ :param df: a Pandas DataFrame containing preprocessed CGM data
200
+ :type df: 'pandas.DataFrame'
201
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 1 hyperglycemia (180-250 mg/dL)
202
+ :rtype: float
203
+ """
204
+ return percent_time_in_range(df, 181, 249)
205
+
206
+ def percent_time_in_level_2_hyperglycemia(df: pd.DataFrame) -> float:
207
+ """Returns the percent of total time the given CGM trace's glucose levels were within literature-defined
208
+ ranges that indicate level 2 hyperglycemia
209
+
210
+ :param df: a Pandas DataFrame containing preprocessed CGM data
211
+ :type df: 'pandas.DataFrame'
212
+ :return: the percentage of total time the glucose levels within the given CGM trace were in ranges indicating level 2 hyperglycemia (> 250 mg/dL)
213
+ :rtype: float
214
+ """
215
+ return percent_time_above_range(df, 250)
216
+
217
+ def ADRR(df: pd.DataFrame) -> float:
218
+ """Calculates the Average Daily Risk Range (ADRR) for the given CGM trace.
219
+
220
+ :param df: a Pandas DataFrame containing preprocessed CGM data
221
+ :type df: 'pandas.DataFrame'
222
+ :return: the ADRR for the given CGM trace
223
+ :rtype: float
224
+ """
225
+ data = df.copy()
226
+
227
+ # Convert time to date
228
+ data['date'] = pd.to_datetime(data[TIME]).dt.date
229
+ data = data.dropna(subset=[GLUCOSE])
230
+ data['bgi'] = (np.log(data[GLUCOSE]) ** 1.084) - 5.381
231
+ data['right'] = 22.7 * np.maximum(data['bgi'], 0) ** 2
232
+ data['left'] = 22.7 * np.minimum(data['bgi'], 0) ** 2
233
+
234
+ adrr = data.groupby(['date']).apply(lambda df: np.max(df['left']) + np.max(df['right'])).mean()
235
+ return adrr
236
+
237
+ def BG_formula(ser: pd.Series) -> pd.Series:
238
+ """Calculates the Average Daily Risk Range (ADRR) for the given CGM trace.
239
+
240
+ :param df: a Pandas DataFrame containing preprocessed CGM data
241
+ :type df: 'pandas.DataFrame'
242
+ :return: the ADRR for the given CGM trace
243
+ :rtype: float
244
+ """
245
+ return 1.509 * (np.power(np.log(ser), 1.084) - 5.381)
246
+
247
+ def LBGI(df: pd.DataFrame) -> float:
248
+ """Calculates the Low Blood Glucose Index (LBGI) for the given CGM trace.
249
+
250
+ :param df: a Pandas DataFrame containing preprocessed CGM data
251
+ :type df: 'pandas.DataFrame'
252
+ :return: the LBGI for the given CGM trace
253
+ :rtype: float
254
+ """
255
+ BG = np.minimum(0, BG_formula(df[GLUCOSE]))
256
+ return np.mean(10 * (BG ** 2))
257
+
258
+ def HBGI(df: pd.DataFrame) -> float:
259
+ """Calculates the High Blood Glucose Index (HBGI) for the given CGM trace.
260
+
261
+ :param df: a Pandas DataFrame containing preprocessed CGM data
262
+ :type df: 'pandas.DataFrame'
263
+ :return: the HBGI for the given CGM trace
264
+ :rtype: float
265
+ """
266
+ BG = np.maximum(0, BG_formula(df[GLUCOSE]))
267
+ return np.mean(10 * (BG ** 2))
268
+
269
+ def COGI(df: pd.DataFrame) -> float:
270
+ """Calculates the Continuous Glucose Monitoring Index (COGI) for the given CGM trace.
271
+
272
+ :param df: a Pandas DataFrame containing preprocessed CGM data
273
+ :type df: 'pandas.DataFrame'
274
+ :return: the COGI for the given CGM trace
275
+ :rtype: float
276
+ """
277
+ tir = percent_time_in_range(df)
278
+ tir_score = 0.5 * tir
279
+
280
+ tbr = percent_time_in_range(df, 0, 70)
281
+ tbr_score = 0.35 * ((1 - (np.minimum(tbr, 15) / 15)) * 100)
282
+
283
+ sd = SD(df)
284
+ sd_score = 100
285
+ if sd >= 108:
286
+ sd_score = 0
287
+ elif sd > 18:
288
+ sd_score = (1 - ((sd-18) / 90)) * 100
289
+ sd_score *= 0.15
290
+
291
+ COGI = tir_score + tbr_score + sd_score
292
+ return COGI
293
+
294
+ def GRADE_formula(df: pd.DataFrame) -> pd.DataFrame:
295
+ """Transforms each glucose value within the given CGM trace as needed to help calculate
296
+ the Glycaemic Risk Assessment Diabetes Equation (GRADE).
297
+
298
+ :param df: a Pandas DataFrame containing preprocessed CGM data
299
+ :type df: 'pandas.DataFrame'
300
+ :return: the LBGI for the given CGM trace
301
+ :rtype: float
302
+ """
303
+ df_GRADE = pd.DataFrame()
304
+ df_GRADE[GLUCOSE] = df[GLUCOSE].copy()
305
+ df_GRADE["GRADE"] = ((np.log10(np.log10(df[GLUCOSE] / 18)) + 0.16) ** 2) * 425
306
+ return df_GRADE
307
+
308
+ def GRADE_eugly(df: pd.DataFrame) -> float:
309
+ """Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
310
+ solely the glucose values in target range (70-140 mg/dL) within the given CGM trace.
311
+
312
+ :param df: a Pandas DataFrame containing preprocessed CGM data
313
+ :type df: 'pandas.DataFrame'
314
+ :return: the euglycemic GRADE for the given CGM trace
315
+ :rtype: float
316
+ """
317
+ df_GRADE = GRADE_formula(df)
318
+ return np.sum(df_GRADE[(df_GRADE[GLUCOSE] >= 70) & (df_GRADE[GLUCOSE] <= 140)]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
319
+
320
+ def GRADE_hypo(df: pd.DataFrame) -> float:
321
+ """Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
322
+ solely the glucose values in hypoglycemic range (<70 mg/dL) within the given CGM trace.
323
+
324
+ :param df: a Pandas DataFrame containing preprocessed CGM data
325
+ :type df: 'pandas.DataFrame'
326
+ :return: the hypoglycemic GRADE for the given CGM trace
327
+ :rtype: float
328
+ """
329
+ df_GRADE = GRADE_formula(df)
330
+ return np.sum(df_GRADE[df_GRADE[GLUCOSE] < 70]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
331
+
332
+ def GRADE_hyper(df: pd.DataFrame) -> float:
333
+ """Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for
334
+ solely the glucose values in hyperglycemic range (>140 mg/dL) within the given CGM trace.
335
+
336
+ :param df: a Pandas DataFrame containing preprocessed CGM data
337
+ :type df: 'pandas.DataFrame'
338
+ :return: the hyperglycemic GRADE for the given CGM trace
339
+ :rtype: float
340
+ """
341
+ df_GRADE = GRADE_formula(df)
342
+ return np.sum(df_GRADE[df_GRADE[GLUCOSE] > 140]["GRADE"]) / np.sum(df_GRADE["GRADE"]) * 100
343
+
344
+ def GRADE(df: pd.DataFrame) -> float:
345
+ """Calculates the Glycaemic Risk Assessment Diabetes Equation (GRADE) for the given CGM trace.
346
+
347
+ :param df: a Pandas DataFrame containing preprocessed CGM data
348
+ :type df: 'pandas.DataFrame'
349
+ :return: the GRADE for the given CGM trace
350
+ :rtype: float
351
+ """
352
+ df_GRADE = GRADE_formula(df)
353
+ return df_GRADE["GRADE"].mean()
354
+
355
+ def GRI(df: pd.DataFrame) -> float:
356
+ """Calculates the Glycemia Risk Index (GRI) for the given CGM trace.
357
+
358
+ :param df: a Pandas DataFrame containing preprocessed CGM data
359
+ :type df: 'pandas.DataFrame'
360
+ :return: the GRI for the given CGM trace
361
+ :rtype: float
362
+ """
363
+ vlow = percent_time_in_range(df, 0, 53)
364
+ low = percent_time_in_range(df, 54, 69)
365
+ high = percent_time_in_range(df, 181, 250)
366
+ vhigh = percent_time_in_range(df, 251, 500)
367
+
368
+ return min((3 * vlow) + (2.4 * low) + (0.8 * high) + (1.6 * vhigh), 100)
369
+
370
+ def GVP(df: pd.DataFrame) -> float:
371
+ """Calculates the Glucose Variability Percentage (GVP) for the given CGM trace.
372
+
373
+ :param df: a Pandas DataFrame containing preprocessed CGM data
374
+ :type df: 'pandas.DataFrame'
375
+ :return: the GVP for the given CGM trace
376
+ :rtype: float
377
+ """
378
+ copy_df = df.dropna(subset=["Glucose"])
379
+ delta_x = pd.Series(5, index=np.arange(copy_df.shape[0]), dtype="float", name='orders')
380
+ delta_y = copy_df.reset_index()["Glucose"].diff()
381
+ L = np.sum(np.sqrt((delta_x ** 2) + (delta_y ** 2)))
382
+ L_0 = np.sum(delta_x)
383
+ return L / L_0
384
+
385
+ def hyper_index(df: pd.DataFrame, limit: int = 140, a: float = 1.1, c: float = 30) -> float:
386
+ """Calculates the Hyperglycemia Index for the given CGM trace.
387
+
388
+ :param df: a Pandas DataFrame containing preprocessed CGM data
389
+ :type df: 'pandas.DataFrame'
390
+ :param limit: upper limit of target range (above which would hyperglycemia), defaults to 140 mg/dL
391
+ :type limit: int, optional
392
+ :param a: exponent utilized for Hyperglycemia Index calculation, defaults to 1.1
393
+ :type a: float, optional
394
+ :param c: constant to help scale Hyperglycemia Index the same as other metrics (e.g. LBGI, HBGI, and GRADE), defaults to 30
395
+ :type c: float, optional
396
+ :return: the Hyperglycemia Index for the given CGM trace
397
+ :rtype: float
398
+ """
399
+ BG = df[GLUCOSE].dropna()
400
+ return np.sum(np.power(BG[BG > limit] - limit, a)) / (BG.size * c)
401
+
402
+ def hypo_index(df: pd.DataFrame, limit: int = 80, b: float = 2, d: float = 30) -> float:
403
+ """Calculates the Hypoglycemia Index for the given CGM trace.
404
+
405
+ :param df: a Pandas DataFrame containing preprocessed CGM data
406
+ :type df: 'pandas.DataFrame'
407
+ :param limit: lower limit of target range (above which would hypoglycemia), defaults to 80 mg/dL
408
+ :type limit: int, optional
409
+ :param b: exponent utilized for Hypoglycemia Index calculation, defaults to 2
410
+ :type b: float, optional
411
+ :param d: constant to help scale Hypoglycemia Index the same as other metrics (e.g. LBGI, HBGI, and GRADE), defaults to 30
412
+ :type d: float, optional
413
+ :return: the Hypoglycemia Index for the given CGM trace
414
+ :rtype: float
415
+ """
416
+ BG = df[GLUCOSE].dropna()
417
+ return np.sum(np.power(limit - BG[BG < limit], b)) / (BG.size * d)
418
+
419
+ def IGC(df: pd.DataFrame) -> float:
420
+ """Calculates the Index of Glycemic Control (IGC) for the given CGM trace.
421
+
422
+ :param df: a Pandas DataFrame containing preprocessed CGM data
423
+ :type df: 'pandas.DataFrame'
424
+ :return: the IGC for the given CGM trace
425
+ :rtype: float
426
+ """
427
+ return hyper_index(df) + hypo_index(df)
428
+
429
+ def j_index(df: pd.DataFrame) -> float:
430
+ """Calculates the J-Index for the given CGM trace.
431
+
432
+ :param df: a Pandas DataFrame containing preprocessed CGM data
433
+ :type df: 'pandas.DataFrame'
434
+ :return: the J-Index for the given CGM trace
435
+ :rtype: float
436
+ """
437
+ return 0.001 * ((mean(df) + SD(df)) ** 2)
438
+
439
+ def CONGA(df: pd.DataFrame, n: int = 24) -> float:
440
+ """Calculates the Continuous Overall Net Glycemic Action (CONGA) for the given CGM trace.
441
+
442
+ :param df: a Pandas DataFrame containing preprocessed CGM data
443
+ :type df: 'pandas.DataFrame'
444
+ :param n: the difference in time (in hours) between observations used to calculate CONGA, defaults to 24
445
+ :type n: int, optional
446
+ :return: the CONGA for the given CGM trace
447
+ :rtype: float
448
+ """
449
+ config.read('config.ini')
450
+ interval = int(config["variables"]["interval"])
451
+ period = n * (60 / interval)
452
+ return np.std(df[GLUCOSE].diff(periods=period))
453
+
454
+ # lag is in days
455
+ def MODD(df: pd.DataFrame, lag: int = 1) -> float:
456
+ """Calculates the Mean Difference Between Glucose Values Obtained at the Same Time of Day (MODD) for the given CGM trace.
457
+
458
+ :param df: a Pandas DataFrame containing preprocessed CGM data
459
+ :type df: 'pandas.DataFrame'
460
+ :param lag: the difference in time (in days) between observations used to calculate MODD, defaults to 1
461
+ :type lag: int, optional
462
+ :return: the MODD for the given CGM trace
463
+ :rtype: float
464
+ """
465
+ config.read('config.ini')
466
+ interval = int(config["variables"]["interval"])
467
+ period = lag * 24 * (60 / interval)
468
+
469
+ return np.mean(np.abs(df[GLUCOSE].diff(periods=period)))
470
+
471
+ def mean_absolute_differences(df: pd.DataFrame) -> float:
472
+ """Calculates the Mean Absolute Differences for the given CGM trace.
473
+
474
+ :param df: a Pandas DataFrame containing preprocessed CGM data
475
+ :type df: 'pandas.DataFrame'
476
+ :return: the mean absolute differences for the given CGM trace
477
+ :rtype: float
478
+ """
479
+ return np.mean(np.abs(df[GLUCOSE].diff()))
480
+
481
+ def median_absolute_deviation(df: pd.DataFrame, constant: float = 1.4826) -> float:
482
+ """Calculates the Median Absolute Deviation for the given CGM trace.
483
+
484
+ :param df: a Pandas DataFrame containing preprocessed CGM data
485
+ :type df: 'pandas.DataFrame'
486
+ :param constant: factor to multiply median absolute deviation by, defaults to 1.4826
487
+ :type constant: float, optional
488
+ :return: the median absolute deviation for the given CGM trace
489
+ :rtype: float
490
+ """
491
+ return constant * np.nanmedian(np.abs(df[GLUCOSE] - np.nanmedian(df[GLUCOSE])))
492
+
493
+ def MAG(df: pd.DataFrame) -> float:
494
+ """Calculates the Mean Absolute Glucose (MAG) for the given CGM trace.
495
+
496
+ :param df: a Pandas DataFrame containing preprocessed CGM data
497
+ :type df: 'pandas.DataFrame'
498
+ :return: the MAG for the given CGM trace
499
+ :rtype: float
500
+ """
501
+ df.dropna(subset=[GLUCOSE], inplace=True)
502
+ data = df[(df[TIME].dt.minute == (df[TIME].dt.minute).iloc[0]) & (df[TIME].dt.second == (df[TIME].dt.second).iloc[0])][GLUCOSE]
503
+ return np.sum(data.diff().abs()) / data.size
504
+
505
+ def MAGE(df: pd.DataFrame, short_ma: int = 5, long_ma: int = 32, max_gap: int = 180) -> float:
506
+ """Calculates the Mean Amplitude of Glycemic Excursions (MAGE) for the given CGM trace.
507
+
508
+ :param df: a Pandas DataFrame containing preprocessed CGM data
509
+ :type df: 'pandas.DataFrame'
510
+ :param short_ma: number of data points utilized to calculate short moving average values, defaults to 5
511
+ :type short_ma: int, optional
512
+ :param long_ma: number of data points utilized to calculate long moving average values, defaults to 32
513
+ :type long_ma: int, optional
514
+ :param max_gap: number of minutes a gap between CGM data points can be without having to split the MAGE calculation into multiple segments, defaults to 180
515
+ :type max_gap: int, optional
516
+ :return: the MAGE for the given CGM trace
517
+ :rtype: float
518
+ """
519
+ data = df.reset_index(drop=True)
520
+
521
+ config.read('config.ini')
522
+ interval = int(config["variables"]["interval"])
523
+
524
+ missing = data[GLUCOSE].isnull()
525
+ # create groups of consecutive missing values
526
+ groups = missing.ne(missing.shift()).cumsum()
527
+ # group by the created groups and count the size of each group, then apply it where values are missing
528
+ size_of_groups = data.groupby([groups, missing])[GLUCOSE].transform('size').where(missing, 0)
529
+ # filter groups where size is greater than 0 and take their indexes
530
+ indexes = size_of_groups[size_of_groups.diff() > (max_gap / interval)].index.tolist()
531
+
532
+ if not indexes: # no gaps in data larger than max_gap
533
+ return MAGE_helper(df, short_ma, long_ma)
534
+ else: # calculate MAGE per segment and add them together (weighted)
535
+ indexes.insert(0, 0); indexes.append(None)
536
+ mage = 0
537
+ total_duration = 0
538
+ for i in range(len(indexes) - 1):
539
+ segment = data.iloc[indexes[i]:indexes[i+1]]
540
+ segment = segment.loc[segment[GLUCOSE].first_valid_index():].reset_index(drop=True)
541
+ segment_duration = (segment.iloc[-1][TIME] - segment.iloc[0][TIME]).total_seconds(); total_duration += segment_duration
542
+ mage += segment_duration * MAGE_helper(segment, short_ma, long_ma)
543
+ return mage / total_duration
544
+
545
+ def MAGE_helper(df: pd.DataFrame, short_ma: int = 5, long_ma: int = 32) -> float:
546
+ """Calculates the Mean Amplitude of Glycemic Excursions (MAGE) for a specific segment of a CGM trace.
547
+ Algorithm for calculating MAGE is based on iglu's implementation, and this method is a helper for the MAGE() function.
548
+
549
+ :param df: a Pandas DataFrame containing preprocessed CGM data without significant gaps (as defined in the MAGE() function)
550
+ :type df: 'pandas.DataFrame'
551
+ :param short_ma: number of data points utilized to calculate short moving average values, defaults to 5
552
+ :type short_ma: int, optional
553
+ :param long_ma: number of data points utilized to calculate long moving average values, defaults to 32
554
+ :type long_ma: int, optional
555
+ :return: the MAGE for the given segment of a CGM trace
556
+ :rtype: float
557
+ """
558
+ averages = pd.DataFrame()
559
+ averages[GLUCOSE] = df[GLUCOSE]
560
+ averages.reset_index(drop=True, inplace=True)
561
+
562
+ if short_ma < 1 or long_ma < 1:
563
+ raise Exception("Moving average spans must be positive, non-zero integers.")
564
+
565
+ if short_ma >= long_ma:
566
+ raise Exception("Short moving average span must be smaller than the long moving average span.")
567
+
568
+ if averages.shape[0] < long_ma:
569
+ return np.nan
570
+
571
+ # calculate rolling means, iglu does right align instead of center
572
+ averages["MA_Short"] = averages[GLUCOSE].rolling(window=short_ma, min_periods=1).mean()
573
+ averages["MA_Long"] = averages[GLUCOSE].rolling(window=long_ma, min_periods=1).mean()
574
+
575
+ # fill in leading NaNs due to moving average calculation
576
+ averages["MA_Short"].iloc[:short_ma-1] = averages["MA_Short"].iloc[short_ma-1]
577
+ averages["MA_Long"].iloc[:long_ma-1] = averages["MA_Long"].iloc[long_ma-1]
578
+ averages["DELTA_SL"] = averages["MA_Short"] - averages["MA_Long"]
579
+
580
+ # get crossing points
581
+ glu = lambda i: averages[GLUCOSE].iloc[i]
582
+ average = lambda i: averages["DELTA_SL"].iloc[i]
583
+ crosses_list = [{"location": 0, "type": np.where(average(0) > 0, "peak", "nadir")}]
584
+
585
+ for index in range(1, averages.shape[0]):
586
+ current_actual = glu(index)
587
+ current_average = average(index)
588
+ previous_actual = glu(index-1)
589
+ previous_average = average(index-1)
590
+
591
+ if not (np.isnan(current_actual) or np.isnan(previous_actual) or np.isnan(current_average) or np.isnan(previous_average)):
592
+ if current_average * previous_average < 0:
593
+ type = np.where(current_average < previous_average, "nadir", "peak")
594
+ crosses_list.append({"location": index, "type": type})
595
+ elif (not np.isnan(current_average) and (current_average * average(crosses_list[-1]["location"]) < 0)):
596
+ prev_delta = average(crosses_list[-1]["location"])
597
+ type = np.where(current_average < prev_delta, "nadir", "peak")
598
+ crosses_list.append({"location": index, "type": type})
599
+
600
+ crosses_list.append({"location": None, "type": np.where(average(-1) > 0, "peak", "nadir")})
601
+ crosses = pd.DataFrame(crosses_list)
602
+
603
+ num_extrema = crosses.shape[0] - 1
604
+ minmax = np.tile(np.nan, num_extrema)
605
+ indexes = pd.Series(np.nan, index=range(num_extrema))
606
+
607
+ for index in range(num_extrema):
608
+ s1 = int(np.where(index == 0, crosses["location"].iloc[index], indexes.iloc[index-1]))
609
+ s2 = crosses["location"].iloc[index+1]
610
+
611
+ values = averages[GLUCOSE].loc[s1:s2]
612
+ if crosses["type"].iloc[index] == "nadir":
613
+ minmax[index] = np.min(values)
614
+ indexes.iloc[index] = values.idxmin()
615
+ else:
616
+ minmax[index] = np.max(values)
617
+ indexes.iloc[index] = values.idxmax()
618
+
619
+ differences = np.transpose(minmax[:, np.newaxis] - minmax)
620
+ sd = np.std(df[GLUCOSE].dropna())
621
+ N = len(minmax)
622
+
623
+ # MAGE+
624
+ mage_plus_heights = []
625
+ mage_plus_tp_pairs = []
626
+ j = 0; prev_j = 0
627
+ while j < N:
628
+ delta = differences[prev_j:j+1,j]
629
+
630
+ max_v = np.max(delta)
631
+ i = np.argmax(delta) + prev_j
632
+
633
+ if max_v >= sd:
634
+ for k in range(j, N):
635
+ if minmax[k] > minmax[j]:
636
+ j = k
637
+ if (differences[j, k] < (-1 * sd)) or (k == N - 1):
638
+ max_v = minmax[j] - minmax[i]
639
+ mage_plus_heights.append(max_v)
640
+ mage_plus_tp_pairs.append([i, j])
641
+
642
+ prev_j = k
643
+ j = k
644
+ break
645
+ else:
646
+ j += 1
647
+
648
+ # MAGE-
649
+ mage_minus_heights = []
650
+ mage_minus_tp_pairs = []
651
+ j = 0; prev_j = 0
652
+ while j < N:
653
+ delta = differences[prev_j:j+1,j]
654
+ min_v = np.min(delta)
655
+ i = np.argmin(delta) + prev_j
656
+
657
+ if min_v <= (-1 * sd):
658
+ for k in range(j, N):
659
+ if minmax[k] < minmax[j]:
660
+ j = k
661
+ if (differences[j, k] > sd) or (k == N - 1):
662
+ min_v = minmax[j] - minmax[i]
663
+ mage_minus_heights.append(min_v)
664
+ mage_minus_tp_pairs.append([i, j, k])
665
+
666
+ prev_j = k
667
+ j = k
668
+ break
669
+ else:
670
+ j += 1
671
+
672
+ plus_first = len(mage_plus_heights) > 0 and ((len(mage_minus_heights) == 0) or (mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0]))
673
+ return float(np.where(plus_first, np.mean(mage_plus_heights), np.mean(np.absolute(mage_minus_heights))))
674
+
675
+ def m_value(df: pd.DataFrame, r: int = 100) -> float:
676
+ """Calculates the M-value for the given CGM trace.
677
+
678
+ :param df: a Pandas DataFrame containing preprocessed CGM data
679
+ :type df: 'pandas.DataFrame'
680
+ :param r: a reference value utilized for calculating the M-value, defaults to 100
681
+ :type r: int, optional
682
+ :return: the M-value for the given CGM trace
683
+ :rtype: float
684
+ """
685
+ return (abs(10 * np.log10(df[GLUCOSE] / r)) ** 3).mean()
686
+
687
+ def ROC(df: pd.DataFrame, timedelta: int = 15) -> pd.Series:
688
+ """Returns a Pandas Series with the rate of change in glucose values at every data point
689
+
690
+ :param df: a Pandas DataFrame containing preprocessed CGM data
691
+ :type df: 'pandas.DataFrame'
692
+ :param timedelta: difference in time (in minutes) to utilize when calculating differences between data points, defaults to 15
693
+ :type timedelta: int, optional
694
+ :return: a Pandas Series with the rate of change in glucose values at every data point
695
+ :rtype: 'pandas.Series'
696
+ """
697
+ config.read('config.ini')
698
+ interval = int(config["variables"]["interval"])
699
+ if timedelta < interval:
700
+ raise Exception("Given timedelta must be greater than resampling interval.")
701
+
702
+ positiondelta = round(timedelta / interval)
703
+ return df[GLUCOSE].diff(periods=positiondelta) / timedelta
704
+
705
+ def number_readings(df: pd.DataFrame):
706
+ return df[GLUCOSE].count()
707
+
708
+ def FBG(df: pd.DataFrame) -> float:
709
+ # Ensure time is in datetime
710
+ df = df.dropna(subset=[GLUCOSE]).copy()
711
+ df['date'] = df[TIME].dt.date
712
+
713
+ daily_fbg_means = []
714
+ for day, day_df in df.groupby('date'):
715
+ # Filter data for readings between 6:00 and 7:00 AM
716
+ morning_df = day_df[(day_df[TIME].dt.hour == 6)]
717
+ morning_df = morning_df.sort_values(by=TIME)
718
+
719
+ if len(morning_df) >= 6:
720
+ # Take the first 6 readings within 6:00-7:00 AM
721
+ first_6 = morning_df.head(6)
722
+ daily_fbg_means.append(first_6[GLUCOSE].mean())
723
+
724
+ return np.nan if not daily_fbg_means else np.mean(daily_fbg_means)
725
+
726
+ def LSBG(df: pd.DataFrame) -> float:
727
+ """Calculates the Lowest Sleeping Blood Glucose (LSBG).
728
+
729
+ Defined as the mean of the six lowest consecutive glucose measures between
730
+ 23:30 and 06:30 (spanning midnight) for each day, averaged over all days.
731
+
732
+ :param df: a Pandas DataFrame containing preprocessed CGM data
733
+ :type df: 'pandas.DataFrame'
734
+ :return: the LSBG for the given CGM trace
735
+ :rtype: float
736
+ """
737
+ # Drop rows with missing glucose values
738
+ df = df.dropna(subset=[GLUCOSE]).copy()
739
+ df['date'] = df[TIME].dt.date
740
+
741
+ daily_lowest_means = []
742
+ unique_dates = sorted(df['date'].unique())
743
+
744
+ # For each date d, nighttime window: d 23:30 -> (d+1) 06:30
745
+ for d in unique_dates:
746
+ start_night = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
747
+ end_night = pd.to_datetime(d) + pd.Timedelta(days=1, hours=6, minutes=30)
748
+
749
+ night_df = df[(df[TIME] >= start_night) & (df[TIME] < end_night)].sort_values(by=TIME)
750
+
751
+ if len(night_df) >= 6:
752
+ # Compute rolling mean of 6 consecutive readings
753
+ rolling_means = night_df[GLUCOSE].rolling(window=6).mean()
754
+ # Get the minimum of those rolling means for the night
755
+ min_rolling_mean = rolling_means.min()
756
+ if not np.isnan(min_rolling_mean):
757
+ daily_lowest_means.append(min_rolling_mean)
758
+
759
+ return np.nan if not daily_lowest_means else np.mean(daily_lowest_means)
760
+
761
+ def mean_24h(df: pd.DataFrame) -> float:
762
+ """Calculates the Mean 24-h starting from 23:30 to the next day's 23:30 for each day.
763
+
764
+ For each date d, the 24-hour window is from d 23:30 to (d+1) 23:30.
765
+ We compute the mean glucose within this window for each day, then average across all days.
766
+
767
+ :param df: a Pandas DataFrame containing preprocessed CGM data
768
+ :type df: 'pandas.DataFrame'
769
+ :return: the mean 24-h BG value for the given CGM trace
770
+ :rtype: float
771
+ """
772
+ # Drop rows with missing glucose values
773
+ df = df.dropna(subset=[GLUCOSE]).copy()
774
+ df['date'] = df[TIME].dt.date
775
+
776
+ daily_means = []
777
+ unique_dates = sorted(df['date'].unique())
778
+
779
+ for d in unique_dates:
780
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
781
+ end_period = start_period + pd.Timedelta(days=1) # next day's 23:30
782
+
783
+ period_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)]
784
+
785
+ if not period_df.empty:
786
+ daily_mean = period_df[GLUCOSE].mean()
787
+ daily_means.append(daily_mean)
788
+
789
+ return np.nan if not daily_means else np.mean(daily_means)
790
+
791
+ def mean_24h_auc(df: pd.DataFrame) -> float:
792
+ """Calculates the mean 24-hour AUC (Area Under the Curve) using the trapezoidal rule,
793
+ with a 24-hour period defined from 23:30 of one day to 23:30 of the next day.
794
+
795
+ For each date d, the 24-hour period is from d 23:30 to (d+1) 23:30.
796
+ The AUC for that day is the integral of glucose over time.
797
+ We then average over all days to get the mean 24-hour AUC.
798
+
799
+ Integration is done using scipy.integrate.trapezoid with actual timestamps as 'x'.
800
+
801
+ :param df: a Pandas DataFrame containing preprocessed CGM data
802
+ :type df: 'pandas.DataFrame'
803
+ :return: the mean 24-hour AUC for the given CGM trace (23:30–23:30)
804
+ :rtype: float
805
+ """
806
+ # Drop rows with missing glucose values
807
+ df = df.dropna(subset=[GLUCOSE]).copy()
808
+ df['date'] = df[TIME].dt.date
809
+
810
+ daily_aucs = []
811
+ unique_dates = sorted(df['date'].unique())
812
+
813
+ for d in unique_dates:
814
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
815
+ end_period = start_period + pd.Timedelta(days=1)
816
+
817
+ day_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)].sort_values(by=TIME)
818
+
819
+ if len(day_df) < 2:
820
+ # Not enough data points to form a meaningful trapezoid
821
+ continue
822
+
823
+ # Compute time array in hours relative to start_period
824
+ times_in_hours = (day_df[TIME] - start_period).dt.total_seconds() / 3600.0
825
+ glucose_values = day_df[GLUCOSE].values
826
+
827
+ # Use scipy's trapezoid to integrate glucose over the 24-hour period
828
+ auc = trapezoid(glucose_values, x=times_in_hours)
829
+ daily_aucs.append(auc)
830
+
831
+ return np.nan if not daily_aucs else np.mean(daily_aucs)
832
+
833
+ def mean_daytime(df: pd.DataFrame) -> float:
834
+ """
835
+ Calculates the mean daytime glucose, defined as the mean of all measures
836
+ between 06:30 and 23:30 for each day, averaged across all days.
837
+
838
+ :param df: a Pandas DataFrame containing preprocessed CGM data
839
+ :type df: 'pandas.DataFrame'
840
+ :return: the mean daytime glucose level
841
+ :rtype: float
842
+ """
843
+ df = df.dropna(subset=[GLUCOSE]).copy()
844
+ df['date'] = df[TIME].dt.date
845
+
846
+ daily_means = []
847
+ for d, day_df in df.groupby('date'):
848
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=6, minutes=30)
849
+ end_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
850
+
851
+ daytime_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)]
852
+
853
+ if not daytime_df.empty:
854
+ daily_mean = daytime_df[GLUCOSE].mean()
855
+ daily_means.append(daily_mean)
856
+
857
+ return np.nan if not daily_means else np.mean(daily_means)
858
+
859
+ def mean_nocturnal(df: pd.DataFrame) -> float:
860
+ """
861
+ Calculates the mean nocturnal glucose, defined as the mean of all measures
862
+ between 23:30 and 06:30 for each day, averaged across all days.
863
+
864
+ :param df: a Pandas DataFrame containing preprocessed CGM data
865
+ :type df: 'pandas.DataFrame'
866
+ :return: the mean nocturnal glucose level
867
+ :rtype: float
868
+ """
869
+ df = df.dropna(subset=[GLUCOSE]).copy()
870
+ df['date'] = df[TIME].dt.date
871
+
872
+ daily_means = []
873
+ for d, day_df in df.groupby('date'):
874
+ # Define the nighttime window
875
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
876
+ end_period = pd.to_datetime(d) + pd.Timedelta(days=1, hours=6, minutes=30)
877
+
878
+ night_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)]
879
+
880
+ # If no readings in that interval for this particular night, skip it
881
+ if not night_df.empty:
882
+ daily_mean = night_df[GLUCOSE].mean()
883
+ daily_means.append(daily_mean)
884
+
885
+ return np.nan if not daily_means else np.mean(daily_means)
886
+
887
+ def auc_daytime(df: pd.DataFrame) -> float:
888
+ """
889
+ Calculates the mean daytime AUC (Area Under the Curve) of glucose
890
+ between 06:30 and 23:30 for each day, and then averages these daily AUCs.
891
+
892
+ :param df: a Pandas DataFrame containing preprocessed CGM data
893
+ :type df: 'pandas.DataFrame'
894
+ :return: the mean daytime AUC
895
+ :rtype: float
896
+ """
897
+ # Drop rows with missing glucose values
898
+ df = df.dropna(subset=[GLUCOSE]).copy()
899
+ df['date'] = df[TIME].dt.date
900
+
901
+ daily_aucs = []
902
+ for d, day_df in df.groupby('date'):
903
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=6, minutes=30)
904
+ end_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
905
+
906
+ daytime_df = day_df[(day_df[TIME] >= start_period) & (day_df[TIME] < end_period)].sort_values(by=TIME)
907
+
908
+ if len(daytime_df) < 2:
909
+ # Not enough data points for integration
910
+ continue
911
+
912
+ # Compute time array in hours relative to start_period
913
+ times_in_hours = (daytime_df[TIME] - start_period).dt.total_seconds() / 3600.0
914
+ glucose_values = daytime_df[GLUCOSE].values
915
+
916
+ # Use scipy's trapezoid to integrate glucose over the daytime period
917
+ auc = trapezoid(glucose_values, x=times_in_hours)
918
+ daily_aucs.append(auc)
919
+
920
+ return np.nan if not daily_aucs else np.mean(daily_aucs)
921
+
922
+ def nocturnal_auc(df: pd.DataFrame) -> float:
923
+ """
924
+ Calculates the mean nocturnal AUC (Area Under the Curve) of glucose
925
+ between 23:30 and 06:30 for each day, and then averages these daily AUCs.
926
+
927
+ For each date d, we define the nocturnal period as d 23:30 to (d+1) 06:30.
928
+
929
+ :param df: a Pandas DataFrame containing preprocessed CGM data
930
+ :type df: 'pandas.DataFrame'
931
+ :return: the mean nocturnal AUC
932
+ :rtype: float
933
+ """
934
+ df = df.dropna(subset=[GLUCOSE]).copy()
935
+ df['date'] = df[TIME].dt.date
936
+
937
+ daily_aucs = []
938
+ unique_dates = sorted(df['date'].unique())
939
+
940
+ for d in unique_dates:
941
+ start_period = pd.to_datetime(d) + pd.Timedelta(hours=23, minutes=30)
942
+ end_period = start_period + pd.Timedelta(hours=6, minutes=30)
943
+
944
+ night_df = df[(df[TIME] >= start_period) & (df[TIME] < end_period)].sort_values(by=TIME)
945
+
946
+ if len(night_df) < 2:
947
+ # Not enough data points for integration
948
+ continue
949
+
950
+ # Compute time array in hours relative to start_period
951
+ times_in_hours = (night_df[TIME] - start_period).dt.total_seconds() / 3600.0
952
+ glucose_values = night_df[GLUCOSE].values
953
+
954
+ # Use scipy's trapezoid to integrate glucose over the nocturnal period
955
+ auc = trapezoid(glucose_values, x=times_in_hours)
956
+ daily_aucs.append(auc)
957
+
958
+ return np.nan if not daily_aucs else np.mean(daily_aucs)
959
+
960
+
961
+ def compute_features(id: str, data: pd.DataFrame) -> dict[str, any]:
962
+ """Calculates statistics and metrics for a single patient within the given DataFrame
963
+
964
+ :param id: the patient to calculate features for
965
+ :type id: str
966
+ :param data: Pandas DataFrame containing preprocessed CGM data for one or more patients
967
+ :type data: 'pandas.DataFrame'
968
+ :return: a dictionary (with each key referring to the name of a statistic or metric)
969
+ :rtype: dict[str, any]
970
+ """
971
+ summary = summary_stats(data)
972
+
973
+ features = {
974
+ ID: id,
975
+ "ADRR": ADRR(data),
976
+ "COGI": COGI(data),
977
+ "CONGA": CONGA(data),
978
+ "CV": CV(data),
979
+ "Daytime AUC": auc_daytime(data),
980
+ "eA1c": eA1c(data),
981
+ "FBG": FBG(data),
982
+ "First Quartile": summary[1],
983
+ "GMI": GMI(data),
984
+ "GRADE": GRADE(data),
985
+ "GRADE (euglycemic)": GRADE_eugly(data),
986
+ "GRADE (hyperglycemic)": GRADE_hyper(data),
987
+ "GRADE (hypoglycemic)": GRADE_hypo(data),
988
+ "GRI": GRI(data),
989
+ "GVP": GVP(data),
990
+ "HBGI": HBGI(data),
991
+ "Hyperglycemia Index": hyper_index(data),
992
+ "Hypoglycemia Index": hypo_index(data),
993
+ "IGC": IGC(data),
994
+ "J-Index": j_index(data),
995
+ "LBGI": LBGI(data),
996
+ "LSBG": LSBG(data),
997
+ "MAG": MAG(data),
998
+ "MAGE": MAGE(data),
999
+ "Maximum": summary[4],
1000
+ "Mean": mean(data),
1001
+ "Mean 24h Glucose": mean_24h(data),
1002
+ "Mean 24h AUC": mean_24h_auc(data),
1003
+ "Mean Absolute Differences": mean_absolute_differences(data),
1004
+ "Mean Daytime": mean_daytime(data),
1005
+ "Mean Nocturnal": mean_nocturnal(data),
1006
+ "Median": summary[2],
1007
+ "Median Absolute Deviation": median_absolute_deviation(data),
1008
+ "Minimum": summary[0],
1009
+ "MODD": MODD(data),
1010
+ "M-Value": m_value(data),
1011
+ "Nocturnal AUC": nocturnal_auc(data),
1012
+ "Number of Readings": number_readings(data),
1013
+ "Percent Time Above Range (180)": percent_time_above_range(data),
1014
+ "Percent Time Below Range (70)": percent_time_below_range(data),
1015
+ "Percent Time in Hyperglycemia": percent_time_in_hyperglycemia(data),
1016
+ "Percent Time in Hyperglycemia (level 0)": percent_time_in_level_0_hyperglycemia(data),
1017
+ "Percent Time in Hyperglycemia (level 1)": percent_time_in_level_1_hyperglycemia(data),
1018
+ "Percent Time in Hyperglycemia (level 2)": percent_time_in_level_2_hyperglycemia(data),
1019
+ "Percent Time in Hypoglycemia": percent_time_in_hypoglycemia(data),
1020
+ "Percent Time in Hypoglycemia (level 1)": percent_time_in_level_1_hypoglycemia(data),
1021
+ "Percent Time in Hypoglycemia (level 2)": percent_time_in_level_2_hypoglycemia(data),
1022
+ "Percent Time In Range (70-180)": percent_time_in_range(data),
1023
+ "Percent Time In Tight Range (70-140)": percent_time_in_tight_range(data),
1024
+ "SD": SD(data),
1025
+ "Third Quartile": summary[3],
1026
+ }
1027
+ return features
1028
+
1029
+
1030
+ def create_features(dataset: pd.DataFrame) -> pd.DataFrame:
1031
+ """Takes in a multiindexed Pandas DataFrame containing CGM data for multiple patients/datasets, and
1032
+ returns a single indexed Pandas DataFrame containing summary metrics in the form of one row per patient/dataset
1033
+
1034
+ :param dataset: a Pandas DataFrame containing the CGM data to calculate metrics for
1035
+ :type dataset: 'pandas.DataFrame'
1036
+ :return: a Pandas DataFrame with each row representing a patient in 'dataset' and each column representing a specific statistic or metric
1037
+ :rtype: 'pandas.DataFrame'
1038
+ """
1039
+ with Pool() as pool:
1040
+ features = pool.starmap(compute_features, dataset.groupby(ID))
1041
+ features = pd.DataFrame(features).set_index([ID])
1042
+ return features