glucose360 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
glucose360/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __all__ = ["preprocessing", "features", "events", "plots"]
glucose360/events.py ADDED
@@ -0,0 +1,680 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from scipy.integrate import trapezoid
4
+ import configparser
5
+ import glob, os, zipfile, tempfile
6
+ import math
7
+
8
+ dir_path = os.path.dirname(os.path.realpath(__file__))
9
+ config_path = os.path.join(dir_path, "config.ini")
10
+ config = configparser.ConfigParser()
11
+ config.read(config_path)
12
+ ID = config['variables']['id']
13
+ GLUCOSE = config['variables']['glucose']
14
+ TIME = config['variables']['time']
15
+ BEFORE = config['variables']['before']
16
+ AFTER = config['variables']['after']
17
+ TYPE = config['variables']['type']
18
+ DESCRIPTION = config['variables']['description']
19
+
20
+ def import_events(
21
+ path: str,
22
+ id: str,
23
+ name: str = None,
24
+ day_col: str = "Day",
25
+ time_col: str = "Time",
26
+ before: int = 60,
27
+ after: int = 60,
28
+ type: str = "imported event"
29
+ ) -> pd.DataFrame:
30
+ """Bulk imports events from standalone .csv files or from those within a given directory or .zip file
31
+
32
+ :param path: the path of the directory/zip/csv to import from
33
+ :type path: str
34
+ :param id: the identification of the patient that the imported events belong to
35
+ :type id: str
36
+ :param day_col: the name of the column specifying the day the event occurred (year, month, and specific day), defaults to 'Day'
37
+ :type day_col: str, optional
38
+ :param time_col: the name of the column specifying what time during the day the event occurred, defaults to 'Time'
39
+ :type time_col: str, optional
40
+ :param before: the amount of minutes to also look at before the event timestamp, defaults to 60
41
+ :type before: int, optional
42
+ :param after: the amount of minutes to also look at after the event timestamp, defaults to 60
43
+ :type after: int, optional
44
+ :param type: the type of event to classify all the imported events as, defaults to 'imported event'
45
+ :type type: str, optional
46
+ :return: a Pandas DataFrame containing all the imported events
47
+ :rtype: 'pandas.DataFrame'
48
+ """
49
+ ext = os.path.splitext(path)[1]
50
+
51
+ # path leads to directory
52
+ if ext == "":
53
+ if not os.path.isdir(path):
54
+ raise ValueError("Directory does not exist")
55
+ else:
56
+ return import_events_directory(path, id, day_col, time_col, before, after, type)
57
+
58
+ # check if path leads to .zip or .csv
59
+ if ext.lower() in [".csv", ".zip"]:
60
+ if not os.path.isfile(path):
61
+ raise ValueError("File does not exist")
62
+ else:
63
+ raise ValueError("Invalid file type")
64
+
65
+ # path leads to .csv
66
+ if ext.lower() == ".csv":
67
+ return import_events_csv(path, id, day_col, time_col, before, after, type)
68
+
69
+ # otherwise has to be a .zip file
70
+ with zipfile.ZipFile(path, 'r') as zip_ref:
71
+ # create a temporary directory to pull from
72
+ with tempfile.TemporaryDirectory() as temp_dir:
73
+ zip_ref.extractall(temp_dir)
74
+ dir = name or path.split("/")[-1].split(".")[0]
75
+ return import_events_directory((temp_dir + "/" + dir), id, day_col, time_col, before, after, type)
76
+
77
+ def import_events_directory(
78
+ path: str,
79
+ id: str,
80
+ day_col: str = "Day",
81
+ time_col: str = "Time",
82
+ before: int = 60,
83
+ after: int = 60,
84
+ type: str = "imported event"
85
+ ) -> pd.DataFrame:
86
+ """Bulk imports events from .csv files within a given directory
87
+
88
+ :param path: the path of the directory to import from
89
+ :type path: str
90
+ :param id: the identification of the patient that the imported events belong to
91
+ :type id: str
92
+ :param day_col: the name of the column specifying the day the event occurred (year, month, and specific day), defaults to 'Day'
93
+ :type day_col: str, optional
94
+ :param time_col: the name of the column specifying what time during the day the event occurred, defaults to 'Time'
95
+ :type time_col: str, optional
96
+ :param before: the amount of minutes to also look at before the event timestamp, defaults to 60
97
+ :type before: int, optional
98
+ :param after: the amount of minutes to also look at after the event timestamp, defaults to 60
99
+ :type after: int, optional
100
+ :param type: the type of event to classify all the imported events as, defaults to 'imported event'
101
+ :type type: str, optional
102
+ :return: a Pandas DataFrame containing all the imported events
103
+ :rtype: 'pandas.DataFrame'
104
+ """
105
+ csv_files = glob.glob(path + "/*.csv")
106
+
107
+ if len(csv_files) == 0:
108
+ raise Exception("No CSV files found.")
109
+
110
+ return pd.concat(import_events_csv(file, id, day_col, time_col, before, after, type) for file in csv_files)
111
+
112
+ def import_events_csv(
113
+ path: str,
114
+ id: str,
115
+ day_col: str = "Day",
116
+ time_col: str = "Time",
117
+ before: int = 60,
118
+ after: int = 60,
119
+ type: str = "imported event"
120
+ ) -> pd.DataFrame:
121
+ """Bulk imports events from a single .csv file
122
+
123
+ :param path: the path of the .csv file to import from
124
+ :type path: str
125
+ :param id: the identification of the patient that the imported events belong to
126
+ :type id: str
127
+ :param day_col: the name of the column specifying the day the event occurred (year, month, and specific day), defaults to 'Day'
128
+ :type day_col: str, optional
129
+ :param time_col: the name of the column specifying what time during the day the event occurred, defaults to 'Time'
130
+ :type time_col: str, optional
131
+ :param before: the amount of minutes to also look at before the event timestamp, defaults to 60
132
+ :type before: int, optional
133
+ :param after: the amount of minutes to also look at after the event timestamp, defaults to 60
134
+ :type after: int, optional
135
+ :param type: the type of event to classify all the imported events as, defaults to 'imported event'
136
+ :type type: str, optional
137
+ :return: a Pandas DataFrame containing all the imported events
138
+ :rtype: 'pandas.DataFrame'
139
+ """
140
+ df = pd.read_csv(path)
141
+ csv_name = os.path.splitext(path)[0]
142
+
143
+ events = pd.DataFrame()
144
+ events[TIME] = pd.to_datetime(df[day_col] + " " + df[time_col])
145
+ events[BEFORE] = before
146
+ events[AFTER] = after
147
+ events[TYPE] = type
148
+ events[DESCRIPTION] = df["Food Name"] if "Food Name" in df.columns else ("imported event #" + (events.index + 1).astype(str) + f"from {csv_name}")
149
+ events.insert(0, ID, id)
150
+
151
+ return events.dropna(subset=[TIME])
152
+
153
+ def _episodes_helper(
154
+ df: pd.DataFrame,
155
+ id: str,
156
+ type: str,
157
+ threshold: int,
158
+ level: int,
159
+ min_length: int,
160
+ end_length: int
161
+ ) -> pd.DataFrame:
162
+ """Retrieves all episodes of a specific type/level for a specific patient within the given CGM data
163
+
164
+ :param df: Pandas DataFrame containing preprocessed CGM data
165
+ :type df: pandas.DataFrame
166
+ :param id: identification of the patient to retrieve episodes for
167
+ :type id: str
168
+ :param type: type of episode ('hypo' or 'hyper')
169
+ :type type: str
170
+ :param threshold: threshold (in mg/dL) above/below which glucose values are considered as part of an episode
171
+ :type threshold: int
172
+ :param level: the level the retrieved episodes are (0, 1, or 2)
173
+ :type level: int
174
+ :param min_length: minimum duration (in minutes) required for excursions, defaults to 15
175
+ :type min_length: int, optional
176
+ :param end_length: minimum amount of time (in minutes) that the glucose values must be within typical ranges
177
+ at the end of an excursion, defaults to 15
178
+ :type end_length: int, optional
179
+ :return: a Pandas DataFrame containing all episodes of a specific type/level for a specific patient within the given CGM data
180
+ :rtype: pandas.DataFrame
181
+ """
182
+
183
+ config.read('config.ini')
184
+ interval = int(config["variables"]["interval"])
185
+ timegap = lambda timedelta: timedelta.total_seconds() / 60
186
+ episodes = pd.DataFrame()
187
+
188
+ data = df.copy(); data.reset_index(drop=True, inplace=True)
189
+ episode_df = df[(df[GLUCOSE] <= threshold)].copy() if type == "hypo" else df[df[GLUCOSE] >= threshold].copy()
190
+ episode_df.reset_index(drop=True, inplace=True)
191
+ episode_df["gap"] = episode_df[TIME].diff().apply(timegap)
192
+
193
+ edges = episode_df.index[episode_df["gap"] != interval].to_list()
194
+ edges.append(-1)
195
+
196
+ get = lambda loc, col: episode_df.iloc[loc][col]
197
+ index = 0
198
+ while index < len(edges) - 1:
199
+ offset = 0 if (index == len(edges) - 2) else 1
200
+ end_i = edges[index + 1] - offset # index of the end of the episode (inclusive! - that's what the offset is for)
201
+ start_i = edges[index] # index of the start of the episode
202
+ start_time = get(start_i, TIME)
203
+ end_time = get(end_i, TIME)
204
+ episode_length = timegap(end_time - start_time)
205
+
206
+ if episode_length >= min_length: # check if episode lasts longer than 15 min
207
+ if offset != 0: # not the very last episode
208
+ end_counts = math.ceil(end_length / interval)
209
+
210
+ end_index = data.index[data[TIME] == end_time].to_list()[0]
211
+ end_data = data.iloc[end_index + 1 : end_index + 1 + end_counts][GLUCOSE]
212
+ outside_threshold = np.where(end_data >= threshold, True, False) if type == "hypo" else np.where(end_data <= threshold, True, False)
213
+ if False in outside_threshold: # check if episode ends within 15 min
214
+ edges.pop(index + 1) # this episode does not end within 15 min, so combine this episode with the next
215
+ continue
216
+
217
+ description = f"{start_time} to {end_time} level {level} {type}glycemic episode"
218
+ event = pd.DataFrame.from_records([{ID: id, TIME: start_time, BEFORE: 0, AFTER: episode_length,
219
+ TYPE: f"{type} level {level} episode", DESCRIPTION: description}])
220
+ episodes = pd.concat([episodes, event])
221
+
222
+ index += 1
223
+
224
+ return episodes
225
+
226
+ def get_episodes(
227
+ df: pd.DataFrame,
228
+ hypo_lvl2: int = 54,
229
+ hypo_lvl1: int = 70,
230
+ hyper_lvl0: int = 140,
231
+ hyper_lvl1: int = 180,
232
+ hyper_lvl2: int = 250,
233
+ min_length: int = 15,
234
+ end_length: int = 15
235
+ ) -> pd.DataFrame:
236
+ """Retrieves all episodes within the given CGM data
237
+
238
+ :param df: Pandas DataFrame containing preprocessed CGM data
239
+ :type df: pandas.DataFrame
240
+ :param hypo_lvl2: threshold (in mg/dL) below which glucose values are considered level 2 hypoglycemic, defaults to 54
241
+ :type hypo_lvl2: int, optional
242
+ :param hypo_lvl1: threshold (in mg/dL) below which glucose values are considered level 1 hypoglycemic, defaults to 70
243
+ :type hypo_lvl1: int, optional
244
+ :param hyper_lvl0: threshold (in mg/dL) above which glucose values are considered level 0 hyperglycemic, defaults to 140
245
+ :type hyper_lvl0: int, optional
246
+ :param hyper_lvl1: threshold (in mg/dL) above which glucose values are considered level 1 hyperglycemic, defaults to 180
247
+ :type hyper_lvl1: int, optional
248
+ :param hyper_lvl2: threshold (in mg/dL) above which glucose values are considered level 2 hyperglycemic, defaults to 250
249
+ :type hyper_lvl2: int, optional
250
+ :param min_length: minimum duration (in minutes) required for excursions, defaults to 15
251
+ :type min_length: int, optional
252
+ :param end_length: minimum amount of time (in minutes) that the glucose values must be within typical ranges
253
+ at the end of an excursion, defaults to 15
254
+ :type end_length: int, optional
255
+ :return: a Pandas DataFrame containing all episodes within the given CGM data
256
+ :rtype: pandas.DataFrame
257
+ """
258
+ output = pd.DataFrame()
259
+ for id, data in df.groupby(ID):
260
+ episodes = pd.concat([_episodes_helper(data, id, "hyper", hyper_lvl0, 0, min_length, end_length),
261
+ _episodes_helper(data, id, "hyper", hyper_lvl1, 1, min_length, end_length),
262
+ _episodes_helper(data, id, "hyper", hyper_lvl2, 2, min_length, end_length),
263
+ _episodes_helper(data, id, "hypo", hypo_lvl1, 1, min_length, end_length),
264
+ _episodes_helper(data, id, "hypo", hypo_lvl2, 2, min_length, end_length)])
265
+
266
+ episodes.sort_values(by=[TIME], inplace=True)
267
+ output = pd.concat([output, episodes])
268
+
269
+ return output
270
+
271
+ def get_excursions(
272
+ df: pd.DataFrame,
273
+ z: int = 2,
274
+ min_length: int = 15,
275
+ end_length: int = 15
276
+ ) -> pd.DataFrame:
277
+ """Retrieves all excursions within the given CGM data
278
+
279
+ :param df: Pandas DataFrame containing preprocessed CGM data
280
+ :type df: pandas.DataFrame
281
+ :param z: the number of standard deviations away from the mean that should define an 'excursion', defaults to 2
282
+ :type z: int, optional
283
+ :param min_length: minimum duration (in minutes) required for excursions, defaults to 15
284
+ :type min_length: int, optional
285
+ :param end_length: minimum amount of time (in minutes) that the glucose values must be within typical ranges
286
+ at the end of an excursion, defaults to 15
287
+ :type end_length: int, optional
288
+ :return: a Pandas DataFrame containing all excursions within the given CGM data
289
+ :rtype: pandas.DataFrame
290
+ """
291
+
292
+ excursions = pd.DataFrame()
293
+
294
+ config.read('config.ini')
295
+ interval = int(config["variables"]["interval"])
296
+ for id, data in df.groupby(ID):
297
+ data.reset_index(drop=True, inplace=True)
298
+ sd = data[GLUCOSE].std()
299
+ mean = data[GLUCOSE].mean()
300
+ upper = mean + (z * sd)
301
+ lower = mean - (z * sd)
302
+
303
+ peaks = data[(data[GLUCOSE].shift(1) < data[GLUCOSE]) & (data[GLUCOSE].shift(-1) < data[GLUCOSE])][TIME].copy()
304
+ peaks.reset_index(drop=True, inplace=True)
305
+ nadirs = data[(data[GLUCOSE].shift(1) > data[GLUCOSE]) & (data[GLUCOSE].shift(-1) > data[GLUCOSE])][TIME].copy()
306
+ nadirs.reset_index(drop=True, inplace=True)
307
+
308
+ outliers = data[(data[GLUCOSE] >= upper) | (data[GLUCOSE] <= lower)].copy()
309
+ outliers.reset_index(drop=True, inplace=True)
310
+
311
+ # calculate the differences between each of the timestamps
312
+ timegap = lambda timedelta: timedelta.total_seconds() / 60
313
+ outliers["gaps"] = outliers[TIME].diff().apply(timegap)
314
+
315
+ edges = outliers.index[outliers["gaps"] != interval].to_list()
316
+ edges.append(-1)
317
+ i = 0
318
+ while i < len(edges) - 1:
319
+ type = "hyper" if outliers.iloc[edges[i]][GLUCOSE] > mean else "hypo"
320
+ offset = 0 if i == len(edges) - 2 else 1
321
+ start_time = outliers.iloc[edges[i]][TIME]
322
+ start_index = data.index[data[TIME] == start_time].to_list()[0]
323
+ end_time = outliers.iloc[edges[i+1] - offset][TIME]
324
+ end_index = data.index[data[TIME] == end_time].to_list()[0]
325
+
326
+ excursion_length = timegap(end_time - start_time)
327
+ if excursion_length >= min_length:
328
+ if offset != 0: # not the very last episode
329
+ end_counts = math.ceil(end_length / interval)
330
+
331
+ last_index = data.reset_index().index[data[TIME] == end_time].to_list()[0]
332
+ last_data = data.iloc[last_index + 1 : last_index + 1 + end_counts][GLUCOSE]
333
+ outside_threshold = np.where(last_data <= upper if type == "hyper" else last_data >= lower, True, False)
334
+ if False in outside_threshold: # check if excursion ends within 15 min
335
+ edges.pop(i + 1) # this excursion does not end within 15 min, so combine this episode with the next
336
+ continue
337
+
338
+ outliers.set_index(TIME, inplace=True)
339
+ last_point = edges[i+1] if offset != 0 else None
340
+ timestamp = outliers.iloc[edges[i]:last_point][GLUCOSE].idxmax() if type == "hyper" else outliers.iloc[edges[i]:last_point][GLUCOSE].idxmin()
341
+ outliers.reset_index(inplace=True)
342
+
343
+ extrema = peaks if type == "hypo" else nadirs
344
+ if start_index != 0:
345
+ if not extrema[extrema <= start_time].empty: start_time = extrema[extrema <= start_time].iloc[-1]
346
+ if end_index != data.shape[0] - 1:
347
+ if not extrema[extrema >= end_time].empty: end_time = extrema[extrema >= end_time].iloc[0]
348
+
349
+ description = f"{start_time} to {end_time} {type}glycemic excursion"
350
+ event = pd.DataFrame.from_records([{ID: id, TIME: timestamp, BEFORE: timegap(timestamp - start_time),
351
+ AFTER: timegap(end_time - timestamp),
352
+ TYPE: f"{type} excursion", DESCRIPTION: description}])
353
+ excursions = pd.concat([excursions, event])
354
+
355
+ i += 1
356
+
357
+ return excursions
358
+
359
+ def get_curated_events(df: pd.DataFrame) -> pd.DataFrame:
360
+ """Retrieves all curated events (episodes and excursions) for all the patients within the given DataFrame
361
+
362
+ :param df: a Pandas DataFrame containing preprocessed CGM data
363
+ :type df: 'pandas.DataFrame'
364
+ :return: a Pandas DataFrame (in the usual event structure defined by the package) containing all curated events for all the patients within the given DataFrame
365
+ :rtype: 'pandas.DataFrame'
366
+ """
367
+ return pd.concat([get_episodes(df), get_excursions(df)])
368
+
369
+ def retrieve_event_data(
370
+ df: pd.DataFrame,
371
+ events: pd.DataFrame,
372
+ ) -> pd.DataFrame:
373
+ """Returns a multiindexed Pandas DataFrame containing only patient data during the respective given events
374
+ :param df: a Pandas DataFrame containing the preprocessed CGM traces to retrieve event subsets from
375
+ :type df: 'pandas.DataFrame'
376
+ :param events: a single indexed Pandas DataFrame, with each row specifying a single event in the form of
377
+ an id, a datetime, # of hours before the datetime to include, # of hours after to include, and a description
378
+ :type events: 'pandas.DataFrame'
379
+ :return: a multi-indexed Pandas DataFrame, with each index referring to a subset of CGM trace that was found within 'df' and occurs during a single event within 'events'
380
+ :rtype: 'pandas.DataFrame'
381
+ """
382
+ event_data = pd.DataFrame()
383
+ for index, row in events.to_frame().T.iterrows():
384
+ id = row[ID]
385
+ if id in df.index:
386
+ datetime = pd.Timestamp(row[TIME])
387
+ initial = datetime - pd.Timedelta(row[BEFORE], "m")
388
+ final = datetime + pd.Timedelta(row[AFTER], "m")
389
+
390
+ patient_data = df.loc[id]
391
+ data = patient_data[(patient_data[TIME] >= initial) & (patient_data[TIME] <= final)].copy()
392
+
393
+ data[ID] = id
394
+ data[DESCRIPTION] = row[DESCRIPTION]
395
+
396
+ event_data = pd.concat([event_data, data])
397
+
398
+ #if event_data.shape[0] != 0:
399
+ #event_data = event_data.set_index(["id"])
400
+
401
+ return event_data
402
+
403
+ def event_summary(events: pd.DataFrame) -> pd.Series:
404
+ """Returns the number of events per unique event type found within 'events'
405
+
406
+ :param events: a Pandas DataFrame containing events (as per package guidelines)
407
+ :type events: 'pandas.DataFrame'
408
+ :return: a Pandas Series containing the number of events per unique event type found within 'events'
409
+ :rtype: 'pandas.Series'
410
+ """
411
+ return events[TYPE].value_counts()
412
+
413
+ def AUC(df: pd.DataFrame) -> float:
414
+ """Calculates the total Area-Under-Curve (AUC) for the given CGM trace
415
+
416
+ :param df: a Pandas DataFrame containing the CGM trace to calculate the AUC of
417
+ :type df: 'pandas.DataFrame'
418
+ :return: the AUC of the given CGM trace
419
+ :rtype: float
420
+ """
421
+ config.read('config.ini')
422
+ interval = int(config["variables"]["interval"])
423
+ return trapezoid(df[GLUCOSE], dx=interval)
424
+
425
+ def iAUC(df: pd.DataFrame, level: float) -> float:
426
+ """Calculates the incremental Area-Under-Curve (iAUC) for the given CGM trace
427
+
428
+ :param df: a Pandas DataFrame containing the CGM trace to calculate the AUC of
429
+ :type df: 'pandas.DataFrame'
430
+ :param level: the threshold above which to calculate iAUC
431
+ :type level: float
432
+ :return: the iAUC of the given CGM trace
433
+ :rtype: float
434
+ """
435
+ data = df.copy()
436
+ data[GLUCOSE] = abs(data[GLUCOSE] - level)
437
+ data.loc[data[GLUCOSE] < 0, GLUCOSE] = 0
438
+ return AUC(data)
439
+
440
+ def baseline(df: pd.DataFrame) -> float:
441
+ """Returns the baseline glucose level for the given CGM trace
442
+
443
+ :param df: a Pandas DataFrame containing the CGM trace to retrieve the baseline glucose level for
444
+ :type df: 'pandas.DataFrame'
445
+ :return: the baseline glucose level of the given CGM trace
446
+ :rtype: float
447
+ """
448
+ return df[GLUCOSE].iloc[0]
449
+
450
+ def peak(df: pd.DataFrame) -> float:
451
+ """Returns the maximum glucose level for the given CGM trace
452
+
453
+ :param df: a Pandas DataFrame containing the CGM trace to retrieve the maximum glucose level for
454
+ :type df: 'pandas.DataFrame'
455
+ :return: the maximum glucose level of the given CGM trace
456
+ :rtype: float
457
+ """
458
+ return np.max(df[GLUCOSE])
459
+
460
+ def nadir(df: pd.DataFrame) -> float:
461
+ """Returns the minimum glucose level for the given CGM trace
462
+
463
+ :param df: a Pandas DataFrame containing the CGM trace to retrieve the minimum glucose level for
464
+ :type df: 'pandas.DataFrame'
465
+ :return: the minimum glucose level of the given CGM trace
466
+ :rtype: float
467
+ """
468
+ return np.min(df[GLUCOSE])
469
+
470
+ def delta(df: pd.DataFrame) -> float:
471
+ """Returns the difference in maximum and baseline glucose levels (delta) for the given CGM trace
472
+
473
+ :param df: a Pandas DataFrame containing the CGM trace to retrieve the delta for
474
+ :type df: 'pandas.DataFrame'
475
+ :return: the delta of the given CGM trace
476
+ :rtype: float
477
+ """
478
+ return abs(peak(df) - baseline(df))
479
+
480
+ def post_event_glucoses(data: pd.DataFrame, event_time: pd.Timestamp, times: list[int], glucose_col: str = GLUCOSE) -> dict:
481
+ """
482
+ Returns the glucose values closest to the specified times (in minutes) after the given event_time.
483
+
484
+ :param data: Pandas DataFrame containing the CGM data
485
+ :type data: pd.DataFrame
486
+ :param event_time: The time of the event
487
+ :type event_time: pd.Timestamp
488
+ :param times: A list of integers representing the number of minutes after event_time for which to find the glucose values
489
+ :type times: list[int]
490
+ :param glucose_col: The name of the glucose column in the data, defaults to GLUCOSE
491
+ :type glucose_col: str, optional
492
+ :return: A dictionary where keys are strings like "X-min Post Event" and values are the corresponding glucose readings or np.nan if not found
493
+ :rtype: dict
494
+ """
495
+ result = {}
496
+
497
+ # Always include 0-min to have a reference point
498
+ if 0 not in times:
499
+ times = [0] + times
500
+
501
+ for t in times:
502
+ key = f"{t}-min Post Event"
503
+ result[key] = np.nan
504
+ post_time = event_time + pd.Timedelta(minutes=t)
505
+
506
+ # Check if the desired time is within the range of the data
507
+ if not data.empty and data[TIME].min() <= post_time <= data[TIME].max():
508
+ closest_idx = (data[TIME] - post_time).abs().idxmin()
509
+ result[key] = data.loc[closest_idx, glucose_col]
510
+
511
+ return result
512
+
513
+ def post_event_aucs(data: pd.DataFrame, event_time: pd.Timestamp, durations: list[int], glucose_col: str = GLUCOSE) -> dict:
514
+ """
515
+ Calculates AUC values for multiple durations (in minutes) starting from the given event_time.
516
+
517
+ :param data: Pandas DataFrame containing the CGM data
518
+ :type data: pd.DataFrame
519
+ :param event_time: The time of the event
520
+ :type event_time: pd.Timestamp
521
+ :param durations: A list of integers representing the number of minutes after event_time for which to calculate the AUC
522
+ :type durations: list[int]
523
+ :param glucose_col: The name of the glucose column in the data, defaults to GLUCOSE
524
+ :type glucose_col: str, optional
525
+ :return: A dictionary where keys are strings like "X-min AUC" and values are the corresponding AUC readings or np.nan if no data is available
526
+ :rtype: dict
527
+ """
528
+ result = {}
529
+ for d in durations:
530
+ key = f"{d}-min AUC"
531
+ end_time = event_time + pd.Timedelta(minutes=d)
532
+ subset = data[(data[TIME] >= event_time) & (data[TIME] <= end_time)].copy()
533
+
534
+ if subset.empty:
535
+ result[key] = np.nan
536
+ else:
537
+ result[key] = AUC(subset)
538
+ return result
539
+
540
+ def event_metrics(
541
+ df: pd.DataFrame,
542
+ event: pd.Series,
543
+ post_times: list[int] = [60, 120],
544
+ post_auc_times: list[int] = [120]
545
+ ) -> pd.DataFrame:
546
+ """Calculates basic metrics for events (baseline, peak, delta, iAUC, and
547
+ 0-h, 1-h, and 2-h post event glucose values, and 2-h post event AUC)
548
+
549
+ :param df: Pandas DataFrame containing preprocessed CGM data
550
+ :type df: pandas.DataFrame
551
+ :param event: Pandas Series with fields that represent an 'event'
552
+ :type event: pandas.Series
553
+ :param post_times: A list of integers representing the number of minutes after event_time for which to find the glucose values
554
+ :type post_times: list[int], optional (defaults to [0, 60, 120] for 0-h, 1-h and 2-h post event)
555
+ :return: Pandas DataFrame containing the basic metrics for the given event
556
+ :rtype: pandas.DataFrame
557
+ """
558
+ id = event[ID]
559
+
560
+ datetime = pd.Timestamp(event[TIME])
561
+ initial = datetime - pd.Timedelta(event[BEFORE], "m")
562
+ final = datetime + pd.Timedelta(event[AFTER], "m")
563
+
564
+ patient_data = df.loc[id]
565
+ data = patient_data[(patient_data[TIME] >= initial) & (patient_data[TIME] <= final)].copy()
566
+
567
+ metrics = pd.Series()
568
+ metrics["Baseline"] = baseline(data)
569
+ metrics["Peak"] = peak(data)
570
+ metrics["Delta"] = delta(data)
571
+ metrics["AUC"] = AUC(data)
572
+ metrics["iAUC"] = iAUC(data, baseline(data))
573
+
574
+ # Get post-event glucose values (including 0-min)
575
+ post_values = post_event_glucoses(data, datetime, post_times, GLUCOSE)
576
+ for k, v in post_values.items():
577
+ metrics[k] = v
578
+
579
+ # Compute deltas from 0-min Post Event
580
+ zero_min_val = metrics["0-min Post Event"]
581
+ for t in post_times:
582
+ if t == 0:
583
+ continue
584
+ post_key = f"{t}-min Post Event"
585
+ delta_key = f"{t}-min Delta"
586
+ if post_key in metrics and not np.isnan(metrics[post_key]) and not np.isnan(zero_min_val):
587
+ metrics[delta_key] = metrics[post_key] - zero_min_val
588
+ else:
589
+ metrics[delta_key] = np.nan
590
+
591
+ auc_values = post_event_aucs(data, datetime, post_auc_times, GLUCOSE)
592
+ for k, v in auc_values.items():
593
+ metrics[k] = v
594
+
595
+ return metrics.to_frame().T
596
+
597
+ def create_event_features(
598
+ df: pd.DataFrame,
599
+ events: pd.DataFrame,
600
+ ) -> pd.DataFrame:
601
+ """Returns a multi-indexed Pandas DataFrame containing metrics for the patient data during their respective 'events'
602
+
603
+ :param df: a Pandas DataFrame containing all the relevant patient CGM data to generate event metrics for
604
+ :type df: 'pandas.Series'
605
+ :param events: a single indexed Pandas DataFrame, with each row specifying a single event in the form of
606
+ an id, a datetime, # of hours before the datetime to include, # of hours after to include, and a desc
607
+ :type events: 'pandas.DataFrame'
608
+ :return: a multi-indexed Pandas DataFrame containing metrics for the patient data during their respective 'events'
609
+ """
610
+ event_features = {}
611
+ for id in df.index.unique():
612
+ sub_features = {}
613
+ for type, sub_events in events[events[ID] == id].groupby(TYPE):
614
+ sub_features.update(create_event_features_helper(df.loc[id], sub_events, type))
615
+ event_features[id] = sub_features
616
+
617
+ return pd.DataFrame(event_features).T
618
+
619
+ def create_event_features_helper(
620
+ df: pd.DataFrame,
621
+ sub_events: pd.DataFrame,
622
+ type: str,
623
+ ) -> dict[str, float]:
624
+ """Calculates aggregate event-based metrics for a single patient and type of event. Helper method for 'create_event_features()'.
625
+
626
+ :param df: Pandas DataFrame containing the CGM trace for a single patient
627
+ :type df: 'pandas.DataFrame'
628
+ :param sub_events: Pandas DataFrame containing events of only one type solely for the patient whose CGM trace is also given
629
+ :type sub_events: 'pandas.DataFrame'
630
+ :param type: the type of event that 'sub_events' contains
631
+ :type type: str
632
+ :return: a dictionary with str-type keys that refer to the name of the calculated features and float-type values
633
+ :rtype: dict[str, float]
634
+ """
635
+
636
+ features = {
637
+ f"Mean {type} Duration": [],
638
+ f"Mean Glucose During {type}s": [],
639
+ f"Mean Upwards Slope of {type}s (mg/dL per min)": [],
640
+ f"Mean Downwards Slope of {type}s (mg/dL per min)": [],
641
+ f"Mean Minimum Glucose of {type}s": [],
642
+ f"Mean Maximum Glucose of {type}s": [],
643
+ f"Mean Amplitude of {type}s": [],
644
+ f"Mean iAUC of {type}s": []
645
+ }
646
+
647
+ for _, event in sub_events.iterrows():
648
+ event_data = retrieve_event_data(df, event)
649
+
650
+ duration = event[AFTER] - event[BEFORE]
651
+ features[f"Mean {type} Duration"].append(duration)
652
+
653
+ features[f"Mean Glucose During {type}s"].append(event_data[GLUCOSE].mean())
654
+ features[f"Mean Minimum Glucose of {type}s"] = nadir(event_data)
655
+ features[f"Mean Maximum Glucose of {type}s"] = peak(event_data)
656
+
657
+ event_time = event[TIME]
658
+ closest_idx = (event_data[TIME] - event_time).abs().idxmin()
659
+ event_glucose = event_data.loc[closest_idx, GLUCOSE]
660
+
661
+ peak_glucose = peak(event_data)
662
+ peak_time = event_data.loc[event_data[GLUCOSE].idxmax(), TIME]
663
+ amplitude = peak_glucose - event_glucose
664
+ features[f"Mean Amplitude of {type}s"].append(abs(amplitude))
665
+
666
+ time_diff_to_peak = (peak_time - event_time).total_seconds() / 60.0
667
+ slope_to_peak = (peak_glucose - event_glucose) / time_diff_to_peak if time_diff_to_peak != 0 else np.nan
668
+ features[f"Mean Upwards Slope of {type}s (mg/dL per min)"].append(slope_to_peak)
669
+
670
+ end_time = event_data[TIME].iloc[-1]
671
+ end_glucose = event_data[GLUCOSE].iloc[-1]
672
+ time_diff_peak_to_end = (end_time - peak_time).total_seconds() / 60.0
673
+ slope_peak_to_end = (end_glucose - peak_glucose) / time_diff_peak_to_end if time_diff_peak_to_end != 0 else np.nan
674
+ features[f"Mean Downwards Slope of {type}s (mg/dL per min)"].append(slope_peak_to_end)
675
+
676
+ features[f"Mean iAUC of {type}s"].append(iAUC(event_data, event_glucose))
677
+
678
+ features = {k: np.mean(v) for k, v in features.items()}
679
+ features[f"Mean # of {type}s per day"] = sub_events.shape[0] / len(df[TIME].dt.date.unique())
680
+ return features