paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
paradigma/util.py CHANGED
@@ -1,17 +1,45 @@
1
+ import functools
1
2
  import os
3
+ import warnings
4
+ from datetime import datetime, timedelta
5
+
2
6
  import numpy as np
3
7
  import pandas as pd
4
- from datetime import datetime, timedelta
8
+ import tsdf
5
9
  from dateutil import parser
6
- from typing import List, Tuple, Optional
7
10
  from scipy.stats import gaussian_kde
8
-
9
- import tsdf
10
11
  from tsdf import TSDFMetadata
11
12
 
12
13
  from paradigma.constants import DataColumns, TimeUnit
13
14
 
14
15
 
16
+ def deprecated(reason: str = ""):
17
+ """
18
+ Decorator to mark functions as deprecated. It will show a warning when the
19
+ function is used.
20
+
21
+ Parameters
22
+ ----------
23
+ reason : str, optional
24
+ Additional message to explain why it is deprecated and what to use
25
+ instead.
26
+ """
27
+
28
+ def decorator(func):
29
+ message = f"Function {func.__name__} is deprecated."
30
+ if reason:
31
+ message += f" {reason}"
32
+
33
+ @functools.wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ warnings.warn(message, category=DeprecationWarning, stacklevel=2)
36
+ return func(*args, **kwargs)
37
+
38
+ return wrapper
39
+
40
+ return decorator
41
+
42
+
15
43
  def parse_iso8601_to_datetime(date_str):
16
44
  return parser.parse(date_str)
17
45
 
@@ -28,7 +56,7 @@ def get_end_iso8601(start_iso8601, window_length_seconds):
28
56
 
29
57
  def write_np_data(
30
58
  metadata_time: TSDFMetadata,
31
- np_array_time: np.ndarray,
59
+ np_array_time: np.ndarray,
32
60
  metadata_values: TSDFMetadata,
33
61
  np_array_values: np.ndarray,
34
62
  output_path: str,
@@ -53,7 +81,7 @@ def write_np_data(
53
81
  The filename for the metadata.
54
82
 
55
83
  """
56
-
84
+
57
85
  if not os.path.exists(output_path):
58
86
  os.makedirs(output_path)
59
87
 
@@ -62,9 +90,19 @@ def write_np_data(
62
90
  metadata_values.file_dir_path = output_path
63
91
 
64
92
  # store binaries and metadata
65
- time_tsdf = tsdf.write_binary_file(file_dir=output_path, file_name=metadata_time.file_name, data=np_array_time, metadata=metadata_time.get_plain_tsdf_dict_copy())
93
+ time_tsdf = tsdf.write_binary_file(
94
+ file_dir=output_path,
95
+ file_name=metadata_time.file_name,
96
+ data=np_array_time,
97
+ metadata=metadata_time.get_plain_tsdf_dict_copy(),
98
+ )
66
99
 
67
- samples_tsdf = tsdf.write_binary_file(file_dir=output_path, file_name=metadata_values.file_name, data=np_array_values, metadata=metadata_values.get_plain_tsdf_dict_copy())
100
+ samples_tsdf = tsdf.write_binary_file(
101
+ file_dir=output_path,
102
+ file_name=metadata_values.file_name,
103
+ data=np_array_values,
104
+ metadata=metadata_values.get_plain_tsdf_dict_copy(),
105
+ )
68
106
 
69
107
  tsdf.write_metadata([time_tsdf, samples_tsdf], output_filename)
70
108
 
@@ -118,7 +156,7 @@ def write_df_data(
118
156
 
119
157
  def read_metadata(
120
158
  input_path: str, meta_filename: str, time_filename: str, values_filename: str
121
- ) -> Tuple[TSDFMetadata, TSDFMetadata]:
159
+ ) -> tuple[TSDFMetadata, TSDFMetadata]:
122
160
  metadata_dict = tsdf.load_metadata_from_path(
123
161
  os.path.join(input_path, meta_filename)
124
162
  )
@@ -127,20 +165,30 @@ def read_metadata(
127
165
  return metadata_time, metadata_values
128
166
 
129
167
 
130
- def load_tsdf_dataframe(path_to_data, prefix, meta_suffix='meta.json', time_suffix='time.bin', values_suffix='values.bin'):
168
+ def load_tsdf_dataframe(
169
+ path_to_data,
170
+ prefix,
171
+ meta_suffix="meta.json",
172
+ time_suffix="time.bin",
173
+ values_suffix="values.bin",
174
+ ):
131
175
  meta_filename = f"{prefix}_{meta_suffix}"
132
176
  time_filename = f"{prefix}_{time_suffix}"
133
177
  values_filename = f"{prefix}_{values_suffix}"
134
178
 
135
- metadata_time, metadata_values = read_metadata(path_to_data, meta_filename, time_filename, values_filename)
136
- df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns)
179
+ metadata_time, metadata_values = read_metadata(
180
+ path_to_data, meta_filename, time_filename, values_filename
181
+ )
182
+ df = tsdf.load_dataframe_from_binaries(
183
+ [metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns
184
+ )
137
185
 
138
186
  return df, metadata_time, metadata_values
139
187
 
140
188
 
141
189
  def load_metadata_list(
142
- dir_path: str, meta_filename: str, filenames: List[str]
143
- ) -> List[TSDFMetadata]:
190
+ dir_path: str, meta_filename: str, filenames: list[str]
191
+ ) -> list[TSDFMetadata]:
144
192
  """
145
193
  Load the metadata objects from a metadata file according to the specified binaries.
146
194
 
@@ -152,11 +200,9 @@ def load_metadata_list(
152
200
  The filename of the metadata file.
153
201
  filenames : List[str]
154
202
  The list of binary files of which the metadata files need to be loaded
155
-
156
- """
157
- metadata_dict = tsdf.load_metadata_from_path(
158
- os.path.join(dir_path, meta_filename)
159
- )
203
+
204
+ """
205
+ metadata_dict = tsdf.load_metadata_from_path(os.path.join(dir_path, meta_filename))
160
206
  metadata_list = []
161
207
  for filename in filenames:
162
208
  metadata_list.append(metadata_dict[filename])
@@ -171,7 +217,8 @@ def transform_time_array(
171
217
  start_time: float = 0.0,
172
218
  ) -> np.ndarray:
173
219
  """
174
- Transforms the time array to relative time (when defined in delta time) and scales the values.
220
+ Transforms the time array to relative time (when defined in delta time)
221
+ and scales the values.
175
222
 
176
223
  Parameters
177
224
  ----------
@@ -180,7 +227,8 @@ def transform_time_array(
180
227
  input_unit_type : str
181
228
  The time unit type of the input time array.
182
229
  output_unit_type : str
183
- The time unit type of the output time array. ParaDigMa expects `TimeUnit.RELATIVE_S`.
230
+ The time unit type of the output time array. ParaDigMa expects
231
+ `TimeUnit.RELATIVE_S`.
184
232
  start_time : float, optional
185
233
  The start time of the time array in UNIX seconds (default is 0.0)
186
234
 
@@ -191,41 +239,65 @@ def transform_time_array(
191
239
 
192
240
  Notes
193
241
  -----
194
- - The function handles different time units (`TimeUnit.RELATIVE_MS`, `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`, `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
195
- - The transformation allows for scaling of the time array, converting between time unit types (e.g., relative, absolute, or difference).
196
- - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the relative time starting from the provided or default start time.
242
+ - The function handles different time units (`TimeUnit.RELATIVE_MS`,
243
+ `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`,
244
+ `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
245
+ - The transformation allows for scaling of the time array, converting
246
+ between time unit types (e.g., relative, absolute, or difference).
247
+ - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the
248
+ relative time starting from the provided or default start time.
197
249
  """
198
- input_units = input_unit_type.split('_')[-1].lower()
199
- output_units = output_unit_type.split('_')[-1].lower()
250
+ input_units = input_unit_type.split("_")[-1].lower()
251
+ output_units = output_unit_type.split("_")[-1].lower()
200
252
 
201
253
  if input_units == output_units:
202
254
  scale_factor = 1
203
- elif input_units == 's' and output_units == 'ms':
255
+ elif input_units == "s" and output_units == "ms":
204
256
  scale_factor = 1e3
205
- elif input_units == 'ms' and output_units == 's':
257
+ elif input_units == "ms" and output_units == "s":
206
258
  scale_factor = 1 / 1e3
207
259
  else:
208
- raise ValueError(f"Unsupported time units conversion: {input_units} to {output_units}")
209
-
210
- # Transform to relative time (`TimeUnit.RELATIVE_MS`)
211
- if input_unit_type == TimeUnit.DIFFERENCE_MS or input_unit_type == TimeUnit.DIFFERENCE_S:
212
- # Convert a series of differences into cumulative sum to reconstruct original time series.
260
+ raise ValueError(
261
+ f"Unsupported time units conversion: {input_units} to {output_units}"
262
+ )
263
+
264
+ # Transform to relative time (`TimeUnit.RELATIVE_MS`)
265
+ if (
266
+ input_unit_type == TimeUnit.DIFFERENCE_MS
267
+ or input_unit_type == TimeUnit.DIFFERENCE_S
268
+ ):
269
+ # Convert a series of differences into cumulative sum to
270
+ # reconstruct original time series.
213
271
  time_array = np.cumsum(np.double(time_array))
214
- elif input_unit_type == TimeUnit.ABSOLUTE_MS or input_unit_type == TimeUnit.ABSOLUTE_S:
272
+ elif (
273
+ input_unit_type == TimeUnit.ABSOLUTE_MS
274
+ or input_unit_type == TimeUnit.ABSOLUTE_S
275
+ ):
215
276
  # Set the start time if not provided.
216
277
  if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
217
278
  start_time = time_array[0]
218
279
  # Convert absolute time stamps into a time series relative to start_time.
219
- time_array = (time_array - start_time)
220
-
221
- # Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
222
- if output_unit_type == TimeUnit.ABSOLUTE_MS or output_unit_type == TimeUnit.ABSOLUTE_S:
280
+ time_array = time_array - start_time
281
+
282
+ # Transform the time array from `TimeUnit.RELATIVE_MS` to the
283
+ # specified time unit type
284
+ if (
285
+ output_unit_type == TimeUnit.ABSOLUTE_MS
286
+ or output_unit_type == TimeUnit.ABSOLUTE_S
287
+ ):
223
288
  # Converts time array to absolute time by adding the start time to each element.
224
289
  time_array = time_array + start_time
225
- elif output_unit_type == TimeUnit.DIFFERENCE_MS or output_unit_type == TimeUnit.DIFFERENCE_S:
226
- # Creates a new array starting with 0, followed by the differences between consecutive elements.
290
+ elif (
291
+ output_unit_type == TimeUnit.DIFFERENCE_MS
292
+ or output_unit_type == TimeUnit.DIFFERENCE_S
293
+ ):
294
+ # Creates a new array starting with 0, followed by the
295
+ # differences between consecutive elements.
227
296
  time_array = np.diff(np.insert(time_array, 0, start_time))
228
- elif output_unit_type == TimeUnit.RELATIVE_MS or output_unit_type == TimeUnit.RELATIVE_S:
297
+ elif (
298
+ output_unit_type == TimeUnit.RELATIVE_MS
299
+ or output_unit_type == TimeUnit.RELATIVE_S
300
+ ):
229
301
  # The array is already in relative format, do nothing.
230
302
  pass
231
303
 
@@ -256,25 +328,25 @@ def convert_units_accelerometer(data: np.ndarray, units: str) -> np.ndarray:
256
328
  return data
257
329
  else:
258
330
  raise ValueError(f"Unsupported unit: {units}")
259
-
331
+
260
332
 
261
333
  def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
262
334
  """
263
335
  Convert gyroscope data to deg/s.
264
-
336
+
265
337
  Parameters
266
338
  ----------
267
339
  data : np.ndarray
268
340
  The gyroscope data.
269
-
341
+
270
342
  units : str
271
343
  The unit of the data (currently supports deg/s and rad/s).
272
-
344
+
273
345
  Returns
274
346
  -------
275
347
  np.ndarray
276
348
  The gyroscope data in deg/s.
277
-
349
+
278
350
  """
279
351
  if units == "deg/s":
280
352
  return data
@@ -282,9 +354,9 @@ def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
282
354
  return np.degrees(data)
283
355
  else:
284
356
  raise ValueError(f"Unsupported unit: {units}")
285
-
286
357
 
287
- def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
358
+
359
+ def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
288
360
  """
289
361
  Invert the data based on the watch side.
290
362
 
@@ -305,78 +377,88 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
305
377
  """
306
378
  if side not in ["left", "right"]:
307
379
  raise ValueError(f"Unsupported side: {side}")
308
- if sensor not in ['accelerometer', 'gyroscope', 'both']:
380
+ if sensor not in ["accelerometer", "gyroscope", "both"]:
309
381
  raise ValueError(f"Unsupported sensor: {sensor}")
310
382
 
311
383
  elif side == "right":
312
- if sensor in ['gyroscope', 'both']:
384
+ if sensor in ["gyroscope", "both"]:
313
385
  df[DataColumns.GYROSCOPE_Y] *= -1
314
386
  df[DataColumns.GYROSCOPE_Z] *= -1
315
- if sensor in ['accelerometer', 'both']:
387
+ if sensor in ["accelerometer", "both"]:
316
388
  df[DataColumns.ACCELEROMETER_X] *= -1
317
389
 
318
390
  return df
319
391
 
320
- def aggregate_parameter(parameter: np.ndarray, aggregate: str, evaluation_points: Optional[np.ndarray] = None) -> np.ndarray | int:
392
+
393
+ def aggregate_parameter(
394
+ parameter: np.ndarray,
395
+ aggregate: str,
396
+ evaluation_points: np.ndarray | None = None,
397
+ ) -> np.ndarray | int:
321
398
  """
322
399
  Aggregate a parameter based on the specified method.
323
-
400
+
324
401
  Parameters
325
402
  ----------
326
403
  parameter : np.ndarray
327
404
  The parameter to aggregate.
328
-
405
+
329
406
  aggregate : str
330
407
  The aggregation method to apply.
331
408
 
332
409
  evaluation_points : np.ndarray, optional
333
- Should be specified if the mode is derived for a continuous parameter.
334
- Defines the evaluation points for the kernel density estimation function, from which the maximum is derived as the mode.
410
+ Should be specified if the mode is derived for a continuous parameter.
411
+ Defines the evaluation points for the kernel density estimation
412
+ function, from which the maximum is derived as the mode.
335
413
 
336
414
  Returns
337
415
  -------
338
416
  np.ndarray
339
417
  The aggregated parameter.
340
418
  """
341
- if aggregate == 'mean':
419
+ if aggregate == "mean":
342
420
  return np.mean(parameter)
343
- elif aggregate == 'median':
421
+ elif aggregate == "median":
344
422
  return np.median(parameter)
345
- elif aggregate == 'mode_binned':
423
+ elif aggregate == "mode_binned":
346
424
  if evaluation_points is None:
347
- raise ValueError("evaluation_points must be provided for 'mode_binned' aggregation.")
425
+ raise ValueError(
426
+ "evaluation_points must be provided for 'mode_binned' aggregation."
427
+ )
348
428
  else:
349
429
  kde = gaussian_kde(parameter)
350
430
  kde_values = kde(evaluation_points)
351
431
  max_index = np.argmax(kde_values)
352
432
  return evaluation_points[max_index]
353
- elif aggregate == 'mode':
433
+ elif aggregate == "mode":
354
434
  unique_values, counts = np.unique(parameter, return_counts=True)
355
435
  return unique_values[np.argmax(counts)]
356
- elif aggregate == '90p':
436
+ elif aggregate == "90p":
357
437
  return np.percentile(parameter, 90)
358
- elif aggregate == '95p':
438
+ elif aggregate == "95p":
359
439
  return np.percentile(parameter, 95)
360
- elif aggregate == '99p':
440
+ elif aggregate == "99p":
361
441
  return np.percentile(parameter, 99)
362
- elif aggregate == 'std':
442
+ elif aggregate == "std":
363
443
  return np.std(parameter)
364
- elif aggregate == 'cov':
444
+ elif aggregate == "cov":
365
445
  mean_value = np.mean(parameter)
366
446
  return np.std(parameter) / mean_value if mean_value != 0 else 0
367
447
  else:
368
448
  raise ValueError(f"Invalid aggregation method: {aggregate}")
369
449
 
450
+
370
451
  def merge_predictions_with_timestamps(
371
- df_ts: pd.DataFrame,
372
- df_predictions: pd.DataFrame,
373
- pred_proba_colname: str,
374
- window_length_s: float,
375
- fs: int
376
- ) -> pd.DataFrame:
452
+ df_ts: pd.DataFrame,
453
+ df_predictions: pd.DataFrame,
454
+ pred_proba_colname: str,
455
+ window_length_s: float,
456
+ fs: int,
457
+ ) -> pd.DataFrame:
377
458
  """
378
- Merges prediction probabilities with timestamps by expanding overlapping windows
379
- into individual timestamps and averaging probabilities per unique timestamp.
459
+ Merges prediction probabilities with timestamps by expanding overlapping
460
+ windows into individual timestamps and averaging probabilities per unique
461
+ timestamp.
380
462
 
381
463
  Parameters:
382
464
  ----------
@@ -385,10 +467,11 @@ def merge_predictions_with_timestamps(
385
467
  Must include the timestamp column specified in `DataColumns.TIME`.
386
468
 
387
469
  df_predictions : pd.DataFrame
388
- DataFrame containing prediction windows with start times and probabilities.
389
- Must include:
470
+ DataFrame containing prediction windows with start times and
471
+ probabilities. Must include:
390
472
  - A column for window start times (defined by `DataColumns.TIME`).
391
- - A column for prediction probabilities (defined by `DataColumns.PRED_GAIT_PROBA`).
473
+ - A column for prediction probabilities (defined by
474
+ `DataColumns.PRED_GAIT_PROBA`).
392
475
 
393
476
  pred_proba_colname : str
394
477
  The column name for the prediction probabilities in `df_predictions`.
@@ -398,7 +481,7 @@ def merge_predictions_with_timestamps(
398
481
 
399
482
  fs : int
400
483
  The sampling frequency of the data.
401
-
484
+
402
485
  Returns:
403
486
  -------
404
487
  pd.DataFrame
@@ -419,22 +502,18 @@ def merge_predictions_with_timestamps(
419
502
  # Step 1: Generate all timestamps for prediction windows using NumPy broadcasting
420
503
  window_length = int(window_length_s * fs)
421
504
  timestamps = (
422
- df_predictions[DataColumns.TIME].values[:, None] +
423
- np.arange(0, window_length) / fs
505
+ df_predictions[DataColumns.TIME].values[:, None]
506
+ + np.arange(0, window_length) / fs
424
507
  )
425
-
508
+
426
509
  # Flatten timestamps and probabilities into a single array for efficient processing
427
510
  flat_timestamps = timestamps.ravel()
428
- flat_proba = np.repeat(
429
- df_predictions[pred_proba_colname].values,
430
- window_length
431
- )
511
+ flat_proba = np.repeat(df_predictions[pred_proba_colname].values, window_length)
432
512
 
433
513
  # Step 2: Create a DataFrame for expanded data
434
- expanded_df = pd.DataFrame({
435
- DataColumns.TIME: flat_timestamps,
436
- pred_proba_colname: flat_proba
437
- })
514
+ expanded_df = pd.DataFrame(
515
+ {DataColumns.TIME: flat_timestamps, pred_proba_colname: flat_proba}
516
+ )
438
517
 
439
518
  # Step 3: Round timestamps and aggregate probabilities
440
519
  expanded_df[DataColumns.TIME] = expanded_df[DataColumns.TIME].round(2)
@@ -442,14 +521,15 @@ def merge_predictions_with_timestamps(
442
521
 
443
522
  # Step 4: Round timestamps in `df_ts` and merge
444
523
  df_ts[DataColumns.TIME] = df_ts[DataColumns.TIME].round(2)
445
- df_ts = pd.merge(df_ts, mean_proba, how='left', on=DataColumns.TIME)
524
+ df_ts = pd.merge(df_ts, mean_proba, how="left", on=DataColumns.TIME)
446
525
  df_ts = df_ts.dropna(subset=[pred_proba_colname])
447
526
 
448
527
  return df_ts
449
528
 
450
529
 
451
- def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
452
-
530
+ def select_hours(
531
+ df: pd.DataFrame, select_hours_start: str, select_hours_end: str
532
+ ) -> pd.DataFrame:
453
533
  """
454
534
  Select hours of interest from the data to include in the aggregation step.
455
535
 
@@ -460,7 +540,7 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
460
540
 
461
541
  select_hours_start: str
462
542
  The start time of the selected hours in "HH:MM" format.
463
-
543
+
464
544
  select_hours_end: str
465
545
  The end time of the selected hours in "HH:MM" format.
466
546
 
@@ -471,14 +551,18 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
471
551
 
472
552
  """
473
553
 
474
- select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
475
- select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
476
- df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
554
+ select_hours_start = datetime.strptime(
555
+ select_hours_start, "%H:%M"
556
+ ).time() # convert to time object
557
+ select_hours_end = datetime.strptime(select_hours_end, "%H:%M").time()
558
+ df_subset = df[
559
+ df["time_dt"].dt.time.between(select_hours_start, select_hours_end)
560
+ ] # select the hours of interest
477
561
 
478
562
  return df_subset
479
563
 
480
- def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
481
564
 
565
+ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
482
566
  """
483
567
  Select days of interest from the data to include in the aggregation step.
484
568
 
@@ -488,7 +572,8 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
488
572
  Input data with column 'time_dt' in which the date is stored.
489
573
 
490
574
  min_hours_per_day: int
491
- The minimum number of hours per day required for including the day in the aggregation step.
575
+ The minimum number of hours per day required for including the day
576
+ in the aggregation step.
492
577
 
493
578
 
494
579
  Returns
@@ -499,8 +584,12 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
499
584
  """
500
585
 
501
586
  min_s_per_day = min_hours_per_day * 3600
502
- window_length_s = df['time_dt'].diff().dt.total_seconds().iloc[1] # determine the length of the first window in seconds
587
+ window_length_s = (
588
+ df["time_dt"].diff().dt.total_seconds().iloc[1]
589
+ ) # determine the length of the first window in seconds
503
590
  min_windows_per_day = min_s_per_day / window_length_s
504
- df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
591
+ df_subset = df.groupby(df["time_dt"].dt.date).filter(
592
+ lambda x: len(x) >= min_windows_per_day
593
+ )
505
594
 
506
- return df_subset
595
+ return df_subset