iints-sdk-python35 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. iints/__init__.py +183 -0
  2. iints/analysis/__init__.py +12 -0
  3. iints/analysis/algorithm_xray.py +387 -0
  4. iints/analysis/baseline.py +92 -0
  5. iints/analysis/clinical_benchmark.py +198 -0
  6. iints/analysis/clinical_metrics.py +551 -0
  7. iints/analysis/clinical_tir_analyzer.py +136 -0
  8. iints/analysis/diabetes_metrics.py +43 -0
  9. iints/analysis/edge_efficiency.py +33 -0
  10. iints/analysis/edge_performance_monitor.py +315 -0
  11. iints/analysis/explainability.py +94 -0
  12. iints/analysis/explainable_ai.py +232 -0
  13. iints/analysis/hardware_benchmark.py +221 -0
  14. iints/analysis/metrics.py +117 -0
  15. iints/analysis/population_report.py +188 -0
  16. iints/analysis/reporting.py +345 -0
  17. iints/analysis/safety_index.py +311 -0
  18. iints/analysis/sensor_filtering.py +54 -0
  19. iints/analysis/validator.py +273 -0
  20. iints/api/__init__.py +0 -0
  21. iints/api/base_algorithm.py +307 -0
  22. iints/api/registry.py +103 -0
  23. iints/api/template_algorithm.py +195 -0
  24. iints/assets/iints_logo.png +0 -0
  25. iints/cli/__init__.py +0 -0
  26. iints/cli/cli.py +2598 -0
  27. iints/core/__init__.py +1 -0
  28. iints/core/algorithms/__init__.py +0 -0
  29. iints/core/algorithms/battle_runner.py +138 -0
  30. iints/core/algorithms/correction_bolus.py +95 -0
  31. iints/core/algorithms/discovery.py +92 -0
  32. iints/core/algorithms/fixed_basal_bolus.py +58 -0
  33. iints/core/algorithms/hybrid_algorithm.py +92 -0
  34. iints/core/algorithms/lstm_algorithm.py +138 -0
  35. iints/core/algorithms/mock_algorithms.py +162 -0
  36. iints/core/algorithms/pid_controller.py +88 -0
  37. iints/core/algorithms/standard_pump_algo.py +64 -0
  38. iints/core/device.py +0 -0
  39. iints/core/device_manager.py +64 -0
  40. iints/core/devices/__init__.py +3 -0
  41. iints/core/devices/models.py +160 -0
  42. iints/core/patient/__init__.py +9 -0
  43. iints/core/patient/bergman_model.py +341 -0
  44. iints/core/patient/models.py +285 -0
  45. iints/core/patient/patient_factory.py +117 -0
  46. iints/core/patient/profile.py +41 -0
  47. iints/core/safety/__init__.py +12 -0
  48. iints/core/safety/config.py +37 -0
  49. iints/core/safety/input_validator.py +95 -0
  50. iints/core/safety/supervisor.py +39 -0
  51. iints/core/simulation/__init__.py +0 -0
  52. iints/core/simulation/scenario_parser.py +61 -0
  53. iints/core/simulator.py +874 -0
  54. iints/core/supervisor.py +367 -0
  55. iints/data/__init__.py +53 -0
  56. iints/data/adapter.py +142 -0
  57. iints/data/column_mapper.py +398 -0
  58. iints/data/datasets.json +132 -0
  59. iints/data/demo/__init__.py +1 -0
  60. iints/data/demo/demo_cgm.csv +289 -0
  61. iints/data/importer.py +275 -0
  62. iints/data/ingestor.py +162 -0
  63. iints/data/nightscout.py +128 -0
  64. iints/data/quality_checker.py +550 -0
  65. iints/data/registry.py +166 -0
  66. iints/data/tidepool.py +38 -0
  67. iints/data/universal_parser.py +813 -0
  68. iints/data/virtual_patients/clinic_safe_baseline.yaml +9 -0
  69. iints/data/virtual_patients/clinic_safe_hyper_challenge.yaml +9 -0
  70. iints/data/virtual_patients/clinic_safe_hypo_prone.yaml +9 -0
  71. iints/data/virtual_patients/clinic_safe_midnight.yaml +9 -0
  72. iints/data/virtual_patients/clinic_safe_pizza.yaml +9 -0
  73. iints/data/virtual_patients/clinic_safe_stress_meal.yaml +9 -0
  74. iints/data/virtual_patients/default_patient.yaml +11 -0
  75. iints/data/virtual_patients/patient_559_config.yaml +11 -0
  76. iints/emulation/__init__.py +80 -0
  77. iints/emulation/legacy_base.py +414 -0
  78. iints/emulation/medtronic_780g.py +337 -0
  79. iints/emulation/omnipod_5.py +367 -0
  80. iints/emulation/tandem_controliq.py +393 -0
  81. iints/highlevel.py +451 -0
  82. iints/learning/__init__.py +3 -0
  83. iints/learning/autonomous_optimizer.py +194 -0
  84. iints/learning/learning_system.py +122 -0
  85. iints/metrics.py +34 -0
  86. iints/population/__init__.py +11 -0
  87. iints/population/generator.py +131 -0
  88. iints/population/runner.py +327 -0
  89. iints/presets/__init__.py +28 -0
  90. iints/presets/presets.json +114 -0
  91. iints/research/__init__.py +30 -0
  92. iints/research/config.py +68 -0
  93. iints/research/dataset.py +319 -0
  94. iints/research/losses.py +73 -0
  95. iints/research/predictor.py +329 -0
  96. iints/scenarios/__init__.py +3 -0
  97. iints/scenarios/generator.py +92 -0
  98. iints/templates/__init__.py +0 -0
  99. iints/templates/default_algorithm.py +91 -0
  100. iints/templates/scenarios/__init__.py +0 -0
  101. iints/templates/scenarios/chaos_insulin_stacking.json +29 -0
  102. iints/templates/scenarios/chaos_runaway_ai.json +25 -0
  103. iints/templates/scenarios/example_scenario.json +35 -0
  104. iints/templates/scenarios/exercise_stress.json +30 -0
  105. iints/utils/__init__.py +3 -0
  106. iints/utils/plotting.py +50 -0
  107. iints/utils/run_io.py +152 -0
  108. iints/validation/__init__.py +133 -0
  109. iints/validation/schemas.py +94 -0
  110. iints/visualization/__init__.py +34 -0
  111. iints/visualization/cockpit.py +691 -0
  112. iints/visualization/uncertainty_cloud.py +612 -0
  113. iints_sdk_python35-0.0.18.dist-info/METADATA +225 -0
  114. iints_sdk_python35-0.0.18.dist-info/RECORD +118 -0
  115. iints_sdk_python35-0.0.18.dist-info/WHEEL +5 -0
  116. iints_sdk_python35-0.0.18.dist-info/entry_points.txt +10 -0
  117. iints_sdk_python35-0.0.18.dist-info/licenses/LICENSE +28 -0
  118. iints_sdk_python35-0.0.18.dist-info/top_level.txt +1 -0
@@ -0,0 +1,550 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Data Quality Checker - IINTS-AF
4
+ Validates data quality and calculates confidence scores with gap detection.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Tuple
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime, timedelta
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+
14
+ @dataclass
15
+ class QualityReport:
16
+ """Comprehensive data quality report"""
17
+ overall_score: float # 0.0 - 1.0
18
+ completeness_score: float # Data coverage percentage
19
+ consistency_score: float # Temporal consistency
20
+ validity_score: float # Value range validation
21
+ gaps: List['DataGap']
22
+ anomalies: List['DataAnomaly']
23
+ warnings: List[str]
24
+ summary: str
25
+
26
+ def to_dict(self) -> Dict:
27
+ return {
28
+ 'overall_score': self.overall_score,
29
+ 'completeness_score': self.completeness_score,
30
+ 'consistency_score': self.consistency_score,
31
+ 'validity_score': self.validity_score,
32
+ 'gaps': [g.to_dict() for g in self.gaps],
33
+ 'anomalies': [a.to_dict() for a in self.anomalies],
34
+ 'warnings': self.warnings,
35
+ 'summary': self.summary
36
+ }
37
+
38
+
39
+ @dataclass
40
+ class DataGap:
41
+ """Represents a gap in the data"""
42
+ start_time: float
43
+ end_time: float
44
+ duration_minutes: float
45
+ data_points_missing: int
46
+ percentage_of_total: float
47
+ time_range_description: str
48
+
49
+ def to_dict(self) -> Dict:
50
+ return {
51
+ 'start_time': self.start_time,
52
+ 'end_time': self.end_time,
53
+ 'duration_minutes': self.duration_minutes,
54
+ 'data_points_missing': self.data_points_missing,
55
+ 'percentage_of_total': self.percentage_of_total,
56
+ 'time_range_description': self.time_range_description
57
+ }
58
+
59
+ def get_warning_message(self) -> str:
60
+ """Generate human-readable warning message"""
61
+ return (
62
+ f"[WARN] DATA GAP DETECTED: {self.percentage_of_total:.1f}% of data missing "
63
+ f"({self.data_points_missing} points) between {self.time_range_description} "
64
+ f"({self.duration_minutes:.0f} minutes)"
65
+ )
66
+
67
+
68
+ @dataclass
69
+ class DataAnomaly:
70
+ """Represents an anomalous data point"""
71
+ index: int
72
+ timestamp: float
73
+ value: float
74
+ anomaly_type: str # 'outlier', 'impossible_value', 'rapid_change'
75
+ severity: str # 'low', 'medium', 'high'
76
+ description: str
77
+
78
+ def to_dict(self) -> Dict:
79
+ return {
80
+ 'index': self.index,
81
+ 'timestamp': self.timestamp,
82
+ 'value': self.value,
83
+ 'anomaly_type': self.anomaly_type,
84
+ 'severity': self.severity,
85
+ 'description': self.description
86
+ }
87
+
88
+
89
+ class DataQualityChecker:
90
+ """
91
+ Validates data quality and calculates confidence scores.
92
+
93
+ Performs comprehensive checks:
94
+ - Completeness: Detects missing data and gaps
95
+ - Consistency: Validates temporal sampling
96
+ - Validity: Checks value ranges
97
+
98
+ Outputs confidence score and detailed warnings.
99
+ """
100
+
101
+ # Physiological limits for glucose values
102
+ GLUCOSE_LIMITS = {
103
+ 'minimum': 20, # mg/dL - physiologically possible minimum
104
+ 'maximum': 600, # mg/dL - physiologically possible maximum
105
+ 'critical_low': 54, # mg/dL - clinically significant low
106
+ 'critical_high': 350 # mg/dL - clinically significant high
107
+ }
108
+
109
+ PHYSIOLOGICAL_RATES = {
110
+ 'max_glucose_change_per_min': 19.9 # mg/dL/min - Detecting changes of 20 mg/dL/min or more
111
+ }
112
+
113
+ # Expected sampling intervals (in minutes)
114
+ EXPECTED_INTERVALS = {
115
+ 'cgm': 5, # Continuous Glucose Monitor
116
+ 'bg_meter': 60, # Blood glucose meter
117
+ 'manual': 240 # Manual logging
118
+ }
119
+
120
+ def __init__(self, expected_interval: int = 5, source_type: str = 'cgm'):
121
+ """
122
+ Initialize quality checker.
123
+
124
+ Args:
125
+ expected_interval: Expected time between readings in minutes
126
+ source_type: Data source type ('cgm', 'bg_meter', 'manual')
127
+ """
128
+ self.expected_interval = expected_interval
129
+ self.source_type = source_type
130
+
131
+ def check_completeness(self, df: pd.DataFrame) -> Tuple[float, List[DataGap]]:
132
+ """
133
+ Check data completeness and detect gaps.
134
+
135
+ Args:
136
+ df: DataFrame with timestamp and glucose columns
137
+
138
+ Returns:
139
+ Tuple of (completeness_score, list of gaps)
140
+ """
141
+ if 'timestamp' not in df.columns:
142
+ return 1.0, [] # Can't check without timestamp
143
+
144
+ timestamps = df['timestamp'].dropna().sort_values().astype(float)
145
+
146
+ if len(timestamps) < 2:
147
+ return 1.0, []
148
+
149
+ # Calculate expected number of readings
150
+ time_span = timestamps.iloc[-1] - timestamps.iloc[0]
151
+ expected_readings = int((time_span / self.expected_interval) + 1)
152
+ actual_readings = len(timestamps)
153
+
154
+ # Completeness score
155
+ completeness = min(1.0, actual_readings / expected_readings)
156
+
157
+ # Detect gaps
158
+ gaps = self._detect_gaps(timestamps, time_span, actual_readings, int(expected_readings))
159
+
160
+ return completeness, gaps
161
+
162
+ def _detect_gaps(self,
163
+ timestamps: pd.Series,
164
+ time_span: float,
165
+ actual_readings: int,
166
+ expected_readings: int) -> List[DataGap]:
167
+ """Detect gaps in the data"""
168
+ gaps: List[DataGap] = []
169
+
170
+ if actual_readings < 2:
171
+ return gaps
172
+
173
+ # Calculate time differences between consecutive readings
174
+ time_diffs = timestamps.diff().dropna().astype(float)
175
+
176
+ # Threshold for gap detection (3x expected interval)
177
+ gap_threshold = float(self.expected_interval * 3)
178
+
179
+ # Find gap locations
180
+ gap_indices = time_diffs[time_diffs > gap_threshold].index
181
+
182
+ for idx in gap_indices:
183
+ # Get timestamps around the gap
184
+ before_idx = idx - 1
185
+ after_idx = idx
186
+
187
+ start_time = timestamps.loc[before_idx]
188
+ end_time = timestamps.loc[after_idx]
189
+
190
+ gap_duration = end_time - start_time
191
+ points_missing = int(gap_duration / self.expected_interval) - 1
192
+ gap_percentage = (points_missing / expected_readings) * 100 if expected_readings > 0 else 0
193
+
194
+ # Create time range description
195
+ start_minutes = int(start_time)
196
+ end_minutes = int(end_time)
197
+ hours_start = start_minutes // 60
198
+ mins_start = start_minutes % 60
199
+ hours_end = end_minutes // 60
200
+ mins_end = end_minutes % 60
201
+
202
+ time_range_desc = f"{hours_start:02d}:{mins_start:02d} - {hours_end:02d}:{mins_end:02d}"
203
+
204
+ gap = DataGap(
205
+ start_time=start_time,
206
+ end_time=end_time,
207
+ duration_minutes=gap_duration,
208
+ data_points_missing=points_missing,
209
+ percentage_of_total=gap_percentage,
210
+ time_range_description=time_range_desc
211
+ )
212
+ gaps.append(gap)
213
+
214
+ return gaps
215
+
216
+ def check_consistency(self, df: pd.DataFrame) -> float:
217
+ """
218
+ Check temporal consistency of data.
219
+
220
+ Args:
221
+ df: DataFrame with timestamp column
222
+
223
+ Returns:
224
+ Consistency score (0.0 - 1.0)
225
+ """
226
+ if 'timestamp' not in df.columns:
227
+ return 1.0
228
+
229
+ timestamps = df['timestamp'].dropna().sort_values()
230
+
231
+ if len(timestamps) < 3:
232
+ return 1.0
233
+
234
+ # Calculate time differences
235
+ time_diffs = timestamps.diff().dropna()
236
+
237
+ if len(time_diffs) == 0:
238
+ return 1.0
239
+
240
+ # Check for irregular intervals
241
+ mean_interval = time_diffs.mean()
242
+ std_interval = time_diffs.std()
243
+
244
+ # Coefficient of variation
245
+ cv = std_interval / mean_interval if mean_interval > 0 else 0
246
+
247
+ # Score based on CV (lower is better)
248
+ if cv < 0.1: # Very consistent
249
+ return 1.0
250
+ elif cv < 0.25: # Mostly consistent
251
+ return 0.9
252
+ elif cv < 0.5: # Somewhat inconsistent
253
+ return 0.7
254
+ else: # Very inconsistent
255
+ return 0.5
256
+
257
+ def check_validity(self, df: pd.DataFrame) -> Tuple[float, List[DataAnomaly]]:
258
+ """
259
+ Check data validity and detect anomalies.
260
+
261
+ Args:
262
+ df: DataFrame with glucose column
263
+
264
+ Returns:
265
+ Tuple of (validity_score, list of anomalies)
266
+ """
267
+ anomalies: List[DataAnomaly] = []
268
+
269
+ if 'glucose' not in df.columns:
270
+ return 1.0, anomalies
271
+
272
+ glucose = df['glucose'].dropna()
273
+
274
+ if len(glucose) == 0:
275
+ return 1.0, anomalies
276
+
277
+ # Check for impossible values
278
+ for idx, value in glucose.items():
279
+ if value < self.GLUCOSE_LIMITS['minimum']:
280
+ anomalies.append(DataAnomaly(
281
+ index=int(idx), # type: ignore
282
+ timestamp=float(df.at[idx, 'timestamp']), # type: ignore
283
+ value=value,
284
+ anomaly_type='impossible_value',
285
+ severity='high',
286
+ description=f"Glucose {value:.1f} mg/dL below physiological minimum ({self.GLUCOSE_LIMITS['minimum']})"
287
+ ))
288
+ elif value > self.GLUCOSE_LIMITS['maximum']:
289
+ anomalies.append(DataAnomaly(
290
+ index=int(idx), # type: ignore
291
+ timestamp=float(df.at[idx, 'timestamp']), # type: ignore
292
+ value=value,
293
+ anomaly_type='impossible_value',
294
+ severity='high',
295
+ description=f"Glucose {value:.1f} mg/dL above physiological maximum ({self.GLUCOSE_LIMITS['maximum']})"
296
+ ))
297
+
298
+ # Check for outliers using IQR method
299
+ q1 = glucose.quantile(0.25)
300
+ q3 = glucose.quantile(0.75)
301
+ iqr = q3 - q1
302
+ lower_bound = q1 - 3 * iqr # Using 3*IQR for extreme outliers
303
+ upper_bound = q3 + 3 * iqr
304
+
305
+ for idx, value in glucose.items():
306
+ if value < lower_bound or value > upper_bound:
307
+ severity = 'low' if (abs(value - glucose.median()) < 3 * iqr) else 'medium'
308
+ anomalies.append(DataAnomaly(
309
+ index=int(idx), # type: ignore
310
+ timestamp=float(df.at[idx, 'timestamp']), # type: ignore
311
+ value=value,
312
+ anomaly_type='outlier',
313
+ severity=severity,
314
+ description=f"Outlier glucose value {value:.1f} mg/dL"
315
+ ))
316
+
317
+ # Check for rapid glucose changes (physiologically impossible)
318
+ if 'timestamp' in df.columns:
319
+ glucose_with_time = df[['timestamp', 'glucose']].dropna().sort_values('timestamp')
320
+ if len(glucose_with_time) >= 2:
321
+ time_diff = glucose_with_time['timestamp'].diff() # type: ignore
322
+ glucose_diff = glucose_with_time['glucose'].diff() # type: ignore
323
+
324
+ # Rate of change in mg/dL per minute
325
+ rate_of_change = glucose_diff / time_diff
326
+
327
+ # Use the new class attribute
328
+ max_rate = self.PHYSIOLOGICAL_RATES['max_glucose_change_per_min']
329
+ rapid_change_mask = rate_of_change.abs() > max_rate
330
+
331
+ for idx in rate_of_change[rapid_change_mask].index:
332
+ change = glucose_diff.loc[idx] # type: ignore
333
+ time_delta = time_diff.loc[idx] # type: ignore
334
+ actual_rate = rate_of_change.loc[idx] # type: ignore
335
+
336
+ direction = "rise" if change > 0 else "drop"
337
+ description = (f"Impossible glucose {direction} of {actual_rate:.1f} mg/dL/min "
338
+ f"(changed by {change:.1f} in {time_delta:.1f} min)")
339
+
340
+ anomalies.append(DataAnomaly(
341
+ index=int(idx), # type: ignore
342
+ timestamp=float(df.at[idx, 'timestamp']), # type: ignore
343
+ value=glucose_with_time.loc[idx, 'glucose'], # type: ignore
344
+ anomaly_type='rapid_change',
345
+ severity='high',
346
+ description=description
347
+ ))
348
+
349
+ # Calculate validity score
350
+ total_points = len(glucose)
351
+ invalid_points = len(anomalies)
352
+
353
+ if total_points == 0:
354
+ return 1.0, anomalies
355
+
356
+ validity = 1.0 - (invalid_points / total_points)
357
+
358
+ return max(0.0, validity), anomalies
359
+
360
+ def check(self, df: pd.DataFrame) -> QualityReport:
361
+ """
362
+ Perform comprehensive data quality check.
363
+
364
+ Args:
365
+ df: DataFrame to check
366
+
367
+ Returns:
368
+ QualityReport with all findings
369
+ """
370
+ warnings = []
371
+
372
+ # Run all checks
373
+ completeness, gaps = self.check_completeness(df)
374
+ consistency = self.check_consistency(df)
375
+ validity, anomalies = self.check_validity(df)
376
+
377
+ # Calculate overall score (weighted average)
378
+ overall = (
379
+ completeness * 0.4 +
380
+ consistency * 0.3 +
381
+ validity * 0.3
382
+ )
383
+
384
+ # Generate warnings
385
+ for gap in gaps:
386
+ warnings.append(gap.get_warning_message())
387
+ warnings.append(
388
+ f" [INFO] Simulation confidence score decreases to {max(0, overall - gap.percentage_of_total * 0.01):.2f}"
389
+ )
390
+
391
+ for anomaly in anomalies:
392
+ if anomaly.severity == 'high':
393
+ warnings.append(
394
+ f"[WARN] CRITICAL ANOMALY: {anomaly.description} at index {anomaly.index}"
395
+ )
396
+ elif anomaly.severity == 'medium':
397
+ warnings.append(
398
+ f"[WARN] ANOMALY: {anomaly.description} at index {anomaly.index}"
399
+ )
400
+
401
+ # Summary generation
402
+ if overall >= 0.9:
403
+ summary = "Excellent data quality"
404
+ elif overall >= 0.75:
405
+ summary = "Good data quality with minor issues"
406
+ elif overall >= 0.5:
407
+ summary = "Moderate data quality - use with caution"
408
+ elif overall >= 0.25:
409
+ summary = "Poor data quality - significant gaps detected"
410
+ else:
411
+ summary = "Critical data quality issues - simulation may be unreliable"
412
+
413
+ return QualityReport(
414
+ overall_score=overall,
415
+ completeness_score=completeness,
416
+ consistency_score=consistency,
417
+ validity_score=validity,
418
+ gaps=gaps,
419
+ anomalies=anomalies,
420
+ warnings=warnings,
421
+ summary=summary
422
+ )
423
+
424
+ def get_confidence_score(self, df: pd.DataFrame) -> float:
425
+ """
426
+ Get overall confidence score for simulation.
427
+
428
+ Args:
429
+ df: DataFrame to check
430
+
431
+ Returns:
432
+ Confidence score (0.0 - 1.0)
433
+ """
434
+ report = self.check(df)
435
+ return report.overall_score
436
+
437
+ def print_report(self, report: QualityReport):
438
+ """Print formatted quality report"""
439
+ print("\n" + "=" * 70)
440
+ print("DATA QUALITY REPORT")
441
+ print("=" * 70)
442
+
443
+ # Overall score with visual indicator
444
+ score_bar = "█" * int(report.overall_score * 20) + "░" * (20 - int(report.overall_score * 20))
445
+ print(f"\nOverall Score: [{score_bar}] {report.overall_score:.1%}")
446
+ print(f"Summary: {report.summary}")
447
+
448
+ # Component scores
449
+ print(f"\nComponent Scores:")
450
+ print(f" Completeness: {report.completeness_score:.1%}")
451
+ print(f" Consistency: {report.consistency_score:.1%}")
452
+ print(f" Validity: {report.validity_score:.1%}")
453
+
454
+ # Gaps
455
+ if report.gaps:
456
+ print(f"\nData Gaps Found: {len(report.gaps)}")
457
+ for gap in report.gaps:
458
+ print(f" {gap.get_warning_message()}")
459
+
460
+ # Anomalies
461
+ high_anomalies = [a for a in report.anomalies if a.severity == 'high']
462
+ medium_anomalies = [a for a in report.anomalies if a.severity == 'medium']
463
+
464
+ if high_anomalies:
465
+ print(f"\nCRITICAL Anomalies: {len(high_anomalies)}")
466
+ for anomaly in high_anomalies:
467
+ print(f" {anomaly.description}")
468
+
469
+ if medium_anomalies:
470
+ print(f"\nWarnings: {len(medium_anomalies)}")
471
+ for anomaly in medium_anomalies[:5]: # Show first 5
472
+ print(f" {anomaly.description}")
473
+ if len(medium_anomalies) > 5:
474
+ print(f" ... and {len(medium_anomalies) - 5} more")
475
+
476
+ # Warnings
477
+ if report.warnings:
478
+ print(f"\n{'='*70}")
479
+ print("WARNINGS")
480
+ print("=" * 70)
481
+ for warning in report.warnings:
482
+ print(f" {warning}")
483
+
484
+ print("\n" + "=" * 70)
485
+
486
+
487
+ def demo_quality_checker():
488
+ """Demonstrate data quality checking"""
489
+ print("=" * 70)
490
+ print("DATA QUALITY CHECKER DEMONSTRATION")
491
+ print("=" * 70)
492
+
493
+ checker = DataQualityChecker(expected_interval=5, source_type='cgm')
494
+
495
+ # Test case 1: Clean data
496
+ print("\nTest Case 1: Clean Data (Simulated)")
497
+ print("-" * 50)
498
+
499
+ # Generate clean data
500
+ np.random.seed(42)
501
+ timestamps = np.arange(0, 480, 5) # 8 hours, 5-min intervals
502
+ glucose = 120 + 30 * np.sin(timestamps / 60) + np.random.normal(0, 5, len(timestamps))
503
+ glucose = np.clip(glucose, 40, 400) # Keep within reasonable range
504
+
505
+ clean_df = pd.DataFrame({
506
+ 'timestamp': timestamps,
507
+ 'glucose': glucose,
508
+ 'carbs': np.random.choice([0, 30, 60], len(timestamps), p=[0.8, 0.15, 0.05]),
509
+ 'insulin': np.random.choice([0, 1, 2], len(timestamps), p=[0.7, 0.2, 0.1])
510
+ })
511
+
512
+ report = checker.check(clean_df)
513
+ checker.print_report(report)
514
+
515
+ # Test case 2: Data with gaps
516
+ print("\n\nTest Case 2: Data with Gaps (14:00-16:00)")
517
+ print("-" * 50)
518
+
519
+ gap_df = clean_df.copy()
520
+
521
+ # Remove data points between 14:00 and 16:00 (in minutes from start)
522
+ # Assuming 14:00 = 840 minutes, but our data is only 0-480
523
+ # Let's create a scenario where gap is in the middle
524
+ gap_start_idx = 100 # Around index 100
525
+ gap_end_idx = 130
526
+
527
+ gap_df = gap_df.drop(range(gap_start_idx, gap_end_idx))
528
+ gap_df = gap_df.reset_index(drop=True)
529
+
530
+ report = checker.check(gap_df)
531
+ checker.print_report(report)
532
+
533
+ # Test case 3: Data with anomalies
534
+ print("\n\nTest Case 3: Data with Anomalies")
535
+ print("-" * 50)
536
+
537
+ anomaly_df = clean_df.copy()
538
+
539
+ # Add impossible values
540
+ anomaly_df.loc[50, 'glucose'] = 15 # Too low
541
+ anomaly_df.loc[100, 'glucose'] = 700 # Too high
542
+
543
+ # Add outlier
544
+ anomaly_df.loc[75, 'glucose'] = 400
545
+
546
+ report = checker.check(anomaly_df)
547
+ checker.print_report(report)
548
+
549
+ print("\n" + "=" * 70)
550
+ print("DATA QUALITY CHECKER DEMONSTRATION COMPLETE")