iints-sdk-python35 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iints/__init__.py +183 -0
- iints/analysis/__init__.py +12 -0
- iints/analysis/algorithm_xray.py +387 -0
- iints/analysis/baseline.py +92 -0
- iints/analysis/clinical_benchmark.py +198 -0
- iints/analysis/clinical_metrics.py +551 -0
- iints/analysis/clinical_tir_analyzer.py +136 -0
- iints/analysis/diabetes_metrics.py +43 -0
- iints/analysis/edge_efficiency.py +33 -0
- iints/analysis/edge_performance_monitor.py +315 -0
- iints/analysis/explainability.py +94 -0
- iints/analysis/explainable_ai.py +232 -0
- iints/analysis/hardware_benchmark.py +221 -0
- iints/analysis/metrics.py +117 -0
- iints/analysis/population_report.py +188 -0
- iints/analysis/reporting.py +345 -0
- iints/analysis/safety_index.py +311 -0
- iints/analysis/sensor_filtering.py +54 -0
- iints/analysis/validator.py +273 -0
- iints/api/__init__.py +0 -0
- iints/api/base_algorithm.py +307 -0
- iints/api/registry.py +103 -0
- iints/api/template_algorithm.py +195 -0
- iints/assets/iints_logo.png +0 -0
- iints/cli/__init__.py +0 -0
- iints/cli/cli.py +2598 -0
- iints/core/__init__.py +1 -0
- iints/core/algorithms/__init__.py +0 -0
- iints/core/algorithms/battle_runner.py +138 -0
- iints/core/algorithms/correction_bolus.py +95 -0
- iints/core/algorithms/discovery.py +92 -0
- iints/core/algorithms/fixed_basal_bolus.py +58 -0
- iints/core/algorithms/hybrid_algorithm.py +92 -0
- iints/core/algorithms/lstm_algorithm.py +138 -0
- iints/core/algorithms/mock_algorithms.py +162 -0
- iints/core/algorithms/pid_controller.py +88 -0
- iints/core/algorithms/standard_pump_algo.py +64 -0
- iints/core/device.py +0 -0
- iints/core/device_manager.py +64 -0
- iints/core/devices/__init__.py +3 -0
- iints/core/devices/models.py +160 -0
- iints/core/patient/__init__.py +9 -0
- iints/core/patient/bergman_model.py +341 -0
- iints/core/patient/models.py +285 -0
- iints/core/patient/patient_factory.py +117 -0
- iints/core/patient/profile.py +41 -0
- iints/core/safety/__init__.py +12 -0
- iints/core/safety/config.py +37 -0
- iints/core/safety/input_validator.py +95 -0
- iints/core/safety/supervisor.py +39 -0
- iints/core/simulation/__init__.py +0 -0
- iints/core/simulation/scenario_parser.py +61 -0
- iints/core/simulator.py +874 -0
- iints/core/supervisor.py +367 -0
- iints/data/__init__.py +53 -0
- iints/data/adapter.py +142 -0
- iints/data/column_mapper.py +398 -0
- iints/data/datasets.json +132 -0
- iints/data/demo/__init__.py +1 -0
- iints/data/demo/demo_cgm.csv +289 -0
- iints/data/importer.py +275 -0
- iints/data/ingestor.py +162 -0
- iints/data/nightscout.py +128 -0
- iints/data/quality_checker.py +550 -0
- iints/data/registry.py +166 -0
- iints/data/tidepool.py +38 -0
- iints/data/universal_parser.py +813 -0
- iints/data/virtual_patients/clinic_safe_baseline.yaml +9 -0
- iints/data/virtual_patients/clinic_safe_hyper_challenge.yaml +9 -0
- iints/data/virtual_patients/clinic_safe_hypo_prone.yaml +9 -0
- iints/data/virtual_patients/clinic_safe_midnight.yaml +9 -0
- iints/data/virtual_patients/clinic_safe_pizza.yaml +9 -0
- iints/data/virtual_patients/clinic_safe_stress_meal.yaml +9 -0
- iints/data/virtual_patients/default_patient.yaml +11 -0
- iints/data/virtual_patients/patient_559_config.yaml +11 -0
- iints/emulation/__init__.py +80 -0
- iints/emulation/legacy_base.py +414 -0
- iints/emulation/medtronic_780g.py +337 -0
- iints/emulation/omnipod_5.py +367 -0
- iints/emulation/tandem_controliq.py +393 -0
- iints/highlevel.py +451 -0
- iints/learning/__init__.py +3 -0
- iints/learning/autonomous_optimizer.py +194 -0
- iints/learning/learning_system.py +122 -0
- iints/metrics.py +34 -0
- iints/population/__init__.py +11 -0
- iints/population/generator.py +131 -0
- iints/population/runner.py +327 -0
- iints/presets/__init__.py +28 -0
- iints/presets/presets.json +114 -0
- iints/research/__init__.py +30 -0
- iints/research/config.py +68 -0
- iints/research/dataset.py +319 -0
- iints/research/losses.py +73 -0
- iints/research/predictor.py +329 -0
- iints/scenarios/__init__.py +3 -0
- iints/scenarios/generator.py +92 -0
- iints/templates/__init__.py +0 -0
- iints/templates/default_algorithm.py +91 -0
- iints/templates/scenarios/__init__.py +0 -0
- iints/templates/scenarios/chaos_insulin_stacking.json +29 -0
- iints/templates/scenarios/chaos_runaway_ai.json +25 -0
- iints/templates/scenarios/example_scenario.json +35 -0
- iints/templates/scenarios/exercise_stress.json +30 -0
- iints/utils/__init__.py +3 -0
- iints/utils/plotting.py +50 -0
- iints/utils/run_io.py +152 -0
- iints/validation/__init__.py +133 -0
- iints/validation/schemas.py +94 -0
- iints/visualization/__init__.py +34 -0
- iints/visualization/cockpit.py +691 -0
- iints/visualization/uncertainty_cloud.py +612 -0
- iints_sdk_python35-0.0.18.dist-info/METADATA +225 -0
- iints_sdk_python35-0.0.18.dist-info/RECORD +118 -0
- iints_sdk_python35-0.0.18.dist-info/WHEEL +5 -0
- iints_sdk_python35-0.0.18.dist-info/entry_points.txt +10 -0
- iints_sdk_python35-0.0.18.dist-info/licenses/LICENSE +28 -0
- iints_sdk_python35-0.0.18.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "baseline_t1d",
|
|
4
|
+
"description": "Clinic-safe baseline with stable parameters and moderate meals.",
|
|
5
|
+
"patient_config": "clinic_safe_baseline",
|
|
6
|
+
"duration_minutes": 1440,
|
|
7
|
+
"time_step_minutes": 5,
|
|
8
|
+
"critical_glucose_threshold": 40.0,
|
|
9
|
+
"critical_glucose_duration_minutes": 30,
|
|
10
|
+
"scenario": {
|
|
11
|
+
"scenario_name": "Clinic Safe Baseline",
|
|
12
|
+
"scenario_version": "1.0",
|
|
13
|
+
"stress_events": [
|
|
14
|
+
{"start_time": 60, "event_type": "meal", "value": 45, "absorption_delay_minutes": 15, "duration": 60},
|
|
15
|
+
{"start_time": 360, "event_type": "meal", "value": 60, "absorption_delay_minutes": 20, "duration": 90},
|
|
16
|
+
{"start_time": 720, "event_type": "meal", "value": 70, "absorption_delay_minutes": 15, "duration": 90},
|
|
17
|
+
{"start_time": 1000, "event_type": "exercise", "value": 0.4, "duration": 45}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"name": "stress_test_meal",
|
|
23
|
+
"description": "Clinic-safe meal stress test with larger lunch and safety clamps enabled.",
|
|
24
|
+
"patient_config": "clinic_safe_stress_meal",
|
|
25
|
+
"duration_minutes": 720,
|
|
26
|
+
"time_step_minutes": 5,
|
|
27
|
+
"critical_glucose_threshold": 40.0,
|
|
28
|
+
"critical_glucose_duration_minutes": 30,
|
|
29
|
+
"scenario": {
|
|
30
|
+
"scenario_name": "Clinic Safe Meal Stress",
|
|
31
|
+
"scenario_version": "1.0",
|
|
32
|
+
"stress_events": [
|
|
33
|
+
{"start_time": 90, "event_type": "meal", "value": 55, "absorption_delay_minutes": 15, "duration": 60},
|
|
34
|
+
{"start_time": 300, "event_type": "meal", "value": 90, "absorption_delay_minutes": 20, "duration": 90},
|
|
35
|
+
{"start_time": 600, "event_type": "meal", "value": 60, "absorption_delay_minutes": 15, "duration": 60}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
,
|
|
40
|
+
{
|
|
41
|
+
"name": "hypo_prone_night",
|
|
42
|
+
"description": "Overnight hypo risk assessment with reduced basal and conservative carbs.",
|
|
43
|
+
"patient_config": "clinic_safe_hypo_prone",
|
|
44
|
+
"duration_minutes": 720,
|
|
45
|
+
"time_step_minutes": 5,
|
|
46
|
+
"critical_glucose_threshold": 40.0,
|
|
47
|
+
"critical_glucose_duration_minutes": 30,
|
|
48
|
+
"scenario": {
|
|
49
|
+
"scenario_name": "Hypo-Prone Overnight",
|
|
50
|
+
"scenario_version": "1.0",
|
|
51
|
+
"stress_events": [
|
|
52
|
+
{"start_time": 60, "event_type": "meal", "value": 35, "absorption_delay_minutes": 10, "duration": 45},
|
|
53
|
+
{"start_time": 240, "event_type": "exercise", "value": 0.3, "duration": 30}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"name": "hyper_challenge",
|
|
59
|
+
"description": "Post-prandial hyperglycemia challenge with a large meal.",
|
|
60
|
+
"patient_config": "clinic_safe_hyper_challenge",
|
|
61
|
+
"duration_minutes": 480,
|
|
62
|
+
"time_step_minutes": 5,
|
|
63
|
+
"critical_glucose_threshold": 40.0,
|
|
64
|
+
"critical_glucose_duration_minutes": 30,
|
|
65
|
+
"scenario": {
|
|
66
|
+
"scenario_name": "Hyper Challenge",
|
|
67
|
+
"scenario_version": "1.0",
|
|
68
|
+
"stress_events": [
|
|
69
|
+
{"start_time": 90, "event_type": "meal", "value": 110, "absorption_delay_minutes": 15, "duration": 90}
|
|
70
|
+
]
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
,
|
|
74
|
+
{
|
|
75
|
+
"name": "pizza_paradox",
|
|
76
|
+
"description": "Delayed glucose rise after a high-fat meal (pizza paradox).",
|
|
77
|
+
"patient_config": "clinic_safe_pizza",
|
|
78
|
+
"duration_minutes": 900,
|
|
79
|
+
"time_step_minutes": 5,
|
|
80
|
+
"critical_glucose_threshold": 40.0,
|
|
81
|
+
"critical_glucose_duration_minutes": 30,
|
|
82
|
+
"scenario": {
|
|
83
|
+
"scenario_name": "The Pizza Paradox",
|
|
84
|
+
"scenario_version": "1.0",
|
|
85
|
+
"stress_events": [
|
|
86
|
+
{
|
|
87
|
+
"start_time": 240,
|
|
88
|
+
"event_type": "meal",
|
|
89
|
+
"value": 90,
|
|
90
|
+
"reported_value": 90,
|
|
91
|
+
"absorption_delay_minutes": 180,
|
|
92
|
+
"duration": 120
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"name": "midnight_crash",
|
|
99
|
+
"description": "Overnight crash risk after evening exercise.",
|
|
100
|
+
"patient_config": "clinic_safe_midnight",
|
|
101
|
+
"duration_minutes": 720,
|
|
102
|
+
"time_step_minutes": 5,
|
|
103
|
+
"critical_glucose_threshold": 40.0,
|
|
104
|
+
"critical_glucose_duration_minutes": 30,
|
|
105
|
+
"scenario": {
|
|
106
|
+
"scenario_name": "Midnight Crash",
|
|
107
|
+
"scenario_version": "1.0",
|
|
108
|
+
"stress_events": [
|
|
109
|
+
{"start_time": 120, "event_type": "meal", "value": 45, "absorption_delay_minutes": 10, "duration": 60},
|
|
110
|
+
{"start_time": 300, "event_type": "exercise", "value": 0.6, "duration": 45}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .config import PredictorConfig, TrainingConfig
|
|
2
|
+
from .dataset import (
|
|
3
|
+
build_sequences,
|
|
4
|
+
subject_split,
|
|
5
|
+
FeatureScaler,
|
|
6
|
+
load_parquet,
|
|
7
|
+
save_parquet,
|
|
8
|
+
load_dataset,
|
|
9
|
+
save_dataset,
|
|
10
|
+
)
|
|
11
|
+
from .predictor import LSTMPredictor, load_predictor, PredictorService, load_predictor_service
|
|
12
|
+
from .losses import QuantileLoss, SafetyWeightedMSE
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"PredictorConfig",
|
|
16
|
+
"TrainingConfig",
|
|
17
|
+
"build_sequences",
|
|
18
|
+
"subject_split",
|
|
19
|
+
"FeatureScaler",
|
|
20
|
+
"load_parquet",
|
|
21
|
+
"save_parquet",
|
|
22
|
+
"load_dataset",
|
|
23
|
+
"save_dataset",
|
|
24
|
+
"LSTMPredictor",
|
|
25
|
+
"load_predictor",
|
|
26
|
+
"PredictorService",
|
|
27
|
+
"load_predictor_service",
|
|
28
|
+
"QuantileLoss",
|
|
29
|
+
"SafetyWeightedMSE",
|
|
30
|
+
]
|
iints/research/config.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class PredictorConfig:
|
|
9
|
+
history_minutes: int = 240
|
|
10
|
+
horizon_minutes: int = 60
|
|
11
|
+
time_step_minutes: int = 5
|
|
12
|
+
feature_columns: List[str] = field(default_factory=lambda: [
|
|
13
|
+
"glucose_actual_mgdl",
|
|
14
|
+
"patient_iob_units",
|
|
15
|
+
"patient_cob_grams",
|
|
16
|
+
"effective_isf",
|
|
17
|
+
"effective_icr",
|
|
18
|
+
"effective_basal_rate_u_per_hr",
|
|
19
|
+
"glucose_trend_mgdl_min",
|
|
20
|
+
])
|
|
21
|
+
target_column: str = "glucose_actual_mgdl"
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def history_steps(self) -> int:
|
|
25
|
+
return int(self.history_minutes / self.time_step_minutes)
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def horizon_steps(self) -> int:
|
|
29
|
+
return int(self.horizon_minutes / self.time_step_minutes)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class TrainingConfig:
|
|
34
|
+
epochs: int = 20
|
|
35
|
+
batch_size: int = 128
|
|
36
|
+
learning_rate: float = 1e-3
|
|
37
|
+
weight_decay: float = 0.0
|
|
38
|
+
hidden_size: int = 64
|
|
39
|
+
num_layers: int = 2
|
|
40
|
+
dropout: float = 0.1
|
|
41
|
+
|
|
42
|
+
# P0-2: Subject-level split fractions (must sum to <= 1.0).
|
|
43
|
+
# The remaining fraction after val + test goes to training.
|
|
44
|
+
# Set subject_level_split=True (default) to split by subject ID to
|
|
45
|
+
# prevent data leakage between train/val/test sets.
|
|
46
|
+
subject_level_split: bool = True
|
|
47
|
+
validation_split: float = 0.15 # fraction of subjects for validation
|
|
48
|
+
test_split: float = 0.15 # fraction of subjects for held-out test
|
|
49
|
+
|
|
50
|
+
seed: int = 42
|
|
51
|
+
|
|
52
|
+
# P3-10: Normalization strategy. Options: "zscore", "robust", "none".
|
|
53
|
+
normalization: str = "zscore"
|
|
54
|
+
|
|
55
|
+
# P3-12: Loss function. Options: "mse", "quantile".
|
|
56
|
+
# For quantile loss, also set `quantile` (0 < q < 1).
|
|
57
|
+
loss: str = "mse"
|
|
58
|
+
quantile: Optional[float] = None # e.g. 0.9 for 90th-percentile upper bound
|
|
59
|
+
|
|
60
|
+
# Fine-tuning controls
|
|
61
|
+
freeze_lstm_layers: int = 0 # freeze first N LSTM layers
|
|
62
|
+
early_stopping_patience: int = 0 # 0 disables early stopping
|
|
63
|
+
early_stopping_min_delta: float = 0.0
|
|
64
|
+
|
|
65
|
+
# Safety-weighted loss (penalize low-glucose errors more strongly)
|
|
66
|
+
safety_weighted_low_threshold: float = 80.0
|
|
67
|
+
safety_weighted_alpha: float = 2.0
|
|
68
|
+
safety_weighted_max_weight: float = 4.0
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
# Sequence building
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
def build_sequences(
|
|
15
|
+
df: pd.DataFrame,
|
|
16
|
+
history_steps: int,
|
|
17
|
+
horizon_steps: int,
|
|
18
|
+
feature_columns: List[str],
|
|
19
|
+
target_column: str,
|
|
20
|
+
subject_column: Optional[str] = "subject_id",
|
|
21
|
+
segment_column: Optional[str] = None,
|
|
22
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
23
|
+
"""
|
|
24
|
+
Convert a time-series dataframe into (X, y) sequences.
|
|
25
|
+
|
|
26
|
+
X shape: [N, history_steps, num_features]
|
|
27
|
+
y shape: [N, horizon_steps]
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
df : pd.DataFrame
|
|
32
|
+
Input dataframe, sorted by time within each subject/segment.
|
|
33
|
+
history_steps : int
|
|
34
|
+
Number of past steps used as input features.
|
|
35
|
+
horizon_steps : int
|
|
36
|
+
Number of future steps to predict.
|
|
37
|
+
feature_columns : list of str
|
|
38
|
+
Columns to use as input features.
|
|
39
|
+
target_column : str
|
|
40
|
+
Column to predict.
|
|
41
|
+
subject_column : str or None
|
|
42
|
+
If provided, sequences will not cross subject boundaries.
|
|
43
|
+
Defaults to "subject_id".
|
|
44
|
+
segment_column : str or None
|
|
45
|
+
If provided, sequences will additionally not cross segment boundaries
|
|
46
|
+
(e.g. gaps in CGM data). Defaults to None.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
X : np.ndarray of shape [N, history_steps, num_features]
|
|
51
|
+
y : np.ndarray of shape [N, horizon_steps]
|
|
52
|
+
"""
|
|
53
|
+
if history_steps <= 0 or horizon_steps <= 0:
|
|
54
|
+
raise ValueError("history_steps and horizon_steps must be > 0")
|
|
55
|
+
missing = [col for col in feature_columns + [target_column] if col not in df.columns]
|
|
56
|
+
if missing:
|
|
57
|
+
raise ValueError(f"Missing required columns: {missing}")
|
|
58
|
+
|
|
59
|
+
# Build a boundary mask: True at every row that starts a new group
|
|
60
|
+
# (subject or segment), so we never build a window that crosses the boundary.
|
|
61
|
+
boundary = pd.Series(False, index=df.index)
|
|
62
|
+
if subject_column and subject_column in df.columns:
|
|
63
|
+
boundary |= df[subject_column] != df[subject_column].shift(1)
|
|
64
|
+
if segment_column and segment_column in df.columns:
|
|
65
|
+
boundary |= df[segment_column] != df[segment_column].shift(1)
|
|
66
|
+
# First row is always a boundary
|
|
67
|
+
if len(boundary) > 0:
|
|
68
|
+
boundary.iloc[0] = True
|
|
69
|
+
|
|
70
|
+
values = df[feature_columns].to_numpy(dtype=np.float32)
|
|
71
|
+
target = df[target_column].to_numpy(dtype=np.float32)
|
|
72
|
+
boundary_arr = boundary.to_numpy(dtype=bool)
|
|
73
|
+
|
|
74
|
+
X_list: List[np.ndarray] = []
|
|
75
|
+
y_list: List[np.ndarray] = []
|
|
76
|
+
total = len(df)
|
|
77
|
+
end_index = total - history_steps - horizon_steps + 1
|
|
78
|
+
|
|
79
|
+
for idx in range(end_index):
|
|
80
|
+
window_end = idx + history_steps # exclusive, first prediction step
|
|
81
|
+
horizon_end = window_end + horizon_steps
|
|
82
|
+
|
|
83
|
+
# Reject window if any boundary falls *inside* the history window
|
|
84
|
+
# (idx+1 .. window_end) or at the start of the horizon (window_end).
|
|
85
|
+
# idx itself may be a boundary (start of a new subject), which is fine.
|
|
86
|
+
if boundary_arr[idx + 1 : horizon_end].any():
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
X_list.append(values[idx:window_end])
|
|
90
|
+
y_list.append(target[window_end:horizon_end])
|
|
91
|
+
|
|
92
|
+
if not X_list:
|
|
93
|
+
raise ValueError("Not enough rows to build sequences with current window sizes.")
|
|
94
|
+
|
|
95
|
+
X = np.stack(X_list, axis=0)
|
|
96
|
+
y = np.stack(y_list, axis=0)
|
|
97
|
+
return X, y
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# P0-2: Subject-level train / val / test split
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def subject_split(
|
|
105
|
+
df: pd.DataFrame,
|
|
106
|
+
val_fraction: float = 0.15,
|
|
107
|
+
test_fraction: float = 0.15,
|
|
108
|
+
subject_column: str = "subject_id",
|
|
109
|
+
seed: int = 42,
|
|
110
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
|
111
|
+
"""
|
|
112
|
+
Split a dataframe into train / val / test sets by subject ID.
|
|
113
|
+
|
|
114
|
+
All rows belonging to one subject stay in the same split, which prevents
|
|
115
|
+
data leakage between sets (a subject's glucose patterns are unique and
|
|
116
|
+
would otherwise trivially inflate validation metrics).
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
df : pd.DataFrame
|
|
121
|
+
Full dataset with a subject identifier column.
|
|
122
|
+
val_fraction : float
|
|
123
|
+
Fraction of *subjects* assigned to the validation set.
|
|
124
|
+
test_fraction : float
|
|
125
|
+
Fraction of *subjects* assigned to the held-out test set.
|
|
126
|
+
subject_column : str
|
|
127
|
+
Name of the subject identifier column.
|
|
128
|
+
seed : int
|
|
129
|
+
Random seed for reproducible shuffling.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
train_df, val_df, test_df : pd.DataFrame
|
|
134
|
+
Three non-overlapping subsets.
|
|
135
|
+
"""
|
|
136
|
+
if subject_column not in df.columns:
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f"Column '{subject_column}' not found in dataframe. "
|
|
139
|
+
"Cannot perform subject-level split."
|
|
140
|
+
)
|
|
141
|
+
if val_fraction + test_fraction >= 1.0:
|
|
142
|
+
raise ValueError("val_fraction + test_fraction must be < 1.0")
|
|
143
|
+
|
|
144
|
+
rng = np.random.default_rng(seed)
|
|
145
|
+
subjects = np.array(sorted(df[subject_column].unique()))
|
|
146
|
+
rng.shuffle(subjects)
|
|
147
|
+
|
|
148
|
+
n = len(subjects)
|
|
149
|
+
n_test = max(1, round(n * test_fraction))
|
|
150
|
+
n_val = max(1, round(n * val_fraction))
|
|
151
|
+
# Ensure we have at least one training subject
|
|
152
|
+
if n - n_val - n_test < 1:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Not enough subjects ({n}) for the requested val/test fractions. "
|
|
155
|
+
"Reduce val_fraction or test_fraction."
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
test_subjects = set(subjects[:n_test])
|
|
159
|
+
val_subjects = set(subjects[n_test: n_test + n_val])
|
|
160
|
+
train_subjects = set(subjects[n_test + n_val:])
|
|
161
|
+
|
|
162
|
+
train_df = df[df[subject_column].isin(train_subjects)].reset_index(drop=True)
|
|
163
|
+
val_df = df[df[subject_column].isin(val_subjects)].reset_index(drop=True)
|
|
164
|
+
test_df = df[df[subject_column].isin(test_subjects)].reset_index(drop=True)
|
|
165
|
+
|
|
166
|
+
return train_df, val_df, test_df
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# P3-10: Feature normalisation
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
class FeatureScaler:
|
|
174
|
+
"""
|
|
175
|
+
Fit-transform scaler for LSTM feature arrays.
|
|
176
|
+
|
|
177
|
+
Two strategies are supported:
|
|
178
|
+
|
|
179
|
+
``"zscore"``
|
|
180
|
+
Standard z-score normalisation: (x - mean) / std.
|
|
181
|
+
Sensitive to outliers but widely used and interpretable.
|
|
182
|
+
|
|
183
|
+
``"robust"``
|
|
184
|
+
Robust scaling: (x - median) / IQR.
|
|
185
|
+
Less sensitive to outlier glucose spikes and extreme bolus values.
|
|
186
|
+
|
|
187
|
+
``"none"``
|
|
188
|
+
Pass-through (no scaling).
|
|
189
|
+
|
|
190
|
+
The scaler is fitted on training data only and the same parameters
|
|
191
|
+
are applied to val/test splits to avoid leakage.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
strategy : str
|
|
196
|
+
One of ``"zscore"``, ``"robust"``, or ``"none"``.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
def __init__(self, strategy: str = "zscore") -> None:
|
|
200
|
+
if strategy not in {"zscore", "robust", "none"}:
|
|
201
|
+
raise ValueError(f"Unknown normalization strategy: {strategy!r}. "
|
|
202
|
+
"Choose from 'zscore', 'robust', 'none'.")
|
|
203
|
+
self.strategy = strategy
|
|
204
|
+
self._center: Optional[np.ndarray] = None
|
|
205
|
+
self._scale: Optional[np.ndarray] = None
|
|
206
|
+
self._fitted = False
|
|
207
|
+
|
|
208
|
+
def fit(self, X: np.ndarray) -> "FeatureScaler":
|
|
209
|
+
"""
|
|
210
|
+
Fit scaler on X of shape [N, T, F] or [N, F].
|
|
211
|
+
|
|
212
|
+
Parameters computed per feature (last axis).
|
|
213
|
+
"""
|
|
214
|
+
flat = X.reshape(-1, X.shape[-1])
|
|
215
|
+
if self.strategy == "zscore":
|
|
216
|
+
self._center = flat.mean(axis=0)
|
|
217
|
+
std = flat.std(axis=0)
|
|
218
|
+
# Replace near-zero std with 1 to avoid division by zero
|
|
219
|
+
self._scale = np.where(std < 1e-8, 1.0, std)
|
|
220
|
+
elif self.strategy == "robust":
|
|
221
|
+
self._center = np.median(flat, axis=0)
|
|
222
|
+
q75 = np.percentile(flat, 75, axis=0)
|
|
223
|
+
q25 = np.percentile(flat, 25, axis=0)
|
|
224
|
+
iqr = q75 - q25
|
|
225
|
+
self._scale = np.where(iqr < 1e-8, 1.0, iqr)
|
|
226
|
+
else:
|
|
227
|
+
# none – identity
|
|
228
|
+
self._center = np.zeros(X.shape[-1], dtype=np.float32)
|
|
229
|
+
self._scale = np.ones(X.shape[-1], dtype=np.float32)
|
|
230
|
+
self._fitted = True
|
|
231
|
+
return self
|
|
232
|
+
|
|
233
|
+
def transform(self, X: np.ndarray) -> np.ndarray:
|
|
234
|
+
"""Apply scaling. X shape: [N, T, F] or [N, F]."""
|
|
235
|
+
if not self._fitted:
|
|
236
|
+
raise RuntimeError("FeatureScaler must be fitted before transform().")
|
|
237
|
+
return ((X - self._center) / self._scale).astype(np.float32)
|
|
238
|
+
|
|
239
|
+
def fit_transform(self, X: np.ndarray) -> np.ndarray:
|
|
240
|
+
"""Fit and transform in one step."""
|
|
241
|
+
return self.fit(X).transform(X)
|
|
242
|
+
|
|
243
|
+
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
|
|
244
|
+
"""Undo scaling."""
|
|
245
|
+
if not self._fitted:
|
|
246
|
+
raise RuntimeError("FeatureScaler must be fitted before inverse_transform().")
|
|
247
|
+
return (X * self._scale + self._center).astype(np.float32)
|
|
248
|
+
|
|
249
|
+
def to_dict(self) -> dict:
|
|
250
|
+
"""Serialise scaler parameters for storage in model checkpoint."""
|
|
251
|
+
return {
|
|
252
|
+
"strategy": self.strategy,
|
|
253
|
+
"center": self._center.tolist() if self._center is not None else None,
|
|
254
|
+
"scale": self._scale.tolist() if self._scale is not None else None,
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
@classmethod
|
|
258
|
+
def from_dict(cls, d: dict) -> "FeatureScaler":
|
|
259
|
+
"""Restore a scaler from a serialised dict."""
|
|
260
|
+
scaler = cls(strategy=d["strategy"])
|
|
261
|
+
if d.get("center") is not None:
|
|
262
|
+
scaler._center = np.array(d["center"], dtype=np.float32)
|
|
263
|
+
scaler._scale = np.array(d["scale"], dtype=np.float32)
|
|
264
|
+
scaler._fitted = True
|
|
265
|
+
return scaler
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
# Persistence helpers
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
def save_parquet(df: pd.DataFrame, path: Path) -> None:
|
|
273
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
274
|
+
try:
|
|
275
|
+
df.to_parquet(path, index=False)
|
|
276
|
+
except Exception as exc:
|
|
277
|
+
raise RuntimeError(
|
|
278
|
+
"Parquet support requires pyarrow. Install with `pip install iints-sdk-python35[research]`."
|
|
279
|
+
) from exc
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def save_dataset(df: pd.DataFrame, path: Path) -> None:
|
|
283
|
+
"""Save dataset as parquet when available, or CSV as a fallback."""
|
|
284
|
+
if path.suffix.lower() in {".parquet", ".pq"}:
|
|
285
|
+
save_parquet(df, path)
|
|
286
|
+
return
|
|
287
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
df.to_csv(path, index=False)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def load_parquet(path: Path) -> pd.DataFrame:
|
|
292
|
+
try:
|
|
293
|
+
return pd.read_parquet(path)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
raise RuntimeError(
|
|
296
|
+
"Parquet support requires pyarrow. Install with `pip install iints-sdk-python35[research]`."
|
|
297
|
+
) from exc
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def load_dataset(path: Path) -> pd.DataFrame:
|
|
301
|
+
"""Load a dataset from parquet or CSV."""
|
|
302
|
+
if path.suffix.lower() in {".parquet", ".pq"}:
|
|
303
|
+
return load_parquet(path)
|
|
304
|
+
if path.suffix.lower() in {".csv", ".txt"}:
|
|
305
|
+
return pd.read_csv(path)
|
|
306
|
+
raise ValueError(f"Unsupported dataset format: {path.suffix}")
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def concat_runs(frames: Iterable[pd.DataFrame]) -> pd.DataFrame:
|
|
310
|
+
return pd.concat(list(frames), ignore_index=True)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def basic_stats(df: pd.DataFrame, columns: List[str]) -> Dict[str, float]:
|
|
314
|
+
stats: Dict[str, float] = {}
|
|
315
|
+
for col in columns:
|
|
316
|
+
if col in df.columns:
|
|
317
|
+
stats[f"{col}_mean"] = float(df[col].mean())
|
|
318
|
+
stats[f"{col}_std"] = float(df[col].std())
|
|
319
|
+
return stats
|
iints/research/losses.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import torch
|
|
7
|
+
from torch import nn
|
|
8
|
+
except Exception as exc: # pragma: no cover
|
|
9
|
+
torch = None # type: ignore
|
|
10
|
+
nn = None # type: ignore
|
|
11
|
+
_IMPORT_ERROR: Optional[BaseException] = exc
|
|
12
|
+
else:
|
|
13
|
+
_IMPORT_ERROR = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
import torch # pragma: no cover
|
|
18
|
+
from torch import nn # pragma: no cover
|
|
19
|
+
|
|
20
|
+
class QuantileLoss(nn.Module):
|
|
21
|
+
def __init__(self, quantile: float = 0.9) -> None: ...
|
|
22
|
+
def forward(self, preds: "torch.Tensor", targets: "torch.Tensor") -> "torch.Tensor": ...
|
|
23
|
+
|
|
24
|
+
class SafetyWeightedMSE(nn.Module):
|
|
25
|
+
def __init__(self, low_threshold: float = 80.0, alpha: float = 2.0, max_weight: float = 4.0) -> None: ...
|
|
26
|
+
def forward(self, preds: "torch.Tensor", targets: "torch.Tensor") -> "torch.Tensor": ...
|
|
27
|
+
else:
|
|
28
|
+
if nn is None: # pragma: no cover
|
|
29
|
+
class QuantileLoss: # type: ignore[no-redef]
|
|
30
|
+
def __init__(self, *args: object, **kwargs: object) -> None:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"Torch is required for QuantileLoss. Install with `pip install iints-sdk-python35[research]`."
|
|
33
|
+
) from _IMPORT_ERROR
|
|
34
|
+
|
|
35
|
+
class SafetyWeightedMSE: # type: ignore[no-redef]
|
|
36
|
+
def __init__(self, *args: object, **kwargs: object) -> None:
|
|
37
|
+
raise ImportError(
|
|
38
|
+
"Torch is required for SafetyWeightedMSE. Install with `pip install iints-sdk-python35[research]`."
|
|
39
|
+
) from _IMPORT_ERROR
|
|
40
|
+
else:
|
|
41
|
+
class QuantileLoss(nn.Module): # type: ignore[misc,no-redef]
|
|
42
|
+
"""Pinball / quantile loss for probabilistic forecasting."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, quantile: float = 0.9) -> None:
|
|
45
|
+
super().__init__()
|
|
46
|
+
if not 0.0 < quantile < 1.0:
|
|
47
|
+
raise ValueError(f"quantile must be in (0, 1), got {quantile}")
|
|
48
|
+
self.quantile = quantile
|
|
49
|
+
|
|
50
|
+
def forward(self, preds: "torch.Tensor", targets: "torch.Tensor") -> "torch.Tensor":
|
|
51
|
+
errors = targets - preds
|
|
52
|
+
loss = torch.where(
|
|
53
|
+
errors >= 0,
|
|
54
|
+
self.quantile * errors,
|
|
55
|
+
(self.quantile - 1.0) * errors,
|
|
56
|
+
)
|
|
57
|
+
return loss.mean()
|
|
58
|
+
|
|
59
|
+
class SafetyWeightedMSE(nn.Module): # type: ignore[misc,no-redef]
|
|
60
|
+
"""MSE with extra weight on low-glucose targets (safety-critical)."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, low_threshold: float = 80.0, alpha: float = 2.0, max_weight: float = 4.0) -> None:
|
|
63
|
+
super().__init__()
|
|
64
|
+
self.low_threshold = float(low_threshold)
|
|
65
|
+
self.alpha = float(alpha)
|
|
66
|
+
self.max_weight = float(max_weight)
|
|
67
|
+
|
|
68
|
+
def forward(self, preds: "torch.Tensor", targets: "torch.Tensor") -> "torch.Tensor":
|
|
69
|
+
# Emphasize errors below the low threshold
|
|
70
|
+
delta = torch.clamp(self.low_threshold - targets, min=0.0)
|
|
71
|
+
weights = 1.0 + self.alpha * (delta / max(self.low_threshold, 1.0))
|
|
72
|
+
weights = torch.clamp(weights, max=self.max_weight)
|
|
73
|
+
return ((preds - targets) ** 2 * weights).mean()
|