discontinuum 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- discontinuum/_version.py +2 -2
- discontinuum/engines/gpytorch.py +143 -14
- {discontinuum-1.0.3.dist-info → discontinuum-1.0.5.dist-info}/METADATA +4 -3
- {discontinuum-1.0.3.dist-info → discontinuum-1.0.5.dist-info}/RECORD +8 -8
- rating_gp/models/gpytorch.py +29 -7
- {discontinuum-1.0.3.dist-info → discontinuum-1.0.5.dist-info}/WHEEL +0 -0
- {discontinuum-1.0.3.dist-info → discontinuum-1.0.5.dist-info}/licenses/LICENSE.md +0 -0
- {discontinuum-1.0.3.dist-info → discontinuum-1.0.5.dist-info}/top_level.txt +0 -0
discontinuum/_version.py
CHANGED
discontinuum/engines/gpytorch.py
CHANGED
@@ -49,7 +49,11 @@ class MarginalGPyTorch(BaseModel):
|
|
49
49
|
target: Dataset,
|
50
50
|
target_unc: Dataset = None,
|
51
51
|
iterations: int = 100,
|
52
|
-
optimizer: str = "
|
52
|
+
optimizer: str = "adamw",
|
53
|
+
learning_rate: float = None,
|
54
|
+
early_stopping: bool = False,
|
55
|
+
patience: int = 60,
|
56
|
+
gradient_noise: bool = False,
|
53
57
|
):
|
54
58
|
"""Fit the model to data.
|
55
59
|
|
@@ -64,7 +68,15 @@ class MarginalGPyTorch(BaseModel):
|
|
64
68
|
iterations : int, optional
|
65
69
|
Number of iterations for optimization. The default is 100.
|
66
70
|
optimizer : str, optional
|
67
|
-
Optimization method. The default is "
|
71
|
+
Optimization method. Supported: "adam", "adamw". The default is "adamw".
|
72
|
+
learning_rate : float, optional
|
73
|
+
Learning rate for optimization. If None, uses adaptive defaults.
|
74
|
+
early_stopping : bool, optional
|
75
|
+
Whether to use early stopping. The default is False.
|
76
|
+
patience : int, optional
|
77
|
+
Number of iterations to wait without improvement before stopping. The default is 60.
|
78
|
+
gradient_noise : bool, optional
|
79
|
+
Whether to inject Gaussian noise into gradients each step (std = 0.1 × current learning rate). The default is False.
|
68
80
|
"""
|
69
81
|
self.is_fitted = True
|
70
82
|
# setup data manager (self.dm)
|
@@ -86,26 +98,143 @@ class MarginalGPyTorch(BaseModel):
|
|
86
98
|
self.model.train()
|
87
99
|
self.likelihood.train()
|
88
100
|
|
89
|
-
|
90
|
-
|
91
|
-
|
101
|
+
if learning_rate is None:
|
102
|
+
if optimizer == "adam":
|
103
|
+
learning_rate = 0.1 # Aggressive default for faster convergence
|
104
|
+
elif optimizer == "adamw":
|
105
|
+
learning_rate = 0.1
|
106
|
+
|
107
|
+
if optimizer == "adamw":
|
108
|
+
optimizer_obj = torch.optim.AdamW(
|
109
|
+
self.model.parameters(),
|
110
|
+
lr=learning_rate,
|
111
|
+
betas=(0.9, 0.999),
|
112
|
+
eps=1e-8,
|
113
|
+
weight_decay=1e-2 # Stronger regularization for AdamW
|
114
|
+
)
|
115
|
+
elif optimizer == "adam":
|
116
|
+
optimizer_obj = torch.optim.Adam(
|
117
|
+
self.model.parameters(),
|
118
|
+
lr=learning_rate,
|
119
|
+
betas=(0.9, 0.999),
|
120
|
+
eps=1e-8,
|
121
|
+
weight_decay=1e-4 # Lighter regularization for Adam
|
122
|
+
)
|
92
123
|
else:
|
93
|
-
raise NotImplementedError("Only
|
124
|
+
raise NotImplementedError(f"Only 'adam' and 'adamw' optimizers are supported. Got '{optimizer}'.")
|
125
|
+
|
126
|
+
# Use ReduceLROnPlateau for more stable learning rate adaptation
|
127
|
+
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
128
|
+
optimizer_obj,
|
129
|
+
mode='min',
|
130
|
+
factor=0.5, # Reduce LR by half
|
131
|
+
patience=max(2, patience),
|
132
|
+
threshold=1e-4,
|
133
|
+
min_lr=1e-5
|
134
|
+
)
|
94
135
|
|
95
136
|
# "Loss" for GPs - the marginal log likelihood
|
96
137
|
mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
|
97
138
|
|
98
|
-
|
139
|
+
# Training loop with stability features
|
140
|
+
pbar = tqdm.tqdm(range(iterations), ncols=100) # Wider progress bar
|
141
|
+
jitter = 1e-6 # Dynamic jitter for numerical stability
|
142
|
+
best_loss = float('inf')
|
143
|
+
patience_counter = 0
|
144
|
+
min_lr_for_early_stop = 2e-5 # Stop if patience is exceeded and LR is below this
|
145
|
+
|
99
146
|
for i in pbar:
|
100
|
-
#
|
101
|
-
|
102
|
-
# Output from model
|
147
|
+
# Adam/AdamW optimizer with stability features
|
148
|
+
optimizer_obj.zero_grad()
|
103
149
|
output = self.model(train_x)
|
104
|
-
|
105
|
-
loss
|
150
|
+
|
151
|
+
# Attempt loss calculation with dynamic jitter
|
152
|
+
try:
|
153
|
+
with gpytorch.settings.cholesky_jitter(jitter):
|
154
|
+
loss = -mll(output, train_y)
|
155
|
+
except Exception as e:
|
156
|
+
# Increase jitter if numerical issues occur
|
157
|
+
jitter = min(jitter * 10, 1e-2)
|
158
|
+
current_lr = optimizer_obj.param_groups[0]['lr']
|
159
|
+
pbar.set_postfix_str(
|
160
|
+
f'lr={current_lr:.1e} jitter={jitter:.1e} | Numerical issue - increasing jitter'
|
161
|
+
)
|
162
|
+
continue
|
163
|
+
|
164
|
+
# Check for NaN loss
|
165
|
+
if torch.isnan(loss) or torch.isinf(loss):
|
166
|
+
current_lr = optimizer_obj.param_groups[0]['lr']
|
167
|
+
pbar.set_postfix_str(
|
168
|
+
f'lr={current_lr:.1e} jitter={jitter:.1e} | NaN/Inf loss detected - skipping step'
|
169
|
+
)
|
170
|
+
continue
|
171
|
+
|
106
172
|
loss.backward()
|
107
|
-
|
108
|
-
|
173
|
+
|
174
|
+
# Get current learning rate before gradient noise injection
|
175
|
+
current_lr = optimizer_obj.param_groups[0]['lr']
|
176
|
+
|
177
|
+
# Gradient noise injection (if enabled)
|
178
|
+
if gradient_noise:
|
179
|
+
gradient_noise_scale = 0.1
|
180
|
+
adaptive_noise = gradient_noise_scale * current_lr
|
181
|
+
for param in self.model.parameters():
|
182
|
+
if param.grad is not None:
|
183
|
+
noise = torch.normal(mean=0.0, std=adaptive_noise, size=param.grad.shape, device=param.grad.device)
|
184
|
+
param.grad.add_(noise)
|
185
|
+
|
186
|
+
# Gradient clipping for stability
|
187
|
+
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
188
|
+
|
189
|
+
# Check for NaN gradients
|
190
|
+
has_nan_grad = False
|
191
|
+
for param in self.model.parameters():
|
192
|
+
if param.grad is not None and torch.isnan(param.grad).any():
|
193
|
+
has_nan_grad = True
|
194
|
+
break
|
195
|
+
|
196
|
+
if has_nan_grad:
|
197
|
+
# Don't update scheduler on NaN gradients - this prevents rapid LR decay
|
198
|
+
# The scheduler should only respond to actual optimization progress
|
199
|
+
current_lr = optimizer_obj.param_groups[0]['lr']
|
200
|
+
|
201
|
+
# Update best loss tracking (loss is still valid, just gradients are NaN)
|
202
|
+
if loss.item() < best_loss:
|
203
|
+
best_loss = loss.item()
|
204
|
+
patience_counter = 0
|
205
|
+
else:
|
206
|
+
patience_counter += 1
|
207
|
+
|
208
|
+
# Display comprehensive info even with NaN gradients, skip normal progress update
|
209
|
+
pbar.set_postfix_str(
|
210
|
+
f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f} | NaN gradients - skipping step'
|
211
|
+
)
|
212
|
+
continue
|
213
|
+
|
214
|
+
optimizer_obj.step()
|
215
|
+
|
216
|
+
# Update learning rate scheduler for Adam/AdamW
|
217
|
+
scheduler.step(loss.item())
|
218
|
+
current_lr = optimizer_obj.param_groups[0]['lr']
|
219
|
+
|
220
|
+
# Early stopping check (more aggressive)
|
221
|
+
if loss.item() < best_loss:
|
222
|
+
best_loss = loss.item()
|
223
|
+
patience_counter = 0
|
224
|
+
else:
|
225
|
+
patience_counter += 1
|
226
|
+
|
227
|
+
# Only update progress bar if not skipped above
|
228
|
+
if not has_nan_grad:
|
229
|
+
progress_info = f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f}'
|
230
|
+
if early_stopping:
|
231
|
+
progress_info += f' patience={patience_counter}/{patience}'
|
232
|
+
pbar.set_postfix_str(progress_info)
|
233
|
+
|
234
|
+
if early_stopping and patience_counter >= patience and current_lr <= min_lr_for_early_stop:
|
235
|
+
print(f"\nEarly stopping triggered after {i+1} iterations")
|
236
|
+
print(f"Best loss: {best_loss:.6f}")
|
237
|
+
break
|
109
238
|
|
110
239
|
@is_fitted
|
111
240
|
def predict(self,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: discontinuum
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.5
|
4
4
|
Summary: Estimate discontinuous timeseries from continuous covariates.
|
5
5
|
Maintainer-email: Timothy Hodson <thodson@usgs.gov>
|
6
6
|
License: License
|
@@ -124,11 +124,12 @@ However, LOADEST has several serious limitations
|
|
124
124
|
the more flexible Weighted Regression on Time Discharge and Season (WRTDS),
|
125
125
|
which allows the relation between target and covariate to vary through time.
|
126
126
|
`loadest-gp` takes the WRTDS idea and reimplements it as a GP.
|
127
|
-
|
127
|
+
github/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb
|
128
|
+
Try it out in the [loadest-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb).
|
128
129
|
|
129
130
|
### rating-gp
|
130
131
|
`rating-gp` is a Gaussian-process model for estimating river flow from stage time series.
|
131
|
-
Try it out in the [rating-gp demo](https://
|
132
|
+
Try it out in the [rating-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/rating-gp-demo.ipynb).
|
132
133
|
|
133
134
|
## Engines
|
134
135
|
Currently, the only supported engines are the marginal likelihood implementation in `pymc` and `gpytorch`.
|
@@ -1,17 +1,17 @@
|
|
1
1
|
discontinuum/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
discontinuum/_version.py,sha256=
|
2
|
+
discontinuum/_version.py,sha256=cafGA7j4exK_daS29O0wW2JM2p5b8FwYDLqAYJR0jWo,511
|
3
3
|
discontinuum/data_manager.py,sha256=LiZoPR0nnu7YAUfh5L1ZDRfaS3dgfVIELXIHkzUKyBg,4416
|
4
4
|
discontinuum/pipeline.py,sha256=1avuZnFai-b3HmihcpZ8M3WFNQ8lXAFSNTrnfl2NrY0,10074
|
5
5
|
discontinuum/plot.py,sha256=eZQS6-Ydq8FFcEukPtNuDVB-weV6lHyWMyJ1hqTkVrU,2969
|
6
6
|
discontinuum/utils.py,sha256=07hIHQk_oDlkjz7tasgBjqqPOC6D0iNcy0eu-88aNbM,1540
|
7
7
|
discontinuum/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
discontinuum/engines/base.py,sha256=OlHd4ssIQoWvYHKoVqk5fKAVBcKsIIkR4ul9iNBvaYg,2396
|
9
|
-
discontinuum/engines/gpytorch.py,sha256=
|
9
|
+
discontinuum/engines/gpytorch.py,sha256=kRyAgCfxjKZbAJhJGViaDU_y8NO8sW4rSWRyEQlomHo,14383
|
10
10
|
discontinuum/engines/pymc.py,sha256=phbtE-3UCSVcP1MhbXwAHIWDZWDr56wK9U7aRt-w-2o,5961
|
11
11
|
discontinuum/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
discontinuum/providers/base.py,sha256=Yn2EHS1b4fYl09-m2MYuf2P9VRUXAP-WDpSoZrCbRvY,720
|
13
13
|
discontinuum/tests/test_pipeline.py,sha256=_FhkGxbFIxNb35lGaIdZk7Zjgs6CkxEF3gFUX3PE8EU,918
|
14
|
-
discontinuum-1.0.
|
14
|
+
discontinuum-1.0.5.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
|
15
15
|
loadest_gp/__init__.py,sha256=YISfvbc7Zy2y0BOxS1A2KzqxyoNJTz0EnLMnRW6iVT8,740
|
16
16
|
loadest_gp/plot.py,sha256=x2PK7vBCc44dX9lu5YV-rvw1u4pvXSLdcrTSvYLiHMA,2595
|
17
17
|
loadest_gp/utils.py,sha256=m5QaqR_0JiuRXPfryH8nI5lODp8PqvQla5C05WDN3LY,2772
|
@@ -25,11 +25,11 @@ rating_gp/pipeline.py,sha256=1HgxN6DD3ZL5lhUb3DK2in2IXiml7W4Ja272GBMTc08,1884
|
|
25
25
|
rating_gp/plot.py,sha256=CJphwqWWAfIY22j5Oz5DRwj7TcQCRyIQvM79_3KEdlc,9635
|
26
26
|
rating_gp/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
rating_gp/models/base.py,sha256=e2Kq644I88YLHWPNA0qyRgitF5wimdLW4618vKX-o_s,1474
|
28
|
-
rating_gp/models/gpytorch.py,sha256=
|
28
|
+
rating_gp/models/gpytorch.py,sha256=eFHwtnW44GZ1zz0fLx5REbzIWwnb_x_uq-cGjDcHyWs,6907
|
29
29
|
rating_gp/models/kernels.py,sha256=3xg2mhY3aEgjI3r5vyAll9MA4c3M5UKqRi3FApNhJJQ,11579
|
30
30
|
rating_gp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
rating_gp/providers/usgs.py,sha256=KmKYN3c8Mi-ly2l6X80WT3taEhqCPXeEcRNi9HvbJmY,8134
|
32
|
-
discontinuum-1.0.
|
33
|
-
discontinuum-1.0.
|
34
|
-
discontinuum-1.0.
|
35
|
-
discontinuum-1.0.
|
32
|
+
discontinuum-1.0.5.dist-info/METADATA,sha256=GDh-fscmNYYMmXXoUE5Xd1QafuKEOPjgZjlssmjqGVg,6302
|
33
|
+
discontinuum-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
34
|
+
discontinuum-1.0.5.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
|
35
|
+
discontinuum-1.0.5.dist-info/RECORD,,
|
rating_gp/models/gpytorch.py
CHANGED
@@ -71,8 +71,9 @@ class RatingGPMarginalGPyTorch(
|
|
71
71
|
# noise, *and* you did not specify noise. This is treated as a no-op."
|
72
72
|
self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(
|
73
73
|
noise=noise,
|
74
|
+
#learn_additional_noise=False,
|
74
75
|
learn_additional_noise=True,
|
75
|
-
noise_prior=gpytorch.priors.HalfNormalPrior(scale=0.
|
76
|
+
noise_prior=gpytorch.priors.HalfNormalPrior(scale=0.005),
|
76
77
|
)
|
77
78
|
|
78
79
|
model = ExactGPModel(X, y, self.likelihood)
|
@@ -109,17 +110,20 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
109
110
|
# + stage * time kernel only at low stage with smaller time length.
|
110
111
|
# Note that stage gets transformed to q, so the kernel is actually
|
111
112
|
# q * time
|
113
|
+
b_min = np.quantile(train_y, 0.10)
|
114
|
+
b_max = np.quantile(train_y, 0.90)
|
112
115
|
self.covar_module = (
|
113
|
-
(self.cov_stage()
|
116
|
+
(self.cov_stage(ls_prior=GammaPrior(concentration=1, rate=1))
|
114
117
|
* self.cov_time(ls_prior=GammaPrior(concentration=1, rate=1)))
|
115
|
-
+ (self.cov_stage(ls_prior=GammaPrior(concentration=
|
118
|
+
+ (self.cov_stage(ls_prior=GammaPrior(concentration=3, rate=1))
|
116
119
|
* self.cov_time(ls_prior=GammaPrior(concentration=2, rate=5))
|
117
120
|
* SigmoidKernel(
|
118
121
|
active_dims=self.stage_dim,
|
119
122
|
# a_prior=NormalPrior(loc=20, scale=1),
|
123
|
+
# b_prior=NormalPrior(loc=0.5, scale=0.2),
|
120
124
|
b_constraint=gpytorch.constraints.Interval(
|
121
|
-
|
122
|
-
|
125
|
+
b_min,
|
126
|
+
b_max,
|
123
127
|
),
|
124
128
|
)
|
125
129
|
)
|
@@ -141,11 +145,12 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
141
145
|
|
142
146
|
def cov_stage(self, ls_prior=None):
|
143
147
|
eta = HalfNormalPrior(scale=1)
|
144
|
-
|
148
|
+
|
145
149
|
return ScaleKernel(
|
146
150
|
MaternKernel(
|
147
151
|
active_dims=self.stage_dim,
|
148
152
|
lengthscale_prior=ls_prior,
|
153
|
+
nu=2.5, # Smoother kernel (was nu=1.5)
|
149
154
|
),
|
150
155
|
outputscale_prior=eta,
|
151
156
|
)
|
@@ -153,14 +158,31 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
153
158
|
def cov_time(self, ls_prior=None):
|
154
159
|
eta = HalfNormalPrior(scale=1)
|
155
160
|
|
156
|
-
|
161
|
+
# Base Matern kernel for long-term trends
|
162
|
+
base_kernel = ScaleKernel(
|
157
163
|
MaternKernel(
|
158
164
|
active_dims=self.time_dim,
|
159
165
|
lengthscale_prior=ls_prior,
|
166
|
+
nu=1.5, # was 2.5
|
160
167
|
),
|
161
168
|
outputscale_prior=eta,
|
162
169
|
)
|
163
170
|
|
171
|
+
# Periodic performs beter than a locally periodic kernel
|
172
|
+
periodic_kernel = ScaleKernel(
|
173
|
+
gpytorch.kernels.PeriodicKernel(
|
174
|
+
active_dims=self.time_dim,
|
175
|
+
period_length_prior=NormalPrior(loc=1.0, scale=0.1), # ~1 year
|
176
|
+
lengthscale_prior=GammaPrior(concentration=2, rate=4),
|
177
|
+
),
|
178
|
+
outputscale_prior=HalfNormalPrior(scale=0.5),
|
179
|
+
)
|
180
|
+
|
181
|
+
return base_kernel + periodic_kernel
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
164
186
|
def cov_stagetime(self):
|
165
187
|
eta = HalfNormalPrior(scale=1)
|
166
188
|
ls = GammaPrior(concentration=2, rate=1)
|
File without changes
|
File without changes
|
File without changes
|