discontinuum 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- discontinuum/_version.py +2 -2
- discontinuum/engines/gpytorch.py +134 -15
- {discontinuum-1.0.2.dist-info → discontinuum-1.0.4.dist-info}/METADATA +4 -3
- {discontinuum-1.0.2.dist-info → discontinuum-1.0.4.dist-info}/RECORD +8 -8
- rating_gp/models/gpytorch.py +37 -9
- {discontinuum-1.0.2.dist-info → discontinuum-1.0.4.dist-info}/WHEEL +0 -0
- {discontinuum-1.0.2.dist-info → discontinuum-1.0.4.dist-info}/licenses/LICENSE.md +0 -0
- {discontinuum-1.0.2.dist-info → discontinuum-1.0.4.dist-info}/top_level.txt +0 -0
discontinuum/_version.py
CHANGED
discontinuum/engines/gpytorch.py
CHANGED
@@ -50,6 +50,9 @@ class MarginalGPyTorch(BaseModel):
|
|
50
50
|
target_unc: Dataset = None,
|
51
51
|
iterations: int = 100,
|
52
52
|
optimizer: str = "adam",
|
53
|
+
learning_rate: float = None,
|
54
|
+
early_stopping: bool = False,
|
55
|
+
early_stopping_patience: int = 100,
|
53
56
|
):
|
54
57
|
"""Fit the model to data.
|
55
58
|
|
@@ -65,6 +68,12 @@ class MarginalGPyTorch(BaseModel):
|
|
65
68
|
Number of iterations for optimization. The default is 100.
|
66
69
|
optimizer : str, optional
|
67
70
|
Optimization method. The default is "adam".
|
71
|
+
learning_rate : float, optional
|
72
|
+
Learning rate for optimization. If None, uses adaptive defaults.
|
73
|
+
early_stopping : bool, optional
|
74
|
+
Whether to use early stopping. The default is False.
|
75
|
+
early_stopping_patience : int, optional
|
76
|
+
Number of iterations to wait without improvement before stopping. The default is 100.
|
68
77
|
"""
|
69
78
|
self.is_fitted = True
|
70
79
|
# setup data manager (self.dm)
|
@@ -86,26 +95,136 @@ class MarginalGPyTorch(BaseModel):
|
|
86
95
|
self.model.train()
|
87
96
|
self.likelihood.train()
|
88
97
|
|
89
|
-
#
|
90
|
-
if
|
91
|
-
optimizer
|
92
|
-
|
93
|
-
|
98
|
+
# Adaptive learning rate selection for faster convergence
|
99
|
+
if learning_rate is None:
|
100
|
+
if optimizer == "adam":
|
101
|
+
learning_rate = 0.1 # More aggressive default for faster convergence
|
102
|
+
elif optimizer == "lbfgs":
|
103
|
+
learning_rate = 1.0 # L-BFGS doesn't use learning rate the same way
|
104
|
+
|
105
|
+
# Use the specified optimizer with stabilization
|
106
|
+
if optimizer != "adam":
|
107
|
+
raise NotImplementedError(f"Only 'adam' optimizer is supported. Got '{optimizer}'.")
|
108
|
+
optimizer = torch.optim.Adam(
|
109
|
+
self.model.parameters(),
|
110
|
+
lr=learning_rate,
|
111
|
+
betas=(0.9, 0.999), # Slightly more conservative momentum
|
112
|
+
eps=1e-8, # Numerical stability
|
113
|
+
weight_decay=1e-4 # Small L2 regularization
|
114
|
+
)
|
115
|
+
# More responsive learning rate scheduler for faster adaptation
|
116
|
+
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
117
|
+
optimizer,
|
118
|
+
mode='min',
|
119
|
+
factor=0.6, # Reduce LR by 40% when loss plateaus (more aggressive)
|
120
|
+
patience=40, # Reduce sooner for faster adaptation
|
121
|
+
min_lr=1e-5, # Higher minimum learning rate
|
122
|
+
threshold=1e-4 # Less sensitive to plateaus
|
123
|
+
)
|
94
124
|
|
95
125
|
# "Loss" for GPs - the marginal log likelihood
|
96
126
|
mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
|
97
127
|
|
98
|
-
|
128
|
+
# Training loop with stability features
|
129
|
+
pbar = tqdm.tqdm(range(iterations), ncols=100) # Wider progress bar
|
130
|
+
jitter = 1e-6 # Dynamic jitter for numerical stability
|
131
|
+
best_loss = float('inf')
|
132
|
+
patience_counter = 0
|
133
|
+
min_lr_for_early_stop = 2e-5 # Stop if patience is exceeded and LR is below this
|
134
|
+
|
99
135
|
for i in pbar:
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
136
|
+
if optimizer.__class__.__name__ == "LBFGS":
|
137
|
+
# L-BFGS requires a closure function
|
138
|
+
def closure():
|
139
|
+
optimizer.zero_grad()
|
140
|
+
output = self.model(train_x)
|
141
|
+
with gpytorch.settings.cholesky_jitter(jitter):
|
142
|
+
loss = -mll(output, train_y).sum()
|
143
|
+
loss.backward()
|
144
|
+
return loss
|
145
|
+
|
146
|
+
loss = optimizer.step(closure)
|
147
|
+
pbar.set_postfix(loss=loss.item())
|
148
|
+
else:
|
149
|
+
# Adam optimizer with stability features
|
150
|
+
optimizer.zero_grad()
|
151
|
+
output = self.model(train_x)
|
152
|
+
|
153
|
+
# Attempt loss calculation with dynamic jitter
|
154
|
+
try:
|
155
|
+
with gpytorch.settings.cholesky_jitter(jitter):
|
156
|
+
loss = -mll(output, train_y)
|
157
|
+
except Exception as e:
|
158
|
+
# Increase jitter if numerical issues occur
|
159
|
+
jitter = min(jitter * 10, 1e-2)
|
160
|
+
current_lr = optimizer.param_groups[0]['lr']
|
161
|
+
pbar.set_postfix_str(
|
162
|
+
f'lr={current_lr:.1e} jitter={jitter:.1e} | Numerical issue - increasing jitter'
|
163
|
+
)
|
164
|
+
continue
|
165
|
+
|
166
|
+
# Check for NaN loss
|
167
|
+
if torch.isnan(loss) or torch.isinf(loss):
|
168
|
+
current_lr = optimizer.param_groups[0]['lr']
|
169
|
+
pbar.set_postfix_str(
|
170
|
+
f'lr={current_lr:.1e} jitter={jitter:.1e} | NaN/Inf loss detected - skipping step'
|
171
|
+
)
|
172
|
+
continue
|
173
|
+
|
174
|
+
loss.backward()
|
175
|
+
|
176
|
+
# Gradient clipping for stability
|
177
|
+
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
178
|
+
|
179
|
+
# Check for NaN gradients
|
180
|
+
has_nan_grad = False
|
181
|
+
for param in self.model.parameters():
|
182
|
+
if param.grad is not None and torch.isnan(param.grad).any():
|
183
|
+
has_nan_grad = True
|
184
|
+
break
|
185
|
+
|
186
|
+
if has_nan_grad:
|
187
|
+
# Don't update scheduler on NaN gradients - this prevents rapid LR decay
|
188
|
+
# The scheduler should only respond to actual optimization progress
|
189
|
+
current_lr = optimizer.param_groups[0]['lr']
|
190
|
+
|
191
|
+
# Update best loss tracking (loss is still valid, just gradients are NaN)
|
192
|
+
if loss.item() < best_loss:
|
193
|
+
best_loss = loss.item()
|
194
|
+
patience_counter = 0
|
195
|
+
else:
|
196
|
+
patience_counter += 1
|
197
|
+
|
198
|
+
# Display comprehensive info even with NaN gradients
|
199
|
+
pbar.set_postfix_str(
|
200
|
+
f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f} | NaN gradients - skipping step'
|
201
|
+
)
|
202
|
+
continue
|
203
|
+
|
204
|
+
optimizer.step()
|
205
|
+
|
206
|
+
# Update learning rate scheduler for Adam
|
207
|
+
scheduler.step(loss)
|
208
|
+
current_lr = optimizer.param_groups[0]['lr']
|
209
|
+
|
210
|
+
# Early stopping check (more aggressive)
|
211
|
+
if loss.item() < best_loss:
|
212
|
+
best_loss = loss.item()
|
213
|
+
patience_counter = 0
|
214
|
+
else:
|
215
|
+
patience_counter += 1
|
216
|
+
|
217
|
+
# Display progress with comprehensive metadata
|
218
|
+
progress_info = f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f}'
|
219
|
+
if early_stopping:
|
220
|
+
progress_info += f' patience={patience_counter}/25'
|
221
|
+
pbar.set_postfix_str(progress_info)
|
222
|
+
|
223
|
+
# More aggressive early stopping: patience=25 and require LR to be low
|
224
|
+
if early_stopping and patience_counter >= 25 and current_lr <= min_lr_for_early_stop:
|
225
|
+
print(f"\nEarly stopping triggered after {i+1} iterations (patience exceeded and LR low)")
|
226
|
+
print(f"Best loss: {best_loss:.6f}")
|
227
|
+
break
|
109
228
|
|
110
229
|
@is_fitted
|
111
230
|
def predict(self,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: discontinuum
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.4
|
4
4
|
Summary: Estimate discontinuous timeseries from continuous covariates.
|
5
5
|
Maintainer-email: Timothy Hodson <thodson@usgs.gov>
|
6
6
|
License: License
|
@@ -124,11 +124,12 @@ However, LOADEST has several serious limitations
|
|
124
124
|
the more flexible Weighted Regression on Time Discharge and Season (WRTDS),
|
125
125
|
which allows the relation between target and covariate to vary through time.
|
126
126
|
`loadest-gp` takes the WRTDS idea and reimplements it as a GP.
|
127
|
-
|
127
|
+
github/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb
|
128
|
+
Try it out in the [loadest-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb).
|
128
129
|
|
129
130
|
### rating-gp
|
130
131
|
`rating-gp` is a Gaussian-process model for estimating river flow from stage time series.
|
131
|
-
Try it out in the [rating-gp demo](https://
|
132
|
+
Try it out in the [rating-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/rating-gp-demo.ipynb).
|
132
133
|
|
133
134
|
## Engines
|
134
135
|
Currently, the only supported engines are the marginal likelihood implementation in `pymc` and `gpytorch`.
|
@@ -1,17 +1,17 @@
|
|
1
1
|
discontinuum/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
discontinuum/_version.py,sha256=
|
2
|
+
discontinuum/_version.py,sha256=rXTOeD0YpRo_kJ2LqUiMnTKEFf43sO_PBvJHDh0SQUA,511
|
3
3
|
discontinuum/data_manager.py,sha256=LiZoPR0nnu7YAUfh5L1ZDRfaS3dgfVIELXIHkzUKyBg,4416
|
4
4
|
discontinuum/pipeline.py,sha256=1avuZnFai-b3HmihcpZ8M3WFNQ8lXAFSNTrnfl2NrY0,10074
|
5
5
|
discontinuum/plot.py,sha256=eZQS6-Ydq8FFcEukPtNuDVB-weV6lHyWMyJ1hqTkVrU,2969
|
6
6
|
discontinuum/utils.py,sha256=07hIHQk_oDlkjz7tasgBjqqPOC6D0iNcy0eu-88aNbM,1540
|
7
7
|
discontinuum/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
discontinuum/engines/base.py,sha256=OlHd4ssIQoWvYHKoVqk5fKAVBcKsIIkR4ul9iNBvaYg,2396
|
9
|
-
discontinuum/engines/gpytorch.py,sha256=
|
9
|
+
discontinuum/engines/gpytorch.py,sha256=36TxE_qfRUjuOB16eXmyrxPlicKzXkdQ7xnfqL2ucy0,14539
|
10
10
|
discontinuum/engines/pymc.py,sha256=phbtE-3UCSVcP1MhbXwAHIWDZWDr56wK9U7aRt-w-2o,5961
|
11
11
|
discontinuum/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
discontinuum/providers/base.py,sha256=Yn2EHS1b4fYl09-m2MYuf2P9VRUXAP-WDpSoZrCbRvY,720
|
13
13
|
discontinuum/tests/test_pipeline.py,sha256=_FhkGxbFIxNb35lGaIdZk7Zjgs6CkxEF3gFUX3PE8EU,918
|
14
|
-
discontinuum-1.0.
|
14
|
+
discontinuum-1.0.4.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
|
15
15
|
loadest_gp/__init__.py,sha256=YISfvbc7Zy2y0BOxS1A2KzqxyoNJTz0EnLMnRW6iVT8,740
|
16
16
|
loadest_gp/plot.py,sha256=x2PK7vBCc44dX9lu5YV-rvw1u4pvXSLdcrTSvYLiHMA,2595
|
17
17
|
loadest_gp/utils.py,sha256=m5QaqR_0JiuRXPfryH8nI5lODp8PqvQla5C05WDN3LY,2772
|
@@ -25,11 +25,11 @@ rating_gp/pipeline.py,sha256=1HgxN6DD3ZL5lhUb3DK2in2IXiml7W4Ja272GBMTc08,1884
|
|
25
25
|
rating_gp/plot.py,sha256=CJphwqWWAfIY22j5Oz5DRwj7TcQCRyIQvM79_3KEdlc,9635
|
26
26
|
rating_gp/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
rating_gp/models/base.py,sha256=e2Kq644I88YLHWPNA0qyRgitF5wimdLW4618vKX-o_s,1474
|
28
|
-
rating_gp/models/gpytorch.py,sha256=
|
28
|
+
rating_gp/models/gpytorch.py,sha256=4SqOdWIvI93kDq9S4cDPHXX25EHNjT_hKwZijhAR4C0,7121
|
29
29
|
rating_gp/models/kernels.py,sha256=3xg2mhY3aEgjI3r5vyAll9MA4c3M5UKqRi3FApNhJJQ,11579
|
30
30
|
rating_gp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
rating_gp/providers/usgs.py,sha256=KmKYN3c8Mi-ly2l6X80WT3taEhqCPXeEcRNi9HvbJmY,8134
|
32
|
-
discontinuum-1.0.
|
33
|
-
discontinuum-1.0.
|
34
|
-
discontinuum-1.0.
|
35
|
-
discontinuum-1.0.
|
32
|
+
discontinuum-1.0.4.dist-info/METADATA,sha256=A6T6BQocZmIox600f7nU5Tb9r7x5YthC5ba1WRET2XM,6302
|
33
|
+
discontinuum-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
34
|
+
discontinuum-1.0.4.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
|
35
|
+
discontinuum-1.0.4.dist-info/RECORD,,
|
rating_gp/models/gpytorch.py
CHANGED
@@ -71,7 +71,9 @@ class RatingGPMarginalGPyTorch(
|
|
71
71
|
# noise, *and* you did not specify noise. This is treated as a no-op."
|
72
72
|
self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(
|
73
73
|
noise=noise,
|
74
|
-
learn_additional_noise=False,
|
74
|
+
#learn_additional_noise=False,
|
75
|
+
learn_additional_noise=True,
|
76
|
+
noise_prior=gpytorch.priors.HalfNormalPrior(scale=0.005),
|
75
77
|
)
|
76
78
|
|
77
79
|
model = ExactGPModel(X, y, self.likelihood)
|
@@ -108,17 +110,20 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
108
110
|
# + stage * time kernel only at low stage with smaller time length.
|
109
111
|
# Note that stage gets transformed to q, so the kernel is actually
|
110
112
|
# q * time
|
113
|
+
b_min = np.quantile(train_y, 0.30)
|
114
|
+
b_max = np.quantile(train_y, 0.90)
|
111
115
|
self.covar_module = (
|
112
|
-
(self.cov_stage()
|
113
|
-
* self.cov_time(ls_prior=GammaPrior(concentration=
|
114
|
-
|
115
|
-
* self.cov_time(ls_prior=GammaPrior(concentration=
|
116
|
+
(self.cov_stage(ls_prior=GammaPrior(concentration=2, rate=1))
|
117
|
+
* self.cov_time(ls_prior=GammaPrior(concentration=1, rate=1)))
|
118
|
+
+ (self.cov_stage(ls_prior=GammaPrior(concentration=5, rate=1))
|
119
|
+
* self.cov_time(ls_prior=GammaPrior(concentration=1, rate=5))
|
116
120
|
* SigmoidKernel(
|
117
121
|
active_dims=self.stage_dim,
|
118
122
|
# a_prior=NormalPrior(loc=20, scale=1),
|
123
|
+
# b_prior=NormalPrior(loc=0.5, scale=0.2),
|
119
124
|
b_constraint=gpytorch.constraints.Interval(
|
120
|
-
|
121
|
-
|
125
|
+
b_min,
|
126
|
+
b_max,
|
122
127
|
),
|
123
128
|
)
|
124
129
|
)
|
@@ -140,11 +145,12 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
140
145
|
|
141
146
|
def cov_stage(self, ls_prior=None):
|
142
147
|
eta = HalfNormalPrior(scale=1)
|
143
|
-
|
148
|
+
|
144
149
|
return ScaleKernel(
|
145
150
|
MaternKernel(
|
146
151
|
active_dims=self.stage_dim,
|
147
152
|
lengthscale_prior=ls_prior,
|
153
|
+
nu=2.5, # Smoother kernel (was nu=1.5)
|
148
154
|
),
|
149
155
|
outputscale_prior=eta,
|
150
156
|
)
|
@@ -152,13 +158,35 @@ class ExactGPModel(gpytorch.models.ExactGP):
|
|
152
158
|
def cov_time(self, ls_prior=None):
|
153
159
|
eta = HalfNormalPrior(scale=1)
|
154
160
|
|
155
|
-
|
161
|
+
# Base Matern kernel for long-term trends
|
162
|
+
base_kernel = ScaleKernel(
|
156
163
|
MaternKernel(
|
157
164
|
active_dims=self.time_dim,
|
158
165
|
lengthscale_prior=ls_prior,
|
166
|
+
nu=1.5, # was 2.5
|
159
167
|
),
|
160
168
|
outputscale_prior=eta,
|
161
169
|
)
|
170
|
+
|
171
|
+
# Periodic kernel for annual seasonality
|
172
|
+
# Locally periodic kernel: Periodic * Matern
|
173
|
+
periodic_kernel = ScaleKernel(
|
174
|
+
gpytorch.kernels.PeriodicKernel(
|
175
|
+
active_dims=self.time_dim,
|
176
|
+
period_length_prior=NormalPrior(loc=1.0, scale=0.05), # ~1 year
|
177
|
+
lengthscale_prior=GammaPrior(concentration=6, rate=1),
|
178
|
+
) * MaternKernel(
|
179
|
+
active_dims=self.time_dim,
|
180
|
+
nu=2.5,
|
181
|
+
lengthscale_prior=GammaPrior(concentration=4, rate=3),
|
182
|
+
),
|
183
|
+
outputscale_prior=HalfNormalPrior(scale=0.2),
|
184
|
+
)
|
185
|
+
|
186
|
+
return base_kernel + periodic_kernel
|
187
|
+
|
188
|
+
|
189
|
+
|
162
190
|
|
163
191
|
def cov_stagetime(self):
|
164
192
|
eta = HalfNormalPrior(scale=1)
|
File without changes
|
File without changes
|
File without changes
|