discontinuum 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
discontinuum/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.0.2'
21
- __version_tuple__ = version_tuple = (1, 0, 2)
20
+ __version__ = version = '1.0.4'
21
+ __version_tuple__ = version_tuple = (1, 0, 4)
@@ -50,6 +50,9 @@ class MarginalGPyTorch(BaseModel):
50
50
  target_unc: Dataset = None,
51
51
  iterations: int = 100,
52
52
  optimizer: str = "adam",
53
+ learning_rate: float = None,
54
+ early_stopping: bool = False,
55
+ early_stopping_patience: int = 100,
53
56
  ):
54
57
  """Fit the model to data.
55
58
 
@@ -65,6 +68,12 @@ class MarginalGPyTorch(BaseModel):
65
68
  Number of iterations for optimization. The default is 100.
66
69
  optimizer : str, optional
67
70
  Optimization method. The default is "adam".
71
+ learning_rate : float, optional
72
+ Learning rate for optimization. If None, uses adaptive defaults.
73
+ early_stopping : bool, optional
74
+ Whether to use early stopping. The default is False.
75
+ early_stopping_patience : int, optional
76
+ Number of iterations to wait without improvement before stopping. The default is 100.
68
77
  """
69
78
  self.is_fitted = True
70
79
  # setup data manager (self.dm)
@@ -86,26 +95,136 @@ class MarginalGPyTorch(BaseModel):
86
95
  self.model.train()
87
96
  self.likelihood.train()
88
97
 
89
- # Use the adam optimizer
90
- if optimizer == "adam":
91
- optimizer = torch.optim.Adam(self.model.parameters(), lr=0.05) # default previously lr=0.1
92
- else:
93
- raise NotImplementedError("Only Adam optimizer is implemented")
98
+ # Adaptive learning rate selection for faster convergence
99
+ if learning_rate is None:
100
+ if optimizer == "adam":
101
+ learning_rate = 0.1 # More aggressive default for faster convergence
102
+ elif optimizer == "lbfgs":
103
+ learning_rate = 1.0 # L-BFGS doesn't use learning rate the same way
104
+
105
+ # Use the specified optimizer with stabilization
106
+ if optimizer != "adam":
107
+ raise NotImplementedError(f"Only 'adam' optimizer is supported. Got '{optimizer}'.")
108
+ optimizer = torch.optim.Adam(
109
+ self.model.parameters(),
110
+ lr=learning_rate,
111
+ betas=(0.9, 0.999), # Slightly more conservative momentum
112
+ eps=1e-8, # Numerical stability
113
+ weight_decay=1e-4 # Small L2 regularization
114
+ )
115
+ # More responsive learning rate scheduler for faster adaptation
116
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
117
+ optimizer,
118
+ mode='min',
119
+ factor=0.6, # Reduce LR by 40% when loss plateaus (more aggressive)
120
+ patience=40, # Reduce sooner for faster adaptation
121
+ min_lr=1e-5, # Higher minimum learning rate
122
+ threshold=1e-4 # Less sensitive to plateaus
123
+ )
94
124
 
95
125
  # "Loss" for GPs - the marginal log likelihood
96
126
  mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
97
127
 
98
- pbar = tqdm.tqdm(range(iterations), ncols=70)
128
+ # Training loop with stability features
129
+ pbar = tqdm.tqdm(range(iterations), ncols=100) # Wider progress bar
130
+ jitter = 1e-6 # Dynamic jitter for numerical stability
131
+ best_loss = float('inf')
132
+ patience_counter = 0
133
+ min_lr_for_early_stop = 2e-5 # Stop if patience is exceeded and LR is below this
134
+
99
135
  for i in pbar:
100
- # Zero gradients from previous iteration
101
- optimizer.zero_grad()
102
- # Output from model
103
- output = self.model(train_x)
104
- # Calc loss and backprop gradients
105
- loss = -mll(output, train_y)
106
- loss.backward()
107
- pbar.set_postfix(loss=loss.item())
108
- optimizer.step()
136
+ if optimizer.__class__.__name__ == "LBFGS":
137
+ # L-BFGS requires a closure function
138
+ def closure():
139
+ optimizer.zero_grad()
140
+ output = self.model(train_x)
141
+ with gpytorch.settings.cholesky_jitter(jitter):
142
+ loss = -mll(output, train_y).sum()
143
+ loss.backward()
144
+ return loss
145
+
146
+ loss = optimizer.step(closure)
147
+ pbar.set_postfix(loss=loss.item())
148
+ else:
149
+ # Adam optimizer with stability features
150
+ optimizer.zero_grad()
151
+ output = self.model(train_x)
152
+
153
+ # Attempt loss calculation with dynamic jitter
154
+ try:
155
+ with gpytorch.settings.cholesky_jitter(jitter):
156
+ loss = -mll(output, train_y)
157
+ except Exception as e:
158
+ # Increase jitter if numerical issues occur
159
+ jitter = min(jitter * 10, 1e-2)
160
+ current_lr = optimizer.param_groups[0]['lr']
161
+ pbar.set_postfix_str(
162
+ f'lr={current_lr:.1e} jitter={jitter:.1e} | Numerical issue - increasing jitter'
163
+ )
164
+ continue
165
+
166
+ # Check for NaN loss
167
+ if torch.isnan(loss) or torch.isinf(loss):
168
+ current_lr = optimizer.param_groups[0]['lr']
169
+ pbar.set_postfix_str(
170
+ f'lr={current_lr:.1e} jitter={jitter:.1e} | NaN/Inf loss detected - skipping step'
171
+ )
172
+ continue
173
+
174
+ loss.backward()
175
+
176
+ # Gradient clipping for stability
177
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
178
+
179
+ # Check for NaN gradients
180
+ has_nan_grad = False
181
+ for param in self.model.parameters():
182
+ if param.grad is not None and torch.isnan(param.grad).any():
183
+ has_nan_grad = True
184
+ break
185
+
186
+ if has_nan_grad:
187
+ # Don't update scheduler on NaN gradients - this prevents rapid LR decay
188
+ # The scheduler should only respond to actual optimization progress
189
+ current_lr = optimizer.param_groups[0]['lr']
190
+
191
+ # Update best loss tracking (loss is still valid, just gradients are NaN)
192
+ if loss.item() < best_loss:
193
+ best_loss = loss.item()
194
+ patience_counter = 0
195
+ else:
196
+ patience_counter += 1
197
+
198
+ # Display comprehensive info even with NaN gradients
199
+ pbar.set_postfix_str(
200
+ f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f} | NaN gradients - skipping step'
201
+ )
202
+ continue
203
+
204
+ optimizer.step()
205
+
206
+ # Update learning rate scheduler for Adam
207
+ scheduler.step(loss)
208
+ current_lr = optimizer.param_groups[0]['lr']
209
+
210
+ # Early stopping check (more aggressive)
211
+ if loss.item() < best_loss:
212
+ best_loss = loss.item()
213
+ patience_counter = 0
214
+ else:
215
+ patience_counter += 1
216
+
217
+ # Display progress with comprehensive metadata
218
+ progress_info = f'loss={loss.item():.4f} lr={current_lr:.1e} jitter={jitter:.1e} best={best_loss:.4f}'
219
+ if early_stopping:
220
+ progress_info += f' patience={patience_counter}/25'
221
+ pbar.set_postfix_str(progress_info)
222
+
223
+ # More aggressive early stopping: patience=25 and require LR to be low
224
+ if early_stopping and patience_counter >= 25 and current_lr <= min_lr_for_early_stop:
225
+ print(f"\nEarly stopping triggered after {i+1} iterations (patience exceeded and LR low)")
226
+ print(f"Best loss: {best_loss:.6f}")
227
+ break
109
228
 
110
229
  @is_fitted
111
230
  def predict(self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: discontinuum
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Estimate discontinuous timeseries from continuous covariates.
5
5
  Maintainer-email: Timothy Hodson <thodson@usgs.gov>
6
6
  License: License
@@ -124,11 +124,12 @@ However, LOADEST has several serious limitations
124
124
  the more flexible Weighted Regression on Time Discharge and Season (WRTDS),
125
125
  which allows the relation between target and covariate to vary through time.
126
126
  `loadest-gp` takes the WRTDS idea and reimplements it as a GP.
127
- Try it out in the [loadest-gp demo](https://code.usgs.gov/wma/uncertainty/discontinuum/-/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb).
127
+ github/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb
128
+ Try it out in the [loadest-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/loadest-gp-demo.ipynb).
128
129
 
129
130
  ### rating-gp
130
131
  `rating-gp` is a Gaussian-process model for estimating river flow from stage time series.
131
- Try it out in the [rating-gp demo](https://code.usgs.gov/wma/uncertainty/discontinuum/-/blob/main/docs/source/notebooks/rating-gp-demo.ipynb).
132
+ Try it out in the [rating-gp demo](https://github.com/thodson-usgs/discontinuum/blob/main/docs/source/notebooks/rating-gp-demo.ipynb).
132
133
 
133
134
  ## Engines
134
135
  Currently, the only supported engines are the marginal likelihood implementation in `pymc` and `gpytorch`.
@@ -1,17 +1,17 @@
1
1
  discontinuum/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- discontinuum/_version.py,sha256=98Mtoja-WpqDUnppClywkGhX6oxX6l-ZHnSYMYYbBUE,511
2
+ discontinuum/_version.py,sha256=rXTOeD0YpRo_kJ2LqUiMnTKEFf43sO_PBvJHDh0SQUA,511
3
3
  discontinuum/data_manager.py,sha256=LiZoPR0nnu7YAUfh5L1ZDRfaS3dgfVIELXIHkzUKyBg,4416
4
4
  discontinuum/pipeline.py,sha256=1avuZnFai-b3HmihcpZ8M3WFNQ8lXAFSNTrnfl2NrY0,10074
5
5
  discontinuum/plot.py,sha256=eZQS6-Ydq8FFcEukPtNuDVB-weV6lHyWMyJ1hqTkVrU,2969
6
6
  discontinuum/utils.py,sha256=07hIHQk_oDlkjz7tasgBjqqPOC6D0iNcy0eu-88aNbM,1540
7
7
  discontinuum/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  discontinuum/engines/base.py,sha256=OlHd4ssIQoWvYHKoVqk5fKAVBcKsIIkR4ul9iNBvaYg,2396
9
- discontinuum/engines/gpytorch.py,sha256=oJMNvNAwKwxQyt3j-QyRE-pjkYDv4i-qqhQfimNQ2HQ,8654
9
+ discontinuum/engines/gpytorch.py,sha256=36TxE_qfRUjuOB16eXmyrxPlicKzXkdQ7xnfqL2ucy0,14539
10
10
  discontinuum/engines/pymc.py,sha256=phbtE-3UCSVcP1MhbXwAHIWDZWDr56wK9U7aRt-w-2o,5961
11
11
  discontinuum/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  discontinuum/providers/base.py,sha256=Yn2EHS1b4fYl09-m2MYuf2P9VRUXAP-WDpSoZrCbRvY,720
13
13
  discontinuum/tests/test_pipeline.py,sha256=_FhkGxbFIxNb35lGaIdZk7Zjgs6CkxEF3gFUX3PE8EU,918
14
- discontinuum-1.0.2.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
14
+ discontinuum-1.0.4.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
15
15
  loadest_gp/__init__.py,sha256=YISfvbc7Zy2y0BOxS1A2KzqxyoNJTz0EnLMnRW6iVT8,740
16
16
  loadest_gp/plot.py,sha256=x2PK7vBCc44dX9lu5YV-rvw1u4pvXSLdcrTSvYLiHMA,2595
17
17
  loadest_gp/utils.py,sha256=m5QaqR_0JiuRXPfryH8nI5lODp8PqvQla5C05WDN3LY,2772
@@ -25,11 +25,11 @@ rating_gp/pipeline.py,sha256=1HgxN6DD3ZL5lhUb3DK2in2IXiml7W4Ja272GBMTc08,1884
25
25
  rating_gp/plot.py,sha256=CJphwqWWAfIY22j5Oz5DRwj7TcQCRyIQvM79_3KEdlc,9635
26
26
  rating_gp/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  rating_gp/models/base.py,sha256=e2Kq644I88YLHWPNA0qyRgitF5wimdLW4618vKX-o_s,1474
28
- rating_gp/models/gpytorch.py,sha256=rSxuTMoSeGK2LwqANMDMrmxDsOCXyAWSmENm6KFjRZ0,5930
28
+ rating_gp/models/gpytorch.py,sha256=4SqOdWIvI93kDq9S4cDPHXX25EHNjT_hKwZijhAR4C0,7121
29
29
  rating_gp/models/kernels.py,sha256=3xg2mhY3aEgjI3r5vyAll9MA4c3M5UKqRi3FApNhJJQ,11579
30
30
  rating_gp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  rating_gp/providers/usgs.py,sha256=KmKYN3c8Mi-ly2l6X80WT3taEhqCPXeEcRNi9HvbJmY,8134
32
- discontinuum-1.0.2.dist-info/METADATA,sha256=0oCct8VOrQib5sCzdi2cBNI8KEcpYTYTKj1oCL3DCsc,6231
33
- discontinuum-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- discontinuum-1.0.2.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
35
- discontinuum-1.0.2.dist-info/RECORD,,
32
+ discontinuum-1.0.4.dist-info/METADATA,sha256=A6T6BQocZmIox600f7nU5Tb9r7x5YthC5ba1WRET2XM,6302
33
+ discontinuum-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ discontinuum-1.0.4.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
35
+ discontinuum-1.0.4.dist-info/RECORD,,
@@ -71,7 +71,9 @@ class RatingGPMarginalGPyTorch(
71
71
  # noise, *and* you did not specify noise. This is treated as a no-op."
72
72
  self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(
73
73
  noise=noise,
74
- learn_additional_noise=False,
74
+ #learn_additional_noise=False,
75
+ learn_additional_noise=True,
76
+ noise_prior=gpytorch.priors.HalfNormalPrior(scale=0.005),
75
77
  )
76
78
 
77
79
  model = ExactGPModel(X, y, self.likelihood)
@@ -108,17 +110,20 @@ class ExactGPModel(gpytorch.models.ExactGP):
108
110
  # + stage * time kernel only at low stage with smaller time length.
109
111
  # Note that stage gets transformed to q, so the kernel is actually
110
112
  # q * time
113
+ b_min = np.quantile(train_y, 0.30)
114
+ b_max = np.quantile(train_y, 0.90)
111
115
  self.covar_module = (
112
- (self.cov_stage()
113
- * self.cov_time(ls_prior=GammaPrior(concentration=10, rate=5)))
114
- + (self.cov_stage()
115
- * self.cov_time(ls_prior=GammaPrior(concentration=2, rate=5))
116
+ (self.cov_stage(ls_prior=GammaPrior(concentration=2, rate=1))
117
+ * self.cov_time(ls_prior=GammaPrior(concentration=1, rate=1)))
118
+ + (self.cov_stage(ls_prior=GammaPrior(concentration=5, rate=1))
119
+ * self.cov_time(ls_prior=GammaPrior(concentration=1, rate=5))
116
120
  * SigmoidKernel(
117
121
  active_dims=self.stage_dim,
118
122
  # a_prior=NormalPrior(loc=20, scale=1),
123
+ # b_prior=NormalPrior(loc=0.5, scale=0.2),
119
124
  b_constraint=gpytorch.constraints.Interval(
120
- train_y.min(),
121
- train_y.max(),
125
+ b_min,
126
+ b_max,
122
127
  ),
123
128
  )
124
129
  )
@@ -140,11 +145,12 @@ class ExactGPModel(gpytorch.models.ExactGP):
140
145
 
141
146
  def cov_stage(self, ls_prior=None):
142
147
  eta = HalfNormalPrior(scale=1)
143
-
148
+
144
149
  return ScaleKernel(
145
150
  MaternKernel(
146
151
  active_dims=self.stage_dim,
147
152
  lengthscale_prior=ls_prior,
153
+ nu=2.5, # Smoother kernel (was nu=1.5)
148
154
  ),
149
155
  outputscale_prior=eta,
150
156
  )
@@ -152,13 +158,35 @@ class ExactGPModel(gpytorch.models.ExactGP):
152
158
  def cov_time(self, ls_prior=None):
153
159
  eta = HalfNormalPrior(scale=1)
154
160
 
155
- return ScaleKernel(
161
+ # Base Matern kernel for long-term trends
162
+ base_kernel = ScaleKernel(
156
163
  MaternKernel(
157
164
  active_dims=self.time_dim,
158
165
  lengthscale_prior=ls_prior,
166
+ nu=1.5, # was 2.5
159
167
  ),
160
168
  outputscale_prior=eta,
161
169
  )
170
+
171
+ # Periodic kernel for annual seasonality
172
+ # Locally periodic kernel: Periodic * Matern
173
+ periodic_kernel = ScaleKernel(
174
+ gpytorch.kernels.PeriodicKernel(
175
+ active_dims=self.time_dim,
176
+ period_length_prior=NormalPrior(loc=1.0, scale=0.05), # ~1 year
177
+ lengthscale_prior=GammaPrior(concentration=6, rate=1),
178
+ ) * MaternKernel(
179
+ active_dims=self.time_dim,
180
+ nu=2.5,
181
+ lengthscale_prior=GammaPrior(concentration=4, rate=3),
182
+ ),
183
+ outputscale_prior=HalfNormalPrior(scale=0.2),
184
+ )
185
+
186
+ return base_kernel + periodic_kernel
187
+
188
+
189
+
162
190
 
163
191
  def cov_stagetime(self):
164
192
  eta = HalfNormalPrior(scale=1)