sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgptools/utils/metrics.py CHANGED
@@ -16,159 +16,432 @@ from scipy.stats import multivariate_normal
16
16
  import tensorflow as tf
17
17
  import numpy as np
18
18
  import gpflow
19
+ from typing import Tuple
20
+ from sgptools.objectives import SLogMI
19
21
 
20
22
 
21
- def gaussian_entropy(K):
22
- """Computes GP-based entropy from a kernel matrix
23
+ def gaussian_entropy(K: np.ndarray) -> float:
24
+ """
25
+ Computes the entropy of a multivariate Gaussian distribution defined by its
26
+ covariance matrix `K`. This is often used to quantify the uncertainty or
27
+ information content of a Gaussian Process.
23
28
 
24
29
  Args:
25
- K (ndarray): (n, n); kernel matrix
30
+ K (np.ndarray): (n, n); A square NumPy array representing the covariance matrix.
31
+ Must be positive semi-definite.
26
32
 
27
33
  Returns:
28
- entropy (float): Entropy computed from the kernel matrix
29
- """
30
- return multivariate_normal(mean=None, cov=K, allow_singular=True).entropy()
34
+ float: The entropy of the multivariate Gaussian distribution.
31
35
 
32
- def get_mi(Xu, candidate_locs, noise_variance, kernel):
33
- """Computes mutual information between the sensing locations and the candidate locations
36
+ Usage:
37
+ ```python
38
+ import numpy as np
39
+ from sgptools.utils.metrics import gaussian_entropy
34
40
 
35
- Args:
36
- Xu (ndarray): (m, d); Sensing locations
37
- candidate_locs (ndarray): (n, d); Candidate sensing locations
38
- noise_variance (float): data variance
39
- kernel (gpflow.kernels.Kernel): gpflow kernel function
40
-
41
- Returns:
42
- mi (float): Mutual information computed using a GP
41
+ # Example covariance matrix for 2 variables
42
+ covariance_matrix = np.array([[1.0, 0.5], [0.5, 1.0]])
43
+ entropy_value = gaussian_entropy(covariance_matrix)
44
+ ```
43
45
  """
44
- Xu = np.array(Xu)
45
- candidate_locs = np.array(candidate_locs)
46
+ # Using scipy's multivariate_normal for entropy calculation
47
+ # allow_singular=True to handle cases where the covariance matrix might be singular
48
+ # or near-singular due to numerical issues, preventing errors.
49
+ return float(
50
+ multivariate_normal(mean=None, cov=K, allow_singular=True).entropy())
51
+
46
52
 
47
- gp = gpflow.models.GPR(data=(Xu, np.zeros((len(Xu), 1))),
48
- kernel=kernel,
49
- noise_variance=noise_variance)
50
- _, sigma_a = gp.predict_f(candidate_locs, full_cov=True)
51
- sigma_a = sigma_a.numpy()[0]
52
- cond_entropy = gaussian_entropy(sigma_a)
53
+ def get_mi(Xu: np.ndarray, X_objective: np.ndarray, noise_variance: float,
54
+ kernel: gpflow.kernels.Kernel) -> float:
55
+ """
56
+ Computes the Mutual Information (MI) between a set of sensing locations (`Xu`)
57
+ and a set of objective/candidate locations (`X_objective`) using a Gaussian Process model.
58
+ MI quantifies the reduction in uncertainty about `X_objective` given `Xu`.
59
+ Internally, it uses the `SLogMI` objective from `sgptools.objectives` for numerical stability.
53
60
 
54
- K = kernel(candidate_locs, full_cov=True).numpy()
55
- K += noise_variance * np.eye(len(candidate_locs))
56
- entropy = gaussian_entropy(K)
57
-
58
- return float(entropy - cond_entropy)
61
+ Args:
62
+ Xu (np.ndarray): (m, d); NumPy array of sensing locations. `m` is the number of
63
+ sensing points, `d` is the dimensionality.
64
+ X_objective (np.ndarray): (n, d); NumPy array of candidate or objective locations. `n` is the number of
65
+ objective points, `d` is the dimensionality.
66
+ noise_variance (float): The noise variance of the Gaussian Process likelihood.
67
+ kernel (gpflow.kernels.Kernel): A GPflow kernel object used to compute covariances.
59
68
 
60
- def get_elbo(Xu, X_env, noise_variance, kernel, baseline=False):
61
- """Computes the ELBO of the SGP, corrected to be positive
69
+ Returns:
70
+ float: The computed Mutual Information value.
71
+
72
+ Usage:
73
+ ```python
74
+ import numpy as np
75
+ import gpflow
76
+ from sgptools.utils.metrics import get_mi
77
+
78
+ # Dummy data
79
+ X_sensing_locs = np.array([[0.1, 0.1], [0.5, 0.5]], dtype=np.float64)
80
+ X_candidate_locs = np.array([[0.2, 0.2], [0.6, 0.6], [0.9, 0.9]], dtype=np.float64)
81
+ noise_var = 0.1
82
+ rbf_kernel = gpflow.kernels.SquaredExponential(lengthscales=1.0, variance=1.0)
83
+
84
+ mi_value = get_mi(X_sensing_locs, X_candidate_locs, noise_var, rbf_kernel)
85
+ ```
86
+ """
87
+ # Ensure inputs are TensorFlow tensors for compatibility with SLogMI
88
+ # SLogMI expects tf.Tensor, not np.ndarray for X_objective
89
+ # Assuming SLogMI's init takes np.ndarray for X_objective and converts it
90
+ # If not, convert X_objective here: tf.constant(X_objective, dtype=tf.float64)
91
+ mi_model = SLogMI(
92
+ X_objective=X_objective,
93
+ kernel=kernel,
94
+ noise_variance=noise_variance,
95
+ jitter=1e-6) # jitter is added to noise_variance in SLogMI
96
+ # SLogMI's __call__ method expects a tf.Tensor for X (Xu in this context)
97
+ return float(mi_model(tf.constant(Xu, dtype=tf.float64)).numpy())
98
+
99
+
100
+ def get_elbo(Xu: np.ndarray,
101
+ X_env: np.ndarray,
102
+ noise_variance: float,
103
+ kernel: gpflow.kernels.Kernel,
104
+ baseline: bool = False) -> float:
105
+ """
106
+ Computes the Evidence Lower Bound (ELBO) of a Sparse Gaussian Process (SGP) model.
107
+ The ELBO is a lower bound on the marginal likelihood and is commonly used as
108
+ an optimization objective for sparse GPs. Optionally, a baseline can be
109
+ subtracted to ensure the ELBO is positive or to compare against a trivial model.
62
110
 
63
111
  Args:
64
- Xu (ndarray): (m, d); Sensing locations
65
- X_env (ndarray): (n, d); Data points used to approximate the bounds of the environment
66
- noise_variance (float): data variance
67
- kernel (gpflow.kernels.Kernel): gpflow kernel function
68
- baseline (bool): If True, the ELBO is adjusted to be positive
112
+ Xu (np.ndarray): (m, d); NumPy array of inducing points. `m` is the number of
113
+ inducing points, `d` is the dimensionality.
114
+ X_env (np.ndarray): (n, d); NumPy array of data points representing the environment
115
+ or training data. `n` is the number of data points, `d` is the dimensionality.
116
+ noise_variance (float): The noise variance of the Gaussian Process likelihood.
117
+ kernel (gpflow.kernels.Kernel): A GPflow kernel object.
118
+ baseline (bool): If True, a baseline ELBO (computed with a single inducing point at [0,0])
119
+ is subtracted from the main ELBO. This can normalize the ELBO value.
120
+ Defaults to False.
69
121
 
70
122
  Returns:
71
- elbo (float): ELBO of the SGP
123
+ float: The computed ELBO value.
124
+
125
+ Usage:
126
+ ```python
127
+ import numpy as np
128
+ import gpflow
129
+ from sgptools.utils.metrics import get_elbo
130
+
131
+ # Dummy data
132
+ X_environment = np.random.rand(100, 2) * 10 # Environment data
133
+ inducing_points = np.array([[2.0, 2.0], [8.0, 8.0]], dtype=np.float64) # Inducing points
134
+ noise_var = 0.1
135
+ rbf_kernel = gpflow.kernels.SquaredExponential(lengthscales=2.0, variance=1.0)
136
+
137
+ # Compute ELBO without baseline
138
+ elbo_no_baseline = get_elbo(inducing_points, X_environment, noise_var, rbf_kernel)
139
+
140
+ # Compute ELBO with baseline
141
+ elbo_with_baseline = get_elbo(inducing_points, X_environment, noise_var, rbf_kernel, baseline=True)
142
+ ```
72
143
  """
144
+ # Convert Xu to TensorFlow tensor for SGPR
145
+ tf_Xu = tf.constant(Xu, dtype=tf.float64)
146
+ # X_env is expected as (X, Y) tuple, but for ELBO calculation without Y, pass (X_env, zeros)
147
+ tf_X_env = tf.constant(X_env, dtype=tf.float64)
148
+ y_dummy = tf.zeros((tf_X_env.shape[0], 1), dtype=tf.float64)
149
+
150
+ baseline_value = 0.0
73
151
  if baseline:
74
- sgpr = gpflow.models.SGPR(X_env,
75
- noise_variance=noise_variance,
76
- kernel=kernel,
77
- inducing_variable=[[0, 0]])
78
- baseline = sgpr.elbo().numpy()
79
- else:
80
- baseline = 0.0
81
-
82
- sgpr = gpflow.models.SGPR(X_env,
83
- noise_variance=noise_variance,
84
- kernel=kernel,
85
- inducing_variable=Xu)
86
- return (sgpr.elbo() - baseline).numpy()
87
-
88
- def get_kl(Xu, X_env, noise_variance, kernel):
89
- """Computes the KL divergence between the SGP and the GP
152
+ # Create a temporary SGPR model with a single dummy inducing point for baseline
153
+ sgpr_baseline = gpflow.models.SGPR(data=(tf_X_env, y_dummy),
154
+ noise_variance=noise_variance,
155
+ kernel=kernel,
156
+ inducing_variable=tf.constant(
157
+ [[0.0, 0.0]], dtype=tf.float64))
158
+ baseline_value = float(sgpr_baseline.elbo().numpy())
159
+
160
+ # Create the main SGPR model with the provided inducing points
161
+ sgpr_model = gpflow.models.SGPR(data=(tf_X_env, y_dummy),
162
+ noise_variance=noise_variance,
163
+ kernel=kernel,
164
+ inducing_variable=tf_Xu)
165
+
166
+ return float((sgpr_model.elbo() - baseline_value).numpy())
167
+
168
+
169
+ def get_kl(Xu: np.ndarray, X_env: np.ndarray, noise_variance: float,
170
+ kernel: gpflow.kernels.Kernel) -> float:
171
+ """
172
+ Computes the Kullback-Leibler (KL) divergence between a full Gaussian Process (GP)
173
+ and a Sparse Gaussian Process (SGP) approximation. This KL divergence term is
174
+ part of the ELBO objective in sparse GPs.
90
175
 
91
176
  Args:
92
- Xu (ndarray): (m, d); Sensing locations
93
- X_env (ndarray): (n, d); Data points used to approximate the bounds of the environment
94
- noise_variance (float): data variance
95
- kernel (gpflow.kernels.Kernel): gpflow kernel function
177
+ Xu (np.ndarray): (m, d); NumPy array of inducing points for the SGP.
178
+ X_env (np.ndarray): (n, d); NumPy array of data points representing the environment
179
+ or training data.
180
+ noise_variance (float): The noise variance of the Gaussian Process likelihood.
181
+ kernel (gpflow.kernels.Kernel): A GPflow kernel object.
96
182
 
97
183
  Returns:
98
- kl (float): KL divergence between the SGP and the GP
184
+ float: The computed KL divergence value (specifically, the trace term
185
+ from the KL divergence in the ELBO formulation, $0.5 \text{Tr}(K_{ff} - Q_{ff}) / \sigma^2$).
186
+
187
+ Usage:
188
+ ```python
189
+ import numpy as np
190
+ import gpflow
191
+ from sgptools.utils.metrics import get_kl
192
+
193
+ # Dummy data
194
+ X_environment = np.random.rand(100, 2) * 10
195
+ inducing_points = np.array([[2.0, 2.0], [8.0, 8.0]], dtype=np.float64)
196
+ noise_var = 0.1
197
+ rbf_kernel = gpflow.kernels.SquaredExponential(lengthscales=2.0, variance=1.0)
198
+
199
+ kl_value = get_kl(inducing_points, X_environment, noise_var, rbf_kernel)
200
+ ```
99
201
  """
100
- sgpr = gpflow.models.SGPR(X_env,
101
- noise_variance=noise_variance,
102
- kernel=kernel,
103
- inducing_variable=Xu)
104
-
105
- common = sgpr._common_calculation()
202
+ tf_Xu = tf.constant(Xu, dtype=tf.float64)
203
+ tf_X_env = tf.constant(X_env, dtype=tf.float64)
204
+ y_dummy = tf.zeros((tf_X_env.shape[0], 1), dtype=tf.float64)
205
+
206
+ sgpr_model = gpflow.models.SGPR(data=(tf_X_env, y_dummy),
207
+ noise_variance=noise_variance,
208
+ kernel=kernel,
209
+ inducing_variable=tf_Xu)
210
+
211
+ # Accessing common terms used in ELBO calculation from GPflow's internal methods
212
+ # This involves private methods (_common_calculation), so be aware of potential
213
+ # breaking changes in future GPflow versions.
214
+ common = sgpr_model._common_calculation()
106
215
  sigma_sq = common.sigma_sq
107
- AAT = common.AAT
216
+ AAT = common.AAT # AAT = A @ A.T, where A = L⁻¹Kuf/σ
108
217
 
109
- x, _ = sgpr.data
110
- kdiag = sgpr.kernel(x, full_cov=False)
218
+ # kdiag: diagonal of Kff (prior covariance for all data points)
219
+ kdiag = sgpr_model.kernel(tf_X_env, full_cov=False)
111
220
 
112
- # tr(K) / σ²
221
+ # trace_k: Tr(Kff) / σ²
113
222
  trace_k = tf.reduce_sum(kdiag / sigma_sq)
114
- # tr(Q) / σ²
223
+ # trace_q: Tr(Qff) / σ² = Tr(Kuf.T @ Kuu^-1 @ Kuf) / σ²
224
+ # From the ELBO derivation, Tr(Q_N N) / sigma^2 is Tr(AAT)
115
225
  trace_q = tf.reduce_sum(tf.linalg.diag_part(AAT))
116
- # tr(K - Q) / σ²
117
- trace = trace_k - trace_q
118
- trace = 0.5 * trace
119
-
120
- return float(trace.numpy())
121
226
 
122
- def get_rmse(y_pred, y_test):
123
- """Computes the root-mean-square error between `y_pred` and `y_test`
227
+ # KL divergence trace term: 0.5 * Tr(Kff - Qff) / σ²
228
+ trace_term = 0.5 * (trace_k - trace_q)
229
+
230
+ return float(trace_term.numpy())
231
+
232
+
233
+ def get_rmse(y_pred: np.ndarray, y_test: np.ndarray) -> float:
234
+ """
235
+ Computes the Root Mean Square Error (RMSE) between predicted and ground truth values.
124
236
 
125
237
  Args:
126
- y_pred (ndarray): (n, 1); Predicted data field estimate
127
- y_test (ndarray): (n, 1); Ground truth data field
238
+ y_pred (np.ndarray): (n, 1); NumPy array of predicted values.
239
+ y_test (np.ndarray): (n, 1); NumPy array of ground truth values.
128
240
 
129
241
  Returns:
130
- rmse (float): Computed RMSE
242
+ float: The computed RMSE.
243
+
244
+ Usage:
245
+ ```python
246
+ import numpy as np
247
+ from sgptools.utils.metrics import get_rmse
248
+
249
+ # Dummy data
250
+ predictions = np.array([[1.1], [2.2], [3.3]])
251
+ ground_truth = np.array([[1.0], [2.0], [3.0]])
252
+
253
+ rmse_value = get_rmse(predictions, ground_truth)
254
+ ```
131
255
  """
132
- return np.sqrt(np.mean(np.square(y_pred - y_test)))
256
+ error = y_pred - y_test
257
+ return float(np.sqrt(np.mean(np.square(error))))
258
+
133
259
 
134
- def get_reconstruction(sensor_data, X_test, noise_variance, kernel):
135
- """Computes the GP-based data field estimates with the solution placements as the training set
260
+ def get_reconstruction(
261
+ sensor_data: Tuple[np.ndarray, np.ndarray], X_test: np.ndarray,
262
+ noise_variance: float,
263
+ kernel: gpflow.kernels.Kernel) -> Tuple[np.ndarray, np.ndarray]:
264
+ """
265
+ Computes the Gaussian Process (GP)-based reconstruction (mean prediction and variance)
266
+ of a data field. The provided `sensor_data` serves as the training set for the GP model,
267
+ and predictions are made over `X_test`.
136
268
 
137
269
  Args:
138
- sensor_data (ndarray tuple): ((m, d), (m, 1)); Sensing locations' input
139
- and corresponding ground truth labels
140
- X_test (ndarray): (n, d); Testing data input locations
141
- noise_variance (float): data variance
142
- kernel (gpflow.kernels.Kernel): gpflow kernel function
270
+ sensor_data (Tuple[np.ndarray, np.ndarray]): A tuple containing:
271
+ - Xu_X (np.ndarray): (m, d); Input locations from sensor measurements.
272
+ - Xu_y (np.ndarray): (m, 1); Corresponding labels (measurements) from sensors.
273
+ X_test (np.ndarray): (n, d); NumPy array of testing input locations
274
+ (points where the data field needs to be estimated).
275
+ noise_variance (float): The noise variance of the Gaussian Process likelihood.
276
+ kernel (gpflow.kernels.Kernel): A GPflow kernel object.
143
277
 
144
278
  Returns:
145
- y_pred (ndarray): (n, 1); Predicted data field estimates
146
- y_var (ndarray): (n, 1); Prediction variance at each location in the data field
279
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
280
+ - y_pred (np.ndarray): (n, 1); Predicted mean estimates of the data field at `X_test`.
281
+ - y_var (np.ndarray): (n, 1); Predicted variance of the data field at `X_test`.
282
+
283
+ Usage:
284
+ ```python
285
+ import numpy as np
286
+ import gpflow
287
+ from sgptools.utils.metrics import get_reconstruction
288
+
289
+ # Dummy sensor data (training data for GP)
290
+ sensor_locs = np.array([[0.1, 0.1], [0.3, 0.3], [0.7, 0.7]], dtype=np.float64)
291
+ sensor_vals = np.array([[0.5], [1.5], [2.5]], dtype=np.float64)
292
+
293
+ # Dummy test locations (where we want predictions)
294
+ test_locs = np.array([[0.2, 0.2], [0.4, 0.4], [0.6, 0.6], [0.8, 0.8]], dtype=np.float64)
295
+
296
+ noise_var = 0.05
297
+ rbf_kernel = gpflow.kernels.SquaredExponential(lengthscales=1.0, variance=1.0)
298
+
299
+ predicted_means, predicted_vars = get_reconstruction(
300
+ (sensor_locs, sensor_vals), test_locs, noise_var, rbf_kernel
301
+ )
302
+ ```
147
303
  """
148
304
  Xu_X, Xu_y = sensor_data
149
305
 
150
- # Get the GP predictions
151
- gpr = gpflow.models.GPR((Xu_X, Xu_y),
306
+ # Initialize and train a GP Regression (GPR) model
307
+ gpr = gpflow.models.GPR(data=(Xu_X, Xu_y),
152
308
  noise_variance=noise_variance,
153
309
  kernel=kernel)
154
- y_pred, y_var = gpr.predict_f(X_test)
155
- y_pred = y_pred.numpy().reshape(-1, 1)
310
+
311
+ # Predict the mean and variance at the test locations
312
+ y_pred_tf, y_var_tf = gpr.predict_f(X_test)
313
+
314
+ # Convert TensorFlow tensors to NumPy arrays and reshape
315
+ y_pred = y_pred_tf.numpy().reshape(-1, 1)
316
+ y_var = y_var_tf.numpy().reshape(-1, 1)
156
317
 
157
318
  return y_pred, y_var
158
319
 
159
- def get_distance(X):
160
- """Compute the length of a path (L2-norm)
320
+
321
+ def get_distance(X: np.ndarray) -> float:
322
+ """
323
+ Computes the total length of a path defined by a sequence of waypoints.
324
+ The length is calculated as the sum of Euclidean distances between consecutive waypoints.
161
325
 
162
326
  Args:
163
- X (ndarray): (m, d); Waypoints of a path
327
+ X (np.ndarray): (m, d); NumPy array where each row represents a waypoint
328
+ and columns represent its coordinates (e.g., (x, y) or (x, y, z)).
329
+ `m` is the number of waypoints, `d` is the dimensionality.
164
330
 
165
331
  Returns:
166
- dist (float): Total path length
332
+ float: The total length of the path.
333
+
334
+ Usage:
335
+ ```python
336
+ import numpy as np
337
+ from sgptools.utils.metrics import get_distance
338
+
339
+ # Example 2D path with 3 waypoints
340
+ path_waypoints_2d = np.array([[0.0, 0.0], [3.0, 4.0], [3.0, 7.0]])
341
+ distance_2d = get_distance(path_waypoints_2d)
342
+
343
+ # Example 3D path
344
+ path_waypoints_3d = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
345
+ distance_3d = get_distance(path_waypoints_3d)
346
+ ```
347
+ """
348
+ if X.shape[0] < 2:
349
+ return 0.0 # A path needs at least two points to have a length
350
+
351
+ # Compute Euclidean distance (L2-norm) between consecutive points
352
+ # `X[1:] - X[:-1]` calculates the vector differences between adjacent waypoints
353
+ dist_segments = np.linalg.norm(X[1:] - X[:-1], axis=-1)
354
+
355
+ # Sum the lengths of all segments to get the total path length
356
+ total_distance = np.sum(dist_segments)
357
+ return float(total_distance)
358
+
359
+
360
+ def get_smse(y_pred: np.ndarray, y_test: np.ndarray, var: np.ndarray) -> float:
167
361
  """
168
- dist = np.linalg.norm(X[1:] - X[:-1], axis=-1)
169
- dist = np.sum(dist)
170
- return dist
362
+ Computes the Standardized Mean Square Error (SMSE).
363
+ SMSE is a variant of MSE where each squared error term is divided by
364
+ the predicted variance. It's particularly useful in Bayesian contexts
365
+ as it accounts for the model's uncertainty in its predictions.
366
+
367
+ Args:
368
+ y_pred (np.ndarray): (n, 1); NumPy array of predicted values.
369
+ y_test (np.ndarray): (n, 1); NumPy array of ground truth values.
370
+ var (np.ndarray): (n, 1); NumPy array of predicted variances for each prediction.
371
+
372
+ Returns:
373
+ float: The computed SMSE value.
374
+
375
+ Raises:
376
+ ValueError: If `var` contains zero or negative values, which would lead to division by zero or invalid results.
171
377
 
378
+ Usage:
379
+ ```python
380
+ import numpy as np
381
+ from sgptools.utils.metrics import get_smse
172
382
 
173
- if __name__=='__main__':
174
- pass
383
+ # Dummy data
384
+ predictions = np.array([[1.1], [2.2], [3.3]])
385
+ ground_truth = np.array([[1.0], [2.0], [3.0]])
386
+ # Predicted variances (must be positive)
387
+ variances = np.array([[0.01], [0.04], [0.09]])
388
+
389
+ smse_value = get_smse(predictions, ground_truth, variances)
390
+ ```
391
+ """
392
+ if np.any(var <= 0):
393
+ raise ValueError(
394
+ "Predicted variance (var) must be strictly positive for SMSE calculation."
395
+ )
396
+
397
+ error = y_pred - y_test
398
+ # Element-wise division by variance
399
+ smse_val = np.mean(np.square(error) / var)
400
+ return float(smse_val)
401
+
402
+
403
+ def get_nlpd(y_pred: np.ndarray, y_test: np.ndarray, var: np.ndarray) -> float:
404
+ """
405
+ Computes the Negative Log Predictive Density (NLPD).
406
+ NLPD is a measure of how well a probabilistic model predicts new data.
407
+ A lower NLPD indicates a better fit. For a Gaussian predictive distribution,
408
+ it is derived from the log-likelihood of the true observations under the
409
+ predicted Gaussian.
410
+
411
+ Args:
412
+ y_pred (np.ndarray): (n, 1); NumPy array of predicted mean values.
413
+ y_test (np.ndarray): (n, 1); NumPy array of ground truth values.
414
+ var (np.ndarray): (n, 1); NumPy array of predicted variances for each prediction.
415
+
416
+ Returns:
417
+ float: The computed NLPD value.
418
+
419
+ Raises:
420
+ ValueError: If `var` contains zero or negative values, which would lead to invalid log or division.
421
+
422
+ Usage:
423
+ ```python
424
+ import numpy as np
425
+ from sgptools.utils.metrics import get_nlpd
426
+
427
+ # Dummy data
428
+ predictions = np.array([[1.1], [2.2], [3.3]])
429
+ ground_truth = np.array([[1.0], [2.0], [3.0]])
430
+ # Predicted variances (must be positive)
431
+ variances = np.array([[0.01], [0.04], [0.09]])
432
+
433
+ nlpd_value = get_nlpd(predictions, ground_truth, variances)
434
+ ```
435
+ """
436
+ if np.any(var <= 0):
437
+ raise ValueError(
438
+ "Predicted variance (var) must be strictly positive for NLPD calculation."
439
+ )
440
+
441
+ error = y_pred - y_test
442
+ # Calculate NLPD terms for each point
443
+ nlpd_terms = 0.5 * np.log(
444
+ 2 * np.pi) + 0.5 * np.log(var) + 0.5 * np.square(error) / var
445
+
446
+ # Return the mean NLPD across all points
447
+ return float(np.mean(nlpd_terms))