dragon-ml-toolbox 14.8.0__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (44) hide show
  1. {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +9 -5
  2. dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
  3. ml_tools/ETL_cleaning.py +20 -20
  4. ml_tools/ETL_engineering.py +23 -25
  5. ml_tools/GUI_tools.py +20 -20
  6. ml_tools/MICE_imputation.py +3 -3
  7. ml_tools/ML_callbacks.py +43 -26
  8. ml_tools/ML_configuration.py +204 -11
  9. ml_tools/ML_datasetmaster.py +198 -280
  10. ml_tools/ML_evaluation.py +132 -41
  11. ml_tools/ML_evaluation_multi.py +96 -35
  12. ml_tools/ML_inference.py +249 -207
  13. ml_tools/ML_models.py +13 -102
  14. ml_tools/ML_models_advanced.py +1 -1
  15. ml_tools/ML_optimization.py +12 -12
  16. ml_tools/ML_scaler.py +11 -11
  17. ml_tools/ML_sequence_datasetmaster.py +341 -0
  18. ml_tools/ML_sequence_evaluation.py +215 -0
  19. ml_tools/ML_sequence_inference.py +391 -0
  20. ml_tools/ML_sequence_models.py +139 -0
  21. ml_tools/ML_trainer.py +1237 -354
  22. ml_tools/ML_utilities.py +1 -1
  23. ml_tools/ML_vision_datasetmaster.py +73 -67
  24. ml_tools/ML_vision_evaluation.py +26 -6
  25. ml_tools/ML_vision_inference.py +117 -140
  26. ml_tools/ML_vision_models.py +1 -1
  27. ml_tools/ML_vision_transformers.py +121 -40
  28. ml_tools/PSO_optimization.py +6 -6
  29. ml_tools/SQL.py +4 -4
  30. ml_tools/{keys.py → _keys.py} +43 -0
  31. ml_tools/_schema.py +1 -1
  32. ml_tools/ensemble_evaluation.py +1 -1
  33. ml_tools/ensemble_inference.py +7 -33
  34. ml_tools/ensemble_learning.py +1 -1
  35. ml_tools/optimization_tools.py +2 -2
  36. ml_tools/path_manager.py +5 -5
  37. ml_tools/utilities.py +1 -2
  38. dragon_ml_toolbox-14.8.0.dist-info/RECORD +0 -49
  39. ml_tools/RNN_forecast.py +0 -56
  40. ml_tools/_ML_vision_recipe.py +0 -88
  41. {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
  42. {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
  43. {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  44. {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from sklearn.metrics import (
6
+ mean_squared_error,
7
+ mean_absolute_error,
8
+ r2_score,
9
+ median_absolute_error,
10
+ )
11
+ from pathlib import Path
12
+ from typing import Union, Optional
13
+
14
+ from .path_manager import make_fullpath
15
+ from ._logger import _LOGGER
16
+ from ._script_info import _script_info
17
+ from .ML_configuration import SequenceValueMetricsFormat, SequenceSequenceMetricsFormat
18
+
19
+
20
+ __all__ = [
21
+ "sequence_to_value_metrics",
22
+ "sequence_to_sequence_metrics"
23
+ ]
24
+
25
+ DPI_value = 250
26
+
27
+
28
+ def sequence_to_value_metrics(
29
+ y_true: np.ndarray,
30
+ y_pred: np.ndarray,
31
+ save_dir: Union[str, Path],
32
+ config: Optional[SequenceValueMetricsFormat] = None
33
+ ):
34
+ """
35
+ Saves regression metrics and plots for sequence-to-value (many-to-one) tasks.
36
+
37
+ Args:
38
+ y_true (np.ndarray): Ground truth values (1D array).
39
+ y_pred (np.ndarray): Predicted values (1D array).
40
+ save_dir (str | Path): Directory to save plots and report.
41
+ config (SequenceValueMetricsFormat): Formatting configuration object.
42
+ """
43
+
44
+ # --- Ensure 1D input ---
45
+ if y_true.ndim > 1: y_true = y_true.flatten()
46
+ if y_pred.ndim > 1: y_pred = y_pred.flatten()
47
+
48
+ # --- Parse Config or use defaults ---
49
+ if config is None:
50
+ # Create a default config if one wasn't provided
51
+ config = SequenceValueMetricsFormat()
52
+
53
+ # --- Set Matplotlib font size ---
54
+ original_rc_params = plt.rcParams.copy()
55
+ plt.rcParams.update({'font.size': config.font_size})
56
+
57
+ # --- Calculate Metrics ---
58
+ rmse = np.sqrt(mean_squared_error(y_true, y_pred))
59
+ mae = mean_absolute_error(y_true, y_pred)
60
+ r2 = r2_score(y_true, y_pred)
61
+ medae = median_absolute_error(y_true, y_pred)
62
+
63
+ report_lines = [
64
+ "--- Sequence-to-Value Regression Report ---",
65
+ f" Root Mean Squared Error (RMSE): {rmse:.4f}",
66
+ f" Mean Absolute Error (MAE): {mae:.4f}",
67
+ f" Median Absolute Error (MedAE): {medae:.4f}",
68
+ f" Coefficient of Determination (R²): {r2:.4f}"
69
+ ]
70
+ report_string = "\n".join(report_lines)
71
+
72
+ save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
73
+ # Save text report
74
+ report_path = save_dir_path / "sequence_to_value_report.txt"
75
+ report_path.write_text(report_string)
76
+ _LOGGER.info(f"📝 Seq-to-Value report saved as '{report_path.name}'")
77
+
78
+ # --- Save residual plot ---
79
+ residuals = y_true - y_pred
80
+ fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=DPI_value)
81
+ ax_res.scatter(y_pred, residuals,
82
+ alpha=config.scatter_alpha,
83
+ color=config.scatter_color)
84
+ ax_res.axhline(0, color=config.residual_line_color, linestyle='--')
85
+ ax_res.set_xlabel("Predicted Values")
86
+ ax_res.set_ylabel("Residuals")
87
+ ax_res.set_title("Sequence-to-Value Residual Plot")
88
+ ax_res.grid(True)
89
+ plt.tight_layout()
90
+ res_path = save_dir_path / "sequence_to_value_residual_plot.svg"
91
+ plt.savefig(res_path)
92
+ _LOGGER.info(f"📈 Seq-to-Value residual plot saved as '{res_path.name}'")
93
+ plt.close(fig_res)
94
+
95
+ # --- Save true vs predicted plot ---
96
+ fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=DPI_value)
97
+ ax_tvp.scatter(y_true, y_pred,
98
+ alpha=config.scatter_alpha,
99
+ color=config.scatter_color)
100
+ ax_tvp.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()],
101
+ linestyle='--',
102
+ lw=2,
103
+ color=config.ideal_line_color)
104
+ ax_tvp.set_xlabel('True Values')
105
+ ax_tvp.set_ylabel('Predictions')
106
+ ax_tvp.set_title('Sequence-to-Value: True vs. Predicted')
107
+ ax_tvp.grid(True)
108
+ plt.tight_layout()
109
+ tvp_path = save_dir_path / "sequence_to_value_true_vs_predicted_plot.svg"
110
+ plt.savefig(tvp_path)
111
+ _LOGGER.info(f"📉 Seq-to-Value True vs. Predicted plot saved as '{tvp_path.name}'")
112
+ plt.close(fig_tvp)
113
+
114
+ # --- Restore RC params ---
115
+ plt.rcParams.update(original_rc_params)
116
+
117
+
118
+ def sequence_to_sequence_metrics(
119
+ y_true: np.ndarray,
120
+ y_pred: np.ndarray,
121
+ save_dir: Union[str, Path],
122
+ config: Optional[SequenceSequenceMetricsFormat] = None
123
+ ):
124
+ """
125
+ Saves per-step regression metrics for sequence-to-sequence (many-to-many) tasks.
126
+
127
+ Args:
128
+ y_true (np.ndarray): Ground truth sequences (n_samples, sequence_length).
129
+ y_pred (np.ndarray): Predicted sequences (n_samples, sequence_length).
130
+ save_dir (str | Path): Directory to save plots and report.
131
+ config (SequenceSequenceMetricsFormat): Formatting configuration object.
132
+ """
133
+
134
+ if y_true.ndim != 2 or y_pred.ndim != 2:
135
+ _LOGGER.error(f"Input arrays must be 2D (n_samples, sequence_length). Got y_true: {y_true.shape}, y_pred: {y_pred.shape}")
136
+ raise ValueError("Invalid input dimensions for sequence-to-sequence metrics.")
137
+
138
+ if y_true.shape != y_pred.shape:
139
+ _LOGGER.error(f"Input shapes do not match. Got y_true: {y_true.shape}, y_pred: {y_pred.shape}")
140
+ raise ValueError("Mismatched input shapes.")
141
+
142
+ # --- Parse Config or use defaults ---
143
+ if config is None:
144
+ config = SequenceSequenceMetricsFormat()
145
+
146
+ # --- Set Matplotlib font size ---
147
+ original_rc_params = plt.rcParams.copy()
148
+ plt.rcParams.update({'font.size': config.font_size})
149
+
150
+ sequence_length = y_true.shape[1]
151
+ steps = list(range(1, sequence_length + 1))
152
+ per_step_rmse = []
153
+ per_step_mae = []
154
+
155
+ # --- Calculate metrics for each time step ---
156
+ for i in range(sequence_length):
157
+ y_true_step = y_true[:, i]
158
+ y_pred_step = y_pred[:, i]
159
+
160
+ rmse = np.sqrt(mean_squared_error(y_true_step, y_pred_step))
161
+ mae = mean_absolute_error(y_true_step, y_pred_step)
162
+
163
+ per_step_rmse.append(rmse)
164
+ per_step_mae.append(mae)
165
+
166
+ # --- Create and save DataFrame ---
167
+ report_df = pd.DataFrame({
168
+ "step": steps,
169
+ "rmse": per_step_rmse,
170
+ "mae": per_step_mae
171
+ })
172
+
173
+ save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
174
+ report_path = save_dir_path / "sequence_to_sequence_report.csv"
175
+ report_df.to_csv(report_path, index=False)
176
+ _LOGGER.info(f"📝 Seq-to-Seq per-step report saved as '{report_path.name}'")
177
+
178
+ # --- Create and save plot ---
179
+ fig, ax1 = plt.subplots(figsize=config.plot_figsize, dpi=DPI_value)
180
+
181
+ # Plot RMSE
182
+ color_rmse = config.rmse_color
183
+ ax1.set_xlabel('Prediction Step')
184
+ ax1.set_ylabel('RMSE', color=color_rmse)
185
+ ax1.plot(steps, per_step_rmse, config.rmse_marker, color=color_rmse, label='RMSE')
186
+ ax1.tick_params(axis='y', labelcolor=color_rmse)
187
+ ax1.grid(True, linestyle=config.grid_style)
188
+
189
+ # Create a second y-axis for MAE
190
+ ax2 = ax1.twinx()
191
+ color_mae = config.mae_color
192
+ ax2.set_ylabel('MAE', color=color_mae)
193
+ ax2.plot(steps, per_step_mae, config.mae_marker, color=color_mae, label='MAE')
194
+ ax2.tick_params(axis='y', labelcolor=color_mae)
195
+
196
+ fig.suptitle('Sequence-to-Sequence Metrics (Per-Step)')
197
+
198
+ # Add a single legend
199
+ lines, labels = ax1.get_legend_handles_labels()
200
+ lines2, labels2 = ax2.get_legend_handles_labels()
201
+ ax2.legend(lines + lines2, labels + labels2, loc='best')
202
+
203
+ fig.tight_layout(rect=(0, 0.03, 1, 0.95)) # Adjust for suptitle
204
+
205
+ plot_path = save_dir_path / "sequence_to_sequence_metrics_plot.svg"
206
+ plt.savefig(plot_path)
207
+ _LOGGER.info(f"📈 Seq-to-Seq per-step metrics plot saved as '{plot_path.name}'")
208
+ plt.close(fig)
209
+
210
+ # --- Restore RC params ---
211
+ plt.rcParams.update(original_rc_params)
212
+
213
+
214
+ def info():
215
+ _script_info(__all__)
@@ -0,0 +1,391 @@
1
+ import torch
2
+ from torch import nn
3
+ import numpy as np
4
+ from pathlib import Path
5
+ from typing import Union, Literal, Dict, Any, Optional
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ from .ML_scaler import DragonScaler
10
+ from ._script_info import _script_info
11
+ from ._logger import _LOGGER
12
+ from .path_manager import make_fullpath, sanitize_filename
13
+ from ._keys import PyTorchInferenceKeys, MLTaskKeys, PyTorchCheckpointKeys
14
+ from .ML_inference import _BaseInferenceHandler
15
+
16
+
17
+ __all__ = [
18
+ "DragonSequenceInferenceHandler"
19
+ ]
20
+
21
+
22
+ class DragonSequenceInferenceHandler(_BaseInferenceHandler):
23
+ """
24
+ Handles loading a PyTorch sequence model's state and performing inference
25
+ for univariate sequence tasks.
26
+
27
+ This handler automatically scales inputs and de-scales outputs.
28
+ """
29
+ def __init__(self,
30
+ model: nn.Module,
31
+ state_dict: Union[str, Path],
32
+ prediction_mode: Literal["sequence-to-sequence", "sequence-to-value"],
33
+ scaler: Union[DragonScaler, str, Path],
34
+ device: str = 'cpu'):
35
+ """
36
+ Initializes the handler for sequence tasks.
37
+
38
+ Args:
39
+ model (nn.Module): An instantiated PyTorch model architecture.
40
+ state_dict (str | Path): Path to the saved .pth model state_dict file.
41
+ prediction_mode (str): The type of sequence task.
42
+ device (str): The device to run inference on ('cpu', 'cuda', 'mps').
43
+ scaler (DragonScaler | str | Path): A DragonScaler instance or
44
+ the file path to a saved DragonScaler state. This is required
45
+ to correctly scale inputs and de-scale predictions.
46
+ """
47
+ # Call the parent constructor to handle model loading and device
48
+ super().__init__(model, state_dict, device, scaler)
49
+
50
+ self.sequence_length: Optional[int] = None
51
+ self.initial_sequence: Optional[np.ndarray] = None
52
+
53
+ if prediction_mode not in [MLTaskKeys.SEQUENCE_SEQUENCE, MLTaskKeys.SEQUENCE_VALUE]:
54
+ _LOGGER.error(f"'prediction_mode' not recognized: '{prediction_mode}'.")
55
+ raise ValueError()
56
+ self.prediction_mode = prediction_mode
57
+
58
+ if self.scaler is None:
59
+ _LOGGER.error("A 'scaler' is required for DragonSequenceInferenceHandler to scale inputs and de-scale predictions.")
60
+ raise ValueError()
61
+
62
+ # Load sequence length from the loaded dict (populated by _BaseInferenceHandler)
63
+ if PyTorchCheckpointKeys.SEQUENCE_LENGTH in self._loaded_data_dict:
64
+ try:
65
+ self.sequence_length = int(self._loaded_data_dict[PyTorchCheckpointKeys.SEQUENCE_LENGTH])
66
+ _LOGGER.info(f"'{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' found and set to {self.sequence_length}")
67
+ except Exception as e_int:
68
+ _LOGGER.warning(f"State Dictionary has the key '{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' but an error occurred when retrieving it:\n{e_int}")
69
+ else:
70
+ _LOGGER.warning(f"'{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' not found in model file. Forecasting validation will be skipped.")
71
+
72
+ # Load initial sequence
73
+ if PyTorchCheckpointKeys.INITIAL_SEQUENCE in self._loaded_data_dict:
74
+ try:
75
+ self.initial_sequence = self._loaded_data_dict[PyTorchCheckpointKeys.INITIAL_SEQUENCE]
76
+ _LOGGER.info(f"Default 'initial_sequence' for forecasting loaded from model file.")
77
+ # Optional: Validate shape
78
+ if self.sequence_length and len(self.initial_sequence) != self.sequence_length: # type: ignore
79
+ _LOGGER.warning(f"Loaded 'initial_sequence' length ({len(self.initial_sequence)}) mismatches 'sequence_length' ({self.sequence_length}).") # type: ignore
80
+ except Exception as e_seq:
81
+ _LOGGER.warning(f"State Dictionary has the key '{PyTorchCheckpointKeys.INITIAL_SEQUENCE}' but an error occurred when retrieving it:\n{e_seq}")
82
+ else:
83
+ _LOGGER.info("No default 'initial_sequence' found in model file. Must be provided for forecasting.")
84
+
85
+ def _preprocess_input(self, features: torch.Tensor) -> torch.Tensor:
86
+ """
87
+ Converts input sequence to a torch.Tensor, applies scaling, and moves it to the correct device.
88
+
89
+ Overrides _BaseInferenceHandler._preprocess_input.
90
+
91
+ Args:
92
+ features (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
93
+
94
+ Returns:
95
+ torch.Tensor: Scaled tensor on the correct device.
96
+ """
97
+ if self.scaler is None:
98
+ # This check is redundant due to __init__ check, but good for safety.
99
+ _LOGGER.error("Scaler is not available for preprocessing.")
100
+ raise RuntimeError()
101
+
102
+ features_tensor = features.float()
103
+
104
+ # Scale the sequence values
105
+ # (batch, seq_len) -> (batch * seq_len, 1)
106
+ batch_size, seq_len = features_tensor.shape
107
+ features_flat = features_tensor.reshape(-1, 1)
108
+
109
+ scaled_flat = self.scaler.transform(features_flat)
110
+
111
+ # (batch * seq_len, 1) -> (batch, seq_len)
112
+ scaled_features = scaled_flat.reshape(batch_size, seq_len)
113
+
114
+ return scaled_features.to(self.device)
115
+
116
+ def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
117
+ """
118
+ Core batch prediction method for sequences.
119
+ Runs a batch of sequences through the model, de-scales the output,
120
+ and returns the predictions.
121
+
122
+ Args:
123
+ features (np.ndarray | torch.Tensor): A 2D array/tensor of input sequences, shape (batch_size, sequence_length).
124
+
125
+ Returns:
126
+ A dictionary containing the de-scaled prediction tensors.
127
+ """
128
+ if features.ndim != 2:
129
+ _LOGGER.error("Input for batch prediction must be a 2D array or tensor (batch_size, sequence_length).")
130
+ raise ValueError()
131
+
132
+ if isinstance(features, np.ndarray):
133
+ features_tensor = torch.from_numpy(features).float()
134
+ else:
135
+ features_tensor = features.float()
136
+
137
+ # _preprocess_input scales the data and moves it to the correct device
138
+ input_tensor = self._preprocess_input(features_tensor)
139
+
140
+ with torch.no_grad():
141
+ scaled_output = self.model(input_tensor)
142
+
143
+ # De-scale the output using the scaler
144
+ if self.scaler is None: # Should be impossible due to __init__
145
+ raise RuntimeError("Scaler not found for de-scaling.")
146
+
147
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
148
+ # scaled_output is (batch)
149
+ # Reshape to (batch, 1) for scaler
150
+ scaled_output_reshaped = scaled_output.reshape(-1, 1)
151
+ descaled_output = self.scaler.inverse_transform(scaled_output_reshaped)
152
+ descaled_output = descaled_output.squeeze(-1) # (batch)
153
+
154
+ elif self.prediction_mode == MLTaskKeys.SEQUENCE_SEQUENCE:
155
+ # scaled_output is (batch, seq_len)
156
+ batch_size, seq_len = scaled_output.shape
157
+ scaled_flat = scaled_output.reshape(-1, 1)
158
+ descaled_flat = self.scaler.inverse_transform(scaled_flat)
159
+ descaled_output = descaled_flat.reshape(batch_size, seq_len)
160
+
161
+ else:
162
+ # Should not happen
163
+ _LOGGER.error(f"Invalid prediction mode: {self.prediction_mode}")
164
+ raise RuntimeError()
165
+
166
+ return {PyTorchInferenceKeys.PREDICTIONS: descaled_output}
167
+
168
+ def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
169
+ """
170
+ Core single-sample prediction method for sequences.
171
+ Runs a single sequence through the model, de-scales the output,
172
+ and returns the prediction.
173
+
174
+ Args:
175
+ features (np.ndarray | torch.Tensor): A 1D array/tensor of
176
+ input features, shape (sequence_length).
177
+
178
+ Returns:
179
+ A dictionary containing the de-scaled prediction tensor.
180
+ """
181
+ if features.ndim == 1:
182
+ features = features.reshape(1, -1) # Reshape (seq_len) to (1, seq_len)
183
+
184
+ if features.shape[0] != 1 or features.ndim != 2:
185
+ _LOGGER.error("The 'predict()' method is for a single sequence (1D tensor). Use 'predict_batch()' for multiple sequences (2D tensor).")
186
+ raise ValueError()
187
+
188
+ batch_results = self.predict_batch(features)
189
+
190
+ # Extract the first (and only) result from the batch output
191
+ # For seq-to-value, result is shape ()
192
+ # For seq-to-seq, result is shape (seq_len)
193
+ single_results = {key: value[0] for key, value in batch_results.items()}
194
+ return single_results
195
+
196
+ # --- NumPy Convenience Wrappers (on CPU) ---
197
+
198
+ def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
199
+ """
200
+ Convenience wrapper for predict_batch that returns NumPy arrays.
201
+
202
+ Args:
203
+ features (np.ndarray | torch.Tensor): A 2D array/tensor of
204
+ input sequences, shape (batch_size, sequence_length).
205
+
206
+ Returns:
207
+ A dictionary containing the de-scaled prediction as a NumPy array.
208
+ """
209
+ tensor_results = self.predict_batch(features)
210
+ numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
211
+ return numpy_results
212
+
213
+ def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
214
+ """
215
+ Convenience wrapper for predict that returns NumPy arrays or scalars.
216
+
217
+ Args:
218
+ features (np.ndarray | torch.Tensor): A 1D array/tensor of
219
+ input features, shape (sequence_length).
220
+
221
+ Returns:
222
+ A dictionary containing the de-scaled prediction.
223
+ - For 'sequence-to-value', the value is a Python scalar.
224
+ - For 'sequence-to-sequence', the value is a 1D NumPy array.
225
+ """
226
+ tensor_results = self.predict(features)
227
+
228
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
229
+ # Prediction is a 0-dim tensor, .item() gets the scalar
230
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
231
+ else: # sequence-to-sequence
232
+ # Prediction is a 1D tensor
233
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].cpu().numpy()}
234
+
235
+ def forecast(self,
236
+ n_steps: int,
237
+ initial_sequence: Optional[Union[np.ndarray, torch.Tensor]]=None) -> np.ndarray:
238
+ """
239
+ Autoregressively forecasts 'n_steps' into the future.
240
+
241
+ This method works for both 'sequence-to-value' and
242
+ 'sequence-to-sequence' models.
243
+
244
+ If 'initial_sequence' is not provided, this method will use the
245
+ default sequence that was saved with the model (if available).
246
+
247
+ Args:
248
+ initial_sequence (np.ndarray | torch.Tensor): The sequence
249
+ to start forecasting from. If None, uses the loaded default.
250
+ This should be a 1D array of *un-scaled* data.
251
+ n_steps (int): The number of future time steps to predict.
252
+
253
+ Returns:
254
+ np.ndarray: A 1D array containing the 'n_steps' forecasted values.
255
+ """
256
+ # --- Validation ---
257
+ if initial_sequence is None:
258
+ if self.initial_sequence is None:
259
+ _LOGGER.error("No 'initial_sequence' provided and no default sequence was loaded. Cannot forecast.")
260
+ raise ValueError()
261
+ _LOGGER.info("Using default 'initial_sequence' loaded from model file for forecast.")
262
+ initial_sequence_tensor = torch.from_numpy(self.initial_sequence).float()
263
+ elif isinstance(initial_sequence, np.ndarray):
264
+ initial_sequence_tensor = torch.from_numpy(initial_sequence).float()
265
+ else:
266
+ initial_sequence_tensor = initial_sequence.float()
267
+
268
+ if initial_sequence_tensor.ndim != 1:
269
+ _LOGGER.error(f"initial_sequence must be a 1D array. Got {initial_sequence_tensor.ndim} dimensions.")
270
+ raise ValueError()
271
+
272
+ if self.sequence_length is not None:
273
+ if len(initial_sequence_tensor) != self.sequence_length:
274
+ _LOGGER.error(f"Input sequence length ({len(initial_sequence_tensor)}) does not match model's required sequence_length ({self.sequence_length}).")
275
+ raise ValueError()
276
+ else:
277
+ _LOGGER.warning("Model's 'sequence_length' is unknown. Cannot validate input sequence length. Assuming it is correct.")
278
+
279
+ # --- Pre-processing ---
280
+ # 1. Scale the entire initial sequence
281
+ # We need to use the scaler: (seq_len) -> (seq_len, 1)
282
+ if self.scaler is None: # Should be impossible due to __init__
283
+ raise RuntimeError("Scaler not found for forecasting.")
284
+
285
+ scaled_sequence_flat = self.scaler.transform(initial_sequence_tensor.reshape(-1, 1))
286
+ # (seq_len, 1) -> (seq_len)
287
+ current_scaled_sequence = scaled_sequence_flat.squeeze(-1).to(self.device)
288
+
289
+ descaled_predictions = []
290
+
291
+ # --- Autoregressive Loop ---
292
+ self.model.eval() # Ensure model is in eval mode
293
+ with torch.no_grad():
294
+ for _ in range(n_steps):
295
+ # (seq_len) -> (1, seq_len)
296
+ input_tensor = current_scaled_sequence.reshape(1, -1)
297
+
298
+ # Run the model
299
+ # input_tensor is (1, seq_len)
300
+ model_output = self.model(input_tensor).squeeze() # remove batch dim
301
+
302
+ # Extract the single new prediction
303
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
304
+ # Output is shape (), a single scalar tensor
305
+ scaled_prediction = model_output
306
+ else: # MLTaskKeys.SEQUENCE_SEQUENCE
307
+ # Output is shape (seq_len), we need the last value
308
+ scaled_prediction = model_output[-1]
309
+
310
+ # De-scale the prediction for storage
311
+ # scaler input (1, 1)
312
+ descaled_prediction = self.scaler.inverse_transform(scaled_prediction.reshape(1, 1)).item()
313
+ descaled_predictions.append(descaled_prediction)
314
+
315
+ # Create the new input sequence for the next loop
316
+ # "autoregression": roll the window by dropping the first value and appending the new scaled prediction.
317
+ # .unsqueeze(0) is needed to make the 0-dim tensor 1-dim for cat
318
+ current_scaled_sequence = torch.cat((current_scaled_sequence[1:], scaled_prediction.unsqueeze(0)))
319
+
320
+ return np.array(descaled_predictions)
321
+
322
+ def plot_forecast(self,
323
+ n_steps: int,
324
+ save_dir: Union[str, Path],
325
+ filename: str = "forecast_plot.svg",
326
+ initial_sequence: Optional[Union[np.ndarray, torch.Tensor]]=None):
327
+ """
328
+ Runs a forecast and saves a plot of the results.
329
+
330
+ Args:
331
+ n_steps (int): The number of future time steps to predict.
332
+ save_dir (str | Path): Directory to save the plot.
333
+ filename (str, optional): Name for the saved plot file.
334
+ initial_sequence (np.ndarray | torch.Tensor | None): The sequence
335
+ to start forecasting from. If None, uses the loaded default.
336
+ """
337
+ # --- 1. Get Forecast Data ---
338
+ predictions = self.forecast(n_steps=n_steps,
339
+ initial_sequence=initial_sequence)
340
+
341
+ # --- 2. Determine which initial sequence was used for plotting ---
342
+ if initial_sequence is None:
343
+ plot_initial_sequence = self.initial_sequence
344
+ if plot_initial_sequence is None: # Should be caught by forecast() but good to check
345
+ _LOGGER.error("Cannot plot: No 'initial_sequence' provided and no default found.")
346
+ return
347
+ elif isinstance(initial_sequence, torch.Tensor):
348
+ plot_initial_sequence = initial_sequence.cpu().numpy()
349
+ else: # Is numpy array
350
+ plot_initial_sequence = initial_sequence
351
+
352
+ # --- 3. Create X-axis indices ---
353
+ # The x-axis will be integer time steps
354
+ seq_len = len(plot_initial_sequence)
355
+ history_x = np.arange(0, seq_len)
356
+ forecast_x = np.arange(seq_len, seq_len + n_steps)
357
+
358
+ # --- 4. Plot ---
359
+ sns.set_theme(style="darkgrid")
360
+ plt.figure(figsize=(12, 6))
361
+
362
+ # Plot the historical data
363
+ plt.plot(history_x, plot_initial_sequence, label="Historical Data")
364
+
365
+ # Plot the forecasted data
366
+ plt.plot(forecast_x, predictions, label="Forecasted Data", linestyle="--")
367
+
368
+ # Add a vertical line to mark the start of the forecast
369
+ plt.axvline(x=history_x[-1], color='red', linestyle=':', label='Forecast Start')
370
+
371
+ plt.title(f"{n_steps}-Step Forecast")
372
+ plt.xlabel("Time Step")
373
+ plt.ylabel("Value")
374
+ plt.legend()
375
+ plt.tight_layout()
376
+
377
+ # --- 5. Save Plot ---
378
+ dir_path = make_fullpath(save_dir, make=True, enforce="directory")
379
+ full_path = dir_path / sanitize_filename(filename)
380
+
381
+ try:
382
+ plt.savefig(full_path)
383
+ _LOGGER.info(f"📈 Forecast plot saved to '{full_path.name}'.")
384
+ except Exception as e:
385
+ _LOGGER.error(f"Failed to save plot:\n{e}")
386
+ finally:
387
+ plt.close()
388
+
389
+
390
+ def info():
391
+ _script_info(__all__)