dragon-ml-toolbox 13.3.0__py3-none-any.whl → 16.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/METADATA +20 -6
  2. dragon_ml_toolbox-16.2.0.dist-info/RECORD +51 -0
  3. {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
  4. ml_tools/ETL_cleaning.py +20 -20
  5. ml_tools/ETL_engineering.py +23 -25
  6. ml_tools/GUI_tools.py +20 -20
  7. ml_tools/MICE_imputation.py +207 -5
  8. ml_tools/ML_callbacks.py +43 -26
  9. ml_tools/ML_configuration.py +788 -0
  10. ml_tools/ML_datasetmaster.py +303 -448
  11. ml_tools/ML_evaluation.py +351 -93
  12. ml_tools/ML_evaluation_multi.py +139 -42
  13. ml_tools/ML_inference.py +290 -209
  14. ml_tools/ML_models.py +33 -106
  15. ml_tools/ML_models_advanced.py +323 -0
  16. ml_tools/ML_optimization.py +12 -12
  17. ml_tools/ML_scaler.py +11 -11
  18. ml_tools/ML_sequence_datasetmaster.py +341 -0
  19. ml_tools/ML_sequence_evaluation.py +219 -0
  20. ml_tools/ML_sequence_inference.py +391 -0
  21. ml_tools/ML_sequence_models.py +139 -0
  22. ml_tools/ML_trainer.py +1604 -179
  23. ml_tools/ML_utilities.py +351 -4
  24. ml_tools/ML_vision_datasetmaster.py +1540 -0
  25. ml_tools/ML_vision_evaluation.py +284 -0
  26. ml_tools/ML_vision_inference.py +405 -0
  27. ml_tools/ML_vision_models.py +641 -0
  28. ml_tools/ML_vision_transformers.py +284 -0
  29. ml_tools/PSO_optimization.py +6 -6
  30. ml_tools/SQL.py +4 -4
  31. ml_tools/_keys.py +171 -0
  32. ml_tools/_schema.py +1 -1
  33. ml_tools/custom_logger.py +37 -14
  34. ml_tools/data_exploration.py +502 -93
  35. ml_tools/ensemble_evaluation.py +54 -11
  36. ml_tools/ensemble_inference.py +7 -33
  37. ml_tools/ensemble_learning.py +1 -1
  38. ml_tools/math_utilities.py +1 -1
  39. ml_tools/optimization_tools.py +2 -2
  40. ml_tools/path_manager.py +5 -5
  41. ml_tools/serde.py +2 -2
  42. ml_tools/utilities.py +192 -4
  43. dragon_ml_toolbox-13.3.0.dist-info/RECORD +0 -41
  44. ml_tools/RNN_forecast.py +0 -56
  45. ml_tools/keys.py +0 -87
  46. {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/WHEEL +0 -0
  47. {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE +0 -0
  48. {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,391 @@
1
+ import torch
2
+ from torch import nn
3
+ import numpy as np
4
+ from pathlib import Path
5
+ from typing import Union, Literal, Dict, Any, Optional
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ from .ML_scaler import DragonScaler
10
+ from ._script_info import _script_info
11
+ from ._logger import _LOGGER
12
+ from .path_manager import make_fullpath, sanitize_filename
13
+ from ._keys import PyTorchInferenceKeys, MLTaskKeys, PyTorchCheckpointKeys
14
+ from .ML_inference import _BaseInferenceHandler
15
+
16
+
17
+ __all__ = [
18
+ "DragonSequenceInferenceHandler"
19
+ ]
20
+
21
+
22
+ class DragonSequenceInferenceHandler(_BaseInferenceHandler):
23
+ """
24
+ Handles loading a PyTorch sequence model's state and performing inference
25
+ for univariate sequence tasks.
26
+
27
+ This handler automatically scales inputs and de-scales outputs.
28
+ """
29
+ def __init__(self,
30
+ model: nn.Module,
31
+ state_dict: Union[str, Path],
32
+ prediction_mode: Literal["sequence-to-sequence", "sequence-to-value"],
33
+ scaler: Union[DragonScaler, str, Path],
34
+ device: str = 'cpu'):
35
+ """
36
+ Initializes the handler for sequence tasks.
37
+
38
+ Args:
39
+ model (nn.Module): An instantiated PyTorch model architecture.
40
+ state_dict (str | Path): Path to the saved .pth model state_dict file.
41
+ prediction_mode (str): The type of sequence task.
42
+ device (str): The device to run inference on ('cpu', 'cuda', 'mps').
43
+ scaler (DragonScaler | str | Path): A DragonScaler instance or
44
+ the file path to a saved DragonScaler state. This is required
45
+ to correctly scale inputs and de-scale predictions.
46
+ """
47
+ # Call the parent constructor to handle model loading and device
48
+ super().__init__(model, state_dict, device, scaler)
49
+
50
+ self.sequence_length: Optional[int] = None
51
+ self.initial_sequence: Optional[np.ndarray] = None
52
+
53
+ if prediction_mode not in [MLTaskKeys.SEQUENCE_SEQUENCE, MLTaskKeys.SEQUENCE_VALUE]:
54
+ _LOGGER.error(f"'prediction_mode' not recognized: '{prediction_mode}'.")
55
+ raise ValueError()
56
+ self.prediction_mode = prediction_mode
57
+
58
+ if self.scaler is None:
59
+ _LOGGER.error("A 'scaler' is required for DragonSequenceInferenceHandler to scale inputs and de-scale predictions.")
60
+ raise ValueError()
61
+
62
+ # Load sequence length from the loaded dict (populated by _BaseInferenceHandler)
63
+ if PyTorchCheckpointKeys.SEQUENCE_LENGTH in self._loaded_data_dict:
64
+ try:
65
+ self.sequence_length = int(self._loaded_data_dict[PyTorchCheckpointKeys.SEQUENCE_LENGTH])
66
+ _LOGGER.info(f"'{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' found and set to {self.sequence_length}")
67
+ except Exception as e_int:
68
+ _LOGGER.warning(f"State Dictionary has the key '{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' but an error occurred when retrieving it:\n{e_int}")
69
+ else:
70
+ _LOGGER.warning(f"'{PyTorchCheckpointKeys.SEQUENCE_LENGTH}' not found in model file. Forecasting validation will be skipped.")
71
+
72
+ # Load initial sequence
73
+ if PyTorchCheckpointKeys.INITIAL_SEQUENCE in self._loaded_data_dict:
74
+ try:
75
+ self.initial_sequence = self._loaded_data_dict[PyTorchCheckpointKeys.INITIAL_SEQUENCE]
76
+ _LOGGER.info(f"Default 'initial_sequence' for forecasting loaded from model file.")
77
+ # Optional: Validate shape
78
+ if self.sequence_length and len(self.initial_sequence) != self.sequence_length: # type: ignore
79
+ _LOGGER.warning(f"Loaded 'initial_sequence' length ({len(self.initial_sequence)}) mismatches 'sequence_length' ({self.sequence_length}).") # type: ignore
80
+ except Exception as e_seq:
81
+ _LOGGER.warning(f"State Dictionary has the key '{PyTorchCheckpointKeys.INITIAL_SEQUENCE}' but an error occurred when retrieving it:\n{e_seq}")
82
+ else:
83
+ _LOGGER.info("No default 'initial_sequence' found in model file. Must be provided for forecasting.")
84
+
85
+ def _preprocess_input(self, features: torch.Tensor) -> torch.Tensor:
86
+ """
87
+ Converts input sequence to a torch.Tensor, applies scaling, and moves it to the correct device.
88
+
89
+ Overrides _BaseInferenceHandler._preprocess_input.
90
+
91
+ Args:
92
+ features (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
93
+
94
+ Returns:
95
+ torch.Tensor: Scaled tensor on the correct device.
96
+ """
97
+ if self.scaler is None:
98
+ # This check is redundant due to __init__ check, but good for safety.
99
+ _LOGGER.error("Scaler is not available for preprocessing.")
100
+ raise RuntimeError()
101
+
102
+ features_tensor = features.float()
103
+
104
+ # Scale the sequence values
105
+ # (batch, seq_len) -> (batch * seq_len, 1)
106
+ batch_size, seq_len = features_tensor.shape
107
+ features_flat = features_tensor.reshape(-1, 1)
108
+
109
+ scaled_flat = self.scaler.transform(features_flat)
110
+
111
+ # (batch * seq_len, 1) -> (batch, seq_len)
112
+ scaled_features = scaled_flat.reshape(batch_size, seq_len)
113
+
114
+ return scaled_features.to(self.device)
115
+
116
+ def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
117
+ """
118
+ Core batch prediction method for sequences.
119
+ Runs a batch of sequences through the model, de-scales the output,
120
+ and returns the predictions.
121
+
122
+ Args:
123
+ features (np.ndarray | torch.Tensor): A 2D array/tensor of input sequences, shape (batch_size, sequence_length).
124
+
125
+ Returns:
126
+ A dictionary containing the de-scaled prediction tensors.
127
+ """
128
+ if features.ndim != 2:
129
+ _LOGGER.error("Input for batch prediction must be a 2D array or tensor (batch_size, sequence_length).")
130
+ raise ValueError()
131
+
132
+ if isinstance(features, np.ndarray):
133
+ features_tensor = torch.from_numpy(features).float()
134
+ else:
135
+ features_tensor = features.float()
136
+
137
+ # _preprocess_input scales the data and moves it to the correct device
138
+ input_tensor = self._preprocess_input(features_tensor)
139
+
140
+ with torch.no_grad():
141
+ scaled_output = self.model(input_tensor)
142
+
143
+ # De-scale the output using the scaler
144
+ if self.scaler is None: # Should be impossible due to __init__
145
+ raise RuntimeError("Scaler not found for de-scaling.")
146
+
147
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
148
+ # scaled_output is (batch)
149
+ # Reshape to (batch, 1) for scaler
150
+ scaled_output_reshaped = scaled_output.reshape(-1, 1)
151
+ descaled_output = self.scaler.inverse_transform(scaled_output_reshaped)
152
+ descaled_output = descaled_output.squeeze(-1) # (batch)
153
+
154
+ elif self.prediction_mode == MLTaskKeys.SEQUENCE_SEQUENCE:
155
+ # scaled_output is (batch, seq_len)
156
+ batch_size, seq_len = scaled_output.shape
157
+ scaled_flat = scaled_output.reshape(-1, 1)
158
+ descaled_flat = self.scaler.inverse_transform(scaled_flat)
159
+ descaled_output = descaled_flat.reshape(batch_size, seq_len)
160
+
161
+ else:
162
+ # Should not happen
163
+ _LOGGER.error(f"Invalid prediction mode: {self.prediction_mode}")
164
+ raise RuntimeError()
165
+
166
+ return {PyTorchInferenceKeys.PREDICTIONS: descaled_output}
167
+
168
+ def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
169
+ """
170
+ Core single-sample prediction method for sequences.
171
+ Runs a single sequence through the model, de-scales the output,
172
+ and returns the prediction.
173
+
174
+ Args:
175
+ features (np.ndarray | torch.Tensor): A 1D array/tensor of
176
+ input features, shape (sequence_length).
177
+
178
+ Returns:
179
+ A dictionary containing the de-scaled prediction tensor.
180
+ """
181
+ if features.ndim == 1:
182
+ features = features.reshape(1, -1) # Reshape (seq_len) to (1, seq_len)
183
+
184
+ if features.shape[0] != 1 or features.ndim != 2:
185
+ _LOGGER.error("The 'predict()' method is for a single sequence (1D tensor). Use 'predict_batch()' for multiple sequences (2D tensor).")
186
+ raise ValueError()
187
+
188
+ batch_results = self.predict_batch(features)
189
+
190
+ # Extract the first (and only) result from the batch output
191
+ # For seq-to-value, result is shape ()
192
+ # For seq-to-seq, result is shape (seq_len)
193
+ single_results = {key: value[0] for key, value in batch_results.items()}
194
+ return single_results
195
+
196
+ # --- NumPy Convenience Wrappers (on CPU) ---
197
+
198
+ def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
199
+ """
200
+ Convenience wrapper for predict_batch that returns NumPy arrays.
201
+
202
+ Args:
203
+ features (np.ndarray | torch.Tensor): A 2D array/tensor of
204
+ input sequences, shape (batch_size, sequence_length).
205
+
206
+ Returns:
207
+ A dictionary containing the de-scaled prediction as a NumPy array.
208
+ """
209
+ tensor_results = self.predict_batch(features)
210
+ numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
211
+ return numpy_results
212
+
213
+ def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
214
+ """
215
+ Convenience wrapper for predict that returns NumPy arrays or scalars.
216
+
217
+ Args:
218
+ features (np.ndarray | torch.Tensor): A 1D array/tensor of
219
+ input features, shape (sequence_length).
220
+
221
+ Returns:
222
+ A dictionary containing the de-scaled prediction.
223
+ - For 'sequence-to-value', the value is a Python scalar.
224
+ - For 'sequence-to-sequence', the value is a 1D NumPy array.
225
+ """
226
+ tensor_results = self.predict(features)
227
+
228
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
229
+ # Prediction is a 0-dim tensor, .item() gets the scalar
230
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
231
+ else: # sequence-to-sequence
232
+ # Prediction is a 1D tensor
233
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].cpu().numpy()}
234
+
235
+ def forecast(self,
236
+ n_steps: int,
237
+ initial_sequence: Optional[Union[np.ndarray, torch.Tensor]]=None) -> np.ndarray:
238
+ """
239
+ Autoregressively forecasts 'n_steps' into the future.
240
+
241
+ This method works for both 'sequence-to-value' and
242
+ 'sequence-to-sequence' models.
243
+
244
+ If 'initial_sequence' is not provided, this method will use the
245
+ default sequence that was saved with the model (if available).
246
+
247
+ Args:
248
+ initial_sequence (np.ndarray | torch.Tensor): The sequence
249
+ to start forecasting from. If None, uses the loaded default.
250
+ This should be a 1D array of *un-scaled* data.
251
+ n_steps (int): The number of future time steps to predict.
252
+
253
+ Returns:
254
+ np.ndarray: A 1D array containing the 'n_steps' forecasted values.
255
+ """
256
+ # --- Validation ---
257
+ if initial_sequence is None:
258
+ if self.initial_sequence is None:
259
+ _LOGGER.error("No 'initial_sequence' provided and no default sequence was loaded. Cannot forecast.")
260
+ raise ValueError()
261
+ _LOGGER.info("Using default 'initial_sequence' loaded from model file for forecast.")
262
+ initial_sequence_tensor = torch.from_numpy(self.initial_sequence).float()
263
+ elif isinstance(initial_sequence, np.ndarray):
264
+ initial_sequence_tensor = torch.from_numpy(initial_sequence).float()
265
+ else:
266
+ initial_sequence_tensor = initial_sequence.float()
267
+
268
+ if initial_sequence_tensor.ndim != 1:
269
+ _LOGGER.error(f"initial_sequence must be a 1D array. Got {initial_sequence_tensor.ndim} dimensions.")
270
+ raise ValueError()
271
+
272
+ if self.sequence_length is not None:
273
+ if len(initial_sequence_tensor) != self.sequence_length:
274
+ _LOGGER.error(f"Input sequence length ({len(initial_sequence_tensor)}) does not match model's required sequence_length ({self.sequence_length}).")
275
+ raise ValueError()
276
+ else:
277
+ _LOGGER.warning("Model's 'sequence_length' is unknown. Cannot validate input sequence length. Assuming it is correct.")
278
+
279
+ # --- Pre-processing ---
280
+ # 1. Scale the entire initial sequence
281
+ # We need to use the scaler: (seq_len) -> (seq_len, 1)
282
+ if self.scaler is None: # Should be impossible due to __init__
283
+ raise RuntimeError("Scaler not found for forecasting.")
284
+
285
+ scaled_sequence_flat = self.scaler.transform(initial_sequence_tensor.reshape(-1, 1))
286
+ # (seq_len, 1) -> (seq_len)
287
+ current_scaled_sequence = scaled_sequence_flat.squeeze(-1).to(self.device)
288
+
289
+ descaled_predictions = []
290
+
291
+ # --- Autoregressive Loop ---
292
+ self.model.eval() # Ensure model is in eval mode
293
+ with torch.no_grad():
294
+ for _ in range(n_steps):
295
+ # (seq_len) -> (1, seq_len)
296
+ input_tensor = current_scaled_sequence.reshape(1, -1)
297
+
298
+ # Run the model
299
+ # input_tensor is (1, seq_len)
300
+ model_output = self.model(input_tensor).squeeze() # remove batch dim
301
+
302
+ # Extract the single new prediction
303
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
304
+ # Output is shape (), a single scalar tensor
305
+ scaled_prediction = model_output
306
+ else: # MLTaskKeys.SEQUENCE_SEQUENCE
307
+ # Output is shape (seq_len), we need the last value
308
+ scaled_prediction = model_output[-1]
309
+
310
+ # De-scale the prediction for storage
311
+ # scaler input (1, 1)
312
+ descaled_prediction = self.scaler.inverse_transform(scaled_prediction.reshape(1, 1)).item()
313
+ descaled_predictions.append(descaled_prediction)
314
+
315
+ # Create the new input sequence for the next loop
316
+ # "autoregression": roll the window by dropping the first value and appending the new scaled prediction.
317
+ # .unsqueeze(0) is needed to make the 0-dim tensor 1-dim for cat
318
+ current_scaled_sequence = torch.cat((current_scaled_sequence[1:], scaled_prediction.unsqueeze(0)))
319
+
320
+ return np.array(descaled_predictions)
321
+
322
+ def plot_forecast(self,
323
+ n_steps: int,
324
+ save_dir: Union[str, Path],
325
+ filename: str = "forecast_plot.svg",
326
+ initial_sequence: Optional[Union[np.ndarray, torch.Tensor]]=None):
327
+ """
328
+ Runs a forecast and saves a plot of the results.
329
+
330
+ Args:
331
+ n_steps (int): The number of future time steps to predict.
332
+ save_dir (str | Path): Directory to save the plot.
333
+ filename (str, optional): Name for the saved plot file.
334
+ initial_sequence (np.ndarray | torch.Tensor | None): The sequence
335
+ to start forecasting from. If None, uses the loaded default.
336
+ """
337
+ # --- 1. Get Forecast Data ---
338
+ predictions = self.forecast(n_steps=n_steps,
339
+ initial_sequence=initial_sequence)
340
+
341
+ # --- 2. Determine which initial sequence was used for plotting ---
342
+ if initial_sequence is None:
343
+ plot_initial_sequence = self.initial_sequence
344
+ if plot_initial_sequence is None: # Should be caught by forecast() but good to check
345
+ _LOGGER.error("Cannot plot: No 'initial_sequence' provided and no default found.")
346
+ return
347
+ elif isinstance(initial_sequence, torch.Tensor):
348
+ plot_initial_sequence = initial_sequence.cpu().numpy()
349
+ else: # Is numpy array
350
+ plot_initial_sequence = initial_sequence
351
+
352
+ # --- 3. Create X-axis indices ---
353
+ # The x-axis will be integer time steps
354
+ seq_len = len(plot_initial_sequence)
355
+ history_x = np.arange(0, seq_len)
356
+ forecast_x = np.arange(seq_len, seq_len + n_steps)
357
+
358
+ # --- 4. Plot ---
359
+ sns.set_theme(style="darkgrid")
360
+ plt.figure(figsize=(12, 6))
361
+
362
+ # Plot the historical data
363
+ plt.plot(history_x, plot_initial_sequence, label="Historical Data")
364
+
365
+ # Plot the forecasted data
366
+ plt.plot(forecast_x, predictions, label="Forecasted Data", linestyle="--")
367
+
368
+ # Add a vertical line to mark the start of the forecast
369
+ plt.axvline(x=history_x[-1], color='red', linestyle=':', label='Forecast Start')
370
+
371
+ plt.title(f"{n_steps}-Step Forecast")
372
+ plt.xlabel("Time Step")
373
+ plt.ylabel("Value")
374
+ plt.legend()
375
+ plt.tight_layout()
376
+
377
+ # --- 5. Save Plot ---
378
+ dir_path = make_fullpath(save_dir, make=True, enforce="directory")
379
+ full_path = dir_path / sanitize_filename(filename)
380
+
381
+ try:
382
+ plt.savefig(full_path)
383
+ _LOGGER.info(f"📈 Forecast plot saved to '{full_path.name}'.")
384
+ except Exception as e:
385
+ _LOGGER.error(f"Failed to save plot:\n{e}")
386
+ finally:
387
+ plt.close()
388
+
389
+
390
+ def info():
391
+ _script_info(__all__)
@@ -0,0 +1,139 @@
1
+ import torch
2
+ from torch import nn
3
+ from typing import Literal
4
+
5
+ from ._logger import _LOGGER
6
+ from ._script_info import _script_info
7
+ from ._keys import MLTaskKeys
8
+ from .ML_models import _ArchitectureHandlerMixin
9
+
10
+
11
+ __all__ = [
12
+ "DragonSequenceLSTM"
13
+ ]
14
+
15
+
16
+ class DragonSequenceLSTM(nn.Module, _ArchitectureHandlerMixin):
17
+ """
18
+ An LSTM-based network for single-feature (univariate) sequence prediction tasks.
19
+ It can be configured for:
20
+ 1. 'sequence-to-sequence': Predicts a full sequence.
21
+ 2. 'sequence-to-value': Predicts a single value from the last time step.
22
+ """
23
+ def __init__(self,
24
+ prediction_mode: Literal["sequence-to-sequence", "sequence-to-value"],
25
+ hidden_size: int = 100,
26
+ recurrent_layers: int = 1,
27
+ dropout: float = 0.1):
28
+ """
29
+ Args:
30
+ hidden_size (int): The number of features in the LSTM's hidden state.
31
+ recurrent_layers (int): The number of recurrent LSTM layers.
32
+ prediction_mode (str): Determines the model's output behavior.
33
+ - 'sequence-to-sequence': Returns a full sequence.
34
+ - 'sequence-to-value': Returns a single prediction based on the last time step.
35
+ dropout (float): The dropout probability for all but the last LSTM layer.
36
+ """
37
+ super().__init__()
38
+
39
+ # --- Validation ---
40
+ if not prediction_mode in [MLTaskKeys.SEQUENCE_SEQUENCE, MLTaskKeys.SEQUENCE_VALUE]:
41
+ _LOGGER.error(f"Unrecognized prediction mode: '{prediction_mode}'.")
42
+ raise ValueError()
43
+ else:
44
+ self.prediction_mode = prediction_mode
45
+
46
+ if not isinstance(hidden_size, int) or hidden_size < 1:
47
+ _LOGGER.error("hidden_size must be a positive integer.")
48
+ raise ValueError()
49
+ if not isinstance(recurrent_layers, int) or recurrent_layers < 1:
50
+ _LOGGER.error("recurrent_layers must be a positive integer.")
51
+ raise ValueError()
52
+ if not (0.0 <= dropout < 1.0):
53
+ _LOGGER.error("dropout must be a float between 0.0 and 1.0.")
54
+ raise ValueError()
55
+
56
+ # --- Save configuration ---
57
+ self.features = 1 # Univariate
58
+ self.hidden_size = hidden_size
59
+ self.recurrent_layers = recurrent_layers
60
+ self.dropout = dropout
61
+
62
+ # Build model
63
+ self.lstm = nn.LSTM(
64
+ input_size=self.features,
65
+ hidden_size=hidden_size,
66
+ num_layers=recurrent_layers,
67
+ dropout=dropout,
68
+ batch_first=True # This is crucial for (batch, seq, feature) input
69
+ )
70
+ self.linear = nn.Linear(in_features=hidden_size, out_features=self.features)
71
+
72
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
73
+ """
74
+ Defines the forward pass.
75
+
76
+ Args:
77
+ x (torch.Tensor): The input tensor. Can be 2D (batch_size, sequence_length)
78
+ or 3D (batch_size, sequence_length, features).
79
+ The model will automatically handle 2D inputs
80
+ by assuming a feature size of 1.
81
+
82
+ Returns:
83
+ torch.Tensor: The output tensor.
84
+ - (batch_size, sequence_length, features) if 'sequence-to-sequence'
85
+ - (batch_size, features) if 'sequence-to-value'
86
+ """
87
+ # --- Handle Input Shape ---
88
+ if x.ndim == 2:
89
+ # Check if this 2D input is compatible with the model's expected features
90
+ if self.features != 1:
91
+ _LOGGER.error(f"Received 2D input (shape {x.shape}), but model was initialized with features={self.features}.")
92
+ raise ValueError()
93
+
94
+ # Add the feature dimension: (batch_size, seq_len) -> (batch_size, seq_len, 1)
95
+ x = x.unsqueeze(-1)
96
+
97
+ # x is guaranteed to be 3D: (batch_size, seq_len, features)
98
+ # The LSTM returns the full output sequence and the final hidden/cell states
99
+ lstm_out, _ = self.lstm(x)
100
+
101
+ # --- Handle Output Shape based on mode ---
102
+ if self.prediction_mode == MLTaskKeys.SEQUENCE_SEQUENCE:
103
+ # Use the full sequence
104
+ # output shape: (batch_size, seq_len, 1)
105
+ predictions = self.linear(lstm_out)
106
+ # Squeeze to (batch_size, seq_len) to match target
107
+ predictions = predictions.squeeze(-1)
108
+
109
+ elif self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
110
+ # Isolate only the last time step's output
111
+ # last_step shape: (batch_size, hidden_size)
112
+ last_step = lstm_out[:, -1, :]
113
+ predictions = self.linear(last_step)
114
+
115
+ # Squeeze the 'features' dim to match label shape
116
+ predictions = predictions.squeeze(-1)
117
+
118
+ return predictions
119
+
120
+ def get_architecture_config(self) -> dict:
121
+ """Returns the configuration of the model."""
122
+ return {
123
+ 'hidden_size': self.hidden_size,
124
+ 'recurrent_layers': self.recurrent_layers,
125
+ 'prediction_mode': self.prediction_mode,
126
+ 'dropout': self.dropout
127
+ }
128
+
129
+ def __repr__(self) -> str:
130
+ """Returns the developer-friendly string representation of the model."""
131
+ return (
132
+ f"DragonSequenceLSTM(features={self.lstm.input_size}, "
133
+ f"hidden_size={self.lstm.hidden_size}, "
134
+ f"recurrent_layers={self.lstm.num_layers}), "
135
+ f"mode='{self.prediction_mode}')")
136
+
137
+
138
+ def info():
139
+ _script_info(__all__)