qadence 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qadence/__init__.py +1 -1
- qadence/analog/parse_analog.py +1 -2
- qadence/backends/gpsr.py +8 -2
- qadence/backends/pulser/backend.py +7 -23
- qadence/backends/pyqtorch/backend.py +80 -5
- qadence/backends/pyqtorch/config.py +10 -3
- qadence/backends/pyqtorch/convert_ops.py +63 -2
- qadence/blocks/primitive.py +1 -0
- qadence/execution.py +0 -2
- qadence/log_config.yaml +10 -0
- qadence/measurements/shadow.py +97 -128
- qadence/measurements/utils.py +2 -2
- qadence/mitigations/readout.py +12 -6
- qadence/ml_tools/__init__.py +4 -8
- qadence/ml_tools/callbacks/__init__.py +30 -0
- qadence/ml_tools/callbacks/callback.py +451 -0
- qadence/ml_tools/callbacks/callbackmanager.py +214 -0
- qadence/ml_tools/{saveload.py → callbacks/saveload.py} +11 -11
- qadence/ml_tools/callbacks/writer_registry.py +430 -0
- qadence/ml_tools/config.py +132 -258
- qadence/ml_tools/data.py +7 -3
- qadence/ml_tools/loss/__init__.py +10 -0
- qadence/ml_tools/loss/loss.py +87 -0
- qadence/ml_tools/optimize_step.py +45 -10
- qadence/ml_tools/stages.py +46 -0
- qadence/ml_tools/train_utils/__init__.py +7 -0
- qadence/ml_tools/train_utils/base_trainer.py +548 -0
- qadence/ml_tools/train_utils/config_manager.py +184 -0
- qadence/ml_tools/trainer.py +692 -0
- qadence/model.py +1 -1
- qadence/noise/__init__.py +2 -2
- qadence/noise/protocols.py +18 -53
- qadence/operations/ham_evo.py +87 -26
- qadence/transpile/noise.py +12 -5
- qadence/types.py +15 -3
- {qadence-1.8.0.dist-info → qadence-1.9.0.dist-info}/METADATA +3 -4
- {qadence-1.8.0.dist-info → qadence-1.9.0.dist-info}/RECORD +39 -32
- {qadence-1.8.0.dist-info → qadence-1.9.0.dist-info}/WHEEL +1 -1
- qadence/ml_tools/printing.py +0 -154
- qadence/ml_tools/train_grad.py +0 -395
- qadence/ml_tools/train_no_grad.py +0 -199
- qadence/noise/readout.py +0 -218
- {qadence-1.8.0.dist-info → qadence-1.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,692 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import copy
|
4
|
+
from itertools import islice
|
5
|
+
from logging import getLogger
|
6
|
+
from typing import Any, Callable, Iterable, cast
|
7
|
+
|
8
|
+
import torch
|
9
|
+
from nevergrad.optimization.base import Optimizer as NGOptimizer
|
10
|
+
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn
|
11
|
+
from torch import complex128, float32, float64, nn, optim
|
12
|
+
from torch import device as torch_device
|
13
|
+
from torch import dtype as torch_dtype
|
14
|
+
from torch.utils.data import DataLoader
|
15
|
+
|
16
|
+
from qadence.ml_tools.config import TrainConfig
|
17
|
+
from qadence.ml_tools.data import OptimizeResult
|
18
|
+
from qadence.ml_tools.optimize_step import optimize_step, update_ng_parameters
|
19
|
+
from qadence.ml_tools.stages import TrainingStage
|
20
|
+
|
21
|
+
from .train_utils.base_trainer import BaseTrainer
|
22
|
+
|
23
|
+
logger = getLogger("ml_tools")
|
24
|
+
|
25
|
+
|
26
|
+
class Trainer(BaseTrainer):
|
27
|
+
"""Trainer class to manage and execute training, validation, and testing loops for a model (eg.
|
28
|
+
|
29
|
+
QNN).
|
30
|
+
|
31
|
+
This class handles the overall training process, including:
|
32
|
+
- Managing epochs and steps
|
33
|
+
- Handling data loading and batching
|
34
|
+
- Computing and updating gradients
|
35
|
+
- Logging and monitoring training metrics
|
36
|
+
|
37
|
+
Attributes:
|
38
|
+
current_epoch (int): The current epoch number.
|
39
|
+
global_step (int): The global step across all epochs.
|
40
|
+
log_device (str): Device for logging, default is "cpu".
|
41
|
+
device (torch_device): Device used for computation.
|
42
|
+
dtype (torch_dtype | None): Data type used for computation.
|
43
|
+
data_dtype (torch_dtype | None): Data type for data.
|
44
|
+
Depends on the model's data type.
|
45
|
+
|
46
|
+
Inherited Attributes:
|
47
|
+
use_grad (bool): Indicates if gradients are used for optimization. Default is True.
|
48
|
+
|
49
|
+
model (nn.Module): The neural network model.
|
50
|
+
optimizer (optim.Optimizer | NGOptimizer | None): The optimizer for training.
|
51
|
+
config (TrainConfig): The configuration settings for training.
|
52
|
+
train_dataloader (DataLoader | None): DataLoader for training data.
|
53
|
+
val_dataloader (DataLoader | None): DataLoader for validation data.
|
54
|
+
test_dataloader (DataLoader | None): DataLoader for testing data.
|
55
|
+
|
56
|
+
optimize_step (Callable): Function for performing an optimization step.
|
57
|
+
loss_fn (Callable): loss function to use.
|
58
|
+
|
59
|
+
num_training_batches (int): Number of training batches.
|
60
|
+
num_validation_batches (int): Number of validation batches.
|
61
|
+
num_test_batches (int): Number of test batches.
|
62
|
+
|
63
|
+
state (str): Current state in the training process
|
64
|
+
|
65
|
+
Default training routine
|
66
|
+
```
|
67
|
+
for epoch in max_iter + 1:
|
68
|
+
# Training
|
69
|
+
for batch in train_batches:
|
70
|
+
train model
|
71
|
+
# Validation
|
72
|
+
if val_every % epoch == 0:
|
73
|
+
for batch in val_batches:
|
74
|
+
train model
|
75
|
+
```
|
76
|
+
|
77
|
+
Notes:
|
78
|
+
- In case of InfiniteTensorDataset, number of batches = 1.
|
79
|
+
- In case of TensorDataset, number of batches are default.
|
80
|
+
- Training is run for max_iter + 1 epochs. Epoch 0 logs untrained model.
|
81
|
+
- Please look at the CallbackManager initialize_callbacks method to review the default
|
82
|
+
logging behavior.
|
83
|
+
|
84
|
+
Examples:
|
85
|
+
|
86
|
+
```python
|
87
|
+
import torch
|
88
|
+
from torch.optim import SGD
|
89
|
+
from qadence import (
|
90
|
+
feature_map,
|
91
|
+
hamiltonian_factory,
|
92
|
+
hea,
|
93
|
+
QNN,
|
94
|
+
QuantumCircuit,
|
95
|
+
TrainConfig,
|
96
|
+
Z,
|
97
|
+
)
|
98
|
+
from qadence.ml_tools.trainer import Trainer
|
99
|
+
from qadence.ml_tools.optimize_step import optimize_step
|
100
|
+
from qadence.ml_tools import TrainConfig
|
101
|
+
from qadence.ml_tools.data import to_dataloader
|
102
|
+
|
103
|
+
# Initialize the model
|
104
|
+
n_qubits = 2
|
105
|
+
fm = feature_map(n_qubits)
|
106
|
+
ansatz = hea(n_qubits=n_qubits, depth=2)
|
107
|
+
observable = hamiltonian_factory(n_qubits, detuning=Z)
|
108
|
+
circuit = QuantumCircuit(n_qubits, fm, ansatz)
|
109
|
+
model = QNN(circuit, observable, backend="pyqtorch", diff_mode="ad")
|
110
|
+
|
111
|
+
# Set up the optimizer
|
112
|
+
optimizer = SGD(model.parameters(), lr=0.001)
|
113
|
+
|
114
|
+
# Use TrainConfig for configuring the training process
|
115
|
+
config = TrainConfig(
|
116
|
+
max_iter=100,
|
117
|
+
print_every=10,
|
118
|
+
write_every=10,
|
119
|
+
checkpoint_every=10,
|
120
|
+
val_every=10
|
121
|
+
)
|
122
|
+
|
123
|
+
# Create the Trainer instance with TrainConfig
|
124
|
+
trainer = Trainer(
|
125
|
+
model=model,
|
126
|
+
optimizer=optimizer,
|
127
|
+
config=config,
|
128
|
+
loss_fn="mse",
|
129
|
+
optimize_step=optimize_step
|
130
|
+
)
|
131
|
+
|
132
|
+
batch_size = 25
|
133
|
+
x = torch.linspace(0, 1, 32).reshape(-1, 1)
|
134
|
+
y = torch.sin(x)
|
135
|
+
train_loader = to_dataloader(x, y, batch_size=batch_size, infinite=True)
|
136
|
+
val_loader = to_dataloader(x, y, batch_size=batch_size, infinite=False)
|
137
|
+
|
138
|
+
# Train the model
|
139
|
+
model, optimizer = trainer.fit(train_loader, val_loader)
|
140
|
+
```
|
141
|
+
|
142
|
+
This also supports both gradient based and gradient free optimization.
|
143
|
+
The default support is for gradient based optimization.
|
144
|
+
|
145
|
+
Notes:
|
146
|
+
|
147
|
+
- **set_use_grad()** (*class level*):This method is used to set the global `use_grad` flag,
|
148
|
+
controlling whether the trainer uses gradient-based optimization.
|
149
|
+
```python
|
150
|
+
# gradient based
|
151
|
+
Trainer.set_use_grad(True)
|
152
|
+
|
153
|
+
# gradient free
|
154
|
+
Trainer.set_use_grad(False)
|
155
|
+
```
|
156
|
+
- **Context Managers** (*instance level*): `enable_grad_opt()` and `disable_grad_opt()` are
|
157
|
+
context managers that temporarily switch the optimization mode for specific code blocks.
|
158
|
+
This is useful when you want to mix gradient-based and gradient-free optimization
|
159
|
+
in the same training process.
|
160
|
+
```python
|
161
|
+
# gradient based
|
162
|
+
with trainer.enable_grad_opt(optimizer):
|
163
|
+
trainer.fit()
|
164
|
+
|
165
|
+
# gradient free
|
166
|
+
with trainer.disable_grad_opt(ng_optimizer):
|
167
|
+
trainer.fit()
|
168
|
+
```
|
169
|
+
|
170
|
+
Examples
|
171
|
+
|
172
|
+
*Gradient based optimization example Usage*:
|
173
|
+
```python
|
174
|
+
from torch import optim
|
175
|
+
optimizer = optim.SGD(model.parameters(), lr=0.01)
|
176
|
+
|
177
|
+
Trainer.set_use_grad(True)
|
178
|
+
trainer = Trainer(
|
179
|
+
model=model,
|
180
|
+
optimizer=optimizer,
|
181
|
+
config=config,
|
182
|
+
loss_fn="mse"
|
183
|
+
)
|
184
|
+
trainer.fit(train_loader, val_loader)
|
185
|
+
```
|
186
|
+
or
|
187
|
+
```python
|
188
|
+
trainer = Trainer(
|
189
|
+
model=model,
|
190
|
+
config=config,
|
191
|
+
loss_fn="mse"
|
192
|
+
)
|
193
|
+
with trainer.enable_grad_opt(optimizer):
|
194
|
+
trainer.fit(train_loader, val_loader)
|
195
|
+
```
|
196
|
+
|
197
|
+
*Gradient free optimization example Usage*:
|
198
|
+
```python
|
199
|
+
import nevergrad as ng
|
200
|
+
from qadence.ml_tools.parameters import num_parameters
|
201
|
+
ng_optimizer = ng.optimizers.NGOpt(
|
202
|
+
budget=config.max_iter, parametrization= num_parameters(model)
|
203
|
+
)
|
204
|
+
|
205
|
+
Trainer.set_use_grad(False)
|
206
|
+
trainer = Trainer(
|
207
|
+
model=model,
|
208
|
+
optimizer=ng_optimizer,
|
209
|
+
config=config,
|
210
|
+
loss_fn="mse"
|
211
|
+
)
|
212
|
+
trainer.fit(train_loader, val_loader)
|
213
|
+
```
|
214
|
+
or
|
215
|
+
```python
|
216
|
+
import nevergrad as ng
|
217
|
+
from qadence.ml_tools.parameters import num_parameters
|
218
|
+
ng_optimizer = ng.optimizers.NGOpt(
|
219
|
+
budget=config.max_iter, parametrization= num_parameters(model)
|
220
|
+
)
|
221
|
+
|
222
|
+
trainer = Trainer(
|
223
|
+
model=model,
|
224
|
+
config=config,
|
225
|
+
loss_fn="mse"
|
226
|
+
)
|
227
|
+
with trainer.disable_grad_opt(ng_optimizer):
|
228
|
+
trainer.fit(train_loader, val_loader)
|
229
|
+
```
|
230
|
+
"""
|
231
|
+
|
232
|
+
def __init__(
|
233
|
+
self,
|
234
|
+
model: nn.Module,
|
235
|
+
optimizer: optim.Optimizer | NGOptimizer | None,
|
236
|
+
config: TrainConfig,
|
237
|
+
loss_fn: str | Callable = "mse",
|
238
|
+
train_dataloader: DataLoader | None = None,
|
239
|
+
val_dataloader: DataLoader | None = None,
|
240
|
+
test_dataloader: DataLoader | None = None,
|
241
|
+
optimize_step: Callable = optimize_step,
|
242
|
+
device: torch_device | None = None,
|
243
|
+
dtype: torch_dtype | None = None,
|
244
|
+
max_batches: int | None = None,
|
245
|
+
):
|
246
|
+
"""
|
247
|
+
Initializes the Trainer class.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
model (nn.Module): The PyTorch model to train.
|
251
|
+
optimizer (optim.Optimizer | NGOptimizer | None): The optimizer for training.
|
252
|
+
config (TrainConfig): Training configuration object.
|
253
|
+
loss_fn (str | Callable ): Loss function used for training.
|
254
|
+
If not specified, default mse loss will be used.
|
255
|
+
train_dataloader (DataLoader | None): DataLoader for training data.
|
256
|
+
val_dataloader (DataLoader | None): DataLoader for validation data.
|
257
|
+
test_dataloader (DataLoader | None): DataLoader for test data.
|
258
|
+
optimize_step (Callable): Function to execute an optimization step.
|
259
|
+
device (torch_device): Device to use for computation.
|
260
|
+
dtype (torch_dtype): Data type for computation.
|
261
|
+
max_batches (int | None): Maximum number of batches to process per epoch.
|
262
|
+
This is only valid in case of finite TensorDataset dataloaders.
|
263
|
+
if max_batches is not None, the maximum number of batches used will
|
264
|
+
be min(max_batches, len(dataloader.dataset))
|
265
|
+
In case of InfiniteTensorDataset only 1 batch per epoch is used.
|
266
|
+
"""
|
267
|
+
super().__init__(
|
268
|
+
model=model,
|
269
|
+
optimizer=optimizer,
|
270
|
+
config=config,
|
271
|
+
loss_fn=loss_fn,
|
272
|
+
optimize_step=optimize_step,
|
273
|
+
train_dataloader=train_dataloader,
|
274
|
+
val_dataloader=val_dataloader,
|
275
|
+
test_dataloader=test_dataloader,
|
276
|
+
max_batches=max_batches,
|
277
|
+
)
|
278
|
+
self.current_epoch: int = 0
|
279
|
+
self.global_step: int = 0
|
280
|
+
self.log_device: str = "cpu" if device is None else device
|
281
|
+
self.device: torch_device | None = device
|
282
|
+
self.dtype: torch_dtype | None = dtype
|
283
|
+
self.data_dtype: torch_dtype | None = None
|
284
|
+
if self.dtype:
|
285
|
+
self.data_dtype = float64 if (self.dtype == complex128) else float32
|
286
|
+
|
287
|
+
def fit(
|
288
|
+
self, train_dataloader: DataLoader | None = None, val_dataloader: DataLoader | None = None
|
289
|
+
) -> tuple[nn.Module, optim.Optimizer]:
|
290
|
+
"""
|
291
|
+
Fits the model using the specified training configuration.
|
292
|
+
|
293
|
+
The dataloaders can be provided to train on new datasets, or the default dataloaders
|
294
|
+
provided in the trainer will be used.
|
295
|
+
|
296
|
+
Args:
|
297
|
+
train_dataloader (DataLoader | None): DataLoader for training data.
|
298
|
+
val_dataloader (DataLoader | None): DataLoader for validation data.
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
tuple[nn.Module, optim.Optimizer]: The trained model and optimizer.
|
302
|
+
"""
|
303
|
+
if train_dataloader is not None:
|
304
|
+
self.train_dataloader = train_dataloader
|
305
|
+
if val_dataloader is not None:
|
306
|
+
self.val_dataloader = val_dataloader
|
307
|
+
|
308
|
+
self._fit_setup()
|
309
|
+
self._train()
|
310
|
+
self._fit_end()
|
311
|
+
self.training_stage = TrainingStage("idle")
|
312
|
+
return self.model, self.optimizer
|
313
|
+
|
314
|
+
def _fit_setup(self) -> None:
|
315
|
+
"""
|
316
|
+
Sets up the training environment, initializes configurations,.
|
317
|
+
|
318
|
+
and moves the model to the specified device and data type.
|
319
|
+
The callback_manager.start_training takes care of loading checkpoint,
|
320
|
+
and setting up the writer.
|
321
|
+
"""
|
322
|
+
self.config_manager.initialize_config()
|
323
|
+
self.callback_manager.start_training(trainer=self)
|
324
|
+
|
325
|
+
# Move model to device
|
326
|
+
if isinstance(self.model, nn.DataParallel):
|
327
|
+
self.model = self.model.module.to(device=self.device, dtype=self.dtype)
|
328
|
+
else:
|
329
|
+
self.model = self.model.to(device=self.device, dtype=self.dtype)
|
330
|
+
|
331
|
+
# Progress bar for training visualization
|
332
|
+
self.progress: Progress = Progress(
|
333
|
+
TextColumn("[progress.description]{task.description}"),
|
334
|
+
BarColumn(),
|
335
|
+
TaskProgressColumn(),
|
336
|
+
TimeRemainingColumn(elapsed_when_finished=True),
|
337
|
+
)
|
338
|
+
|
339
|
+
# Quick Fix for build_optimize_step
|
340
|
+
# Please review run_train_batch for more details
|
341
|
+
self.model_old = copy.deepcopy(self.model)
|
342
|
+
self.optimizer_old = copy.deepcopy(self.optimizer)
|
343
|
+
|
344
|
+
# Run validation at the start if specified in the configuration
|
345
|
+
self.perform_val = self.config.val_every > 0
|
346
|
+
if self.perform_val:
|
347
|
+
self.run_validation(self.val_dataloader)
|
348
|
+
|
349
|
+
def _fit_end(self) -> None:
|
350
|
+
"""Finalizes the training and closes the writer."""
|
351
|
+
self.callback_manager.end_training(trainer=self)
|
352
|
+
|
353
|
+
@BaseTrainer.callback("train")
|
354
|
+
def _train(self) -> list[list[tuple[torch.Tensor, dict[str, Any]]]]:
|
355
|
+
"""
|
356
|
+
Runs the main training loop, iterating over epochs.
|
357
|
+
|
358
|
+
Returns:
|
359
|
+
list[list[tuple[torch.Tensor, dict[str, Any]]]]: Training loss
|
360
|
+
metrics for all epochs.
|
361
|
+
list -> list -> tuples
|
362
|
+
Epochs -> Training Batches -> (loss, metrics)
|
363
|
+
"""
|
364
|
+
self.on_train_start()
|
365
|
+
train_losses = []
|
366
|
+
val_losses = []
|
367
|
+
|
368
|
+
with self.progress:
|
369
|
+
train_task = self.progress.add_task(
|
370
|
+
"Training", total=self.config_manager.config.max_iter
|
371
|
+
)
|
372
|
+
if self.perform_val:
|
373
|
+
val_task = self.progress.add_task(
|
374
|
+
"Validation",
|
375
|
+
total=(self.config_manager.config.max_iter + 1) / self.config.val_every,
|
376
|
+
)
|
377
|
+
for epoch in range(
|
378
|
+
self.global_step, self.global_step + self.config_manager.config.max_iter + 1
|
379
|
+
):
|
380
|
+
try:
|
381
|
+
self.current_epoch = epoch
|
382
|
+
self.on_train_epoch_start()
|
383
|
+
train_epoch_loss_metrics = self.run_training(self.train_dataloader)
|
384
|
+
train_losses.append(train_epoch_loss_metrics)
|
385
|
+
self.on_train_epoch_end(train_epoch_loss_metrics)
|
386
|
+
|
387
|
+
# Run validation periodically if specified
|
388
|
+
if self.perform_val and self.current_epoch % self.config.val_every == 0:
|
389
|
+
self.on_val_epoch_start()
|
390
|
+
val_epoch_loss_metrics = self.run_validation(self.val_dataloader)
|
391
|
+
val_losses.append(val_epoch_loss_metrics)
|
392
|
+
self.on_val_epoch_end(val_epoch_loss_metrics)
|
393
|
+
self.progress.update(val_task, advance=1)
|
394
|
+
|
395
|
+
self.progress.update(train_task, advance=1)
|
396
|
+
except KeyboardInterrupt:
|
397
|
+
logger.info("Terminating training gracefully after the current iteration.")
|
398
|
+
break
|
399
|
+
|
400
|
+
self.on_train_end(train_losses, val_losses)
|
401
|
+
return train_losses
|
402
|
+
|
403
|
+
@BaseTrainer.callback("train_epoch")
|
404
|
+
def run_training(self, dataloader: DataLoader) -> list[tuple[torch.Tensor, dict[str, Any]]]:
|
405
|
+
"""
|
406
|
+
Runs the training for a single epoch, iterating over multiple batches.
|
407
|
+
|
408
|
+
Args:
|
409
|
+
dataloader (DataLoader): DataLoader for training data.
|
410
|
+
|
411
|
+
Returns:
|
412
|
+
list[tuple[torch.Tensor, dict[str, Any]]]: Loss and metrics for each batch.
|
413
|
+
list -> tuples
|
414
|
+
Training Batches -> (loss, metrics)
|
415
|
+
"""
|
416
|
+
self.model.train()
|
417
|
+
train_epoch_loss_metrics = []
|
418
|
+
# Deep copy model and optimizer to maintain checkpoints
|
419
|
+
# We do this because optimize step provides loss, metrics
|
420
|
+
# before step of optimization
|
421
|
+
# To align them with model/optimizer correctly, we checkpoint
|
422
|
+
# the older copy of the model.
|
423
|
+
# TODO: review optimize_step to provide iteration aligned model and loss.
|
424
|
+
self.model_old = copy.deepcopy(self.model)
|
425
|
+
self.optimizer_old = copy.deepcopy(self.optimizer)
|
426
|
+
|
427
|
+
for batch in self.batch_iter(dataloader, self.num_training_batches):
|
428
|
+
self.on_train_batch_start(batch)
|
429
|
+
train_batch_loss_metrics = self.run_train_batch(batch)
|
430
|
+
train_epoch_loss_metrics.append(train_batch_loss_metrics)
|
431
|
+
self.on_train_batch_end(train_batch_loss_metrics)
|
432
|
+
|
433
|
+
return train_epoch_loss_metrics
|
434
|
+
|
435
|
+
@BaseTrainer.callback("train_batch")
|
436
|
+
def run_train_batch(
|
437
|
+
self, batch: tuple[torch.Tensor, ...]
|
438
|
+
) -> tuple[torch.Tensor, dict[str, Any]]:
|
439
|
+
"""
|
440
|
+
Runs a single training batch, performing optimization.
|
441
|
+
|
442
|
+
We use the step function to optimize the model based on use_grad.
|
443
|
+
use_grad = True entails gradient based optimization, for which we use
|
444
|
+
optimize_step function.
|
445
|
+
use_grad = False entails gradient free optimization, for which we use
|
446
|
+
update_ng_parameters function.
|
447
|
+
|
448
|
+
Args:
|
449
|
+
batch (tuple[torch.Tensor, ...]): Batch of data from the DataLoader.
|
450
|
+
|
451
|
+
Returns:
|
452
|
+
tuple[torch.Tensor, dict[str, Any]]: Loss and metrics for the batch.
|
453
|
+
tuple of (loss, metrics)
|
454
|
+
"""
|
455
|
+
|
456
|
+
if self.use_grad:
|
457
|
+
# Perform gradient-based optimization
|
458
|
+
loss_metrics = self.optimize_step(
|
459
|
+
model=self.model,
|
460
|
+
optimizer=self.optimizer,
|
461
|
+
loss_fn=self.loss_fn,
|
462
|
+
xs=batch,
|
463
|
+
device=self.device,
|
464
|
+
dtype=self.data_dtype,
|
465
|
+
)
|
466
|
+
else:
|
467
|
+
# Perform optimization using Nevergrad
|
468
|
+
loss, metrics, ng_params = update_ng_parameters(
|
469
|
+
model=self.model,
|
470
|
+
optimizer=self.optimizer,
|
471
|
+
loss_fn=self.loss_fn,
|
472
|
+
data=batch,
|
473
|
+
ng_params=self.ng_params, # type: ignore[arg-type]
|
474
|
+
)
|
475
|
+
self.ng_params = ng_params
|
476
|
+
loss_metrics = loss, metrics
|
477
|
+
|
478
|
+
return self.modify_batch_end_loss_metrics(loss_metrics)
|
479
|
+
|
480
|
+
@BaseTrainer.callback("val_epoch")
|
481
|
+
def run_validation(self, dataloader: DataLoader) -> list[tuple[torch.Tensor, dict[str, Any]]]:
|
482
|
+
"""
|
483
|
+
Runs the validation loop for a single epoch, iterating over multiple batches.
|
484
|
+
|
485
|
+
Args:
|
486
|
+
dataloader (DataLoader): DataLoader for validation data.
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
list[tuple[torch.Tensor, dict[str, Any]]]: Loss and metrics for each batch.
|
490
|
+
list -> tuples
|
491
|
+
Validation Batches -> (loss, metrics)
|
492
|
+
"""
|
493
|
+
self.model.eval()
|
494
|
+
val_epoch_loss_metrics = []
|
495
|
+
|
496
|
+
for batch in self.batch_iter(dataloader, self.num_validation_batches):
|
497
|
+
self.on_val_batch_start(batch)
|
498
|
+
val_batch_loss_metrics = self.run_val_batch(batch)
|
499
|
+
val_epoch_loss_metrics.append(val_batch_loss_metrics)
|
500
|
+
self.on_val_batch_end(val_batch_loss_metrics)
|
501
|
+
|
502
|
+
return val_epoch_loss_metrics
|
503
|
+
|
504
|
+
@BaseTrainer.callback("val_batch")
|
505
|
+
def run_val_batch(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, dict[str, Any]]:
|
506
|
+
"""
|
507
|
+
Runs a single validation batch.
|
508
|
+
|
509
|
+
Args:
|
510
|
+
batch (tuple[torch.Tensor, ...]): Batch of data from the DataLoader.
|
511
|
+
|
512
|
+
Returns:
|
513
|
+
tuple[torch.Tensor, dict[str, Any]]: Loss and metrics for the batch.
|
514
|
+
"""
|
515
|
+
with torch.no_grad():
|
516
|
+
loss_metrics = self.loss_fn(self.model, batch)
|
517
|
+
return self.modify_batch_end_loss_metrics(loss_metrics)
|
518
|
+
|
519
|
+
def test(self, test_dataloader: DataLoader = None) -> list[tuple[torch.Tensor, dict[str, Any]]]:
|
520
|
+
"""
|
521
|
+
Runs the testing loop if a test DataLoader is provided.
|
522
|
+
|
523
|
+
if the test_dataloader is not provided, default test_dataloader defined
|
524
|
+
in the Trainer class is used.
|
525
|
+
|
526
|
+
Args:
|
527
|
+
test_dataloader (DataLoader): DataLoader for test data.
|
528
|
+
|
529
|
+
Returns:
|
530
|
+
list[tuple[torch.Tensor, dict[str, Any]]]: Loss and metrics for each batch.
|
531
|
+
list -> tuples
|
532
|
+
Test Batches -> (loss, metrics)
|
533
|
+
"""
|
534
|
+
if test_dataloader is not None:
|
535
|
+
self.test_dataloader = test_dataloader
|
536
|
+
|
537
|
+
self.model.eval()
|
538
|
+
test_loss_metrics = []
|
539
|
+
|
540
|
+
for batch in self.batch_iter(test_dataloader, self.num_training_batches):
|
541
|
+
self.on_test_batch_start(batch)
|
542
|
+
loss_metrics = self.run_test_batch(batch)
|
543
|
+
test_loss_metrics.append(loss_metrics)
|
544
|
+
self.on_test_batch_end(loss_metrics)
|
545
|
+
|
546
|
+
return test_loss_metrics
|
547
|
+
|
548
|
+
@BaseTrainer.callback("test_batch")
|
549
|
+
def run_test_batch(
|
550
|
+
self, batch: tuple[torch.Tensor, ...]
|
551
|
+
) -> tuple[torch.Tensor, dict[str, Any]]:
|
552
|
+
"""
|
553
|
+
Runs a single test batch.
|
554
|
+
|
555
|
+
Args:
|
556
|
+
batch (tuple[torch.Tensor, ...]): Batch of data from the DataLoader.
|
557
|
+
|
558
|
+
Returns:
|
559
|
+
tuple[torch.Tensor, dict[str, Any]]: Loss and metrics for the batch.
|
560
|
+
"""
|
561
|
+
with torch.no_grad():
|
562
|
+
loss_metrics = self.loss_fn(self.model, batch)
|
563
|
+
return self.modify_batch_end_loss_metrics(loss_metrics)
|
564
|
+
|
565
|
+
def batch_iter(
|
566
|
+
self,
|
567
|
+
dataloader: DataLoader,
|
568
|
+
num_batches: int,
|
569
|
+
) -> Iterable[tuple[torch.Tensor, ...] | None]:
|
570
|
+
"""
|
571
|
+
Yields batches from the provided dataloader.
|
572
|
+
|
573
|
+
Args:
|
574
|
+
dataloader ([DataLoader]): The dataloader to iterate over.
|
575
|
+
num_batches (int): The maximum number of batches to yield.
|
576
|
+
|
577
|
+
Yields:
|
578
|
+
Iterable[tuple[torch.Tensor, ...] | None]: A batch from the dataloader moved to the
|
579
|
+
specified device and dtype.
|
580
|
+
"""
|
581
|
+
if dataloader is None:
|
582
|
+
for _ in range(num_batches):
|
583
|
+
yield None
|
584
|
+
else:
|
585
|
+
for batch in islice(dataloader, num_batches):
|
586
|
+
# batch is moved to device inside optimize step
|
587
|
+
# batch = data_to_device(batch, device=self.device, dtype=self.data_dtype)
|
588
|
+
yield batch
|
589
|
+
|
590
|
+
def modify_batch_end_loss_metrics(
|
591
|
+
self, loss_metrics: tuple[torch.Tensor, dict[str, Any]]
|
592
|
+
) -> tuple[torch.Tensor, dict[str, Any]]:
|
593
|
+
"""
|
594
|
+
Modifies the loss and metrics at the end of batch for proper logging.
|
595
|
+
|
596
|
+
All metrics are prefixed with the proper state of the training process
|
597
|
+
- "train_" or "val_" or "test_"
|
598
|
+
A "{state}_loss" is added to metrics.
|
599
|
+
|
600
|
+
Args:
|
601
|
+
loss_metrics (tuple[torch.Tensor, dict[str, Any]]): Original loss and metrics.
|
602
|
+
|
603
|
+
Returns:
|
604
|
+
tuple[None | torch.Tensor, dict[str, Any]]: Modified loss and metrics.
|
605
|
+
"""
|
606
|
+
for phase in ["train", "val", "test"]:
|
607
|
+
if phase in self.training_stage:
|
608
|
+
loss, metrics = loss_metrics
|
609
|
+
updated_metrics = {f"{phase}_{key}": value for key, value in metrics.items()}
|
610
|
+
updated_metrics[f"{phase}_loss"] = loss
|
611
|
+
return loss, updated_metrics
|
612
|
+
return loss_metrics
|
613
|
+
|
614
|
+
def build_optimize_result(
|
615
|
+
self,
|
616
|
+
result: None
|
617
|
+
| tuple[torch.Tensor, dict[Any, Any]]
|
618
|
+
| list[tuple[torch.Tensor, dict[Any, Any]]]
|
619
|
+
| list[list[tuple[torch.Tensor, dict[Any, Any]]]],
|
620
|
+
) -> None:
|
621
|
+
"""
|
622
|
+
Builds and stores the optimization result by calculating the average loss and metrics.
|
623
|
+
|
624
|
+
Result (or loss_metrics) can have multiple formats:
|
625
|
+
- `None` Indicates no loss or metrics data is provided.
|
626
|
+
- `tuple[torch.Tensor, dict[str, Any]]` A single tuple containing the loss tensor
|
627
|
+
and metrics dictionary - at the end of batch.
|
628
|
+
- `list[tuple[torch.Tensor, dict[str, Any]]]` A list of tuples for
|
629
|
+
multiple batches.
|
630
|
+
- `list[list[tuple[torch.Tensor, dict[str, Any]]]]` A list of lists of tuples,
|
631
|
+
where each inner list represents metrics across multiple batches within an epoch.
|
632
|
+
|
633
|
+
Args:
|
634
|
+
result: (None |
|
635
|
+
tuple[torch.Tensor, dict[Any, Any]] |
|
636
|
+
list[tuple[torch.Tensor, dict[Any, Any]]] |
|
637
|
+
list[list[tuple[torch.Tensor, dict[Any, Any]]]])
|
638
|
+
The loss and metrics data, which can have multiple formats
|
639
|
+
|
640
|
+
Returns:
|
641
|
+
None: This method does not return anything. It sets `self.opt_result` with
|
642
|
+
the computed average loss and metrics.
|
643
|
+
"""
|
644
|
+
loss_metrics = result
|
645
|
+
if loss_metrics is None:
|
646
|
+
loss = None
|
647
|
+
metrics: dict[Any, Any] = {}
|
648
|
+
elif isinstance(loss_metrics, tuple):
|
649
|
+
# Single tuple case
|
650
|
+
loss, metrics = loss_metrics
|
651
|
+
else:
|
652
|
+
last_epoch: list[tuple[torch.Tensor, dict[Any, Any]]] = []
|
653
|
+
if isinstance(loss_metrics, list):
|
654
|
+
# Check if it's a list of tuples
|
655
|
+
if all(isinstance(item, tuple) for item in loss_metrics):
|
656
|
+
last_epoch = cast(list[tuple[torch.Tensor, dict[Any, Any]]], loss_metrics)
|
657
|
+
# Check if it's a list of lists of tuples
|
658
|
+
elif all(isinstance(item, list) for item in loss_metrics):
|
659
|
+
last_epoch = cast(
|
660
|
+
list[tuple[torch.Tensor, dict[Any, Any]]],
|
661
|
+
loss_metrics[-1] if loss_metrics else [],
|
662
|
+
)
|
663
|
+
else:
|
664
|
+
raise ValueError(
|
665
|
+
"Invalid format for result: Expected None, tuple, list of tuples,"
|
666
|
+
" or list of lists of tuples."
|
667
|
+
)
|
668
|
+
|
669
|
+
if not last_epoch:
|
670
|
+
loss, metrics = None, {}
|
671
|
+
else:
|
672
|
+
# Compute the average loss over the batches
|
673
|
+
loss_tensor = torch.stack([loss_batch for loss_batch, _ in last_epoch])
|
674
|
+
avg_loss = loss_tensor.mean()
|
675
|
+
|
676
|
+
# Collect and average metrics for all batches
|
677
|
+
metric_keys = last_epoch[0][1].keys()
|
678
|
+
metrics_stacked: dict = {key: [] for key in metric_keys}
|
679
|
+
|
680
|
+
for _, metrics_batch in last_epoch:
|
681
|
+
for key in metric_keys:
|
682
|
+
value = metrics_batch[key]
|
683
|
+
metrics_stacked[key].append(value)
|
684
|
+
|
685
|
+
avg_metrics = {key: torch.stack(metrics_stacked[key]).mean() for key in metric_keys}
|
686
|
+
|
687
|
+
loss, metrics = avg_loss, avg_metrics
|
688
|
+
|
689
|
+
# Store the optimization result
|
690
|
+
self.opt_result = OptimizeResult(
|
691
|
+
self.current_epoch, self.model_old, self.optimizer_old, loss, metrics
|
692
|
+
)
|
qadence/model.py
CHANGED
@@ -514,7 +514,7 @@ class QuantumModel(nn.Module):
|
|
514
514
|
if isinstance(file_path, str):
|
515
515
|
file_path = Path(file_path)
|
516
516
|
if os.path.isdir(file_path):
|
517
|
-
from qadence.ml_tools.saveload import get_latest_checkpoint_name
|
517
|
+
from qadence.ml_tools.callbacks.saveload import get_latest_checkpoint_name
|
518
518
|
|
519
519
|
file_path = file_path / get_latest_checkpoint_name(file_path, "model")
|
520
520
|
|