Trajectree 0.0.0__py3-none-any.whl → 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trajectree/__init__.py +3 -0
- trajectree/fock_optics/devices.py +1 -1
- trajectree/fock_optics/light_sources.py +2 -2
- trajectree/fock_optics/measurement.py +3 -3
- trajectree/fock_optics/utils.py +6 -6
- trajectree/quimb/docs/_pygments/_pygments_dark.py +118 -0
- trajectree/quimb/docs/_pygments/_pygments_light.py +118 -0
- trajectree/quimb/docs/conf.py +158 -0
- trajectree/quimb/docs/examples/ex_mpi_expm_evo.py +62 -0
- trajectree/quimb/quimb/__init__.py +507 -0
- trajectree/quimb/quimb/calc.py +1491 -0
- trajectree/quimb/quimb/core.py +2279 -0
- trajectree/quimb/quimb/evo.py +712 -0
- trajectree/quimb/quimb/experimental/__init__.py +0 -0
- trajectree/quimb/quimb/experimental/autojittn.py +129 -0
- trajectree/quimb/quimb/experimental/belief_propagation/__init__.py +109 -0
- trajectree/quimb/quimb/experimental/belief_propagation/bp_common.py +397 -0
- trajectree/quimb/quimb/experimental/belief_propagation/d1bp.py +316 -0
- trajectree/quimb/quimb/experimental/belief_propagation/d2bp.py +653 -0
- trajectree/quimb/quimb/experimental/belief_propagation/hd1bp.py +571 -0
- trajectree/quimb/quimb/experimental/belief_propagation/hv1bp.py +775 -0
- trajectree/quimb/quimb/experimental/belief_propagation/l1bp.py +316 -0
- trajectree/quimb/quimb/experimental/belief_propagation/l2bp.py +537 -0
- trajectree/quimb/quimb/experimental/belief_propagation/regions.py +194 -0
- trajectree/quimb/quimb/experimental/cluster_update.py +286 -0
- trajectree/quimb/quimb/experimental/merabuilder.py +865 -0
- trajectree/quimb/quimb/experimental/operatorbuilder/__init__.py +15 -0
- trajectree/quimb/quimb/experimental/operatorbuilder/operatorbuilder.py +1631 -0
- trajectree/quimb/quimb/experimental/schematic.py +7 -0
- trajectree/quimb/quimb/experimental/tn_marginals.py +130 -0
- trajectree/quimb/quimb/experimental/tnvmc.py +1483 -0
- trajectree/quimb/quimb/gates.py +36 -0
- trajectree/quimb/quimb/gen/__init__.py +2 -0
- trajectree/quimb/quimb/gen/operators.py +1167 -0
- trajectree/quimb/quimb/gen/rand.py +713 -0
- trajectree/quimb/quimb/gen/states.py +479 -0
- trajectree/quimb/quimb/linalg/__init__.py +6 -0
- trajectree/quimb/quimb/linalg/approx_spectral.py +1109 -0
- trajectree/quimb/quimb/linalg/autoblock.py +258 -0
- trajectree/quimb/quimb/linalg/base_linalg.py +719 -0
- trajectree/quimb/quimb/linalg/mpi_launcher.py +397 -0
- trajectree/quimb/quimb/linalg/numpy_linalg.py +244 -0
- trajectree/quimb/quimb/linalg/rand_linalg.py +514 -0
- trajectree/quimb/quimb/linalg/scipy_linalg.py +293 -0
- trajectree/quimb/quimb/linalg/slepc_linalg.py +892 -0
- trajectree/quimb/quimb/schematic.py +1518 -0
- trajectree/quimb/quimb/tensor/__init__.py +401 -0
- trajectree/quimb/quimb/tensor/array_ops.py +610 -0
- trajectree/quimb/quimb/tensor/circuit.py +4824 -0
- trajectree/quimb/quimb/tensor/circuit_gen.py +411 -0
- trajectree/quimb/quimb/tensor/contraction.py +336 -0
- trajectree/quimb/quimb/tensor/decomp.py +1255 -0
- trajectree/quimb/quimb/tensor/drawing.py +1646 -0
- trajectree/quimb/quimb/tensor/fitting.py +385 -0
- trajectree/quimb/quimb/tensor/geometry.py +583 -0
- trajectree/quimb/quimb/tensor/interface.py +114 -0
- trajectree/quimb/quimb/tensor/networking.py +1058 -0
- trajectree/quimb/quimb/tensor/optimize.py +1818 -0
- trajectree/quimb/quimb/tensor/tensor_1d.py +4778 -0
- trajectree/quimb/quimb/tensor/tensor_1d_compress.py +1854 -0
- trajectree/quimb/quimb/tensor/tensor_1d_tebd.py +662 -0
- trajectree/quimb/quimb/tensor/tensor_2d.py +5954 -0
- trajectree/quimb/quimb/tensor/tensor_2d_compress.py +96 -0
- trajectree/quimb/quimb/tensor/tensor_2d_tebd.py +1230 -0
- trajectree/quimb/quimb/tensor/tensor_3d.py +2869 -0
- trajectree/quimb/quimb/tensor/tensor_3d_tebd.py +46 -0
- trajectree/quimb/quimb/tensor/tensor_approx_spectral.py +60 -0
- trajectree/quimb/quimb/tensor/tensor_arbgeom.py +3237 -0
- trajectree/quimb/quimb/tensor/tensor_arbgeom_compress.py +565 -0
- trajectree/quimb/quimb/tensor/tensor_arbgeom_tebd.py +1138 -0
- trajectree/quimb/quimb/tensor/tensor_builder.py +5411 -0
- trajectree/quimb/quimb/tensor/tensor_core.py +11179 -0
- trajectree/quimb/quimb/tensor/tensor_dmrg.py +1472 -0
- trajectree/quimb/quimb/tensor/tensor_mera.py +204 -0
- trajectree/quimb/quimb/utils.py +892 -0
- trajectree/quimb/tests/__init__.py +0 -0
- trajectree/quimb/tests/test_accel.py +501 -0
- trajectree/quimb/tests/test_calc.py +788 -0
- trajectree/quimb/tests/test_core.py +847 -0
- trajectree/quimb/tests/test_evo.py +565 -0
- trajectree/quimb/tests/test_gen/__init__.py +0 -0
- trajectree/quimb/tests/test_gen/test_operators.py +361 -0
- trajectree/quimb/tests/test_gen/test_rand.py +296 -0
- trajectree/quimb/tests/test_gen/test_states.py +261 -0
- trajectree/quimb/tests/test_linalg/__init__.py +0 -0
- trajectree/quimb/tests/test_linalg/test_approx_spectral.py +368 -0
- trajectree/quimb/tests/test_linalg/test_base_linalg.py +351 -0
- trajectree/quimb/tests/test_linalg/test_mpi_linalg.py +127 -0
- trajectree/quimb/tests/test_linalg/test_numpy_linalg.py +84 -0
- trajectree/quimb/tests/test_linalg/test_rand_linalg.py +134 -0
- trajectree/quimb/tests/test_linalg/test_slepc_linalg.py +283 -0
- trajectree/quimb/tests/test_tensor/__init__.py +0 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/__init__.py +0 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_d1bp.py +39 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_d2bp.py +67 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_hd1bp.py +64 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_hv1bp.py +51 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_l1bp.py +142 -0
- trajectree/quimb/tests/test_tensor/test_belief_propagation/test_l2bp.py +101 -0
- trajectree/quimb/tests/test_tensor/test_circuit.py +816 -0
- trajectree/quimb/tests/test_tensor/test_contract.py +67 -0
- trajectree/quimb/tests/test_tensor/test_decomp.py +40 -0
- trajectree/quimb/tests/test_tensor/test_mera.py +52 -0
- trajectree/quimb/tests/test_tensor/test_optimizers.py +488 -0
- trajectree/quimb/tests/test_tensor/test_tensor_1d.py +1171 -0
- trajectree/quimb/tests/test_tensor/test_tensor_2d.py +606 -0
- trajectree/quimb/tests/test_tensor/test_tensor_2d_tebd.py +144 -0
- trajectree/quimb/tests/test_tensor/test_tensor_3d.py +123 -0
- trajectree/quimb/tests/test_tensor/test_tensor_arbgeom.py +226 -0
- trajectree/quimb/tests/test_tensor/test_tensor_builder.py +441 -0
- trajectree/quimb/tests/test_tensor/test_tensor_core.py +2066 -0
- trajectree/quimb/tests/test_tensor/test_tensor_dmrg.py +388 -0
- trajectree/quimb/tests/test_tensor/test_tensor_spectral_approx.py +63 -0
- trajectree/quimb/tests/test_tensor/test_tensor_tebd.py +270 -0
- trajectree/quimb/tests/test_utils.py +85 -0
- trajectree/trajectory.py +2 -2
- {trajectree-0.0.0.dist-info → trajectree-0.0.1.dist-info}/METADATA +2 -2
- trajectree-0.0.1.dist-info/RECORD +126 -0
- trajectree-0.0.0.dist-info/RECORD +0 -16
- {trajectree-0.0.0.dist-info → trajectree-0.0.1.dist-info}/WHEEL +0 -0
- {trajectree-0.0.0.dist-info → trajectree-0.0.1.dist-info}/licenses/LICENSE +0 -0
- {trajectree-0.0.0.dist-info → trajectree-0.0.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1818 @@
|
|
|
1
|
+
"""Support for optimizing tensor networks using automatic differentiation to
|
|
2
|
+
automatically derive gradients for input to scipy optimizers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import importlib.util
|
|
7
|
+
import re
|
|
8
|
+
import warnings
|
|
9
|
+
from collections.abc import Iterable
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import tqdm
|
|
13
|
+
from autoray import astype, get_dtype_name, to_numpy
|
|
14
|
+
|
|
15
|
+
from ..core import prod
|
|
16
|
+
from ..utils import (
|
|
17
|
+
default_to_neutral_style,
|
|
18
|
+
ensure_dict,
|
|
19
|
+
tree_flatten,
|
|
20
|
+
tree_map,
|
|
21
|
+
tree_unflatten,
|
|
22
|
+
)
|
|
23
|
+
from .contraction import contract_backend
|
|
24
|
+
from .interface import get_jax
|
|
25
|
+
from .tensor_core import (
|
|
26
|
+
TensorNetwork,
|
|
27
|
+
tags_to_oset,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if importlib.util.find_spec("jax") is not None:
|
|
31
|
+
_DEFAULT_BACKEND = "jax"
|
|
32
|
+
elif importlib.util.find_spec("tensorflow") is not None:
|
|
33
|
+
_DEFAULT_BACKEND = "tensorflow"
|
|
34
|
+
elif importlib.util.find_spec("torch") is not None:
|
|
35
|
+
_DEFAULT_BACKEND = "torch"
|
|
36
|
+
else:
|
|
37
|
+
_DEFAULT_BACKEND = "autograd"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
_REAL_CONVERSION = {
|
|
41
|
+
"float32": "float32",
|
|
42
|
+
"float64": "float64",
|
|
43
|
+
"complex64": "float32",
|
|
44
|
+
"complex128": "float64",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
_COMPLEX_CONVERSION = {
|
|
48
|
+
"float32": "complex64",
|
|
49
|
+
"float64": "complex128",
|
|
50
|
+
"complex64": "complex64",
|
|
51
|
+
"complex128": "complex128",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ArrayInfo:
|
|
56
|
+
"""Simple container for recording size and dtype information about arrays."""
|
|
57
|
+
|
|
58
|
+
__slots__ = (
|
|
59
|
+
"shape",
|
|
60
|
+
"size",
|
|
61
|
+
"dtype",
|
|
62
|
+
"iscomplex",
|
|
63
|
+
"real_size",
|
|
64
|
+
"equivalent_real_type",
|
|
65
|
+
"equivalent_complex_type",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def __init__(self, array):
|
|
69
|
+
self.shape = array.shape
|
|
70
|
+
self.size = prod(self.shape)
|
|
71
|
+
self.dtype = get_dtype_name(array)
|
|
72
|
+
self.equivalent_real_type = _REAL_CONVERSION[self.dtype]
|
|
73
|
+
self.equivalent_complex_type = _COMPLEX_CONVERSION[self.dtype]
|
|
74
|
+
self.iscomplex = "complex" in self.dtype
|
|
75
|
+
self.real_size = self.size * (2 if self.iscomplex else 1)
|
|
76
|
+
|
|
77
|
+
def __repr__(self):
|
|
78
|
+
return (
|
|
79
|
+
"ArrayInfo("
|
|
80
|
+
f"shape={self.shape}, "
|
|
81
|
+
f"size={self.size}, "
|
|
82
|
+
f"dtype={self.dtype}"
|
|
83
|
+
")"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Vectorizer:
|
|
88
|
+
"""Object for mapping back and forth between any pytree of mixed
|
|
89
|
+
real/complex n-dimensional arrays to a single, real, double precision numpy
|
|
90
|
+
vector, as required by ``scipy.optimize`` routines.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
tree : pytree of array
|
|
95
|
+
Any nested container of arrays, which will be flattened and packed into
|
|
96
|
+
a single float64 vector.
|
|
97
|
+
is_leaf : callable, optional
|
|
98
|
+
A function which takes a single argument and returns ``True`` if it is
|
|
99
|
+
a leaf node in the tree and should be extracted, ``False`` otherwise.
|
|
100
|
+
Defaults to everything that is not a tuple, list or dict.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, tree):
|
|
104
|
+
arrays = []
|
|
105
|
+
self.infos = []
|
|
106
|
+
self.d = 0
|
|
107
|
+
|
|
108
|
+
def extracter(x):
|
|
109
|
+
arrays.append(x)
|
|
110
|
+
info = ArrayInfo(x)
|
|
111
|
+
self.infos.append(info)
|
|
112
|
+
self.d += info.real_size
|
|
113
|
+
return info
|
|
114
|
+
|
|
115
|
+
self.ref_tree = tree_map(extracter, tree)
|
|
116
|
+
self.pack(arrays)
|
|
117
|
+
|
|
118
|
+
def pack(self, tree, name="vector"):
|
|
119
|
+
"""Take ``arrays`` and pack their values into attribute `.{name}`, by
|
|
120
|
+
default `.vector`.
|
|
121
|
+
"""
|
|
122
|
+
arrays = tree_flatten(tree)
|
|
123
|
+
|
|
124
|
+
# create the vector if it doesn't exist yet
|
|
125
|
+
if not hasattr(self, name):
|
|
126
|
+
setattr(self, name, np.empty(self.d, "float64"))
|
|
127
|
+
x = getattr(self, name)
|
|
128
|
+
|
|
129
|
+
i = 0
|
|
130
|
+
for array, info in zip(arrays, self.infos):
|
|
131
|
+
if not isinstance(array, np.ndarray):
|
|
132
|
+
array = to_numpy(array)
|
|
133
|
+
# flatten
|
|
134
|
+
if info.iscomplex:
|
|
135
|
+
# view as real array of double the length
|
|
136
|
+
real_view = array.reshape(-1).view(info.equivalent_real_type)
|
|
137
|
+
else:
|
|
138
|
+
real_view = array.reshape(-1)
|
|
139
|
+
# pack into our vector
|
|
140
|
+
f = i + info.real_size
|
|
141
|
+
x[i:f] = real_view
|
|
142
|
+
i = f
|
|
143
|
+
|
|
144
|
+
return x
|
|
145
|
+
|
|
146
|
+
def unpack(self, vector=None):
|
|
147
|
+
"""Turn the single, flat ``vector`` into a sequence of arrays."""
|
|
148
|
+
if vector is None:
|
|
149
|
+
vector = self.vector
|
|
150
|
+
|
|
151
|
+
i = 0
|
|
152
|
+
arrays = []
|
|
153
|
+
for info in self.infos:
|
|
154
|
+
# get the linear slice
|
|
155
|
+
f = i + info.real_size
|
|
156
|
+
array = vector[i:f]
|
|
157
|
+
i = f
|
|
158
|
+
if info.iscomplex:
|
|
159
|
+
# view as complex array of half the length
|
|
160
|
+
array = array.view(np.complex128)
|
|
161
|
+
# reshape (inplace)
|
|
162
|
+
array.shape = info.shape
|
|
163
|
+
if get_dtype_name(array) != info.dtype:
|
|
164
|
+
# cast as original dtype
|
|
165
|
+
array = astype(array, info.dtype)
|
|
166
|
+
arrays.append(array)
|
|
167
|
+
|
|
168
|
+
return tree_unflatten(
|
|
169
|
+
arrays, self.ref_tree, lambda x: isinstance(x, ArrayInfo)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
_VARIABLE_TAG = "__VARIABLE{}__"
|
|
174
|
+
variable_finder = re.compile(r"__VARIABLE(\d+)__")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _parse_opt_in(tn, tags, shared_tags, to_constant):
|
|
178
|
+
"""Parse a tensor network where tensors are assumed to be constant unless
|
|
179
|
+
tagged.
|
|
180
|
+
"""
|
|
181
|
+
tn_ag = tn.copy()
|
|
182
|
+
variables = []
|
|
183
|
+
|
|
184
|
+
# tags where each individual tensor should get a separate variable
|
|
185
|
+
individual_tags = tags - shared_tags
|
|
186
|
+
|
|
187
|
+
# handle tagged tensors that are not shared
|
|
188
|
+
for t in tn_ag.select_tensors(individual_tags, "any"):
|
|
189
|
+
# append the raw data but mark the corresponding tensor
|
|
190
|
+
# for reinsertion
|
|
191
|
+
data = t.get_params()
|
|
192
|
+
variables.append(data)
|
|
193
|
+
t.add_tag(_VARIABLE_TAG.format(len(variables) - 1))
|
|
194
|
+
|
|
195
|
+
# handle shared tags
|
|
196
|
+
for tag in shared_tags:
|
|
197
|
+
var_name = _VARIABLE_TAG.format(len(variables))
|
|
198
|
+
test_data = None
|
|
199
|
+
|
|
200
|
+
for t in tn_ag.select_tensors(tag):
|
|
201
|
+
data = t.get_params()
|
|
202
|
+
|
|
203
|
+
# detect that this tensor is already variable tagged and skip
|
|
204
|
+
# if it is
|
|
205
|
+
if any(variable_finder.match(tag) for tag in t.tags):
|
|
206
|
+
warnings.warn(
|
|
207
|
+
"TNOptimizer warning, tensor tagged with"
|
|
208
|
+
" multiple `tags` or `shared_tags`."
|
|
209
|
+
)
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
if test_data is None:
|
|
213
|
+
# create variable and store data
|
|
214
|
+
variables.append(data)
|
|
215
|
+
test_data = data
|
|
216
|
+
else:
|
|
217
|
+
# check that the shape of the variable's data matches the
|
|
218
|
+
# data of this new tensor
|
|
219
|
+
if test_data.shape != data.shape:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
"TNOptimizer error, a `shared_tags` tag "
|
|
222
|
+
"covers tensors with different numbers of"
|
|
223
|
+
" params."
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# mark the corresponding tensor for reinsertion
|
|
227
|
+
t.add_tag(var_name)
|
|
228
|
+
|
|
229
|
+
# iterate over tensors which *don't* have any of the given tags
|
|
230
|
+
for t in tn_ag.select_tensors(tags, which="!any"):
|
|
231
|
+
t.modify(apply=to_constant)
|
|
232
|
+
|
|
233
|
+
return tn_ag, variables
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _parse_opt_out(
|
|
237
|
+
tn,
|
|
238
|
+
constant_tags,
|
|
239
|
+
to_constant,
|
|
240
|
+
):
|
|
241
|
+
"""Parse a tensor network where tensors are assumed to be variables unless
|
|
242
|
+
tagged.
|
|
243
|
+
"""
|
|
244
|
+
tn_ag = tn.copy()
|
|
245
|
+
variables = []
|
|
246
|
+
|
|
247
|
+
for t in tn_ag:
|
|
248
|
+
if t.tags & constant_tags:
|
|
249
|
+
t.modify(apply=to_constant)
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
# append the raw data but mark the corresponding tensor
|
|
253
|
+
# for reinsertion
|
|
254
|
+
data = t.get_params()
|
|
255
|
+
variables.append(data)
|
|
256
|
+
t.add_tag(_VARIABLE_TAG.format(len(variables) - 1))
|
|
257
|
+
|
|
258
|
+
return tn_ag, variables
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _parse_pytree_to_backend(x, to_constant):
|
|
262
|
+
"""Parse a arbitrary pytree, collecting variables. There is not opting in
|
|
263
|
+
or out, all networks, tensors and raw arrays are considered variables.
|
|
264
|
+
"""
|
|
265
|
+
variables = []
|
|
266
|
+
|
|
267
|
+
def collect(x):
|
|
268
|
+
if hasattr(x, "get_params"):
|
|
269
|
+
|
|
270
|
+
if hasattr(x, "apply_to_arrays"):
|
|
271
|
+
x.apply_to_arrays(to_constant)
|
|
272
|
+
|
|
273
|
+
# variables can be a pytree
|
|
274
|
+
variables.append(x.get_params())
|
|
275
|
+
return x.copy()
|
|
276
|
+
else:
|
|
277
|
+
# raw array
|
|
278
|
+
variables.append(x)
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
ref = tree_map(collect, x)
|
|
282
|
+
return ref, variables
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def parse_network_to_backend(
|
|
286
|
+
tn,
|
|
287
|
+
to_constant,
|
|
288
|
+
tags=None,
|
|
289
|
+
shared_tags=None,
|
|
290
|
+
constant_tags=None,
|
|
291
|
+
):
|
|
292
|
+
"""
|
|
293
|
+
Parse tensor network to:
|
|
294
|
+
|
|
295
|
+
- identify the dimension of the optimisation space and the initial
|
|
296
|
+
point of the optimisation from the current values in the tensor
|
|
297
|
+
network,
|
|
298
|
+
- add variable tags to individual tensors so that optimisation vector
|
|
299
|
+
values can be efficiently reinserted into the tensor network.
|
|
300
|
+
|
|
301
|
+
There are two different modes:
|
|
302
|
+
|
|
303
|
+
- 'opt in' : `tags` (and optionally `shared_tags`) are specified and
|
|
304
|
+
only these tensor tags will be optimised over. In this case
|
|
305
|
+
`constant_tags` is ignored if it is passed,
|
|
306
|
+
- 'opt out' : `tags` is not specified. In this case all tensors will be
|
|
307
|
+
optimised over, unless they have one of `constant_tags` tags.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
tn : TensorNetwork
|
|
312
|
+
The initial tensor network to parse.
|
|
313
|
+
to_constant : Callable
|
|
314
|
+
Function that fixes a tensor as constant.
|
|
315
|
+
tags : str, or sequence of str, optional
|
|
316
|
+
Set of opt-in tags to optimise.
|
|
317
|
+
shared_tags : str, or sequence of str, optional
|
|
318
|
+
Subset of opt-in tags to joint optimise i.e. all tensors with tag s in
|
|
319
|
+
shared_tags will correspond to the same optimisation variables.
|
|
320
|
+
constant_tags : str, or sequence of str, optional
|
|
321
|
+
Set of opt-out tags if `tags` not passed.
|
|
322
|
+
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
tn_ag : TensorNetwork
|
|
326
|
+
Tensor network tagged for reinsertion of optimisation variable values.
|
|
327
|
+
variables : list
|
|
328
|
+
List of variables extracted from ``tn``.
|
|
329
|
+
"""
|
|
330
|
+
tags = tags_to_oset(tags)
|
|
331
|
+
shared_tags = tags_to_oset(shared_tags)
|
|
332
|
+
constant_tags = tags_to_oset(constant_tags)
|
|
333
|
+
|
|
334
|
+
if not isinstance(tn, TensorNetwork):
|
|
335
|
+
if any((tags, shared_tags, constant_tags)):
|
|
336
|
+
raise ValueError(
|
|
337
|
+
"TNOptimizer error, if `tags`, `shared_tags`, or"
|
|
338
|
+
" `constant_tags` are specified then `tn` must be a"
|
|
339
|
+
" TensorNetwork, rather than a general pytree."
|
|
340
|
+
)
|
|
341
|
+
return _parse_pytree_to_backend(tn, to_constant=to_constant)
|
|
342
|
+
|
|
343
|
+
if tags | shared_tags:
|
|
344
|
+
# opt_in
|
|
345
|
+
if not (tags & shared_tags) == shared_tags:
|
|
346
|
+
tags = tags | shared_tags
|
|
347
|
+
warnings.warn(
|
|
348
|
+
"TNOptimizer warning, some `shared_tags` are missing"
|
|
349
|
+
" from `tags`. Automatically adding these missing"
|
|
350
|
+
" `shared_tags` to `tags`."
|
|
351
|
+
)
|
|
352
|
+
if constant_tags:
|
|
353
|
+
warnings.warn(
|
|
354
|
+
"TNOptimizer warning, if `tags` or `shared_tags` are"
|
|
355
|
+
" specified then `constant_tags` is ignored - "
|
|
356
|
+
"consider instead untagging those tensors."
|
|
357
|
+
)
|
|
358
|
+
return _parse_opt_in(
|
|
359
|
+
tn,
|
|
360
|
+
tags,
|
|
361
|
+
shared_tags,
|
|
362
|
+
to_constant,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# opt-out
|
|
366
|
+
return _parse_opt_out(
|
|
367
|
+
tn,
|
|
368
|
+
constant_tags,
|
|
369
|
+
to_constant,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _inject_variables_pytree(arrays, tree):
|
|
374
|
+
arrays = iter(arrays)
|
|
375
|
+
|
|
376
|
+
def inject(x):
|
|
377
|
+
if hasattr(x, "set_params"):
|
|
378
|
+
x.set_params(next(arrays))
|
|
379
|
+
return x
|
|
380
|
+
else:
|
|
381
|
+
return next(arrays)
|
|
382
|
+
|
|
383
|
+
return tree_map(inject, tree)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def inject_variables(arrays, tn):
|
|
387
|
+
"""Given the list of optimized variables ``arrays`` and the target tensor
|
|
388
|
+
network or pytree ``tn``, inject the variables back in.
|
|
389
|
+
"""
|
|
390
|
+
if not isinstance(tn, TensorNetwork):
|
|
391
|
+
return _inject_variables_pytree(arrays, tn)
|
|
392
|
+
|
|
393
|
+
tn = tn.copy()
|
|
394
|
+
for t in tn:
|
|
395
|
+
for tag in t.tags:
|
|
396
|
+
match = variable_finder.match(tag)
|
|
397
|
+
if match is not None:
|
|
398
|
+
i = int(match.groups(1)[0])
|
|
399
|
+
t.set_params(arrays[i])
|
|
400
|
+
break
|
|
401
|
+
return tn
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def convert_raw_arrays(x, f):
|
|
405
|
+
"""Given a ``TensorNetwork``, ``Tensor``, or other possibly structured raw
|
|
406
|
+
array, return a copy where the underyling data has had ``f``
|
|
407
|
+
applied to it. Structured raw arrays should implement the
|
|
408
|
+
``tree = get_params()`` and ``set_params(tree)`` methods which get or set
|
|
409
|
+
their underlying data using an arbitrary pytree.
|
|
410
|
+
"""
|
|
411
|
+
try:
|
|
412
|
+
# Tensor, TensorNetwork...
|
|
413
|
+
x = x.copy()
|
|
414
|
+
x.apply_to_arrays(f)
|
|
415
|
+
return x
|
|
416
|
+
except AttributeError:
|
|
417
|
+
pass
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
# raw structured arrays that provide the {get|set}_params interface
|
|
421
|
+
x = x.copy()
|
|
422
|
+
x.set_params(tree_map(f, x.get_params()))
|
|
423
|
+
return x
|
|
424
|
+
except AttributeError:
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
# other raw arrays
|
|
428
|
+
return f(x)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def convert_variables_to_numpy(x):
|
|
432
|
+
if hasattr(x, "apply_to_arrays"):
|
|
433
|
+
x.apply_to_arrays(to_numpy)
|
|
434
|
+
return x
|
|
435
|
+
elif hasattr(x, "get_params"):
|
|
436
|
+
old_params = x.get_params()
|
|
437
|
+
new_params = tree_map(to_numpy, old_params)
|
|
438
|
+
x.set_params(new_params)
|
|
439
|
+
return x
|
|
440
|
+
else:
|
|
441
|
+
return to_numpy(x)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
@functools.lru_cache(1)
|
|
445
|
+
def get_autograd():
|
|
446
|
+
import autograd
|
|
447
|
+
|
|
448
|
+
return autograd
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class AutoGradHandler:
|
|
452
|
+
def __init__(self, device="cpu"):
|
|
453
|
+
if device != "cpu":
|
|
454
|
+
raise ValueError(
|
|
455
|
+
"`autograd` currently is only " "backed by cpu, numpy arrays."
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def to_variable(self, x):
|
|
459
|
+
return np.asarray(x)
|
|
460
|
+
|
|
461
|
+
def to_constant(self, x):
|
|
462
|
+
return np.asarray(x)
|
|
463
|
+
|
|
464
|
+
def setup_fn(self, fn):
|
|
465
|
+
autograd = get_autograd()
|
|
466
|
+
self._backend_fn = fn
|
|
467
|
+
self._value_and_grad = autograd.value_and_grad(fn)
|
|
468
|
+
self._hvp = autograd.hessian_vector_product(fn)
|
|
469
|
+
|
|
470
|
+
def value(self, arrays):
|
|
471
|
+
return self._backend_fn(arrays)
|
|
472
|
+
|
|
473
|
+
def value_and_grad(self, arrays):
|
|
474
|
+
loss, grads = self._value_and_grad(arrays)
|
|
475
|
+
return loss, tree_map(lambda x: x.conj(), grads)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class JaxHandler:
|
|
479
|
+
def __init__(self, jit_fn=True, device=None):
|
|
480
|
+
self.jit_fn = jit_fn
|
|
481
|
+
self.device = device
|
|
482
|
+
|
|
483
|
+
def to_variable(self, x):
|
|
484
|
+
jax = get_jax()
|
|
485
|
+
return jax.numpy.asarray(x)
|
|
486
|
+
|
|
487
|
+
def to_constant(self, x):
|
|
488
|
+
jax = get_jax()
|
|
489
|
+
return jax.numpy.asarray(x)
|
|
490
|
+
|
|
491
|
+
def setup_fn(self, fn):
|
|
492
|
+
jax = get_jax()
|
|
493
|
+
if self.jit_fn:
|
|
494
|
+
self._backend_fn = jax.jit(fn, backend=self.device)
|
|
495
|
+
self._value_and_grad = jax.jit(
|
|
496
|
+
jax.value_and_grad(fn), backend=self.device
|
|
497
|
+
)
|
|
498
|
+
else:
|
|
499
|
+
self._backend_fn = fn
|
|
500
|
+
self._value_and_grad = jax.value_and_grad(fn)
|
|
501
|
+
|
|
502
|
+
self._setup_hessp(fn)
|
|
503
|
+
|
|
504
|
+
def _setup_hessp(self, fn):
|
|
505
|
+
jax = get_jax()
|
|
506
|
+
|
|
507
|
+
def hvp(primals, tangents):
|
|
508
|
+
return jax.jvp(jax.grad(fn), (primals,), (tangents,))[1]
|
|
509
|
+
|
|
510
|
+
if self.jit_fn:
|
|
511
|
+
hvp = jax.jit(hvp, device=self.device)
|
|
512
|
+
|
|
513
|
+
self._hvp = hvp
|
|
514
|
+
|
|
515
|
+
def value(self, arrays):
|
|
516
|
+
jax_arrays = tree_map(self.to_constant, arrays)
|
|
517
|
+
return to_numpy(self._backend_fn(jax_arrays))
|
|
518
|
+
|
|
519
|
+
def value_and_grad(self, arrays):
|
|
520
|
+
loss, grads = self._value_and_grad(arrays)
|
|
521
|
+
return (
|
|
522
|
+
loss,
|
|
523
|
+
tree_map(lambda x: to_numpy(x.conj()), grads),
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def hessp(self, primals, tangents):
|
|
527
|
+
jax_arrays = self._hvp(primals, tangents)
|
|
528
|
+
return tree_map(to_numpy, jax_arrays)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
@functools.lru_cache(1)
|
|
532
|
+
def get_tensorflow():
|
|
533
|
+
import tensorflow
|
|
534
|
+
|
|
535
|
+
return tensorflow
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
class TensorFlowHandler:
|
|
539
|
+
def __init__(
|
|
540
|
+
self,
|
|
541
|
+
jit_fn=False,
|
|
542
|
+
autograph=False,
|
|
543
|
+
experimental_compile=False,
|
|
544
|
+
device=None,
|
|
545
|
+
):
|
|
546
|
+
self.jit_fn = jit_fn
|
|
547
|
+
self.autograph = autograph
|
|
548
|
+
self.experimental_compile = experimental_compile
|
|
549
|
+
self.device = device
|
|
550
|
+
|
|
551
|
+
def to_variable(self, x):
|
|
552
|
+
tf = get_tensorflow()
|
|
553
|
+
if self.device is None:
|
|
554
|
+
return tf.Variable(x)
|
|
555
|
+
with tf.device(self.device):
|
|
556
|
+
return tf.Variable(x)
|
|
557
|
+
|
|
558
|
+
def to_constant(self, x):
|
|
559
|
+
tf = get_tensorflow()
|
|
560
|
+
if self.device is None:
|
|
561
|
+
return tf.constant(x)
|
|
562
|
+
with tf.device(self.device):
|
|
563
|
+
return tf.constant(x)
|
|
564
|
+
|
|
565
|
+
def setup_fn(self, fn):
|
|
566
|
+
tf = get_tensorflow()
|
|
567
|
+
if self.jit_fn:
|
|
568
|
+
self._backend_fn = tf.function(
|
|
569
|
+
fn,
|
|
570
|
+
autograph=self.autograph,
|
|
571
|
+
experimental_compile=self.experimental_compile,
|
|
572
|
+
)
|
|
573
|
+
else:
|
|
574
|
+
self._backend_fn = fn
|
|
575
|
+
|
|
576
|
+
def value(self, arrays):
|
|
577
|
+
tf_arrays = tree_map(self.to_constant, arrays)
|
|
578
|
+
return to_numpy(self._backend_fn(tf_arrays))
|
|
579
|
+
|
|
580
|
+
def value_and_grad(self, arrays):
|
|
581
|
+
tf = get_tensorflow()
|
|
582
|
+
|
|
583
|
+
variables = tree_map(self.to_variable, arrays)
|
|
584
|
+
|
|
585
|
+
with tf.GradientTape() as t:
|
|
586
|
+
result = self._backend_fn(variables)
|
|
587
|
+
|
|
588
|
+
tf_grads = t.gradient(
|
|
589
|
+
result,
|
|
590
|
+
variables,
|
|
591
|
+
# want to return zeros for unconnected gradients
|
|
592
|
+
unconnected_gradients=tf.UnconnectedGradients.ZERO,
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
grads = tree_map(to_numpy, tf_grads)
|
|
596
|
+
loss = to_numpy(result)
|
|
597
|
+
return loss, grads
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
@functools.lru_cache(1)
|
|
601
|
+
def get_torch():
|
|
602
|
+
import torch
|
|
603
|
+
|
|
604
|
+
return torch
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class TorchHandler:
|
|
608
|
+
def __init__(self, jit_fn=False, device=None):
|
|
609
|
+
torch = get_torch()
|
|
610
|
+
self.jit_fn = jit_fn
|
|
611
|
+
if device is None:
|
|
612
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
613
|
+
self.device = device
|
|
614
|
+
|
|
615
|
+
def to_variable(self, x):
|
|
616
|
+
torch = get_torch()
|
|
617
|
+
return torch.tensor(x).to(self.device).requires_grad_()
|
|
618
|
+
|
|
619
|
+
def to_constant(self, x):
|
|
620
|
+
torch = get_torch()
|
|
621
|
+
return torch.tensor(x).to(self.device)
|
|
622
|
+
|
|
623
|
+
def setup_fn(self, fn):
|
|
624
|
+
self._fn = fn
|
|
625
|
+
self._backend_fn = None
|
|
626
|
+
|
|
627
|
+
def _setup_backend_fn(self, arrays):
|
|
628
|
+
torch = get_torch()
|
|
629
|
+
if self.jit_fn:
|
|
630
|
+
example_inputs = (tree_map(self.to_variable, arrays),)
|
|
631
|
+
with warnings.catch_warnings():
|
|
632
|
+
warnings.filterwarnings(
|
|
633
|
+
action="ignore",
|
|
634
|
+
message=".*can't record the data flow of Python values.*",
|
|
635
|
+
)
|
|
636
|
+
self._backend_fn = torch.jit.trace(
|
|
637
|
+
self._fn, example_inputs=example_inputs
|
|
638
|
+
)
|
|
639
|
+
else:
|
|
640
|
+
self._backend_fn = self._fn
|
|
641
|
+
|
|
642
|
+
def value(self, arrays):
|
|
643
|
+
if self._backend_fn is None:
|
|
644
|
+
self._setup_backend_fn(arrays)
|
|
645
|
+
torch_arrays = tree_map(self.to_constant, arrays)
|
|
646
|
+
return to_numpy(self._backend_fn(torch_arrays))
|
|
647
|
+
|
|
648
|
+
def value_and_grad(self, arrays):
|
|
649
|
+
if self._backend_fn is None:
|
|
650
|
+
self._setup_backend_fn(arrays)
|
|
651
|
+
|
|
652
|
+
variables = tree_map(self.to_variable, arrays)
|
|
653
|
+
result = self._backend_fn(variables)
|
|
654
|
+
|
|
655
|
+
def get_gradient_from_torch(t):
|
|
656
|
+
if t.grad is None:
|
|
657
|
+
return np.zeros(t.shape, dtype=get_dtype_name(t))
|
|
658
|
+
return to_numpy(t.grad).conj()
|
|
659
|
+
|
|
660
|
+
result.backward()
|
|
661
|
+
grads = tree_map(get_gradient_from_torch, variables)
|
|
662
|
+
|
|
663
|
+
loss = to_numpy(result)
|
|
664
|
+
return loss, grads
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
_BACKEND_HANDLERS = {
|
|
668
|
+
"numpy": AutoGradHandler,
|
|
669
|
+
"autograd": AutoGradHandler,
|
|
670
|
+
"jax": JaxHandler,
|
|
671
|
+
"tensorflow": TensorFlowHandler,
|
|
672
|
+
"torch": TorchHandler,
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
class MultiLossHandler:
|
|
677
|
+
def __init__(self, autodiff_backend, executor=None, **backend_opts):
|
|
678
|
+
self.autodiff_backend = autodiff_backend
|
|
679
|
+
self.backend_opts = backend_opts
|
|
680
|
+
self.executor = executor
|
|
681
|
+
|
|
682
|
+
# start just with one, as we don't don't know how many functions yet
|
|
683
|
+
h0 = _BACKEND_HANDLERS[autodiff_backend](**backend_opts)
|
|
684
|
+
self.handlers = [h0]
|
|
685
|
+
# ... but we do need access to `to_constant`
|
|
686
|
+
self.to_constant = h0.to_constant
|
|
687
|
+
|
|
688
|
+
def setup_fn(self, funcs):
|
|
689
|
+
fn0, *fns = funcs
|
|
690
|
+
self.handlers[0].setup_fn(fn0)
|
|
691
|
+
for fn in fns:
|
|
692
|
+
h = _BACKEND_HANDLERS[self.autodiff_backend](**self.backend_opts)
|
|
693
|
+
h.setup_fn(fn)
|
|
694
|
+
self.handlers.append(h)
|
|
695
|
+
|
|
696
|
+
def _value_seq(self, arrays):
|
|
697
|
+
return sum(h.value(arrays) for h in self.handlers)
|
|
698
|
+
|
|
699
|
+
def _value_par_seq(self, arrays):
|
|
700
|
+
futures = [
|
|
701
|
+
self.executor.submit(h.value, arrays) for h in self.handlers
|
|
702
|
+
]
|
|
703
|
+
return sum(f.result() for f in futures)
|
|
704
|
+
|
|
705
|
+
def value(self, arrays):
|
|
706
|
+
if self.executor is not None:
|
|
707
|
+
return self._value_par(arrays)
|
|
708
|
+
return self._value_seq(arrays)
|
|
709
|
+
|
|
710
|
+
def _value_and_grad_seq(self, arrays):
|
|
711
|
+
h0, *hs = self.handlers
|
|
712
|
+
loss, grads = h0.value_and_grad(arrays)
|
|
713
|
+
# need to make arrays writeable for efficient inplace sum
|
|
714
|
+
grads = list(map(np.array, grads))
|
|
715
|
+
for h in hs:
|
|
716
|
+
loss_i, grads_i = h.value_and_grad(arrays)
|
|
717
|
+
loss = loss + loss_i
|
|
718
|
+
for i, g_i in enumerate(grads_i):
|
|
719
|
+
grads[i] += g_i
|
|
720
|
+
return loss, grads
|
|
721
|
+
|
|
722
|
+
def _value_and_grad_par(self, arrays):
|
|
723
|
+
futures = [
|
|
724
|
+
self.executor.submit(h.value_and_grad, arrays)
|
|
725
|
+
for h in self.handlers
|
|
726
|
+
]
|
|
727
|
+
results = (f.result() for f in futures)
|
|
728
|
+
|
|
729
|
+
# get first result
|
|
730
|
+
loss, grads = next(results)
|
|
731
|
+
grads = list(map(np.array, grads))
|
|
732
|
+
|
|
733
|
+
# process remaining results
|
|
734
|
+
for loss_i, grads_i in results:
|
|
735
|
+
loss = loss + loss_i
|
|
736
|
+
for i, g_i in enumerate(grads_i):
|
|
737
|
+
grads[i] += g_i
|
|
738
|
+
|
|
739
|
+
return loss, grads
|
|
740
|
+
|
|
741
|
+
def value_and_grad(self, arrays):
|
|
742
|
+
if self.executor is not None:
|
|
743
|
+
return self._value_and_grad_par(arrays)
|
|
744
|
+
return self._value_and_grad_seq(arrays)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
class SGD:
|
|
748
|
+
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
|
|
749
|
+
stochastic gradient descent with momentum.
|
|
750
|
+
|
|
751
|
+
Adapted from ``autograd/misc/optimizers.py``.
|
|
752
|
+
"""
|
|
753
|
+
|
|
754
|
+
def __init__(self):
|
|
755
|
+
from scipy.optimize import OptimizeResult
|
|
756
|
+
|
|
757
|
+
self.OptimizeResult = OptimizeResult
|
|
758
|
+
self._i = 0
|
|
759
|
+
self._velocity = None
|
|
760
|
+
|
|
761
|
+
def get_velocity(self, x):
|
|
762
|
+
if self._velocity is None:
|
|
763
|
+
self._velocity = np.zeros_like(x)
|
|
764
|
+
return self._velocity
|
|
765
|
+
|
|
766
|
+
def __call__(
|
|
767
|
+
self,
|
|
768
|
+
fun,
|
|
769
|
+
x0,
|
|
770
|
+
jac,
|
|
771
|
+
args=(),
|
|
772
|
+
learning_rate=0.1,
|
|
773
|
+
mass=0.9,
|
|
774
|
+
maxiter=1000,
|
|
775
|
+
callback=None,
|
|
776
|
+
bounds=None,
|
|
777
|
+
**kwargs,
|
|
778
|
+
):
|
|
779
|
+
x = x0
|
|
780
|
+
velocity = self.get_velocity(x)
|
|
781
|
+
|
|
782
|
+
for _ in range(maxiter):
|
|
783
|
+
self._i += 1
|
|
784
|
+
|
|
785
|
+
g = jac(x)
|
|
786
|
+
|
|
787
|
+
if callback and callback(x):
|
|
788
|
+
break
|
|
789
|
+
|
|
790
|
+
velocity = mass * velocity - (1.0 - mass) * g
|
|
791
|
+
x = x + learning_rate * velocity
|
|
792
|
+
|
|
793
|
+
if bounds is not None:
|
|
794
|
+
x = np.clip(x, bounds[:, 0], bounds[:, 1])
|
|
795
|
+
|
|
796
|
+
# save for restart
|
|
797
|
+
self._velocity = velocity
|
|
798
|
+
|
|
799
|
+
return self.OptimizeResult(
|
|
800
|
+
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
class RMSPROP:
|
|
805
|
+
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
|
|
806
|
+
root mean squared prop: See Adagrad paper for details.
|
|
807
|
+
|
|
808
|
+
Adapted from ``autograd/misc/optimizers.py``.
|
|
809
|
+
"""
|
|
810
|
+
|
|
811
|
+
def __init__(self):
|
|
812
|
+
from scipy.optimize import OptimizeResult
|
|
813
|
+
|
|
814
|
+
self.OptimizeResult = OptimizeResult
|
|
815
|
+
self._i = 0
|
|
816
|
+
self._avg_sq_grad = None
|
|
817
|
+
|
|
818
|
+
def get_avg_sq_grad(self, x):
|
|
819
|
+
if self._avg_sq_grad is None:
|
|
820
|
+
self._avg_sq_grad = np.ones_like(x)
|
|
821
|
+
return self._avg_sq_grad
|
|
822
|
+
|
|
823
|
+
def __call__(
|
|
824
|
+
self,
|
|
825
|
+
fun,
|
|
826
|
+
x0,
|
|
827
|
+
jac,
|
|
828
|
+
args=(),
|
|
829
|
+
learning_rate=0.1,
|
|
830
|
+
gamma=0.9,
|
|
831
|
+
eps=1e-8,
|
|
832
|
+
maxiter=1000,
|
|
833
|
+
callback=None,
|
|
834
|
+
bounds=None,
|
|
835
|
+
**kwargs,
|
|
836
|
+
):
|
|
837
|
+
x = x0
|
|
838
|
+
avg_sq_grad = self.get_avg_sq_grad(x)
|
|
839
|
+
|
|
840
|
+
for _ in range(maxiter):
|
|
841
|
+
self._i += 1
|
|
842
|
+
|
|
843
|
+
g = jac(x)
|
|
844
|
+
|
|
845
|
+
if callback and callback(x):
|
|
846
|
+
break
|
|
847
|
+
|
|
848
|
+
avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
|
|
849
|
+
x = x - learning_rate * g / (np.sqrt(avg_sq_grad) + eps)
|
|
850
|
+
|
|
851
|
+
if bounds is not None:
|
|
852
|
+
x = np.clip(x, bounds[:, 0], bounds[:, 1])
|
|
853
|
+
|
|
854
|
+
# save for restart
|
|
855
|
+
self._avg_sq_grad = avg_sq_grad
|
|
856
|
+
|
|
857
|
+
return self.OptimizeResult(
|
|
858
|
+
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class ADAM:
|
|
863
|
+
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
|
|
864
|
+
ADAM - http://arxiv.org/pdf/1412.6980.pdf.
|
|
865
|
+
|
|
866
|
+
Adapted from ``autograd/misc/optimizers.py``.
|
|
867
|
+
"""
|
|
868
|
+
|
|
869
|
+
def __init__(self):
|
|
870
|
+
from scipy.optimize import OptimizeResult
|
|
871
|
+
|
|
872
|
+
self.OptimizeResult = OptimizeResult
|
|
873
|
+
self._i = 0
|
|
874
|
+
self._m = None
|
|
875
|
+
self._v = None
|
|
876
|
+
|
|
877
|
+
def get_m(self, x):
|
|
878
|
+
if self._m is None:
|
|
879
|
+
self._m = np.zeros_like(x)
|
|
880
|
+
return self._m
|
|
881
|
+
|
|
882
|
+
def get_v(self, x):
|
|
883
|
+
if self._v is None:
|
|
884
|
+
self._v = np.zeros_like(x)
|
|
885
|
+
return self._v
|
|
886
|
+
|
|
887
|
+
def __call__(
|
|
888
|
+
self,
|
|
889
|
+
fun,
|
|
890
|
+
x0,
|
|
891
|
+
jac,
|
|
892
|
+
args=(),
|
|
893
|
+
learning_rate=0.001,
|
|
894
|
+
beta1=0.9,
|
|
895
|
+
beta2=0.999,
|
|
896
|
+
eps=1e-8,
|
|
897
|
+
maxiter=1000,
|
|
898
|
+
callback=None,
|
|
899
|
+
bounds=None,
|
|
900
|
+
**kwargs,
|
|
901
|
+
):
|
|
902
|
+
x = x0
|
|
903
|
+
m = self.get_m(x)
|
|
904
|
+
v = self.get_v(x)
|
|
905
|
+
|
|
906
|
+
for _ in range(maxiter):
|
|
907
|
+
self._i += 1
|
|
908
|
+
|
|
909
|
+
g = jac(x)
|
|
910
|
+
|
|
911
|
+
if callback and callback(x):
|
|
912
|
+
break
|
|
913
|
+
|
|
914
|
+
m = (1 - beta1) * g + beta1 * m # first moment estimate.
|
|
915
|
+
v = (1 - beta2) * (g**2) + beta2 * v # second moment estimate.
|
|
916
|
+
mhat = m / (1 - beta1**self._i) # bias correction.
|
|
917
|
+
vhat = v / (1 - beta2**self._i)
|
|
918
|
+
x = x - learning_rate * mhat / (np.sqrt(vhat) + eps)
|
|
919
|
+
|
|
920
|
+
if bounds is not None:
|
|
921
|
+
x = np.clip(x, bounds[:, 0], bounds[:, 1])
|
|
922
|
+
|
|
923
|
+
# save for restart
|
|
924
|
+
self._m = m
|
|
925
|
+
self._v = v
|
|
926
|
+
|
|
927
|
+
return self.OptimizeResult(
|
|
928
|
+
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
class NADAM:
|
|
933
|
+
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
|
|
934
|
+
NADAM - [Dozat - http://cs229.stanford.edu/proj2015/054_report.pdf].
|
|
935
|
+
|
|
936
|
+
Adapted from ``autograd/misc/optimizers.py``.
|
|
937
|
+
"""
|
|
938
|
+
|
|
939
|
+
def __init__(self):
|
|
940
|
+
from scipy.optimize import OptimizeResult
|
|
941
|
+
|
|
942
|
+
self.OptimizeResult = OptimizeResult
|
|
943
|
+
self._i = 0
|
|
944
|
+
self._m = None
|
|
945
|
+
self._v = None
|
|
946
|
+
self._mus = None
|
|
947
|
+
|
|
948
|
+
def get_m(self, x):
|
|
949
|
+
if self._m is None:
|
|
950
|
+
self._m = np.zeros_like(x)
|
|
951
|
+
return self._m
|
|
952
|
+
|
|
953
|
+
def get_v(self, x):
|
|
954
|
+
if self._v is None:
|
|
955
|
+
self._v = np.zeros_like(x)
|
|
956
|
+
return self._v
|
|
957
|
+
|
|
958
|
+
def get_mus(self, beta1):
|
|
959
|
+
if self._mus is None:
|
|
960
|
+
self._mus = [1, beta1 * (1 - 0.5 * 0.96**0.004)]
|
|
961
|
+
return self._mus
|
|
962
|
+
|
|
963
|
+
def __call__(
|
|
964
|
+
self,
|
|
965
|
+
fun,
|
|
966
|
+
x0,
|
|
967
|
+
jac,
|
|
968
|
+
args=(),
|
|
969
|
+
learning_rate=0.001,
|
|
970
|
+
beta1=0.9,
|
|
971
|
+
beta2=0.999,
|
|
972
|
+
eps=1e-8,
|
|
973
|
+
maxiter=1000,
|
|
974
|
+
callback=None,
|
|
975
|
+
bounds=None,
|
|
976
|
+
**kwargs,
|
|
977
|
+
):
|
|
978
|
+
x = x0
|
|
979
|
+
m = self.get_m(x)
|
|
980
|
+
v = self.get_v(x)
|
|
981
|
+
mus = self.get_mus(beta1)
|
|
982
|
+
|
|
983
|
+
for _ in range(maxiter):
|
|
984
|
+
self._i += 1
|
|
985
|
+
|
|
986
|
+
# this is ``mu[t + 1]`` -> already computed ``mu[t]``
|
|
987
|
+
self._mus.append(
|
|
988
|
+
beta1 * (1 - 0.5 * 0.96 ** (0.004 * (self._i + 1)))
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
g = jac(x)
|
|
992
|
+
|
|
993
|
+
if callback and callback(x):
|
|
994
|
+
break
|
|
995
|
+
|
|
996
|
+
gd = g / (1 - np.prod(self._mus[:-1]))
|
|
997
|
+
m = beta1 * m + (1 - beta1) * g
|
|
998
|
+
md = m / (1 - np.prod(self._mus))
|
|
999
|
+
v = beta2 * v + (1 - beta2) * g**2
|
|
1000
|
+
vd = v / (1 - beta2**self._i)
|
|
1001
|
+
mhat = (1 - self._mus[self._i]) * gd + self._mus[self._i + 1] * md
|
|
1002
|
+
|
|
1003
|
+
x = x - learning_rate * mhat / (np.sqrt(vd) + eps)
|
|
1004
|
+
|
|
1005
|
+
if bounds is not None:
|
|
1006
|
+
x = np.clip(x, bounds[:, 0], bounds[:, 1])
|
|
1007
|
+
|
|
1008
|
+
# save for restart
|
|
1009
|
+
self._m = m
|
|
1010
|
+
self._v = v
|
|
1011
|
+
self._mus = mus
|
|
1012
|
+
|
|
1013
|
+
return self.OptimizeResult(
|
|
1014
|
+
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
class ADABELIEF:
|
|
1019
|
+
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
|
|
1020
|
+
ADABELIEF - https://arxiv.org/abs/2010.07468.
|
|
1021
|
+
|
|
1022
|
+
Adapted from ``autograd/misc/optimizers.py``.
|
|
1023
|
+
"""
|
|
1024
|
+
|
|
1025
|
+
def __init__(self):
|
|
1026
|
+
from scipy.optimize import OptimizeResult
|
|
1027
|
+
|
|
1028
|
+
self.OptimizeResult = OptimizeResult
|
|
1029
|
+
self._i = 0
|
|
1030
|
+
self._m = None
|
|
1031
|
+
self._s = None
|
|
1032
|
+
|
|
1033
|
+
def get_m(self, x):
|
|
1034
|
+
if self._m is None:
|
|
1035
|
+
self._m = np.zeros_like(x)
|
|
1036
|
+
return self._m
|
|
1037
|
+
|
|
1038
|
+
def get_s(self, x):
|
|
1039
|
+
if self._s is None:
|
|
1040
|
+
self._s = np.zeros_like(x)
|
|
1041
|
+
return self._s
|
|
1042
|
+
|
|
1043
|
+
def __call__(
|
|
1044
|
+
self,
|
|
1045
|
+
fun,
|
|
1046
|
+
x0,
|
|
1047
|
+
jac,
|
|
1048
|
+
args=(),
|
|
1049
|
+
learning_rate=0.001,
|
|
1050
|
+
beta1=0.9,
|
|
1051
|
+
beta2=0.999,
|
|
1052
|
+
eps=1e-8,
|
|
1053
|
+
maxiter=1000,
|
|
1054
|
+
callback=None,
|
|
1055
|
+
bounds=None,
|
|
1056
|
+
**kwargs,
|
|
1057
|
+
):
|
|
1058
|
+
x = x0
|
|
1059
|
+
m = self.get_m(x)
|
|
1060
|
+
s = self.get_s(x)
|
|
1061
|
+
|
|
1062
|
+
for _ in range(maxiter):
|
|
1063
|
+
self._i += 1
|
|
1064
|
+
|
|
1065
|
+
g = jac(x)
|
|
1066
|
+
|
|
1067
|
+
if callback and callback(x):
|
|
1068
|
+
break
|
|
1069
|
+
|
|
1070
|
+
m = (1 - beta1) * g + beta1 * m
|
|
1071
|
+
s = (1 - beta2) * (g - m) ** 2 + beta2 * s + eps
|
|
1072
|
+
# bias correction
|
|
1073
|
+
mhat = m / (1 - beta1**self._i)
|
|
1074
|
+
shat = s / (1 - beta2**self._i)
|
|
1075
|
+
x = x - learning_rate * mhat / (np.sqrt(shat) + eps)
|
|
1076
|
+
|
|
1077
|
+
if bounds is not None:
|
|
1078
|
+
x = np.clip(x, bounds[:, 0], bounds[:, 1])
|
|
1079
|
+
|
|
1080
|
+
# save for restart
|
|
1081
|
+
self._m = m
|
|
1082
|
+
self._s = s
|
|
1083
|
+
|
|
1084
|
+
return self.OptimizeResult(
|
|
1085
|
+
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True
|
|
1086
|
+
)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
_STOC_GRAD_METHODS = {
|
|
1090
|
+
"sgd": SGD,
|
|
1091
|
+
"rmsprop": RMSPROP,
|
|
1092
|
+
"adam": ADAM,
|
|
1093
|
+
"nadam": NADAM,
|
|
1094
|
+
"adabelief": ADABELIEF,
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
class MakeArrayFn:
|
|
1099
|
+
"""Class wrapper so picklable."""
|
|
1100
|
+
|
|
1101
|
+
__name__ = "MakeArrayFn"
|
|
1102
|
+
|
|
1103
|
+
def __init__(self, tn_opt, loss_fn, norm_fn, autodiff_backend):
|
|
1104
|
+
self.tn_opt = tn_opt
|
|
1105
|
+
self.loss_fn = loss_fn
|
|
1106
|
+
self.norm_fn = norm_fn
|
|
1107
|
+
self.autodiff_backend = autodiff_backend
|
|
1108
|
+
|
|
1109
|
+
def __call__(self, arrays):
|
|
1110
|
+
tn_compute = inject_variables(arrays, self.tn_opt)
|
|
1111
|
+
|
|
1112
|
+
# set backend explicitly as maybe mixing with numpy arrays
|
|
1113
|
+
with contract_backend(self.autodiff_backend):
|
|
1114
|
+
return self.loss_fn(self.norm_fn(tn_compute))
|
|
1115
|
+
|
|
1116
|
+
|
|
1117
|
+
def identity_fn(x):
|
|
1118
|
+
return x
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
class TNOptimizer:
|
|
1122
|
+
"""Globally optimize tensors within a tensor network with respect to any
|
|
1123
|
+
loss function via automatic differentiation. If parametrized tensors are
|
|
1124
|
+
used, optimize the parameters rather than the raw arrays.
|
|
1125
|
+
|
|
1126
|
+
Parameters
|
|
1127
|
+
----------
|
|
1128
|
+
tn : TensorNetwork
|
|
1129
|
+
The core tensor network structure within which to optimize tensors.
|
|
1130
|
+
loss_fn : callable or sequence of callable
|
|
1131
|
+
The function that takes ``tn`` (as well as ``loss_constants`` and
|
|
1132
|
+
``loss_kwargs``) and returns a single real 'loss' to be minimized.
|
|
1133
|
+
For Hamiltonians which can be represented as a sum over terms, an
|
|
1134
|
+
iterable collection of terms (e.g. list) can be given instead. In that
|
|
1135
|
+
case each term is evaluated independently and the sum taken as loss_fn.
|
|
1136
|
+
This can reduce the total memory requirements or allow for
|
|
1137
|
+
parallelization (see ``executor``).
|
|
1138
|
+
norm_fn : callable, optional
|
|
1139
|
+
A function to call before ``loss_fn`` that prepares or 'normalizes' the
|
|
1140
|
+
raw tensor network in some way.
|
|
1141
|
+
loss_constants : dict, optional
|
|
1142
|
+
Extra tensor networks, tensors, dicts/list/tuples of arrays, or arrays
|
|
1143
|
+
which will be supplied to ``loss_fn`` but also converted to the correct
|
|
1144
|
+
backend array type.
|
|
1145
|
+
loss_kwargs : dict, optional
|
|
1146
|
+
Extra options to supply to ``loss_fn`` (unlike ``loss_constants`` these
|
|
1147
|
+
are assumed to be simple options that don't need conversion).
|
|
1148
|
+
tags : str, or sequence of str, optional
|
|
1149
|
+
If supplied, only optimize tensors with any of these tags.
|
|
1150
|
+
shared_tags : str, or sequence of str, optional
|
|
1151
|
+
If supplied, each tag in ``shared_tags`` corresponds to a group of
|
|
1152
|
+
tensors to be optimized together.
|
|
1153
|
+
constant_tags : str, or sequence of str, optional
|
|
1154
|
+
If supplied, skip optimizing tensors with any of these tags. This
|
|
1155
|
+
'opt-out' mode is overridden if either ``tags`` or ``shared_tags`` is
|
|
1156
|
+
supplied.
|
|
1157
|
+
loss_target : float, optional
|
|
1158
|
+
Stop optimizing once this loss value is reached.
|
|
1159
|
+
optimizer : str, optional
|
|
1160
|
+
Which ``scipy.optimize.minimize`` optimizer to use (the ``'method'``
|
|
1161
|
+
kwarg of that function). In addition, ``quimb`` implements a few custom
|
|
1162
|
+
optimizers compatible with this interface that you can reference by
|
|
1163
|
+
name - ``{'adam', 'nadam', 'rmsprop', 'sgd'}``.
|
|
1164
|
+
executor : None or Executor, optional
|
|
1165
|
+
To be used with term-by-term Hamiltonians. If supplied, this executor
|
|
1166
|
+
is used to parallelize the evaluation. Otherwise each term is
|
|
1167
|
+
evaluated in sequence. It should implement the basic
|
|
1168
|
+
concurrent.futures (PEP 3148) interface.
|
|
1169
|
+
progbar : bool, optional
|
|
1170
|
+
Whether to show live progress.
|
|
1171
|
+
bounds : None or (float, float), optional
|
|
1172
|
+
Constrain the optimized tensor entries within this range (if the scipy
|
|
1173
|
+
optimizer supports it).
|
|
1174
|
+
autodiff_backend : {'jax', 'autograd', 'tensorflow', 'torch'}, optional
|
|
1175
|
+
Which backend library to use to perform the automatic differentation
|
|
1176
|
+
(and computation).
|
|
1177
|
+
callback : callable, optional
|
|
1178
|
+
A function to call after each optimization step. It should take the
|
|
1179
|
+
current ``TNOptimizer`` instance as its only argument. Information such
|
|
1180
|
+
as the current loss and number of evaluations can then be accessed::
|
|
1181
|
+
|
|
1182
|
+
def callback(tnopt):
|
|
1183
|
+
print(tnopt.nevals, tnopt.loss)
|
|
1184
|
+
|
|
1185
|
+
backend_opts
|
|
1186
|
+
Supplied to the backend function compiler and array handler. For
|
|
1187
|
+
example ``jit_fn=True`` or ``device='cpu'`` .
|
|
1188
|
+
"""
|
|
1189
|
+
|
|
1190
|
+
def __init__(
|
|
1191
|
+
self,
|
|
1192
|
+
tn,
|
|
1193
|
+
loss_fn,
|
|
1194
|
+
norm_fn=None,
|
|
1195
|
+
loss_constants=None,
|
|
1196
|
+
loss_kwargs=None,
|
|
1197
|
+
tags=None,
|
|
1198
|
+
shared_tags=None,
|
|
1199
|
+
constant_tags=None,
|
|
1200
|
+
loss_target=None,
|
|
1201
|
+
optimizer="L-BFGS-B",
|
|
1202
|
+
progbar=True,
|
|
1203
|
+
bounds=None,
|
|
1204
|
+
autodiff_backend="AUTO",
|
|
1205
|
+
executor=None,
|
|
1206
|
+
callback=None,
|
|
1207
|
+
**backend_opts,
|
|
1208
|
+
):
|
|
1209
|
+
self.progbar = progbar
|
|
1210
|
+
self.tags = tags
|
|
1211
|
+
self.shared_tags = shared_tags
|
|
1212
|
+
self.constant_tags = constant_tags
|
|
1213
|
+
|
|
1214
|
+
if autodiff_backend.upper() == "AUTO":
|
|
1215
|
+
autodiff_backend = _DEFAULT_BACKEND
|
|
1216
|
+
self._autodiff_backend = autodiff_backend
|
|
1217
|
+
self._multiloss = isinstance(loss_fn, Iterable)
|
|
1218
|
+
|
|
1219
|
+
# the object that handles converting to backend + computing gradient
|
|
1220
|
+
if self._multiloss:
|
|
1221
|
+
# special meta-handler if loss function is sequence to sum
|
|
1222
|
+
backend_opts["executor"] = executor
|
|
1223
|
+
self.handler = MultiLossHandler(autodiff_backend, **backend_opts)
|
|
1224
|
+
else:
|
|
1225
|
+
self.handler = _BACKEND_HANDLERS[autodiff_backend](**backend_opts)
|
|
1226
|
+
|
|
1227
|
+
# use identity if no nomalization required
|
|
1228
|
+
if norm_fn is None:
|
|
1229
|
+
norm_fn = identity_fn
|
|
1230
|
+
self.norm_fn = norm_fn
|
|
1231
|
+
|
|
1232
|
+
self.reset(tn, loss_target=loss_target)
|
|
1233
|
+
|
|
1234
|
+
# convert constant raw arrays ahead of time to correct backend
|
|
1235
|
+
self.loss_constants = tree_map(
|
|
1236
|
+
functools.partial(convert_raw_arrays, f=self.handler.to_constant),
|
|
1237
|
+
ensure_dict(loss_constants),
|
|
1238
|
+
)
|
|
1239
|
+
self.loss_kwargs = ensure_dict(loss_kwargs)
|
|
1240
|
+
kws = {**self.loss_constants, **self.loss_kwargs}
|
|
1241
|
+
|
|
1242
|
+
# inject these constant options to the loss function(s)
|
|
1243
|
+
if self._multiloss:
|
|
1244
|
+
# loss is a sum of independent terms
|
|
1245
|
+
self.loss_fn = [functools.partial(fn, **kws) for fn in loss_fn]
|
|
1246
|
+
else:
|
|
1247
|
+
# loss is all in one
|
|
1248
|
+
self.loss_fn = functools.partial(loss_fn, **kws)
|
|
1249
|
+
|
|
1250
|
+
# first we wrap the function to convert from array args to TN arg
|
|
1251
|
+
# (i.e. to autodiff library compatible form)
|
|
1252
|
+
if self._multiloss:
|
|
1253
|
+
array_fn = [
|
|
1254
|
+
MakeArrayFn(self._tn_opt, fn, self.norm_fn, autodiff_backend)
|
|
1255
|
+
for fn in self.loss_fn
|
|
1256
|
+
]
|
|
1257
|
+
else:
|
|
1258
|
+
array_fn = MakeArrayFn(
|
|
1259
|
+
self._tn_opt, self.loss_fn, self.norm_fn, autodiff_backend
|
|
1260
|
+
)
|
|
1261
|
+
|
|
1262
|
+
# then we pass it to the handler which generates a function that
|
|
1263
|
+
# computes both the value and gradients (still in array form)
|
|
1264
|
+
self.handler.setup_fn(array_fn)
|
|
1265
|
+
|
|
1266
|
+
# options to do with the minimizer
|
|
1267
|
+
self.bounds = bounds
|
|
1268
|
+
self.optimizer = optimizer
|
|
1269
|
+
self.callback = callback
|
|
1270
|
+
|
|
1271
|
+
def _set_tn(self, tn):
|
|
1272
|
+
# work out which tensors to optimize and get the underlying data
|
|
1273
|
+
self._tn_opt, variables = parse_network_to_backend(
|
|
1274
|
+
tn,
|
|
1275
|
+
tags=self.tags,
|
|
1276
|
+
shared_tags=self.shared_tags,
|
|
1277
|
+
constant_tags=self.constant_tags,
|
|
1278
|
+
to_constant=self.handler.to_constant,
|
|
1279
|
+
)
|
|
1280
|
+
# handles storing and packing / unpacking many arrays as a vector
|
|
1281
|
+
self.vectorizer = Vectorizer(variables)
|
|
1282
|
+
|
|
1283
|
+
def _reset_tracking_info(self, loss_target=None):
|
|
1284
|
+
# tracking info
|
|
1285
|
+
self.loss = float("inf")
|
|
1286
|
+
self.loss_best = float("inf")
|
|
1287
|
+
self.loss_target = loss_target
|
|
1288
|
+
self.losses = []
|
|
1289
|
+
self._n = 0
|
|
1290
|
+
self._pbar = None
|
|
1291
|
+
|
|
1292
|
+
def reset(self, tn=None, clear_info=True, loss_target=None):
|
|
1293
|
+
"""Reset this optimizer without losing the compiled loss and gradient
|
|
1294
|
+
functions.
|
|
1295
|
+
|
|
1296
|
+
Parameters
|
|
1297
|
+
----------
|
|
1298
|
+
tn : TensorNetwork, optional
|
|
1299
|
+
Set this tensor network as the current state of the optimizer, it
|
|
1300
|
+
must exactly match the original tensor network.
|
|
1301
|
+
clear_info : bool, optional
|
|
1302
|
+
Clear the tracked losses and iterations.
|
|
1303
|
+
"""
|
|
1304
|
+
if tn is not None:
|
|
1305
|
+
self._set_tn(tn)
|
|
1306
|
+
if clear_info:
|
|
1307
|
+
self._reset_tracking_info(loss_target=loss_target)
|
|
1308
|
+
|
|
1309
|
+
def _maybe_init_pbar(self, n):
|
|
1310
|
+
if self.progbar:
|
|
1311
|
+
self._pbar = tqdm.tqdm(total=n)
|
|
1312
|
+
|
|
1313
|
+
def _maybe_update_pbar(self):
|
|
1314
|
+
if self._pbar is not None:
|
|
1315
|
+
self._pbar.update()
|
|
1316
|
+
self.loss_best = min(self.loss_best, self.loss)
|
|
1317
|
+
msg = f"{self.loss:+.12f} [best: {self.loss_best:+.12f}] "
|
|
1318
|
+
self._pbar.set_description(msg)
|
|
1319
|
+
|
|
1320
|
+
def _maybe_close_pbar(self):
|
|
1321
|
+
if self._pbar is not None:
|
|
1322
|
+
self._pbar.close()
|
|
1323
|
+
self._pbar = None
|
|
1324
|
+
|
|
1325
|
+
def _check_loss_target(self):
|
|
1326
|
+
if (self.loss_target is not None) and (self.loss <= self.loss_target):
|
|
1327
|
+
# for scipy terminating optimizer with callback doesn't work
|
|
1328
|
+
raise KeyboardInterrupt
|
|
1329
|
+
|
|
1330
|
+
def _maybe_call_callback(self):
|
|
1331
|
+
if self.callback is not None:
|
|
1332
|
+
self.callback(self)
|
|
1333
|
+
|
|
1334
|
+
def vectorized_value(self, x):
|
|
1335
|
+
"""The value of the loss function at vector ``x``."""
|
|
1336
|
+
self.vectorizer.vector[:] = x
|
|
1337
|
+
arrays = self.vectorizer.unpack()
|
|
1338
|
+
self.loss = self.handler.value(arrays).item()
|
|
1339
|
+
self.losses.append(self.loss)
|
|
1340
|
+
self._n += 1
|
|
1341
|
+
self._maybe_update_pbar()
|
|
1342
|
+
self._check_loss_target()
|
|
1343
|
+
self._maybe_call_callback()
|
|
1344
|
+
return self.loss
|
|
1345
|
+
|
|
1346
|
+
def vectorized_value_and_grad(self, x):
|
|
1347
|
+
"""The value and gradient of the loss function at vector ``x``."""
|
|
1348
|
+
self.vectorizer.vector[:] = x
|
|
1349
|
+
arrays = self.vectorizer.unpack()
|
|
1350
|
+
result, grads = self.handler.value_and_grad(arrays)
|
|
1351
|
+
self._n += 1
|
|
1352
|
+
self.loss = result.item()
|
|
1353
|
+
self.losses.append(self.loss)
|
|
1354
|
+
vec_grad = self.vectorizer.pack(grads, "grad")
|
|
1355
|
+
self._maybe_update_pbar()
|
|
1356
|
+
self._check_loss_target()
|
|
1357
|
+
self._maybe_call_callback()
|
|
1358
|
+
return self.loss, vec_grad
|
|
1359
|
+
|
|
1360
|
+
def vectorized_hessp(self, x, p):
|
|
1361
|
+
"""The action of the hessian at point ``x`` on vector ``p``."""
|
|
1362
|
+
primals = self.vectorizer.unpack(x)
|
|
1363
|
+
tangents = self.vectorizer.unpack(p)
|
|
1364
|
+
hp_arrays = self.handler.hessp(primals, tangents)
|
|
1365
|
+
self._n += 1
|
|
1366
|
+
self.losses.append(self.loss)
|
|
1367
|
+
self._maybe_update_pbar()
|
|
1368
|
+
return self.vectorizer.pack(hp_arrays, "hp")
|
|
1369
|
+
|
|
1370
|
+
def __repr__(self):
|
|
1371
|
+
return (
|
|
1372
|
+
f"<TNOptimizer(d={self.d}, " f"backend={self._autodiff_backend})>"
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1375
|
+
@property
|
|
1376
|
+
def d(self):
|
|
1377
|
+
return int(self.vectorizer.d)
|
|
1378
|
+
|
|
1379
|
+
@property
|
|
1380
|
+
def nevals(self):
|
|
1381
|
+
"""The number of gradient evaluations."""
|
|
1382
|
+
return self._n
|
|
1383
|
+
|
|
1384
|
+
@property
|
|
1385
|
+
def optimizer(self):
|
|
1386
|
+
"""The underlying optimizer that works with the vectorized functions."""
|
|
1387
|
+
return self._optimizer
|
|
1388
|
+
|
|
1389
|
+
@optimizer.setter
|
|
1390
|
+
def optimizer(self, x):
|
|
1391
|
+
self._optimizer = x
|
|
1392
|
+
if self.optimizer in _STOC_GRAD_METHODS:
|
|
1393
|
+
self._method = _STOC_GRAD_METHODS[self.optimizer]()
|
|
1394
|
+
else:
|
|
1395
|
+
self._method = self.optimizer
|
|
1396
|
+
|
|
1397
|
+
@property
|
|
1398
|
+
def bounds(self):
|
|
1399
|
+
return self._bounds
|
|
1400
|
+
|
|
1401
|
+
@bounds.setter
|
|
1402
|
+
def bounds(self, x):
|
|
1403
|
+
if x is not None:
|
|
1404
|
+
self._bounds = np.array((x,) * self.vectorizer.d)
|
|
1405
|
+
else:
|
|
1406
|
+
self._bounds = None
|
|
1407
|
+
|
|
1408
|
+
def get_tn_opt(self):
|
|
1409
|
+
"""Extract the optimized tensor network, this is a three part process:
|
|
1410
|
+
|
|
1411
|
+
1. inject the current optimized vector into the target tensor
|
|
1412
|
+
network,
|
|
1413
|
+
2. run it through ``norm_fn``,
|
|
1414
|
+
3. drop any tags used to identify variables.
|
|
1415
|
+
|
|
1416
|
+
Returns
|
|
1417
|
+
-------
|
|
1418
|
+
tn_opt : TensorNetwork
|
|
1419
|
+
"""
|
|
1420
|
+
arrays = tree_map(
|
|
1421
|
+
self.handler.to_constant,
|
|
1422
|
+
self.vectorizer.unpack(),
|
|
1423
|
+
)
|
|
1424
|
+
tn = inject_variables(arrays, self._tn_opt)
|
|
1425
|
+
tn = self.norm_fn(tn)
|
|
1426
|
+
|
|
1427
|
+
if isinstance(tn, TensorNetwork):
|
|
1428
|
+
tn.drop_tags(t for t in tn.tags if variable_finder.match(t))
|
|
1429
|
+
|
|
1430
|
+
return tree_map(convert_variables_to_numpy, tn)
|
|
1431
|
+
|
|
1432
|
+
def optimize(
|
|
1433
|
+
self, n, tol=None, jac=True, hessp=False, optlib="scipy", **options
|
|
1434
|
+
):
|
|
1435
|
+
"""Run the optimizer for ``n`` function evaluations, using by default
|
|
1436
|
+
:func:`scipy.optimize.minimize` as the driver for the vectorized
|
|
1437
|
+
computation. Supplying the gradient and hessian vector product is
|
|
1438
|
+
controlled by the ``jac`` and ``hessp`` options respectively.
|
|
1439
|
+
|
|
1440
|
+
Parameters
|
|
1441
|
+
----------
|
|
1442
|
+
n : int
|
|
1443
|
+
Notionally the maximum number of iterations for the optimizer, note
|
|
1444
|
+
that depending on the optimizer being used, this may correspond to
|
|
1445
|
+
number of function evaluations rather than just iterations.
|
|
1446
|
+
tol : None or float, optional
|
|
1447
|
+
Tolerance for convergence, note that various more specific
|
|
1448
|
+
tolerances can usually be supplied to ``options``, depending on
|
|
1449
|
+
the optimizer being used.
|
|
1450
|
+
jac : bool, optional
|
|
1451
|
+
Whether to supply the jacobian, i.e. gradient, of the loss
|
|
1452
|
+
function.
|
|
1453
|
+
hessp : bool, optional
|
|
1454
|
+
Whether to supply the hessian vector product of the loss function.
|
|
1455
|
+
optlib : {'scipy', 'nlopt'}, optional
|
|
1456
|
+
Which optimization library to use.
|
|
1457
|
+
options
|
|
1458
|
+
Supplied to :func:`scipy.optimize.minimize` or whichever optimizer
|
|
1459
|
+
is being used.
|
|
1460
|
+
|
|
1461
|
+
Returns
|
|
1462
|
+
-------
|
|
1463
|
+
tn_opt : TensorNetwork
|
|
1464
|
+
"""
|
|
1465
|
+
return {
|
|
1466
|
+
"scipy": self.optimize_scipy,
|
|
1467
|
+
"nlopt": self.optimize_nlopt,
|
|
1468
|
+
}[optlib](n=n, tol=tol, jac=jac, hessp=hessp, **options)
|
|
1469
|
+
|
|
1470
|
+
def optimize_scipy(self, n, tol=None, jac=True, hessp=False, **options):
|
|
1471
|
+
"""Scipy based optimization, see
|
|
1472
|
+
:meth:`~quimb.tensor.optimize.TNOptimizer.optimize` for details.
|
|
1473
|
+
"""
|
|
1474
|
+
from scipy.optimize import minimize
|
|
1475
|
+
|
|
1476
|
+
if jac:
|
|
1477
|
+
fun = self.vectorized_value_and_grad
|
|
1478
|
+
else:
|
|
1479
|
+
fun = self.vectorized_value
|
|
1480
|
+
|
|
1481
|
+
if self._method in ("l-bfgs-b", "tnc"):
|
|
1482
|
+
options.setdefault("maxfun", n)
|
|
1483
|
+
|
|
1484
|
+
try:
|
|
1485
|
+
self._maybe_init_pbar(n)
|
|
1486
|
+
self.res = minimize(
|
|
1487
|
+
fun=fun,
|
|
1488
|
+
jac=jac,
|
|
1489
|
+
hessp=self.vectorized_hessp if hessp else None,
|
|
1490
|
+
x0=self.vectorizer.vector,
|
|
1491
|
+
tol=tol,
|
|
1492
|
+
bounds=self.bounds,
|
|
1493
|
+
method=self._method,
|
|
1494
|
+
options=dict(maxiter=n, **options),
|
|
1495
|
+
)
|
|
1496
|
+
self.vectorizer.vector[:] = self.res.x
|
|
1497
|
+
except KeyboardInterrupt:
|
|
1498
|
+
pass
|
|
1499
|
+
finally:
|
|
1500
|
+
self._maybe_close_pbar()
|
|
1501
|
+
|
|
1502
|
+
return self.get_tn_opt()
|
|
1503
|
+
|
|
1504
|
+
def optimize_basinhopping(
|
|
1505
|
+
self, n, nhop, temperature=1.0, jac=True, hessp=False, **options
|
|
1506
|
+
):
|
|
1507
|
+
"""Run the optimizer for using :func:`scipy.optimize.basinhopping`
|
|
1508
|
+
as the driver for the vectorized computation. This performs ``nhop``
|
|
1509
|
+
local optimization each with ``n`` iterations.
|
|
1510
|
+
|
|
1511
|
+
Parameters
|
|
1512
|
+
----------
|
|
1513
|
+
n : int
|
|
1514
|
+
Number of iterations per local optimization.
|
|
1515
|
+
nhop : int
|
|
1516
|
+
Number of local optimizations to hop between.
|
|
1517
|
+
temperature : float, optional
|
|
1518
|
+
H
|
|
1519
|
+
options
|
|
1520
|
+
Supplied to the inner :func:`scipy.optimize.minimize` call.
|
|
1521
|
+
|
|
1522
|
+
Returns
|
|
1523
|
+
-------
|
|
1524
|
+
tn_opt : TensorNetwork
|
|
1525
|
+
"""
|
|
1526
|
+
from scipy.optimize import basinhopping
|
|
1527
|
+
|
|
1528
|
+
if jac:
|
|
1529
|
+
fun = self.vectorized_value_and_grad
|
|
1530
|
+
else:
|
|
1531
|
+
fun = self.vectorized_value
|
|
1532
|
+
|
|
1533
|
+
try:
|
|
1534
|
+
self._maybe_init_pbar(n * nhop)
|
|
1535
|
+
self.res = basinhopping(
|
|
1536
|
+
func=fun,
|
|
1537
|
+
x0=self.vectorizer.vector,
|
|
1538
|
+
niter=nhop,
|
|
1539
|
+
minimizer_kwargs=dict(
|
|
1540
|
+
jac=jac,
|
|
1541
|
+
hessp=self.vectorized_hessp if hessp else None,
|
|
1542
|
+
method=self._method,
|
|
1543
|
+
bounds=self.bounds,
|
|
1544
|
+
options=dict(maxiter=n, **options),
|
|
1545
|
+
),
|
|
1546
|
+
T=temperature,
|
|
1547
|
+
)
|
|
1548
|
+
self.vectorizer.vector[:] = self.res.x
|
|
1549
|
+
|
|
1550
|
+
except KeyboardInterrupt:
|
|
1551
|
+
pass
|
|
1552
|
+
finally:
|
|
1553
|
+
self._maybe_close_pbar()
|
|
1554
|
+
|
|
1555
|
+
return self.get_tn_opt()
|
|
1556
|
+
|
|
1557
|
+
def optimize_nlopt(
|
|
1558
|
+
self,
|
|
1559
|
+
n,
|
|
1560
|
+
tol=None,
|
|
1561
|
+
jac=True,
|
|
1562
|
+
hessp=False,
|
|
1563
|
+
ftol_rel=None,
|
|
1564
|
+
ftol_abs=None,
|
|
1565
|
+
xtol_rel=None,
|
|
1566
|
+
xtol_abs=None,
|
|
1567
|
+
):
|
|
1568
|
+
"""Run the optimizer for ``n`` function evaluations, using ``nlopt`` as
|
|
1569
|
+
the backend library to run the optimization. Whether the gradient is
|
|
1570
|
+
computed depends on which ``optimizer`` is selected, see valid options
|
|
1571
|
+
at https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/.
|
|
1572
|
+
|
|
1573
|
+
The following scipy ``optimizer`` options are automatically translated
|
|
1574
|
+
to the corresponding ``nlopt`` algorithms: {"l-bfgs-b", "slsqp", "tnc",
|
|
1575
|
+
"cobyla"}.
|
|
1576
|
+
|
|
1577
|
+
Parameters
|
|
1578
|
+
----------
|
|
1579
|
+
n : int
|
|
1580
|
+
The maximum number of iterations for the optimizer.
|
|
1581
|
+
tol : None or float, optional
|
|
1582
|
+
Tolerance for convergence, here this is taken to be the relative
|
|
1583
|
+
tolerance for the loss (``ftol_rel`` below overrides this).
|
|
1584
|
+
jac : bool, optional
|
|
1585
|
+
Whether to supply the jacobian, i.e. gradient, of the loss
|
|
1586
|
+
function.
|
|
1587
|
+
hessp : bool, optional
|
|
1588
|
+
Whether to supply the hessian vector product of the loss function.
|
|
1589
|
+
ftol_rel : float, optional
|
|
1590
|
+
Set relative tolerance on function value.
|
|
1591
|
+
ftol_abs : float, optional
|
|
1592
|
+
Set absolute tolerance on function value.
|
|
1593
|
+
xtol_rel : float, optional
|
|
1594
|
+
Set relative tolerance on optimization parameters.
|
|
1595
|
+
xtol_abs : float, optional
|
|
1596
|
+
Set absolute tolerances on optimization parameters.
|
|
1597
|
+
|
|
1598
|
+
Returns
|
|
1599
|
+
-------
|
|
1600
|
+
tn_opt : TensorNetwork
|
|
1601
|
+
"""
|
|
1602
|
+
import nlopt
|
|
1603
|
+
|
|
1604
|
+
if not (jac is True and hessp is False):
|
|
1605
|
+
raise NotImplementedError(
|
|
1606
|
+
"Only gradient based optimizers are "
|
|
1607
|
+
"supported with nlopt currently."
|
|
1608
|
+
)
|
|
1609
|
+
|
|
1610
|
+
if tol == 0.0:
|
|
1611
|
+
# assume no stopping criteria
|
|
1612
|
+
ftol_rel == 0.0 if ftol_rel is None else ftol_rel
|
|
1613
|
+
ftol_abs == 0.0 if ftol_abs is None else ftol_abs
|
|
1614
|
+
xtol_rel == 0.0 if xtol_rel is None else xtol_rel
|
|
1615
|
+
xtol_abs == 0.0 if xtol_abs is None else xtol_abs
|
|
1616
|
+
|
|
1617
|
+
elif (tol is not None) and (ftol_rel is None):
|
|
1618
|
+
# assume relative loss tolerance is specified
|
|
1619
|
+
ftol_rel = tol
|
|
1620
|
+
|
|
1621
|
+
# translate directly comparable algorithms
|
|
1622
|
+
optimizer = {
|
|
1623
|
+
"l-bfgs-b": "LD_LBFGS",
|
|
1624
|
+
"slsqp": "LD_SLSQP",
|
|
1625
|
+
"tnc": "LD_TNEWTON_PRECOND_RESTART",
|
|
1626
|
+
"cobyla": "LN_COBYLA",
|
|
1627
|
+
}.get(self.optimizer.lower(), self.optimizer)
|
|
1628
|
+
|
|
1629
|
+
try:
|
|
1630
|
+
self._maybe_init_pbar(n)
|
|
1631
|
+
|
|
1632
|
+
def f(x, grad):
|
|
1633
|
+
self.vectorizer.vector[:] = x
|
|
1634
|
+
arrays = self.vectorizer.unpack()
|
|
1635
|
+
if grad.size > 0:
|
|
1636
|
+
result, grads = self.handler.value_and_grad(arrays)
|
|
1637
|
+
grad[:] = self.vectorizer.pack(grads, "grad")
|
|
1638
|
+
else:
|
|
1639
|
+
result = self.handler.value(arrays)
|
|
1640
|
+
self._n += 1
|
|
1641
|
+
self.loss = result.item()
|
|
1642
|
+
self.losses.append(self.loss)
|
|
1643
|
+
self._maybe_update_pbar()
|
|
1644
|
+
return self.loss
|
|
1645
|
+
|
|
1646
|
+
opt = nlopt.opt(getattr(nlopt, optimizer), self.d)
|
|
1647
|
+
opt.set_min_objective(f)
|
|
1648
|
+
opt.set_maxeval(n)
|
|
1649
|
+
|
|
1650
|
+
if self.bounds is not None:
|
|
1651
|
+
opt.set_lower_bounds(self.bounds[:, 0])
|
|
1652
|
+
opt.set_upper_bounds(self.bounds[:, 1])
|
|
1653
|
+
|
|
1654
|
+
if self.loss_target is not None:
|
|
1655
|
+
opt.set_stopval(self.loss_target)
|
|
1656
|
+
if ftol_rel is not None:
|
|
1657
|
+
opt.set_ftol_rel(ftol_rel)
|
|
1658
|
+
if ftol_abs is not None:
|
|
1659
|
+
opt.set_ftol_abs(ftol_abs)
|
|
1660
|
+
if xtol_rel is not None:
|
|
1661
|
+
opt.set_xtol_rel(xtol_rel)
|
|
1662
|
+
if xtol_abs is not None:
|
|
1663
|
+
opt.set_xtol_abs(xtol_abs)
|
|
1664
|
+
|
|
1665
|
+
self.vectorizer.vector[:] = opt.optimize(self.vectorizer.vector)
|
|
1666
|
+
|
|
1667
|
+
except (KeyboardInterrupt, RuntimeError):
|
|
1668
|
+
pass
|
|
1669
|
+
finally:
|
|
1670
|
+
self._maybe_close_pbar()
|
|
1671
|
+
|
|
1672
|
+
return self.get_tn_opt()
|
|
1673
|
+
|
|
1674
|
+
def optimize_ipopt(self, n, tol=None, **options):
|
|
1675
|
+
"""Run the optimizer for ``n`` function evaluations, using ``ipopt`` as
|
|
1676
|
+
the backend library to run the optimization via the python package
|
|
1677
|
+
``cyipopt``.
|
|
1678
|
+
|
|
1679
|
+
Parameters
|
|
1680
|
+
----------
|
|
1681
|
+
n : int
|
|
1682
|
+
The maximum number of iterations for the optimizer.
|
|
1683
|
+
|
|
1684
|
+
Returns
|
|
1685
|
+
-------
|
|
1686
|
+
tn_opt : TensorNetwork
|
|
1687
|
+
"""
|
|
1688
|
+
from cyipopt import minimize_ipopt
|
|
1689
|
+
|
|
1690
|
+
try:
|
|
1691
|
+
self._maybe_init_pbar(n)
|
|
1692
|
+
self.res = minimize_ipopt(
|
|
1693
|
+
fun=self.vectorized_value_and_grad,
|
|
1694
|
+
jac=True,
|
|
1695
|
+
x0=self.vectorizer.vector,
|
|
1696
|
+
tol=tol,
|
|
1697
|
+
bounds=self.bounds,
|
|
1698
|
+
method=self._method,
|
|
1699
|
+
options=dict(maxiter=n, **options),
|
|
1700
|
+
)
|
|
1701
|
+
self.vectorizer.vector[:] = self.res.x
|
|
1702
|
+
except KeyboardInterrupt:
|
|
1703
|
+
pass
|
|
1704
|
+
finally:
|
|
1705
|
+
self._maybe_close_pbar()
|
|
1706
|
+
|
|
1707
|
+
return self.get_tn_opt()
|
|
1708
|
+
|
|
1709
|
+
def optimize_nevergrad(self, n):
|
|
1710
|
+
"""Run the optimizer for ``n`` function evaluations, using
|
|
1711
|
+
``nevergrad`` as the backend library to run the optimization. As the
|
|
1712
|
+
name suggests, the gradient is not required for this method.
|
|
1713
|
+
|
|
1714
|
+
Parameters
|
|
1715
|
+
----------
|
|
1716
|
+
n : int
|
|
1717
|
+
The maximum number of iterations for the optimizer.
|
|
1718
|
+
|
|
1719
|
+
Returns
|
|
1720
|
+
-------
|
|
1721
|
+
tn_opt : TensorNetwork
|
|
1722
|
+
"""
|
|
1723
|
+
import nevergrad as ng
|
|
1724
|
+
|
|
1725
|
+
opt = getattr(ng.optimizers, self.optimizer)(
|
|
1726
|
+
parametrization=ng.p.Array(
|
|
1727
|
+
init=self.vectorizer.vector,
|
|
1728
|
+
lower=self.bounds[:, 0] if self.bounds is not None else None,
|
|
1729
|
+
upper=self.bounds[:, 1] if self.bounds is not None else None,
|
|
1730
|
+
),
|
|
1731
|
+
budget=n,
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
try:
|
|
1735
|
+
self._maybe_init_pbar(n)
|
|
1736
|
+
for _ in range(n):
|
|
1737
|
+
x = opt.ask()
|
|
1738
|
+
loss = self.vectorized_value(*x.args, **x.kwargs)
|
|
1739
|
+
opt.tell(x, loss)
|
|
1740
|
+
if self.loss_target is not None:
|
|
1741
|
+
if self.loss < self.loss_target:
|
|
1742
|
+
break
|
|
1743
|
+
|
|
1744
|
+
except KeyboardInterrupt:
|
|
1745
|
+
pass
|
|
1746
|
+
finally:
|
|
1747
|
+
self._maybe_close_pbar()
|
|
1748
|
+
|
|
1749
|
+
# recommendation = opt.minimize(self.vectorized_value)
|
|
1750
|
+
recommendation = opt.provide_recommendation()
|
|
1751
|
+
self.vectorizer.vector[:] = recommendation.value
|
|
1752
|
+
|
|
1753
|
+
return self.get_tn_opt()
|
|
1754
|
+
|
|
1755
|
+
@default_to_neutral_style
|
|
1756
|
+
def plot(
|
|
1757
|
+
self,
|
|
1758
|
+
xscale="symlog",
|
|
1759
|
+
xscale_linthresh=20,
|
|
1760
|
+
zoom="auto",
|
|
1761
|
+
hlines=(),
|
|
1762
|
+
):
|
|
1763
|
+
"""Plot the loss function as a function of the number of iterations.
|
|
1764
|
+
|
|
1765
|
+
Parameters
|
|
1766
|
+
----------
|
|
1767
|
+
xscale : str, optional
|
|
1768
|
+
The scale of the x-axis. Default is ``"symlog"``, i.e. linear for
|
|
1769
|
+
the first part of the plot, and logarithmic for the rest, changing
|
|
1770
|
+
at ``xscale_linthresh``.
|
|
1771
|
+
xscale_linthresh : int, optional
|
|
1772
|
+
The threshold for the change from linear to logarithmic scale,
|
|
1773
|
+
if ``xscale`` is ``"symlog"``. Default is ``20``.
|
|
1774
|
+
zoom : None or int, optional
|
|
1775
|
+
If not ``None``, show an inset plot of the last ``zoom``
|
|
1776
|
+
iterations.
|
|
1777
|
+
hlines : dict, optional
|
|
1778
|
+
A dictionary of horizontal lines to plot. The keys are the labels
|
|
1779
|
+
of the lines, and the values are the y-values of the lines.
|
|
1780
|
+
|
|
1781
|
+
Returns
|
|
1782
|
+
-------
|
|
1783
|
+
fig : matplotlib.figure.Figure
|
|
1784
|
+
The figure object.
|
|
1785
|
+
ax : matplotlib.axes.Axes
|
|
1786
|
+
The axes object.
|
|
1787
|
+
"""
|
|
1788
|
+
import matplotlib.pyplot as plt
|
|
1789
|
+
from matplotlib.colors import hsv_to_rgb
|
|
1790
|
+
|
|
1791
|
+
ys = np.array(self.losses)
|
|
1792
|
+
xs = np.arange(ys.size)
|
|
1793
|
+
|
|
1794
|
+
fig, ax = plt.subplots()
|
|
1795
|
+
ax.plot(xs, ys, ".-")
|
|
1796
|
+
if xscale == "symlog":
|
|
1797
|
+
ax.set_xscale(xscale, linthresh=xscale_linthresh)
|
|
1798
|
+
ax.axvline(xscale_linthresh, color=(.5, .5, .5), ls="-", lw=0.5)
|
|
1799
|
+
else:
|
|
1800
|
+
ax.set_xscale(xscale)
|
|
1801
|
+
ax.set_xlabel("Iteration")
|
|
1802
|
+
ax.set_ylabel("Loss")
|
|
1803
|
+
|
|
1804
|
+
if hlines:
|
|
1805
|
+
hlines = dict(hlines)
|
|
1806
|
+
for i, (label, value) in enumerate(hlines.items()):
|
|
1807
|
+
color = hsv_to_rgb([(0.1 * i) % 1.0, 0.9, 0.9])
|
|
1808
|
+
ax.axhline(value, color=color, ls="--", label=label)
|
|
1809
|
+
ax.text(1, value, label, color=color, va="bottom", ha="left")
|
|
1810
|
+
|
|
1811
|
+
if zoom is not None:
|
|
1812
|
+
if zoom == "auto":
|
|
1813
|
+
zoom = min(50, ys.size // 2)
|
|
1814
|
+
|
|
1815
|
+
iax = ax.inset_axes([0.5, 0.5, 0.5, 0.5])
|
|
1816
|
+
iax.plot(xs[-zoom:], ys[-zoom:], ".-")
|
|
1817
|
+
|
|
1818
|
+
return fig, ax
|