scs 0.2.2 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +18 -18
- data/README.md +19 -14
- data/lib/scs/ffi.rb +31 -20
- data/lib/scs/solver.rb +32 -9
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +39 -0
- data/vendor/scs/CMakeLists.txt +320 -0
- data/vendor/scs/Makefile +32 -23
- data/vendor/scs/README.md +9 -218
- data/vendor/scs/include/aa.h +67 -23
- data/vendor/scs/include/cones.h +22 -19
- data/vendor/scs/include/glbopts.h +107 -79
- data/vendor/scs/include/linalg.h +3 -4
- data/vendor/scs/include/linsys.h +58 -44
- data/vendor/scs/include/normalize.h +6 -5
- data/vendor/scs/include/rw.h +8 -2
- data/vendor/scs/include/scs.h +257 -141
- data/vendor/scs/include/scs_types.h +34 -0
- data/vendor/scs/include/scs_work.h +83 -0
- data/vendor/scs/include/util.h +3 -15
- data/vendor/scs/linsys/cpu/direct/private.c +241 -232
- data/vendor/scs/linsys/cpu/direct/private.h +13 -7
- data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
- data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
- data/vendor/scs/linsys/csparse.c +87 -0
- data/vendor/scs/linsys/csparse.h +34 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
- data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
- data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
- data/vendor/scs/linsys/external/qdldl/changes +2 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
- data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
- data/vendor/scs/linsys/gpu/gpu.c +58 -21
- data/vendor/scs/linsys/gpu/gpu.h +70 -35
- data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
- data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
- data/vendor/scs/linsys/scs_matrix.c +478 -0
- data/vendor/scs/linsys/scs_matrix.h +70 -0
- data/vendor/scs/scs.mk +14 -10
- data/vendor/scs/src/aa.c +394 -110
- data/vendor/scs/src/cones.c +497 -359
- data/vendor/scs/src/ctrlc.c +15 -5
- data/vendor/scs/src/linalg.c +107 -26
- data/vendor/scs/src/normalize.c +30 -72
- data/vendor/scs/src/rw.c +202 -27
- data/vendor/scs/src/scs.c +769 -571
- data/vendor/scs/src/scs_version.c +11 -3
- data/vendor/scs/src/util.c +37 -106
- data/vendor/scs/test/minunit.h +22 -8
- data/vendor/scs/test/problem_utils.h +180 -25
- data/vendor/scs/test/problems/degenerate.h +130 -0
- data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
- data/vendor/scs/test/problems/random_prob +0 -0
- data/vendor/scs/test/problems/random_prob.h +45 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
- data/vendor/scs/test/problems/small_lp.h +14 -13
- data/vendor/scs/test/problems/small_qp.h +352 -0
- data/vendor/scs/test/problems/test_validation.h +43 -0
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
- data/vendor/scs/test/random_socp_prob.c +54 -53
- data/vendor/scs/test/rng.h +109 -0
- data/vendor/scs/test/run_from_file.c +20 -11
- data/vendor/scs/test/run_tests.c +35 -2
- metadata +29 -98
- data/vendor/scs/linsys/amatrix.c +0 -305
- data/vendor/scs/linsys/amatrix.h +0 -36
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/problems/small_random_socp.h +0 -33
- data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c
CHANGED
|
@@ -1,49 +1,104 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Anderson acceleration.
|
|
3
|
+
*
|
|
4
|
+
* x: input iterate
|
|
5
|
+
* x_prev: previous input iterate
|
|
6
|
+
* f: f(x) output of map f applied to x
|
|
7
|
+
* g: x - f (error)
|
|
8
|
+
* g_prev: previous error
|
|
9
|
+
* s: x - x_prev
|
|
10
|
+
* y: g - g_prev
|
|
11
|
+
* d: s - y = f - f_prev
|
|
12
|
+
*
|
|
13
|
+
* capital letters are the variables stacked columnwise
|
|
14
|
+
* idx tracks current index where latest quantities written
|
|
15
|
+
* idx cycles from left to right columns in matrix
|
|
16
|
+
*
|
|
17
|
+
* Type-I:
|
|
18
|
+
* return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
|
|
19
|
+
*
|
|
20
|
+
* Type-II:
|
|
21
|
+
* return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
|
|
22
|
+
*
|
|
23
|
+
*/
|
|
24
|
+
|
|
1
25
|
#include "aa.h"
|
|
2
26
|
#include "scs_blas.h"
|
|
3
27
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
* do this using LAPACK ?gesv.
|
|
8
|
-
*/
|
|
28
|
+
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
|
29
|
+
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
|
30
|
+
#define FILL_MEMORY_BEFORE_SOLVE (1)
|
|
9
31
|
|
|
10
32
|
#ifndef USE_LAPACK
|
|
11
33
|
|
|
12
|
-
typedef void *
|
|
34
|
+
typedef void *ACCEL_WORK;
|
|
13
35
|
|
|
14
|
-
AaWork *aa_init(aa_int dim, aa_int
|
|
15
|
-
|
|
16
|
-
|
|
36
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
|
37
|
+
aa_float relaxation, aa_float safeguard_factor,
|
|
38
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
|
39
|
+
return SCS_NULL;
|
|
40
|
+
}
|
|
41
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
|
42
|
+
return 0;
|
|
43
|
+
}
|
|
44
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
|
45
|
+
return 0;
|
|
46
|
+
}
|
|
47
|
+
void aa_finish(AaWork *a) {
|
|
48
|
+
}
|
|
49
|
+
void aa_reset(AaWork *a) {
|
|
50
|
+
}
|
|
17
51
|
|
|
18
52
|
#else
|
|
19
53
|
|
|
20
|
-
|
|
21
|
-
struct ACCEL_WORK {
|
|
22
|
-
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
|
23
|
-
aa_int k; /* aa memory */
|
|
24
|
-
aa_int l; /* variable dimension */
|
|
25
|
-
aa_int iter; /* current iteration */
|
|
54
|
+
#if PROFILING > 0
|
|
26
55
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
56
|
+
#define TIME_TIC \
|
|
57
|
+
timer __t; \
|
|
58
|
+
tic(&__t);
|
|
59
|
+
#define TIME_TOC toc(__func__, &__t);
|
|
30
60
|
|
|
31
|
-
|
|
32
|
-
|
|
61
|
+
#include <time.h>
|
|
62
|
+
typedef struct timer {
|
|
63
|
+
struct timespec tic;
|
|
64
|
+
struct timespec toc;
|
|
65
|
+
} timer;
|
|
33
66
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
67
|
+
void tic(timer *t) {
|
|
68
|
+
clock_gettime(CLOCK_MONOTONIC, &t->tic);
|
|
69
|
+
}
|
|
37
70
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
|
41
|
-
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
|
71
|
+
aa_float tocq(timer *t) {
|
|
72
|
+
struct timespec temp;
|
|
42
73
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
74
|
+
clock_gettime(CLOCK_MONOTONIC, &t->toc);
|
|
75
|
+
|
|
76
|
+
if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
|
|
77
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
|
|
78
|
+
temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
|
|
79
|
+
} else {
|
|
80
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
|
|
81
|
+
temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
|
|
82
|
+
}
|
|
83
|
+
return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
aa_float toc(const char *str, timer *t) {
|
|
87
|
+
aa_float time = tocq(t);
|
|
88
|
+
printf("%s - time: %8.4f milli-seconds.\n", str, time);
|
|
89
|
+
return time;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
#else
|
|
93
|
+
|
|
94
|
+
#define TIME_TIC
|
|
95
|
+
#define TIME_TOC
|
|
96
|
+
|
|
97
|
+
#endif
|
|
98
|
+
|
|
99
|
+
#ifdef __cplusplus
|
|
100
|
+
extern "C" {
|
|
101
|
+
#endif
|
|
47
102
|
|
|
48
103
|
/* BLAS functions used */
|
|
49
104
|
aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
|
|
@@ -59,147 +114,363 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
|
|
|
59
114
|
blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
|
|
60
115
|
blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
|
|
61
116
|
aa_float *c, blas_int *ldc);
|
|
117
|
+
void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
|
|
118
|
+
const blas_int *incx);
|
|
119
|
+
|
|
120
|
+
#ifdef __cplusplus
|
|
121
|
+
}
|
|
122
|
+
#endif
|
|
123
|
+
|
|
124
|
+
/* This file uses Anderson acceleration to improve the convergence of
|
|
125
|
+
* a fixed point mapping.
|
|
126
|
+
* At each iteration we need to solve a (small) linear system, we
|
|
127
|
+
* do this using LAPACK ?gesv.
|
|
128
|
+
*/
|
|
129
|
+
|
|
130
|
+
/* contains the necessary parameters to perform aa at each step */
|
|
131
|
+
struct ACCEL_WORK {
|
|
132
|
+
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
|
133
|
+
aa_int mem; /* aa memory */
|
|
134
|
+
aa_int dim; /* variable dimension */
|
|
135
|
+
aa_int iter; /* current iteration */
|
|
136
|
+
aa_int verbosity; /* verbosity level, 0 is no printing */
|
|
137
|
+
aa_int success; /* was the last AA step successful or not */
|
|
138
|
+
|
|
139
|
+
aa_float relaxation; /* relaxation x and f, beta in some papers */
|
|
140
|
+
aa_float regularization; /* regularization */
|
|
141
|
+
aa_float safeguard_factor; /* safeguard tolerance factor */
|
|
142
|
+
aa_float max_weight_norm; /* maximum norm of AA weights */
|
|
143
|
+
|
|
144
|
+
aa_float *x; /* x input to map*/
|
|
145
|
+
aa_float *f; /* f(x) output of map */
|
|
146
|
+
aa_float *g; /* x - f(x) */
|
|
147
|
+
aa_float norm_g; /* ||x - f(x)|| */
|
|
148
|
+
|
|
149
|
+
/* from previous iteration */
|
|
150
|
+
aa_float *g_prev; /* x_prev - f(x_prev) */
|
|
151
|
+
|
|
152
|
+
aa_float *y; /* g - g_prev */
|
|
153
|
+
aa_float *s; /* x - x_prev */
|
|
154
|
+
aa_float *d; /* f - f_prev */
|
|
155
|
+
|
|
156
|
+
aa_float *Y; /* matrix of stacked y values */
|
|
157
|
+
aa_float *S; /* matrix of stacked s values */
|
|
158
|
+
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
|
159
|
+
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
|
160
|
+
|
|
161
|
+
/* workspace variables */
|
|
162
|
+
aa_float *work; /* scratch space */
|
|
163
|
+
blas_int *ipiv; /* permutation variable, not used after solve */
|
|
164
|
+
|
|
165
|
+
aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
/* add regularization dependent on Y and S matrices */
|
|
169
|
+
static aa_float compute_regularization(AaWork *a, aa_int len) {
|
|
170
|
+
/* typically type-I does better with higher regularization than type-II */
|
|
171
|
+
TIME_TIC
|
|
172
|
+
aa_float r, nrm_m;
|
|
173
|
+
blas_int btotal = (blas_int)(len * len), one = 1;
|
|
174
|
+
nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
|
|
175
|
+
r = a->regularization * nrm_m;
|
|
176
|
+
if (a->verbosity > 2) {
|
|
177
|
+
printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
|
|
178
|
+
}
|
|
179
|
+
TIME_TOC
|
|
180
|
+
return r;
|
|
181
|
+
}
|
|
62
182
|
|
|
63
183
|
/* sets a->M to S'Y or Y'Y depending on type of aa used */
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
184
|
+
/* M is len x len after this */
|
|
185
|
+
static void set_m(AaWork *a, aa_int len) {
|
|
186
|
+
TIME_TIC
|
|
187
|
+
aa_int i;
|
|
188
|
+
blas_int bdim = (blas_int)(a->dim);
|
|
189
|
+
blas_int blen = (blas_int)len;
|
|
190
|
+
aa_float onef = 1.0, zerof = 0.0, r;
|
|
191
|
+
/* if len < mem this only uses len cols */
|
|
67
192
|
BLAS(gemm)
|
|
68
|
-
("Trans", "No", &
|
|
69
|
-
&zerof, a->M, &
|
|
193
|
+
("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
|
|
194
|
+
a->Y, &bdim, &zerof, a->M, &blen);
|
|
195
|
+
if (a->regularization > 0) {
|
|
196
|
+
r = compute_regularization(a, len);
|
|
197
|
+
for (i = 0; i < len; ++i) {
|
|
198
|
+
a->M[i + len * i] += r;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
TIME_TOC
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/* initialize accel params, in particular x_prev, f_prev, g_prev */
|
|
206
|
+
static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
|
|
207
|
+
TIME_TIC
|
|
208
|
+
blas_int bdim = (blas_int)a->dim;
|
|
209
|
+
aa_float neg_onef = -1.0;
|
|
210
|
+
blas_int one = 1;
|
|
211
|
+
/* x_prev = x */
|
|
212
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
|
213
|
+
/* f_prev = f */
|
|
214
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
|
215
|
+
/* g_prev = x */
|
|
216
|
+
memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
|
|
217
|
+
/* g_prev = x_prev - f_prev */
|
|
218
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
|
|
219
|
+
TIME_TOC
|
|
70
220
|
}
|
|
71
221
|
|
|
72
222
|
/* updates the workspace parameters for aa for this iteration */
|
|
73
|
-
static void update_accel_params(const aa_float *x, const aa_float *f,
|
|
74
|
-
|
|
223
|
+
static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
|
|
224
|
+
aa_int len) {
|
|
75
225
|
/* at the start a->x = x_prev and a->f = f_prev */
|
|
76
|
-
|
|
77
|
-
aa_int
|
|
78
|
-
|
|
226
|
+
TIME_TIC
|
|
227
|
+
aa_int idx = (a->iter - 1) % a->mem;
|
|
79
228
|
blas_int one = 1;
|
|
80
|
-
blas_int
|
|
229
|
+
blas_int bdim = (blas_int)a->dim;
|
|
81
230
|
aa_float neg_onef = -1.0;
|
|
82
231
|
|
|
83
232
|
/* g = x */
|
|
84
|
-
memcpy(a->g, x, sizeof(aa_float) *
|
|
233
|
+
memcpy(a->g, x, sizeof(aa_float) * a->dim);
|
|
85
234
|
/* s = x */
|
|
86
|
-
memcpy(a->s, x, sizeof(aa_float) *
|
|
235
|
+
memcpy(a->s, x, sizeof(aa_float) * a->dim);
|
|
87
236
|
/* d = f */
|
|
88
|
-
memcpy(a->d, f, sizeof(aa_float) *
|
|
89
|
-
/* g
|
|
90
|
-
BLAS(axpy)(&
|
|
91
|
-
/* s
|
|
92
|
-
BLAS(axpy)(&
|
|
93
|
-
/* d
|
|
94
|
-
BLAS(axpy)(&
|
|
237
|
+
memcpy(a->d, f, sizeof(aa_float) * a->dim);
|
|
238
|
+
/* g = x - f */
|
|
239
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
|
|
240
|
+
/* s = x - x_prev */
|
|
241
|
+
BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
|
|
242
|
+
/* d = f - f_prev */
|
|
243
|
+
BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
|
|
95
244
|
|
|
96
245
|
/* g, s, d correct here */
|
|
97
246
|
|
|
98
247
|
/* y = g */
|
|
99
|
-
memcpy(a->y, a->g, sizeof(aa_float) *
|
|
100
|
-
/* y
|
|
101
|
-
BLAS(axpy)(&
|
|
248
|
+
memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
|
|
249
|
+
/* y = g - g_prev */
|
|
250
|
+
BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
|
|
102
251
|
|
|
103
252
|
/* y correct here */
|
|
104
253
|
|
|
105
254
|
/* copy y into idx col of Y */
|
|
106
|
-
memcpy(&(a->Y[idx *
|
|
255
|
+
memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
|
|
107
256
|
/* copy s into idx col of S */
|
|
108
|
-
memcpy(&(a->S[idx *
|
|
257
|
+
memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
|
|
109
258
|
/* copy d into idx col of D */
|
|
110
|
-
memcpy(&(a->D[idx *
|
|
259
|
+
memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
|
|
260
|
+
|
|
261
|
+
/* Y, S, D correct here */
|
|
111
262
|
|
|
112
|
-
/*
|
|
263
|
+
/* set a->f and a->x for next iter (x_prev and f_prev) */
|
|
264
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
|
265
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
|
113
266
|
|
|
114
|
-
|
|
115
|
-
|
|
267
|
+
/* workspace for when relaxation != 1.0 */
|
|
268
|
+
if (a->x_work) {
|
|
269
|
+
memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
|
|
270
|
+
}
|
|
116
271
|
|
|
117
272
|
/* x, f correct here */
|
|
118
273
|
|
|
119
|
-
|
|
120
|
-
|
|
274
|
+
memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
|
|
275
|
+
/* g_prev set for next iter here */
|
|
121
276
|
|
|
122
|
-
/*
|
|
277
|
+
/* compute ||g|| = ||f - x|| */
|
|
278
|
+
a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
|
|
123
279
|
|
|
124
|
-
|
|
280
|
+
TIME_TOC
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
125
283
|
|
|
126
|
-
|
|
284
|
+
/* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
|
|
285
|
+
static void relax(aa_float *f, AaWork *a, aa_int len) {
|
|
286
|
+
TIME_TIC
|
|
287
|
+
/* x_work = x initially */
|
|
288
|
+
blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
|
289
|
+
aa_float onef = 1.0, neg_onef = -1.0;
|
|
290
|
+
aa_float one_m_relaxation = 1. - a->relaxation;
|
|
291
|
+
/* x_work = x - S * work */
|
|
292
|
+
BLAS(gemv)
|
|
293
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
|
|
294
|
+
a->x_work, &one);
|
|
295
|
+
/* f = relaxation * f */
|
|
296
|
+
BLAS(scal)(&bdim, &a->relaxation, f, &one);
|
|
297
|
+
/* f += (1 - relaxation) * x_work */
|
|
298
|
+
BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one);
|
|
299
|
+
TIME_TOC
|
|
127
300
|
}
|
|
128
301
|
|
|
129
|
-
/* solves the system of equations to perform the
|
|
302
|
+
/* solves the system of equations to perform the AA update
|
|
130
303
|
* at the end f contains the next iterate to be returned
|
|
131
304
|
*/
|
|
132
|
-
static
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
aa_float
|
|
305
|
+
static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
|
|
306
|
+
TIME_TIC
|
|
307
|
+
blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
|
308
|
+
aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
|
|
309
|
+
|
|
136
310
|
/* work = S'g or Y'g */
|
|
137
311
|
BLAS(gemv)
|
|
138
|
-
("Trans", &
|
|
139
|
-
a->work, &one);
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
a->type1 ? 1 : 2, (int)a->iter, (int)info,
|
|
147
|
-
#endif
|
|
148
|
-
return -1;
|
|
312
|
+
("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
|
|
313
|
+
&zerof, a->work, &one);
|
|
314
|
+
|
|
315
|
+
/* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
|
|
316
|
+
BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
|
|
317
|
+
aa_norm = BLAS(nrm2)(&blen, a->work, &one);
|
|
318
|
+
if (a->verbosity > 1) {
|
|
319
|
+
printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
|
320
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
|
149
321
|
}
|
|
150
|
-
|
|
322
|
+
|
|
323
|
+
/* info < 0 input error, input > 0 matrix is singular */
|
|
324
|
+
if (info != 0 || aa_norm >= a->max_weight_norm) {
|
|
325
|
+
if (a->verbosity > 0) {
|
|
326
|
+
printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
|
327
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
|
328
|
+
}
|
|
329
|
+
a->success = 0;
|
|
330
|
+
/* reset aa for stability */
|
|
331
|
+
aa_reset(a);
|
|
332
|
+
TIME_TOC
|
|
333
|
+
return -aa_norm;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/* here work = gamma, ie, the correct AA shifted weights */
|
|
337
|
+
/* if solve was successful compute new point */
|
|
338
|
+
|
|
339
|
+
/* first set f -= D * work */
|
|
151
340
|
BLAS(gemv)
|
|
152
|
-
("NoTrans", &
|
|
153
|
-
|
|
341
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
|
|
342
|
+
&one);
|
|
343
|
+
|
|
344
|
+
/* if relaxation is not 1 then need to incorporate */
|
|
345
|
+
if (a->relaxation != 1.0) {
|
|
346
|
+
relax(f, a, len);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
a->success = 1; /* this should be the only place we set success = 1 */
|
|
350
|
+
TIME_TOC
|
|
351
|
+
return aa_norm;
|
|
154
352
|
}
|
|
155
353
|
|
|
156
354
|
/*
|
|
157
355
|
* API functions below this line, see aa.h for descriptions.
|
|
158
356
|
*/
|
|
159
|
-
AaWork *aa_init(aa_int
|
|
357
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
|
358
|
+
aa_float relaxation, aa_float safeguard_factor,
|
|
359
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
|
360
|
+
TIME_TIC
|
|
160
361
|
AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
|
|
161
362
|
if (!a) {
|
|
162
|
-
|
|
163
|
-
return (
|
|
363
|
+
printf("Failed to allocate memory for AA.\n");
|
|
364
|
+
return (AaWork *)0;
|
|
164
365
|
}
|
|
165
366
|
a->type1 = type1;
|
|
166
367
|
a->iter = 0;
|
|
167
|
-
a->
|
|
168
|
-
a->
|
|
169
|
-
|
|
368
|
+
a->dim = dim;
|
|
369
|
+
a->mem = MIN(mem, dim); /* for rank stability */
|
|
370
|
+
a->regularization = regularization;
|
|
371
|
+
a->relaxation = relaxation;
|
|
372
|
+
a->safeguard_factor = safeguard_factor;
|
|
373
|
+
a->max_weight_norm = max_weight_norm;
|
|
374
|
+
a->success = 0;
|
|
375
|
+
a->verbosity = verbosity;
|
|
376
|
+
if (a->mem <= 0) {
|
|
170
377
|
return a;
|
|
171
378
|
}
|
|
172
379
|
|
|
173
|
-
a->x = (aa_float *)calloc(a->
|
|
174
|
-
a->f = (aa_float *)calloc(a->
|
|
175
|
-
a->g = (aa_float *)calloc(a->
|
|
380
|
+
a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
381
|
+
a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
382
|
+
a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
176
383
|
|
|
177
|
-
a->g_prev = (aa_float *)calloc(a->
|
|
384
|
+
a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
178
385
|
|
|
179
|
-
a->y = (aa_float *)calloc(a->
|
|
180
|
-
a->s = (aa_float *)calloc(a->
|
|
181
|
-
a->d = (aa_float *)calloc(a->
|
|
386
|
+
a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
387
|
+
a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
388
|
+
a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
182
389
|
|
|
183
|
-
a->Y = (aa_float *)calloc(a->
|
|
184
|
-
a->S = (aa_float *)calloc(a->
|
|
185
|
-
a->D = (aa_float *)calloc(a->
|
|
390
|
+
a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
|
391
|
+
a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
|
392
|
+
a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
|
186
393
|
|
|
187
|
-
a->M = (aa_float *)calloc(a->
|
|
188
|
-
a->work = (aa_float *)calloc(a->
|
|
189
|
-
a->ipiv = (blas_int *)calloc(a->
|
|
394
|
+
a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
|
|
395
|
+
a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
|
|
396
|
+
a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
|
|
397
|
+
|
|
398
|
+
if (relaxation != 1.0) {
|
|
399
|
+
a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
|
400
|
+
} else {
|
|
401
|
+
a->x_work = 0;
|
|
402
|
+
}
|
|
403
|
+
TIME_TOC
|
|
190
404
|
return a;
|
|
191
405
|
}
|
|
192
406
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
407
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
|
408
|
+
TIME_TIC
|
|
409
|
+
aa_float aa_norm = 0;
|
|
410
|
+
aa_int len = MIN(a->iter, a->mem);
|
|
411
|
+
a->success = 0; /* if we make an AA step we set this to 1 later */
|
|
412
|
+
if (a->mem <= 0) {
|
|
413
|
+
TIME_TOC
|
|
414
|
+
return aa_norm; /* 0 */
|
|
196
415
|
}
|
|
197
|
-
|
|
198
|
-
|
|
416
|
+
if (a->iter == 0) {
|
|
417
|
+
/* if first iteration then seed params for next iter */
|
|
418
|
+
init_accel_params(x, f, a);
|
|
419
|
+
a->iter++;
|
|
420
|
+
TIME_TOC
|
|
421
|
+
return aa_norm; /* 0 */
|
|
422
|
+
}
|
|
423
|
+
/* set various accel quantities */
|
|
424
|
+
update_accel_params(x, f, a, len);
|
|
425
|
+
|
|
426
|
+
/* only perform solve steps when the memory is full */
|
|
427
|
+
if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
|
|
428
|
+
/* set M = S'Y or Y'Y depending on type of aa used */
|
|
429
|
+
set_m(a, len);
|
|
430
|
+
/* solve linear system, new point overwrites f if successful */
|
|
431
|
+
aa_norm = solve(f, a, len);
|
|
432
|
+
}
|
|
433
|
+
a->iter++;
|
|
434
|
+
TIME_TOC
|
|
435
|
+
return aa_norm;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
|
439
|
+
TIME_TIC
|
|
440
|
+
blas_int bdim = (blas_int)a->dim;
|
|
441
|
+
blas_int one = 1;
|
|
442
|
+
aa_float neg_onef = -1.0;
|
|
443
|
+
aa_float norm_diff;
|
|
444
|
+
if (!a->success) {
|
|
445
|
+
/* last AA update was not successful, no need for safeguarding */
|
|
446
|
+
TIME_TOC
|
|
199
447
|
return 0;
|
|
200
448
|
}
|
|
201
|
-
|
|
202
|
-
|
|
449
|
+
|
|
450
|
+
/* reset success indicator in case safeguarding called multiple times */
|
|
451
|
+
a->success = 0;
|
|
452
|
+
|
|
453
|
+
/* work = x_new */
|
|
454
|
+
memcpy(a->work, x_new, a->dim * sizeof(aa_float));
|
|
455
|
+
/* work = x_new - f_new */
|
|
456
|
+
BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
|
|
457
|
+
/* norm_diff = || f_new - x_new || */
|
|
458
|
+
norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
|
|
459
|
+
/* g = f - x */
|
|
460
|
+
if (norm_diff > a->safeguard_factor * a->norm_g) {
|
|
461
|
+
/* in this case we reject the AA step and reset */
|
|
462
|
+
memcpy(f_new, a->f, a->dim * sizeof(aa_float));
|
|
463
|
+
memcpy(x_new, a->x, a->dim * sizeof(aa_float));
|
|
464
|
+
if (a->verbosity > 0) {
|
|
465
|
+
printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
|
|
466
|
+
(int)a->iter, norm_diff, a->norm_g);
|
|
467
|
+
}
|
|
468
|
+
aa_reset(a);
|
|
469
|
+
TIME_TOC
|
|
470
|
+
return -1;
|
|
471
|
+
}
|
|
472
|
+
TIME_TOC
|
|
473
|
+
return 0;
|
|
203
474
|
}
|
|
204
475
|
|
|
205
476
|
void aa_finish(AaWork *a) {
|
|
@@ -217,8 +488,21 @@ void aa_finish(AaWork *a) {
|
|
|
217
488
|
free(a->M);
|
|
218
489
|
free(a->work);
|
|
219
490
|
free(a->ipiv);
|
|
491
|
+
if (a->x_work) {
|
|
492
|
+
free(a->x_work);
|
|
493
|
+
}
|
|
220
494
|
free(a);
|
|
221
495
|
}
|
|
496
|
+
return;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
void aa_reset(AaWork *a) {
|
|
500
|
+
/* to reset we simply set a->iter = 0 */
|
|
501
|
+
if (a->verbosity > 0) {
|
|
502
|
+
printf("AA reset.\n");
|
|
503
|
+
}
|
|
504
|
+
a->iter = 0;
|
|
505
|
+
return;
|
|
222
506
|
}
|
|
223
507
|
|
|
224
508
|
#endif
|