scs 0.2.2 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +18 -18
- data/README.md +19 -14
- data/lib/scs/ffi.rb +31 -20
- data/lib/scs/solver.rb +32 -9
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +39 -0
- data/vendor/scs/CMakeLists.txt +320 -0
- data/vendor/scs/Makefile +32 -23
- data/vendor/scs/README.md +9 -218
- data/vendor/scs/include/aa.h +67 -23
- data/vendor/scs/include/cones.h +22 -19
- data/vendor/scs/include/glbopts.h +107 -79
- data/vendor/scs/include/linalg.h +3 -4
- data/vendor/scs/include/linsys.h +58 -44
- data/vendor/scs/include/normalize.h +6 -5
- data/vendor/scs/include/rw.h +8 -2
- data/vendor/scs/include/scs.h +257 -141
- data/vendor/scs/include/scs_types.h +34 -0
- data/vendor/scs/include/scs_work.h +83 -0
- data/vendor/scs/include/util.h +3 -15
- data/vendor/scs/linsys/cpu/direct/private.c +241 -232
- data/vendor/scs/linsys/cpu/direct/private.h +13 -7
- data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
- data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
- data/vendor/scs/linsys/csparse.c +87 -0
- data/vendor/scs/linsys/csparse.h +34 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
- data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
- data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
- data/vendor/scs/linsys/external/qdldl/changes +2 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
- data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
- data/vendor/scs/linsys/gpu/gpu.c +58 -21
- data/vendor/scs/linsys/gpu/gpu.h +70 -35
- data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
- data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
- data/vendor/scs/linsys/scs_matrix.c +478 -0
- data/vendor/scs/linsys/scs_matrix.h +70 -0
- data/vendor/scs/scs.mk +14 -10
- data/vendor/scs/src/aa.c +394 -110
- data/vendor/scs/src/cones.c +497 -359
- data/vendor/scs/src/ctrlc.c +15 -5
- data/vendor/scs/src/linalg.c +107 -26
- data/vendor/scs/src/normalize.c +30 -72
- data/vendor/scs/src/rw.c +202 -27
- data/vendor/scs/src/scs.c +769 -571
- data/vendor/scs/src/scs_version.c +11 -3
- data/vendor/scs/src/util.c +37 -106
- data/vendor/scs/test/minunit.h +22 -8
- data/vendor/scs/test/problem_utils.h +180 -25
- data/vendor/scs/test/problems/degenerate.h +130 -0
- data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
- data/vendor/scs/test/problems/random_prob +0 -0
- data/vendor/scs/test/problems/random_prob.h +45 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
- data/vendor/scs/test/problems/small_lp.h +14 -13
- data/vendor/scs/test/problems/small_qp.h +352 -0
- data/vendor/scs/test/problems/test_validation.h +43 -0
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
- data/vendor/scs/test/random_socp_prob.c +54 -53
- data/vendor/scs/test/rng.h +109 -0
- data/vendor/scs/test/run_from_file.c +20 -11
- data/vendor/scs/test/run_tests.c +35 -2
- metadata +29 -98
- data/vendor/scs/linsys/amatrix.c +0 -305
- data/vendor/scs/linsys/amatrix.h +0 -36
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/problems/small_random_socp.h +0 -33
- data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c
CHANGED
@@ -1,49 +1,104 @@
|
|
1
|
+
/*
|
2
|
+
* Anderson acceleration.
|
3
|
+
*
|
4
|
+
* x: input iterate
|
5
|
+
* x_prev: previous input iterate
|
6
|
+
* f: f(x) output of map f applied to x
|
7
|
+
* g: x - f (error)
|
8
|
+
* g_prev: previous error
|
9
|
+
* s: x - x_prev
|
10
|
+
* y: g - g_prev
|
11
|
+
* d: s - y = f - f_prev
|
12
|
+
*
|
13
|
+
* capital letters are the variables stacked columnwise
|
14
|
+
* idx tracks current index where latest quantities written
|
15
|
+
* idx cycles from left to right columns in matrix
|
16
|
+
*
|
17
|
+
* Type-I:
|
18
|
+
* return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
|
19
|
+
*
|
20
|
+
* Type-II:
|
21
|
+
* return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
|
22
|
+
*
|
23
|
+
*/
|
24
|
+
|
1
25
|
#include "aa.h"
|
2
26
|
#include "scs_blas.h"
|
3
27
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
* do this using LAPACK ?gesv.
|
8
|
-
*/
|
28
|
+
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
29
|
+
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
30
|
+
#define FILL_MEMORY_BEFORE_SOLVE (1)
|
9
31
|
|
10
32
|
#ifndef USE_LAPACK
|
11
33
|
|
12
|
-
typedef void *
|
34
|
+
typedef void *ACCEL_WORK;
|
13
35
|
|
14
|
-
AaWork *aa_init(aa_int dim, aa_int
|
15
|
-
|
16
|
-
|
36
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
37
|
+
aa_float relaxation, aa_float safeguard_factor,
|
38
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
39
|
+
return SCS_NULL;
|
40
|
+
}
|
41
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
42
|
+
return 0;
|
43
|
+
}
|
44
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
void aa_finish(AaWork *a) {
|
48
|
+
}
|
49
|
+
void aa_reset(AaWork *a) {
|
50
|
+
}
|
17
51
|
|
18
52
|
#else
|
19
53
|
|
20
|
-
|
21
|
-
struct ACCEL_WORK {
|
22
|
-
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
23
|
-
aa_int k; /* aa memory */
|
24
|
-
aa_int l; /* variable dimension */
|
25
|
-
aa_int iter; /* current iteration */
|
54
|
+
#if PROFILING > 0
|
26
55
|
|
27
|
-
|
28
|
-
|
29
|
-
|
56
|
+
#define TIME_TIC \
|
57
|
+
timer __t; \
|
58
|
+
tic(&__t);
|
59
|
+
#define TIME_TOC toc(__func__, &__t);
|
30
60
|
|
31
|
-
|
32
|
-
|
61
|
+
#include <time.h>
|
62
|
+
typedef struct timer {
|
63
|
+
struct timespec tic;
|
64
|
+
struct timespec toc;
|
65
|
+
} timer;
|
33
66
|
|
34
|
-
|
35
|
-
|
36
|
-
|
67
|
+
void tic(timer *t) {
|
68
|
+
clock_gettime(CLOCK_MONOTONIC, &t->tic);
|
69
|
+
}
|
37
70
|
|
38
|
-
|
39
|
-
|
40
|
-
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
41
|
-
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
71
|
+
aa_float tocq(timer *t) {
|
72
|
+
struct timespec temp;
|
42
73
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
74
|
+
clock_gettime(CLOCK_MONOTONIC, &t->toc);
|
75
|
+
|
76
|
+
if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
|
77
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
|
78
|
+
temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
|
79
|
+
} else {
|
80
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
|
81
|
+
temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
|
82
|
+
}
|
83
|
+
return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
|
84
|
+
}
|
85
|
+
|
86
|
+
aa_float toc(const char *str, timer *t) {
|
87
|
+
aa_float time = tocq(t);
|
88
|
+
printf("%s - time: %8.4f milli-seconds.\n", str, time);
|
89
|
+
return time;
|
90
|
+
}
|
91
|
+
|
92
|
+
#else
|
93
|
+
|
94
|
+
#define TIME_TIC
|
95
|
+
#define TIME_TOC
|
96
|
+
|
97
|
+
#endif
|
98
|
+
|
99
|
+
#ifdef __cplusplus
|
100
|
+
extern "C" {
|
101
|
+
#endif
|
47
102
|
|
48
103
|
/* BLAS functions used */
|
49
104
|
aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
|
@@ -59,147 +114,363 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
|
|
59
114
|
blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
|
60
115
|
blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
|
61
116
|
aa_float *c, blas_int *ldc);
|
117
|
+
void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
|
118
|
+
const blas_int *incx);
|
119
|
+
|
120
|
+
#ifdef __cplusplus
|
121
|
+
}
|
122
|
+
#endif
|
123
|
+
|
124
|
+
/* This file uses Anderson acceleration to improve the convergence of
|
125
|
+
* a fixed point mapping.
|
126
|
+
* At each iteration we need to solve a (small) linear system, we
|
127
|
+
* do this using LAPACK ?gesv.
|
128
|
+
*/
|
129
|
+
|
130
|
+
/* contains the necessary parameters to perform aa at each step */
|
131
|
+
struct ACCEL_WORK {
|
132
|
+
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
133
|
+
aa_int mem; /* aa memory */
|
134
|
+
aa_int dim; /* variable dimension */
|
135
|
+
aa_int iter; /* current iteration */
|
136
|
+
aa_int verbosity; /* verbosity level, 0 is no printing */
|
137
|
+
aa_int success; /* was the last AA step successful or not */
|
138
|
+
|
139
|
+
aa_float relaxation; /* relaxation x and f, beta in some papers */
|
140
|
+
aa_float regularization; /* regularization */
|
141
|
+
aa_float safeguard_factor; /* safeguard tolerance factor */
|
142
|
+
aa_float max_weight_norm; /* maximum norm of AA weights */
|
143
|
+
|
144
|
+
aa_float *x; /* x input to map*/
|
145
|
+
aa_float *f; /* f(x) output of map */
|
146
|
+
aa_float *g; /* x - f(x) */
|
147
|
+
aa_float norm_g; /* ||x - f(x)|| */
|
148
|
+
|
149
|
+
/* from previous iteration */
|
150
|
+
aa_float *g_prev; /* x_prev - f(x_prev) */
|
151
|
+
|
152
|
+
aa_float *y; /* g - g_prev */
|
153
|
+
aa_float *s; /* x - x_prev */
|
154
|
+
aa_float *d; /* f - f_prev */
|
155
|
+
|
156
|
+
aa_float *Y; /* matrix of stacked y values */
|
157
|
+
aa_float *S; /* matrix of stacked s values */
|
158
|
+
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
159
|
+
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
160
|
+
|
161
|
+
/* workspace variables */
|
162
|
+
aa_float *work; /* scratch space */
|
163
|
+
blas_int *ipiv; /* permutation variable, not used after solve */
|
164
|
+
|
165
|
+
aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
|
166
|
+
};
|
167
|
+
|
168
|
+
/* add regularization dependent on Y and S matrices */
|
169
|
+
static aa_float compute_regularization(AaWork *a, aa_int len) {
|
170
|
+
/* typically type-I does better with higher regularization than type-II */
|
171
|
+
TIME_TIC
|
172
|
+
aa_float r, nrm_m;
|
173
|
+
blas_int btotal = (blas_int)(len * len), one = 1;
|
174
|
+
nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
|
175
|
+
r = a->regularization * nrm_m;
|
176
|
+
if (a->verbosity > 2) {
|
177
|
+
printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
|
178
|
+
}
|
179
|
+
TIME_TOC
|
180
|
+
return r;
|
181
|
+
}
|
62
182
|
|
63
183
|
/* sets a->M to S'Y or Y'Y depending on type of aa used */
|
64
|
-
|
65
|
-
|
66
|
-
|
184
|
+
/* M is len x len after this */
|
185
|
+
static void set_m(AaWork *a, aa_int len) {
|
186
|
+
TIME_TIC
|
187
|
+
aa_int i;
|
188
|
+
blas_int bdim = (blas_int)(a->dim);
|
189
|
+
blas_int blen = (blas_int)len;
|
190
|
+
aa_float onef = 1.0, zerof = 0.0, r;
|
191
|
+
/* if len < mem this only uses len cols */
|
67
192
|
BLAS(gemm)
|
68
|
-
("Trans", "No", &
|
69
|
-
&zerof, a->M, &
|
193
|
+
("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
|
194
|
+
a->Y, &bdim, &zerof, a->M, &blen);
|
195
|
+
if (a->regularization > 0) {
|
196
|
+
r = compute_regularization(a, len);
|
197
|
+
for (i = 0; i < len; ++i) {
|
198
|
+
a->M[i + len * i] += r;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
TIME_TOC
|
202
|
+
return;
|
203
|
+
}
|
204
|
+
|
205
|
+
/* initialize accel params, in particular x_prev, f_prev, g_prev */
|
206
|
+
static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
|
207
|
+
TIME_TIC
|
208
|
+
blas_int bdim = (blas_int)a->dim;
|
209
|
+
aa_float neg_onef = -1.0;
|
210
|
+
blas_int one = 1;
|
211
|
+
/* x_prev = x */
|
212
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
213
|
+
/* f_prev = f */
|
214
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
215
|
+
/* g_prev = x */
|
216
|
+
memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
|
217
|
+
/* g_prev = x_prev - f_prev */
|
218
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
|
219
|
+
TIME_TOC
|
70
220
|
}
|
71
221
|
|
72
222
|
/* updates the workspace parameters for aa for this iteration */
|
73
|
-
static void update_accel_params(const aa_float *x, const aa_float *f,
|
74
|
-
|
223
|
+
static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
|
224
|
+
aa_int len) {
|
75
225
|
/* at the start a->x = x_prev and a->f = f_prev */
|
76
|
-
|
77
|
-
aa_int
|
78
|
-
|
226
|
+
TIME_TIC
|
227
|
+
aa_int idx = (a->iter - 1) % a->mem;
|
79
228
|
blas_int one = 1;
|
80
|
-
blas_int
|
229
|
+
blas_int bdim = (blas_int)a->dim;
|
81
230
|
aa_float neg_onef = -1.0;
|
82
231
|
|
83
232
|
/* g = x */
|
84
|
-
memcpy(a->g, x, sizeof(aa_float) *
|
233
|
+
memcpy(a->g, x, sizeof(aa_float) * a->dim);
|
85
234
|
/* s = x */
|
86
|
-
memcpy(a->s, x, sizeof(aa_float) *
|
235
|
+
memcpy(a->s, x, sizeof(aa_float) * a->dim);
|
87
236
|
/* d = f */
|
88
|
-
memcpy(a->d, f, sizeof(aa_float) *
|
89
|
-
/* g
|
90
|
-
BLAS(axpy)(&
|
91
|
-
/* s
|
92
|
-
BLAS(axpy)(&
|
93
|
-
/* d
|
94
|
-
BLAS(axpy)(&
|
237
|
+
memcpy(a->d, f, sizeof(aa_float) * a->dim);
|
238
|
+
/* g = x - f */
|
239
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
|
240
|
+
/* s = x - x_prev */
|
241
|
+
BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
|
242
|
+
/* d = f - f_prev */
|
243
|
+
BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
|
95
244
|
|
96
245
|
/* g, s, d correct here */
|
97
246
|
|
98
247
|
/* y = g */
|
99
|
-
memcpy(a->y, a->g, sizeof(aa_float) *
|
100
|
-
/* y
|
101
|
-
BLAS(axpy)(&
|
248
|
+
memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
|
249
|
+
/* y = g - g_prev */
|
250
|
+
BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
|
102
251
|
|
103
252
|
/* y correct here */
|
104
253
|
|
105
254
|
/* copy y into idx col of Y */
|
106
|
-
memcpy(&(a->Y[idx *
|
255
|
+
memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
|
107
256
|
/* copy s into idx col of S */
|
108
|
-
memcpy(&(a->S[idx *
|
257
|
+
memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
|
109
258
|
/* copy d into idx col of D */
|
110
|
-
memcpy(&(a->D[idx *
|
259
|
+
memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
|
260
|
+
|
261
|
+
/* Y, S, D correct here */
|
111
262
|
|
112
|
-
/*
|
263
|
+
/* set a->f and a->x for next iter (x_prev and f_prev) */
|
264
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
265
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
113
266
|
|
114
|
-
|
115
|
-
|
267
|
+
/* workspace for when relaxation != 1.0 */
|
268
|
+
if (a->x_work) {
|
269
|
+
memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
|
270
|
+
}
|
116
271
|
|
117
272
|
/* x, f correct here */
|
118
273
|
|
119
|
-
|
120
|
-
|
274
|
+
memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
|
275
|
+
/* g_prev set for next iter here */
|
121
276
|
|
122
|
-
/*
|
277
|
+
/* compute ||g|| = ||f - x|| */
|
278
|
+
a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
|
123
279
|
|
124
|
-
|
280
|
+
TIME_TOC
|
281
|
+
return;
|
282
|
+
}
|
125
283
|
|
126
|
-
|
284
|
+
/* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
|
285
|
+
static void relax(aa_float *f, AaWork *a, aa_int len) {
|
286
|
+
TIME_TIC
|
287
|
+
/* x_work = x initially */
|
288
|
+
blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
289
|
+
aa_float onef = 1.0, neg_onef = -1.0;
|
290
|
+
aa_float one_m_relaxation = 1. - a->relaxation;
|
291
|
+
/* x_work = x - S * work */
|
292
|
+
BLAS(gemv)
|
293
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
|
294
|
+
a->x_work, &one);
|
295
|
+
/* f = relaxation * f */
|
296
|
+
BLAS(scal)(&bdim, &a->relaxation, f, &one);
|
297
|
+
/* f += (1 - relaxation) * x_work */
|
298
|
+
BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one);
|
299
|
+
TIME_TOC
|
127
300
|
}
|
128
301
|
|
129
|
-
/* solves the system of equations to perform the
|
302
|
+
/* solves the system of equations to perform the AA update
|
130
303
|
* at the end f contains the next iterate to be returned
|
131
304
|
*/
|
132
|
-
static
|
133
|
-
|
134
|
-
|
135
|
-
aa_float
|
305
|
+
static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
|
306
|
+
TIME_TIC
|
307
|
+
blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
308
|
+
aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
|
309
|
+
|
136
310
|
/* work = S'g or Y'g */
|
137
311
|
BLAS(gemv)
|
138
|
-
("Trans", &
|
139
|
-
a->work, &one);
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
a->type1 ? 1 : 2, (int)a->iter, (int)info,
|
147
|
-
#endif
|
148
|
-
return -1;
|
312
|
+
("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
|
313
|
+
&zerof, a->work, &one);
|
314
|
+
|
315
|
+
/* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
|
316
|
+
BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
|
317
|
+
aa_norm = BLAS(nrm2)(&blen, a->work, &one);
|
318
|
+
if (a->verbosity > 1) {
|
319
|
+
printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
320
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
149
321
|
}
|
150
|
-
|
322
|
+
|
323
|
+
/* info < 0 input error, input > 0 matrix is singular */
|
324
|
+
if (info != 0 || aa_norm >= a->max_weight_norm) {
|
325
|
+
if (a->verbosity > 0) {
|
326
|
+
printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
327
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
328
|
+
}
|
329
|
+
a->success = 0;
|
330
|
+
/* reset aa for stability */
|
331
|
+
aa_reset(a);
|
332
|
+
TIME_TOC
|
333
|
+
return -aa_norm;
|
334
|
+
}
|
335
|
+
|
336
|
+
/* here work = gamma, ie, the correct AA shifted weights */
|
337
|
+
/* if solve was successful compute new point */
|
338
|
+
|
339
|
+
/* first set f -= D * work */
|
151
340
|
BLAS(gemv)
|
152
|
-
("NoTrans", &
|
153
|
-
|
341
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
|
342
|
+
&one);
|
343
|
+
|
344
|
+
/* if relaxation is not 1 then need to incorporate */
|
345
|
+
if (a->relaxation != 1.0) {
|
346
|
+
relax(f, a, len);
|
347
|
+
}
|
348
|
+
|
349
|
+
a->success = 1; /* this should be the only place we set success = 1 */
|
350
|
+
TIME_TOC
|
351
|
+
return aa_norm;
|
154
352
|
}
|
155
353
|
|
156
354
|
/*
|
157
355
|
* API functions below this line, see aa.h for descriptions.
|
158
356
|
*/
|
159
|
-
AaWork *aa_init(aa_int
|
357
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
358
|
+
aa_float relaxation, aa_float safeguard_factor,
|
359
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
360
|
+
TIME_TIC
|
160
361
|
AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
|
161
362
|
if (!a) {
|
162
|
-
|
163
|
-
return (
|
363
|
+
printf("Failed to allocate memory for AA.\n");
|
364
|
+
return (AaWork *)0;
|
164
365
|
}
|
165
366
|
a->type1 = type1;
|
166
367
|
a->iter = 0;
|
167
|
-
a->
|
168
|
-
a->
|
169
|
-
|
368
|
+
a->dim = dim;
|
369
|
+
a->mem = MIN(mem, dim); /* for rank stability */
|
370
|
+
a->regularization = regularization;
|
371
|
+
a->relaxation = relaxation;
|
372
|
+
a->safeguard_factor = safeguard_factor;
|
373
|
+
a->max_weight_norm = max_weight_norm;
|
374
|
+
a->success = 0;
|
375
|
+
a->verbosity = verbosity;
|
376
|
+
if (a->mem <= 0) {
|
170
377
|
return a;
|
171
378
|
}
|
172
379
|
|
173
|
-
a->x = (aa_float *)calloc(a->
|
174
|
-
a->f = (aa_float *)calloc(a->
|
175
|
-
a->g = (aa_float *)calloc(a->
|
380
|
+
a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
381
|
+
a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
382
|
+
a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
176
383
|
|
177
|
-
a->g_prev = (aa_float *)calloc(a->
|
384
|
+
a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
178
385
|
|
179
|
-
a->y = (aa_float *)calloc(a->
|
180
|
-
a->s = (aa_float *)calloc(a->
|
181
|
-
a->d = (aa_float *)calloc(a->
|
386
|
+
a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
387
|
+
a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
388
|
+
a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
182
389
|
|
183
|
-
a->Y = (aa_float *)calloc(a->
|
184
|
-
a->S = (aa_float *)calloc(a->
|
185
|
-
a->D = (aa_float *)calloc(a->
|
390
|
+
a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
391
|
+
a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
392
|
+
a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
186
393
|
|
187
|
-
a->M = (aa_float *)calloc(a->
|
188
|
-
a->work = (aa_float *)calloc(a->
|
189
|
-
a->ipiv = (blas_int *)calloc(a->
|
394
|
+
a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
|
395
|
+
a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
|
396
|
+
a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
|
397
|
+
|
398
|
+
if (relaxation != 1.0) {
|
399
|
+
a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
400
|
+
} else {
|
401
|
+
a->x_work = 0;
|
402
|
+
}
|
403
|
+
TIME_TOC
|
190
404
|
return a;
|
191
405
|
}
|
192
406
|
|
193
|
-
|
194
|
-
|
195
|
-
|
407
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
408
|
+
TIME_TIC
|
409
|
+
aa_float aa_norm = 0;
|
410
|
+
aa_int len = MIN(a->iter, a->mem);
|
411
|
+
a->success = 0; /* if we make an AA step we set this to 1 later */
|
412
|
+
if (a->mem <= 0) {
|
413
|
+
TIME_TOC
|
414
|
+
return aa_norm; /* 0 */
|
196
415
|
}
|
197
|
-
|
198
|
-
|
416
|
+
if (a->iter == 0) {
|
417
|
+
/* if first iteration then seed params for next iter */
|
418
|
+
init_accel_params(x, f, a);
|
419
|
+
a->iter++;
|
420
|
+
TIME_TOC
|
421
|
+
return aa_norm; /* 0 */
|
422
|
+
}
|
423
|
+
/* set various accel quantities */
|
424
|
+
update_accel_params(x, f, a, len);
|
425
|
+
|
426
|
+
/* only perform solve steps when the memory is full */
|
427
|
+
if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
|
428
|
+
/* set M = S'Y or Y'Y depending on type of aa used */
|
429
|
+
set_m(a, len);
|
430
|
+
/* solve linear system, new point overwrites f if successful */
|
431
|
+
aa_norm = solve(f, a, len);
|
432
|
+
}
|
433
|
+
a->iter++;
|
434
|
+
TIME_TOC
|
435
|
+
return aa_norm;
|
436
|
+
}
|
437
|
+
|
438
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
439
|
+
TIME_TIC
|
440
|
+
blas_int bdim = (blas_int)a->dim;
|
441
|
+
blas_int one = 1;
|
442
|
+
aa_float neg_onef = -1.0;
|
443
|
+
aa_float norm_diff;
|
444
|
+
if (!a->success) {
|
445
|
+
/* last AA update was not successful, no need for safeguarding */
|
446
|
+
TIME_TOC
|
199
447
|
return 0;
|
200
448
|
}
|
201
|
-
|
202
|
-
|
449
|
+
|
450
|
+
/* reset success indicator in case safeguarding called multiple times */
|
451
|
+
a->success = 0;
|
452
|
+
|
453
|
+
/* work = x_new */
|
454
|
+
memcpy(a->work, x_new, a->dim * sizeof(aa_float));
|
455
|
+
/* work = x_new - f_new */
|
456
|
+
BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
|
457
|
+
/* norm_diff = || f_new - x_new || */
|
458
|
+
norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
|
459
|
+
/* g = f - x */
|
460
|
+
if (norm_diff > a->safeguard_factor * a->norm_g) {
|
461
|
+
/* in this case we reject the AA step and reset */
|
462
|
+
memcpy(f_new, a->f, a->dim * sizeof(aa_float));
|
463
|
+
memcpy(x_new, a->x, a->dim * sizeof(aa_float));
|
464
|
+
if (a->verbosity > 0) {
|
465
|
+
printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
|
466
|
+
(int)a->iter, norm_diff, a->norm_g);
|
467
|
+
}
|
468
|
+
aa_reset(a);
|
469
|
+
TIME_TOC
|
470
|
+
return -1;
|
471
|
+
}
|
472
|
+
TIME_TOC
|
473
|
+
return 0;
|
203
474
|
}
|
204
475
|
|
205
476
|
void aa_finish(AaWork *a) {
|
@@ -217,8 +488,21 @@ void aa_finish(AaWork *a) {
|
|
217
488
|
free(a->M);
|
218
489
|
free(a->work);
|
219
490
|
free(a->ipiv);
|
491
|
+
if (a->x_work) {
|
492
|
+
free(a->x_work);
|
493
|
+
}
|
220
494
|
free(a);
|
221
495
|
}
|
496
|
+
return;
|
497
|
+
}
|
498
|
+
|
499
|
+
void aa_reset(AaWork *a) {
|
500
|
+
/* to reset we simply set a->iter = 0 */
|
501
|
+
if (a->verbosity > 0) {
|
502
|
+
printf("AA reset.\n");
|
503
|
+
}
|
504
|
+
a->iter = 0;
|
505
|
+
return;
|
222
506
|
}
|
223
507
|
|
224
508
|
#endif
|