scs 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +11 -6
- data/lib/scs/ffi.rb +30 -13
- data/lib/scs/solver.rb +32 -9
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +39 -0
- data/vendor/scs/CMakeLists.txt +7 -8
- data/vendor/scs/Makefile +24 -15
- data/vendor/scs/README.md +5 -263
- data/vendor/scs/include/aa.h +67 -23
- data/vendor/scs/include/cones.h +17 -17
- data/vendor/scs/include/glbopts.h +98 -32
- data/vendor/scs/include/linalg.h +2 -4
- data/vendor/scs/include/linsys.h +58 -44
- data/vendor/scs/include/normalize.h +3 -3
- data/vendor/scs/include/rw.h +8 -2
- data/vendor/scs/include/scs.h +293 -133
- data/vendor/scs/include/util.h +3 -15
- data/vendor/scs/linsys/cpu/direct/private.c +220 -224
- data/vendor/scs/linsys/cpu/direct/private.h +13 -7
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.c +177 -110
- data/vendor/scs/linsys/cpu/indirect/private.h +8 -4
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/csparse.c +87 -0
- data/vendor/scs/linsys/csparse.h +34 -0
- data/vendor/scs/linsys/csparse.o +0 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +1 -1
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/qdldl/changes +2 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
- data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
- data/vendor/scs/linsys/gpu/gpu.c +31 -33
- data/vendor/scs/linsys/gpu/gpu.h +48 -31
- data/vendor/scs/linsys/gpu/indirect/private.c +338 -232
- data/vendor/scs/linsys/gpu/indirect/private.h +23 -14
- data/vendor/scs/linsys/scs_matrix.c +498 -0
- data/vendor/scs/linsys/scs_matrix.h +70 -0
- data/vendor/scs/linsys/scs_matrix.o +0 -0
- data/vendor/scs/scs.mk +13 -9
- data/vendor/scs/src/aa.c +384 -109
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.c +440 -353
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.c +15 -5
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.c +84 -28
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.c +22 -64
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.c +160 -21
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.c +767 -563
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_indir.o +0 -0
- data/vendor/scs/src/scs_version.c +9 -3
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.c +37 -106
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/minunit.h +17 -8
- data/vendor/scs/test/problem_utils.h +176 -14
- data/vendor/scs/test/problems/degenerate.h +130 -0
- data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
- data/vendor/scs/test/problems/random_prob +0 -0
- data/vendor/scs/test/problems/random_prob.h +45 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
- data/vendor/scs/test/problems/small_lp.h +13 -14
- data/vendor/scs/test/problems/test_fails.h +43 -0
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
- data/vendor/scs/test/random_socp_prob.c +54 -53
- data/vendor/scs/test/rng.h +109 -0
- data/vendor/scs/test/run_from_file.c +19 -10
- data/vendor/scs/test/run_tests.c +27 -3
- metadata +20 -8
- data/vendor/scs/linsys/amatrix.c +0 -305
- data/vendor/scs/linsys/amatrix.h +0 -36
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/problems/small_random_socp.h +0 -33
- data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c
CHANGED
@@ -1,49 +1,100 @@
|
|
1
|
+
/*
|
2
|
+
* Anderson acceleration.
|
3
|
+
*
|
4
|
+
* x: input iterate
|
5
|
+
* x_prev: previous input iterate
|
6
|
+
* f: f(x) output of map f applied to x
|
7
|
+
* g: x - f (error)
|
8
|
+
* g_prev: previous error
|
9
|
+
* s: x - x_prev
|
10
|
+
* y: g - g_prev
|
11
|
+
* d: s - y = f - f_prev
|
12
|
+
*
|
13
|
+
* capital letters are the variables stacked columnwise
|
14
|
+
* idx tracks current index where latest quantities written
|
15
|
+
* idx cycles from left to right columns in matrix
|
16
|
+
*
|
17
|
+
* Type-I:
|
18
|
+
* return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
|
19
|
+
*
|
20
|
+
* Type-II:
|
21
|
+
* return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
|
22
|
+
*
|
23
|
+
*/
|
24
|
+
|
1
25
|
#include "aa.h"
|
2
26
|
#include "scs_blas.h"
|
3
27
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
* do this using LAPACK ?gesv.
|
8
|
-
*/
|
28
|
+
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
29
|
+
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
30
|
+
#define FILL_MEMORY_BEFORE_SOLVE (1)
|
9
31
|
|
10
32
|
#ifndef USE_LAPACK
|
11
33
|
|
12
|
-
typedef void *
|
34
|
+
typedef void *ACCEL_WORK;
|
13
35
|
|
14
|
-
AaWork *aa_init(aa_int dim, aa_int
|
15
|
-
|
16
|
-
|
36
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
37
|
+
aa_float relaxation, aa_float safeguard_factor,
|
38
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
39
|
+
return SCS_NULL;
|
40
|
+
}
|
41
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
42
|
+
return 0;
|
43
|
+
}
|
44
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
void aa_finish(AaWork *a) {
|
48
|
+
}
|
49
|
+
void aa_reset(AaWork *a) {
|
50
|
+
}
|
17
51
|
|
18
52
|
#else
|
19
53
|
|
20
|
-
|
21
|
-
struct ACCEL_WORK {
|
22
|
-
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
23
|
-
aa_int k; /* aa memory */
|
24
|
-
aa_int l; /* variable dimension */
|
25
|
-
aa_int iter; /* current iteration */
|
54
|
+
#if PROFILING > 0
|
26
55
|
|
27
|
-
|
28
|
-
|
29
|
-
|
56
|
+
#define TIME_TIC \
|
57
|
+
timer __t; \
|
58
|
+
tic(&__t);
|
59
|
+
#define TIME_TOC toc(__func__, &__t);
|
30
60
|
|
31
|
-
|
32
|
-
|
61
|
+
#include <time.h>
|
62
|
+
typedef struct timer {
|
63
|
+
struct timespec tic;
|
64
|
+
struct timespec toc;
|
65
|
+
} timer;
|
33
66
|
|
34
|
-
|
35
|
-
|
36
|
-
|
67
|
+
void tic(timer *t) {
|
68
|
+
clock_gettime(CLOCK_MONOTONIC, &t->tic);
|
69
|
+
}
|
37
70
|
|
38
|
-
|
39
|
-
|
40
|
-
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
41
|
-
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
71
|
+
aa_float tocq(timer *t) {
|
72
|
+
struct timespec temp;
|
42
73
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
74
|
+
clock_gettime(CLOCK_MONOTONIC, &t->toc);
|
75
|
+
|
76
|
+
if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
|
77
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
|
78
|
+
temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
|
79
|
+
} else {
|
80
|
+
temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
|
81
|
+
temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
|
82
|
+
}
|
83
|
+
return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
|
84
|
+
}
|
85
|
+
|
86
|
+
aa_float toc(const char *str, timer *t) {
|
87
|
+
aa_float time = tocq(t);
|
88
|
+
printf("%s - time: %8.4f milli-seconds.\n", str, time);
|
89
|
+
return time;
|
90
|
+
}
|
91
|
+
|
92
|
+
#else
|
93
|
+
|
94
|
+
#define TIME_TIC
|
95
|
+
#define TIME_TOC
|
96
|
+
|
97
|
+
#endif
|
47
98
|
|
48
99
|
/* BLAS functions used */
|
49
100
|
aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
|
@@ -59,147 +110,358 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
|
|
59
110
|
blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
|
60
111
|
blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
|
61
112
|
aa_float *c, blas_int *ldc);
|
113
|
+
void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
|
114
|
+
const blas_int *incx);
|
115
|
+
|
116
|
+
/* This file uses Anderson acceleration to improve the convergence of
|
117
|
+
* a fixed point mapping.
|
118
|
+
* At each iteration we need to solve a (small) linear system, we
|
119
|
+
* do this using LAPACK ?gesv.
|
120
|
+
*/
|
121
|
+
|
122
|
+
/* contains the necessary parameters to perform aa at each step */
|
123
|
+
struct ACCEL_WORK {
|
124
|
+
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
125
|
+
aa_int mem; /* aa memory */
|
126
|
+
aa_int dim; /* variable dimension */
|
127
|
+
aa_int iter; /* current iteration */
|
128
|
+
aa_int verbosity; /* verbosity level, 0 is no printing */
|
129
|
+
aa_int success; /* was the last AA step successful or not */
|
130
|
+
|
131
|
+
aa_float relaxation; /* relaxation x and f, beta in some papers */
|
132
|
+
aa_float regularization; /* regularization */
|
133
|
+
aa_float safeguard_factor; /* safeguard tolerance factor */
|
134
|
+
aa_float max_weight_norm; /* maximum norm of AA weights */
|
135
|
+
|
136
|
+
aa_float *x; /* x input to map*/
|
137
|
+
aa_float *f; /* f(x) output of map */
|
138
|
+
aa_float *g; /* x - f(x) */
|
139
|
+
aa_float norm_g; /* ||x - f(x)|| */
|
140
|
+
|
141
|
+
/* from previous iteration */
|
142
|
+
aa_float *g_prev; /* x_prev - f(x_prev) */
|
143
|
+
|
144
|
+
aa_float *y; /* g - g_prev */
|
145
|
+
aa_float *s; /* x - x_prev */
|
146
|
+
aa_float *d; /* f - f_prev */
|
147
|
+
|
148
|
+
aa_float *Y; /* matrix of stacked y values */
|
149
|
+
aa_float *S; /* matrix of stacked s values */
|
150
|
+
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
151
|
+
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
152
|
+
|
153
|
+
/* workspace variables */
|
154
|
+
aa_float *work; /* scratch space */
|
155
|
+
blas_int *ipiv; /* permutation variable, not used after solve */
|
156
|
+
|
157
|
+
aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
|
158
|
+
};
|
159
|
+
|
160
|
+
/* add regularization dependent on Y and S matrices */
|
161
|
+
static aa_float compute_regularization(AaWork *a, aa_int len) {
|
162
|
+
/* typically type-I does better with higher regularization than type-II */
|
163
|
+
TIME_TIC
|
164
|
+
aa_float r, nrm_m;
|
165
|
+
blas_int btotal = (blas_int)(len * len), one = 1;
|
166
|
+
nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
|
167
|
+
r = a->regularization * nrm_m;
|
168
|
+
if (a->verbosity > 2) {
|
169
|
+
printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
|
170
|
+
}
|
171
|
+
TIME_TOC
|
172
|
+
return r;
|
173
|
+
}
|
62
174
|
|
63
175
|
/* sets a->M to S'Y or Y'Y depending on type of aa used */
|
64
|
-
|
65
|
-
|
66
|
-
|
176
|
+
/* M is len x len after this */
|
177
|
+
static void set_m(AaWork *a, aa_int len) {
|
178
|
+
TIME_TIC
|
179
|
+
aa_int i;
|
180
|
+
blas_int bdim = (blas_int)(a->dim);
|
181
|
+
blas_int blen = (blas_int)len;
|
182
|
+
aa_float onef = 1.0, zerof = 0.0, r;
|
183
|
+
/* if len < mem this only uses len cols */
|
67
184
|
BLAS(gemm)
|
68
|
-
("Trans", "No", &
|
69
|
-
&zerof, a->M, &
|
185
|
+
("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
|
186
|
+
a->Y, &bdim, &zerof, a->M, &blen);
|
187
|
+
if (a->regularization > 0) {
|
188
|
+
r = compute_regularization(a, len);
|
189
|
+
for (i = 0; i < len; ++i) {
|
190
|
+
a->M[i + len * i] += r;
|
191
|
+
}
|
192
|
+
}
|
193
|
+
TIME_TOC
|
194
|
+
return;
|
195
|
+
}
|
196
|
+
|
197
|
+
/* initialize accel params, in particular x_prev, f_prev, g_prev */
|
198
|
+
static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
|
199
|
+
TIME_TIC
|
200
|
+
blas_int bdim = (blas_int)a->dim;
|
201
|
+
aa_float neg_onef = -1.0;
|
202
|
+
blas_int one = 1;
|
203
|
+
/* x_prev = x */
|
204
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
205
|
+
/* f_prev = f */
|
206
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
207
|
+
/* g_prev = x */
|
208
|
+
memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
|
209
|
+
/* g_prev = x_prev - f_prev */
|
210
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
|
211
|
+
TIME_TOC
|
70
212
|
}
|
71
213
|
|
72
214
|
/* updates the workspace parameters for aa for this iteration */
|
73
|
-
static void update_accel_params(const aa_float *x, const aa_float *f,
|
74
|
-
|
215
|
+
static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
|
216
|
+
aa_int len) {
|
75
217
|
/* at the start a->x = x_prev and a->f = f_prev */
|
76
|
-
|
77
|
-
aa_int
|
78
|
-
|
218
|
+
TIME_TIC
|
219
|
+
aa_int idx = (a->iter - 1) % a->mem;
|
79
220
|
blas_int one = 1;
|
80
|
-
blas_int
|
221
|
+
blas_int bdim = (blas_int)a->dim;
|
81
222
|
aa_float neg_onef = -1.0;
|
82
223
|
|
83
224
|
/* g = x */
|
84
|
-
memcpy(a->g, x, sizeof(aa_float) *
|
225
|
+
memcpy(a->g, x, sizeof(aa_float) * a->dim);
|
85
226
|
/* s = x */
|
86
|
-
memcpy(a->s, x, sizeof(aa_float) *
|
227
|
+
memcpy(a->s, x, sizeof(aa_float) * a->dim);
|
87
228
|
/* d = f */
|
88
|
-
memcpy(a->d, f, sizeof(aa_float) *
|
89
|
-
/* g
|
90
|
-
BLAS(axpy)(&
|
91
|
-
/* s
|
92
|
-
BLAS(axpy)(&
|
93
|
-
/* d
|
94
|
-
BLAS(axpy)(&
|
229
|
+
memcpy(a->d, f, sizeof(aa_float) * a->dim);
|
230
|
+
/* g = x - f */
|
231
|
+
BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
|
232
|
+
/* s = x - x_prev */
|
233
|
+
BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
|
234
|
+
/* d = f - f_prev */
|
235
|
+
BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
|
95
236
|
|
96
237
|
/* g, s, d correct here */
|
97
238
|
|
98
239
|
/* y = g */
|
99
|
-
memcpy(a->y, a->g, sizeof(aa_float) *
|
100
|
-
/* y
|
101
|
-
BLAS(axpy)(&
|
240
|
+
memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
|
241
|
+
/* y = g - g_prev */
|
242
|
+
BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
|
102
243
|
|
103
244
|
/* y correct here */
|
104
245
|
|
105
246
|
/* copy y into idx col of Y */
|
106
|
-
memcpy(&(a->Y[idx *
|
247
|
+
memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
|
107
248
|
/* copy s into idx col of S */
|
108
|
-
memcpy(&(a->S[idx *
|
249
|
+
memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
|
109
250
|
/* copy d into idx col of D */
|
110
|
-
memcpy(&(a->D[idx *
|
251
|
+
memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
|
111
252
|
|
112
|
-
/* Y, S,D correct here */
|
253
|
+
/* Y, S, D correct here */
|
113
254
|
|
114
|
-
|
115
|
-
memcpy(a->
|
255
|
+
/* set a->f and a->x for next iter (x_prev and f_prev) */
|
256
|
+
memcpy(a->f, f, sizeof(aa_float) * a->dim);
|
257
|
+
memcpy(a->x, x, sizeof(aa_float) * a->dim);
|
258
|
+
|
259
|
+
/* workspace for when relaxation != 1.0 */
|
260
|
+
if (a->x_work) {
|
261
|
+
memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
|
262
|
+
}
|
116
263
|
|
117
264
|
/* x, f correct here */
|
118
265
|
|
119
|
-
|
120
|
-
|
266
|
+
memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
|
267
|
+
/* g_prev set for next iter here */
|
121
268
|
|
122
|
-
/*
|
269
|
+
/* compute ||g|| = ||f - x|| */
|
270
|
+
a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
|
123
271
|
|
124
|
-
|
272
|
+
TIME_TOC
|
273
|
+
return;
|
274
|
+
}
|
125
275
|
|
126
|
-
|
276
|
+
/* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
|
277
|
+
static void relax(aa_float *f, AaWork *a, aa_int len) {
|
278
|
+
TIME_TIC
|
279
|
+
/* x_work = x - S * work */
|
280
|
+
blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
281
|
+
aa_float onef = 1.0, neg_onef = -1.0;
|
282
|
+
aa_float one_m_relaxation = 1. - a->relaxation;
|
283
|
+
BLAS(gemv)
|
284
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
|
285
|
+
a->x_work, &one);
|
286
|
+
/* f = relaxation * f */
|
287
|
+
BLAS(scal)(&blen, &a->relaxation, f, &one);
|
288
|
+
/* f += (1 - relaxation) * x_work */
|
289
|
+
BLAS(axpy)(&blen, &one_m_relaxation, a->x_work, &one, f, &one);
|
290
|
+
TIME_TOC
|
127
291
|
}
|
128
292
|
|
129
|
-
/* solves the system of equations to perform the
|
293
|
+
/* solves the system of equations to perform the AA update
|
130
294
|
* at the end f contains the next iterate to be returned
|
131
295
|
*/
|
132
|
-
static
|
133
|
-
|
134
|
-
|
135
|
-
aa_float
|
296
|
+
static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
|
297
|
+
TIME_TIC
|
298
|
+
blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
299
|
+
aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
|
300
|
+
|
136
301
|
/* work = S'g or Y'g */
|
137
302
|
BLAS(gemv)
|
138
|
-
("Trans", &
|
139
|
-
a->work, &one);
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
a->type1 ? 1 : 2, (int)a->iter, (int)info,
|
147
|
-
|
148
|
-
|
303
|
+
("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
|
304
|
+
&zerof, a->work, &one);
|
305
|
+
|
306
|
+
/* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
|
307
|
+
BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
|
308
|
+
aa_norm = BLAS(nrm2)(&blen, a->work, &one);
|
309
|
+
if (a->verbosity > 1) {
|
310
|
+
printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
311
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
312
|
+
}
|
313
|
+
|
314
|
+
/* info < 0 input error, input > 0 matrix is singular */
|
315
|
+
if (info != 0 || aa_norm >= a->max_weight_norm) {
|
316
|
+
if (a->verbosity > 0) {
|
317
|
+
printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
|
318
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
|
319
|
+
}
|
320
|
+
a->success = 0;
|
321
|
+
/* reset aa for stability */
|
322
|
+
aa_reset(a);
|
323
|
+
TIME_TOC
|
324
|
+
return -aa_norm;
|
149
325
|
}
|
150
|
-
|
326
|
+
|
327
|
+
/* here work = gamma, ie, the correct AA shifted weights */
|
328
|
+
/* if solve was successful compute new point */
|
329
|
+
|
330
|
+
/* first set f -= D * work */
|
151
331
|
BLAS(gemv)
|
152
|
-
("NoTrans", &
|
153
|
-
|
332
|
+
("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
|
333
|
+
&one);
|
334
|
+
|
335
|
+
/* if relaxation is not 1 then need to incorporate */
|
336
|
+
if (a->relaxation != 1.0) {
|
337
|
+
relax(f, a, len);
|
338
|
+
}
|
339
|
+
|
340
|
+
a->success = 1; /* this should be the only place we set success = 1 */
|
341
|
+
TIME_TOC
|
342
|
+
return aa_norm;
|
154
343
|
}
|
155
344
|
|
156
345
|
/*
|
157
346
|
* API functions below this line, see aa.h for descriptions.
|
158
347
|
*/
|
159
|
-
AaWork *aa_init(aa_int
|
348
|
+
AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
349
|
+
aa_float relaxation, aa_float safeguard_factor,
|
350
|
+
aa_float max_weight_norm, aa_int verbosity) {
|
351
|
+
TIME_TIC
|
160
352
|
AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
|
161
353
|
if (!a) {
|
162
|
-
|
354
|
+
printf("Failed to allocate memory for AA.\n");
|
163
355
|
return (void *)0;
|
164
356
|
}
|
165
357
|
a->type1 = type1;
|
166
358
|
a->iter = 0;
|
167
|
-
a->
|
168
|
-
a->
|
169
|
-
|
359
|
+
a->dim = dim;
|
360
|
+
a->mem = MIN(mem, dim); /* for rank stability */
|
361
|
+
a->regularization = regularization;
|
362
|
+
a->relaxation = relaxation;
|
363
|
+
a->safeguard_factor = safeguard_factor;
|
364
|
+
a->max_weight_norm = max_weight_norm;
|
365
|
+
a->success = 0;
|
366
|
+
a->verbosity = verbosity;
|
367
|
+
if (a->mem <= 0) {
|
170
368
|
return a;
|
171
369
|
}
|
172
370
|
|
173
|
-
a->x = (aa_float *)calloc(a->
|
174
|
-
a->f = (aa_float *)calloc(a->
|
175
|
-
a->g = (aa_float *)calloc(a->
|
371
|
+
a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
372
|
+
a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
373
|
+
a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
374
|
+
|
375
|
+
a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
176
376
|
|
177
|
-
a->
|
377
|
+
a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
378
|
+
a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
379
|
+
a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
178
380
|
|
179
|
-
a->
|
180
|
-
a->
|
181
|
-
a->
|
381
|
+
a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
382
|
+
a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
383
|
+
a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
|
182
384
|
|
183
|
-
a->
|
184
|
-
a->
|
185
|
-
a->
|
385
|
+
a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
|
386
|
+
a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
|
387
|
+
a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
|
186
388
|
|
187
|
-
|
188
|
-
|
189
|
-
|
389
|
+
if (relaxation != 1.0) {
|
390
|
+
a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
|
391
|
+
} else {
|
392
|
+
a->x_work = 0;
|
393
|
+
}
|
394
|
+
TIME_TOC
|
190
395
|
return a;
|
191
396
|
}
|
192
397
|
|
193
|
-
|
194
|
-
|
195
|
-
|
398
|
+
aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
399
|
+
TIME_TIC
|
400
|
+
aa_float aa_norm = 0;
|
401
|
+
aa_int len = MIN(a->iter, a->mem);
|
402
|
+
a->success = 0; /* if we make an AA step we set this to 1 later */
|
403
|
+
if (a->mem <= 0) {
|
404
|
+
TIME_TOC
|
405
|
+
return aa_norm; /* 0 */
|
406
|
+
}
|
407
|
+
if (a->iter == 0) {
|
408
|
+
/* if first iteration then seed params for next iter */
|
409
|
+
init_accel_params(x, f, a);
|
410
|
+
a->iter++;
|
411
|
+
TIME_TOC
|
412
|
+
return aa_norm; /* 0 */
|
196
413
|
}
|
197
|
-
|
198
|
-
|
414
|
+
/* set various accel quantities */
|
415
|
+
update_accel_params(x, f, a, len);
|
416
|
+
|
417
|
+
/* only perform solve steps when the memory is full */
|
418
|
+
if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
|
419
|
+
/* set M = S'Y or Y'Y depending on type of aa used */
|
420
|
+
set_m(a, len);
|
421
|
+
/* solve linear system, new point overwrites f if successful */
|
422
|
+
aa_norm = solve(f, a, len);
|
423
|
+
}
|
424
|
+
a->iter++;
|
425
|
+
TIME_TOC
|
426
|
+
return aa_norm;
|
427
|
+
}
|
428
|
+
|
429
|
+
aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
|
430
|
+
TIME_TIC
|
431
|
+
blas_int bdim = (blas_int)a->dim;
|
432
|
+
blas_int one = 1;
|
433
|
+
aa_float neg_onef = -1.0;
|
434
|
+
aa_float norm_diff;
|
435
|
+
if (!a->success) {
|
436
|
+
/* last AA update was not successful, no need for safeguarding */
|
437
|
+
TIME_TOC
|
199
438
|
return 0;
|
200
439
|
}
|
201
|
-
|
202
|
-
|
440
|
+
|
441
|
+
/* reset success indicator in case safeguarding called multiple times */
|
442
|
+
a->success = 0;
|
443
|
+
|
444
|
+
/* work = x_new */
|
445
|
+
memcpy(a->work, x_new, a->dim * sizeof(aa_float));
|
446
|
+
/* work = x_new - f_new */
|
447
|
+
BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
|
448
|
+
/* norm_diff = || f_new - x_new || */
|
449
|
+
norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
|
450
|
+
/* g = f - x */
|
451
|
+
if (norm_diff > a->safeguard_factor * a->norm_g) {
|
452
|
+
/* in this case we reject the AA step and reset */
|
453
|
+
memcpy(f_new, a->f, a->dim * sizeof(aa_float));
|
454
|
+
memcpy(x_new, a->x, a->dim * sizeof(aa_float));
|
455
|
+
if (a->verbosity > 0) {
|
456
|
+
printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
|
457
|
+
(int)a->iter, norm_diff, a->norm_g);
|
458
|
+
}
|
459
|
+
aa_reset(a);
|
460
|
+
TIME_TOC
|
461
|
+
return -1;
|
462
|
+
}
|
463
|
+
TIME_TOC
|
464
|
+
return 0;
|
203
465
|
}
|
204
466
|
|
205
467
|
void aa_finish(AaWork *a) {
|
@@ -217,8 +479,21 @@ void aa_finish(AaWork *a) {
|
|
217
479
|
free(a->M);
|
218
480
|
free(a->work);
|
219
481
|
free(a->ipiv);
|
482
|
+
if (a->x_work) {
|
483
|
+
free(a->x_work);
|
484
|
+
}
|
220
485
|
free(a);
|
221
486
|
}
|
487
|
+
return;
|
488
|
+
}
|
489
|
+
|
490
|
+
void aa_reset(AaWork *a) {
|
491
|
+
/* to reset we simply set a->iter = 0 */
|
492
|
+
if (a->verbosity > 0) {
|
493
|
+
printf("AA reset.\n");
|
494
|
+
}
|
495
|
+
a->iter = 0;
|
496
|
+
return;
|
222
497
|
}
|
223
498
|
|
224
499
|
#endif
|
data/vendor/scs/src/aa.o
CHANGED
Binary file
|