scs 0.2.2 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +19 -14
  5. data/lib/scs/ffi.rb +31 -20
  6. data/lib/scs/solver.rb +32 -9
  7. data/lib/scs/version.rb +1 -1
  8. data/vendor/scs/CITATION.cff +39 -0
  9. data/vendor/scs/CMakeLists.txt +320 -0
  10. data/vendor/scs/Makefile +32 -23
  11. data/vendor/scs/README.md +9 -218
  12. data/vendor/scs/include/aa.h +67 -23
  13. data/vendor/scs/include/cones.h +22 -19
  14. data/vendor/scs/include/glbopts.h +107 -79
  15. data/vendor/scs/include/linalg.h +3 -4
  16. data/vendor/scs/include/linsys.h +58 -44
  17. data/vendor/scs/include/normalize.h +6 -5
  18. data/vendor/scs/include/rw.h +8 -2
  19. data/vendor/scs/include/scs.h +257 -141
  20. data/vendor/scs/include/scs_types.h +34 -0
  21. data/vendor/scs/include/scs_work.h +83 -0
  22. data/vendor/scs/include/util.h +3 -15
  23. data/vendor/scs/linsys/cpu/direct/private.c +241 -232
  24. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  25. data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
  26. data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
  27. data/vendor/scs/linsys/csparse.c +87 -0
  28. data/vendor/scs/linsys/csparse.h +34 -0
  29. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
  30. data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
  31. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  32. data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
  33. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  34. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  35. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  36. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  37. data/vendor/scs/linsys/gpu/gpu.c +58 -21
  38. data/vendor/scs/linsys/gpu/gpu.h +70 -35
  39. data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
  40. data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
  41. data/vendor/scs/linsys/scs_matrix.c +478 -0
  42. data/vendor/scs/linsys/scs_matrix.h +70 -0
  43. data/vendor/scs/scs.mk +14 -10
  44. data/vendor/scs/src/aa.c +394 -110
  45. data/vendor/scs/src/cones.c +497 -359
  46. data/vendor/scs/src/ctrlc.c +15 -5
  47. data/vendor/scs/src/linalg.c +107 -26
  48. data/vendor/scs/src/normalize.c +30 -72
  49. data/vendor/scs/src/rw.c +202 -27
  50. data/vendor/scs/src/scs.c +769 -571
  51. data/vendor/scs/src/scs_version.c +11 -3
  52. data/vendor/scs/src/util.c +37 -106
  53. data/vendor/scs/test/minunit.h +22 -8
  54. data/vendor/scs/test/problem_utils.h +180 -25
  55. data/vendor/scs/test/problems/degenerate.h +130 -0
  56. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  57. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  58. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  59. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  60. data/vendor/scs/test/problems/random_prob +0 -0
  61. data/vendor/scs/test/problems/random_prob.h +45 -0
  62. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  63. data/vendor/scs/test/problems/small_lp.h +14 -13
  64. data/vendor/scs/test/problems/small_qp.h +352 -0
  65. data/vendor/scs/test/problems/test_validation.h +43 -0
  66. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  67. data/vendor/scs/test/random_socp_prob.c +54 -53
  68. data/vendor/scs/test/rng.h +109 -0
  69. data/vendor/scs/test/run_from_file.c +20 -11
  70. data/vendor/scs/test/run_tests.c +35 -2
  71. metadata +29 -98
  72. data/vendor/scs/linsys/amatrix.c +0 -305
  73. data/vendor/scs/linsys/amatrix.h +0 -36
  74. data/vendor/scs/linsys/amatrix.o +0 -0
  75. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  76. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  77. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  78. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  79. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  80. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  81. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  82. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  83. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  84. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  85. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  86. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  87. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  88. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  89. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  90. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  91. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  92. data/vendor/scs/src/aa.o +0 -0
  93. data/vendor/scs/src/cones.o +0 -0
  94. data/vendor/scs/src/ctrlc.o +0 -0
  95. data/vendor/scs/src/linalg.o +0 -0
  96. data/vendor/scs/src/normalize.o +0 -0
  97. data/vendor/scs/src/rw.o +0 -0
  98. data/vendor/scs/src/scs.o +0 -0
  99. data/vendor/scs/src/scs_version.o +0 -0
  100. data/vendor/scs/src/util.o +0 -0
  101. data/vendor/scs/test/data/small_random_socp +0 -0
  102. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  103. data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c CHANGED
@@ -1,49 +1,104 @@
1
+ /*
2
+ * Anderson acceleration.
3
+ *
4
+ * x: input iterate
5
+ * x_prev: previous input iterate
6
+ * f: f(x) output of map f applied to x
7
+ * g: x - f (error)
8
+ * g_prev: previous error
9
+ * s: x - x_prev
10
+ * y: g - g_prev
11
+ * d: s - y = f - f_prev
12
+ *
13
+ * capital letters are the variables stacked columnwise
14
+ * idx tracks current index where latest quantities written
15
+ * idx cycles from left to right columns in matrix
16
+ *
17
+ * Type-I:
18
+ * return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
19
+ *
20
+ * Type-II:
21
+ * return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
22
+ *
23
+ */
24
+
1
25
  #include "aa.h"
2
26
  #include "scs_blas.h"
3
27
 
4
- /* This file uses Anderson acceleration to improve the convergence of
5
- * a fixed point mapping.
6
- * At each iteration we need to solve a (small) linear system, we
7
- * do this using LAPACK ?gesv.
8
- */
28
+ #define MAX(a, b) (((a) > (b)) ? (a) : (b))
29
+ #define MIN(a, b) (((a) < (b)) ? (a) : (b))
30
+ #define FILL_MEMORY_BEFORE_SOLVE (1)
9
31
 
10
32
  #ifndef USE_LAPACK
11
33
 
12
- typedef void * ACCEL_WORK;
34
+ typedef void *ACCEL_WORK;
13
35
 
14
- AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
15
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
16
- void aa_finish(AaWork *a) {}
36
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
37
+ aa_float relaxation, aa_float safeguard_factor,
38
+ aa_float max_weight_norm, aa_int verbosity) {
39
+ return SCS_NULL;
40
+ }
41
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
42
+ return 0;
43
+ }
44
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
45
+ return 0;
46
+ }
47
+ void aa_finish(AaWork *a) {
48
+ }
49
+ void aa_reset(AaWork *a) {
50
+ }
17
51
 
18
52
  #else
19
53
 
20
- /* contains the necessary parameters to perform aa at each step */
21
- struct ACCEL_WORK {
22
- aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
23
- aa_int k; /* aa memory */
24
- aa_int l; /* variable dimension */
25
- aa_int iter; /* current iteration */
54
+ #if PROFILING > 0
26
55
 
27
- aa_float *x; /* x input to map*/
28
- aa_float *f; /* f(x) output of map */
29
- aa_float *g; /* x - f(x) */
56
+ #define TIME_TIC \
57
+ timer __t; \
58
+ tic(&__t);
59
+ #define TIME_TOC toc(__func__, &__t);
30
60
 
31
- /* from previous iteration */
32
- aa_float *g_prev; /* x - f(x) */
61
+ #include <time.h>
62
+ typedef struct timer {
63
+ struct timespec tic;
64
+ struct timespec toc;
65
+ } timer;
33
66
 
34
- aa_float *y; /* g - g_prev */
35
- aa_float *s; /* x - x_prev */
36
- aa_float *d; /* f - f_prev */
67
+ void tic(timer *t) {
68
+ clock_gettime(CLOCK_MONOTONIC, &t->tic);
69
+ }
37
70
 
38
- aa_float *Y; /* matrix of stacked y values */
39
- aa_float *S; /* matrix of stacked s values */
40
- aa_float *D; /* matrix of stacked d values = (S-Y) */
41
- aa_float *M; /* S'Y or Y'Y depending on type of aa */
71
+ aa_float tocq(timer *t) {
72
+ struct timespec temp;
42
73
 
43
- /* workspace variables */
44
- aa_float *work;
45
- blas_int *ipiv;
46
- };
74
+ clock_gettime(CLOCK_MONOTONIC, &t->toc);
75
+
76
+ if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
77
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
78
+ temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
79
+ } else {
80
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
81
+ temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
82
+ }
83
+ return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
84
+ }
85
+
86
+ aa_float toc(const char *str, timer *t) {
87
+ aa_float time = tocq(t);
88
+ printf("%s - time: %8.4f milli-seconds.\n", str, time);
89
+ return time;
90
+ }
91
+
92
+ #else
93
+
94
+ #define TIME_TIC
95
+ #define TIME_TOC
96
+
97
+ #endif
98
+
99
+ #ifdef __cplusplus
100
+ extern "C" {
101
+ #endif
47
102
 
48
103
  /* BLAS functions used */
49
104
  aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
@@ -59,147 +114,363 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
59
114
  blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
60
115
  blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
61
116
  aa_float *c, blas_int *ldc);
117
+ void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
118
+ const blas_int *incx);
119
+
120
+ #ifdef __cplusplus
121
+ }
122
+ #endif
123
+
124
+ /* This file uses Anderson acceleration to improve the convergence of
125
+ * a fixed point mapping.
126
+ * At each iteration we need to solve a (small) linear system, we
127
+ * do this using LAPACK ?gesv.
128
+ */
129
+
130
+ /* contains the necessary parameters to perform aa at each step */
131
+ struct ACCEL_WORK {
132
+ aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
133
+ aa_int mem; /* aa memory */
134
+ aa_int dim; /* variable dimension */
135
+ aa_int iter; /* current iteration */
136
+ aa_int verbosity; /* verbosity level, 0 is no printing */
137
+ aa_int success; /* was the last AA step successful or not */
138
+
139
+ aa_float relaxation; /* relaxation x and f, beta in some papers */
140
+ aa_float regularization; /* regularization */
141
+ aa_float safeguard_factor; /* safeguard tolerance factor */
142
+ aa_float max_weight_norm; /* maximum norm of AA weights */
143
+
144
+ aa_float *x; /* x input to map*/
145
+ aa_float *f; /* f(x) output of map */
146
+ aa_float *g; /* x - f(x) */
147
+ aa_float norm_g; /* ||x - f(x)|| */
148
+
149
+ /* from previous iteration */
150
+ aa_float *g_prev; /* x_prev - f(x_prev) */
151
+
152
+ aa_float *y; /* g - g_prev */
153
+ aa_float *s; /* x - x_prev */
154
+ aa_float *d; /* f - f_prev */
155
+
156
+ aa_float *Y; /* matrix of stacked y values */
157
+ aa_float *S; /* matrix of stacked s values */
158
+ aa_float *D; /* matrix of stacked d values = (S-Y) */
159
+ aa_float *M; /* S'Y or Y'Y depending on type of aa */
160
+
161
+ /* workspace variables */
162
+ aa_float *work; /* scratch space */
163
+ blas_int *ipiv; /* permutation variable, not used after solve */
164
+
165
+ aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
166
+ };
167
+
168
+ /* add regularization dependent on Y and S matrices */
169
+ static aa_float compute_regularization(AaWork *a, aa_int len) {
170
+ /* typically type-I does better with higher regularization than type-II */
171
+ TIME_TIC
172
+ aa_float r, nrm_m;
173
+ blas_int btotal = (blas_int)(len * len), one = 1;
174
+ nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
175
+ r = a->regularization * nrm_m;
176
+ if (a->verbosity > 2) {
177
+ printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
178
+ }
179
+ TIME_TOC
180
+ return r;
181
+ }
62
182
 
63
183
  /* sets a->M to S'Y or Y'Y depending on type of aa used */
64
- static void set_m(AaWork *a) {
65
- blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
66
- aa_float onef = 1.0, zerof = 0.0;
184
+ /* M is len x len after this */
185
+ static void set_m(AaWork *a, aa_int len) {
186
+ TIME_TIC
187
+ aa_int i;
188
+ blas_int bdim = (blas_int)(a->dim);
189
+ blas_int blen = (blas_int)len;
190
+ aa_float onef = 1.0, zerof = 0.0, r;
191
+ /* if len < mem this only uses len cols */
67
192
  BLAS(gemm)
68
- ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
69
- &zerof, a->M, &bk);
193
+ ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
194
+ a->Y, &bdim, &zerof, a->M, &blen);
195
+ if (a->regularization > 0) {
196
+ r = compute_regularization(a, len);
197
+ for (i = 0; i < len; ++i) {
198
+ a->M[i + len * i] += r;
199
+ }
200
+ }
201
+ TIME_TOC
202
+ return;
203
+ }
204
+
205
+ /* initialize accel params, in particular x_prev, f_prev, g_prev */
206
+ static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
207
+ TIME_TIC
208
+ blas_int bdim = (blas_int)a->dim;
209
+ aa_float neg_onef = -1.0;
210
+ blas_int one = 1;
211
+ /* x_prev = x */
212
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
213
+ /* f_prev = f */
214
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
215
+ /* g_prev = x */
216
+ memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
217
+ /* g_prev = x_prev - f_prev */
218
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
219
+ TIME_TOC
70
220
  }
71
221
 
72
222
  /* updates the workspace parameters for aa for this iteration */
73
- static void update_accel_params(const aa_float *x, const aa_float *f,
74
- AaWork *a) {
223
+ static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
224
+ aa_int len) {
75
225
  /* at the start a->x = x_prev and a->f = f_prev */
76
- aa_int idx = a->iter % a->k;
77
- aa_int l = a->l;
78
-
226
+ TIME_TIC
227
+ aa_int idx = (a->iter - 1) % a->mem;
79
228
  blas_int one = 1;
80
- blas_int bl = (blas_int)l;
229
+ blas_int bdim = (blas_int)a->dim;
81
230
  aa_float neg_onef = -1.0;
82
231
 
83
232
  /* g = x */
84
- memcpy(a->g, x, sizeof(aa_float) * l);
233
+ memcpy(a->g, x, sizeof(aa_float) * a->dim);
85
234
  /* s = x */
86
- memcpy(a->s, x, sizeof(aa_float) * l);
235
+ memcpy(a->s, x, sizeof(aa_float) * a->dim);
87
236
  /* d = f */
88
- memcpy(a->d, f, sizeof(aa_float) * l);
89
- /* g -= f */
90
- BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
91
- /* s -= x_prev */
92
- BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
93
- /* d -= f_prev */
94
- BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
237
+ memcpy(a->d, f, sizeof(aa_float) * a->dim);
238
+ /* g = x - f */
239
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
240
+ /* s = x - x_prev */
241
+ BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
242
+ /* d = f - f_prev */
243
+ BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
95
244
 
96
245
  /* g, s, d correct here */
97
246
 
98
247
  /* y = g */
99
- memcpy(a->y, a->g, sizeof(aa_float) * l);
100
- /* y -= g_prev */
101
- BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
248
+ memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
249
+ /* y = g - g_prev */
250
+ BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
102
251
 
103
252
  /* y correct here */
104
253
 
105
254
  /* copy y into idx col of Y */
106
- memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
255
+ memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
107
256
  /* copy s into idx col of S */
108
- memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
257
+ memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
109
258
  /* copy d into idx col of D */
110
- memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
259
+ memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
260
+
261
+ /* Y, S, D correct here */
111
262
 
112
- /* Y, S,D correct here */
263
+ /* set a->f and a->x for next iter (x_prev and f_prev) */
264
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
265
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
113
266
 
114
- memcpy(a->f, f, sizeof(aa_float) * l);
115
- memcpy(a->x, x, sizeof(aa_float) * l);
267
+ /* workspace for when relaxation != 1.0 */
268
+ if (a->x_work) {
269
+ memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
270
+ }
116
271
 
117
272
  /* x, f correct here */
118
273
 
119
- /* set M = S'*Y */
120
- set_m(a);
274
+ memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
275
+ /* g_prev set for next iter here */
121
276
 
122
- /* M correct here */
277
+ /* compute ||g|| = ||f - x|| */
278
+ a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
123
279
 
124
- memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
280
+ TIME_TOC
281
+ return;
282
+ }
125
283
 
126
- /* g_prev set for next iter here */
284
+ /* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
285
+ static void relax(aa_float *f, AaWork *a, aa_int len) {
286
+ TIME_TIC
287
+ /* x_work = x initially */
288
+ blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
289
+ aa_float onef = 1.0, neg_onef = -1.0;
290
+ aa_float one_m_relaxation = 1. - a->relaxation;
291
+ /* x_work = x - S * work */
292
+ BLAS(gemv)
293
+ ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
294
+ a->x_work, &one);
295
+ /* f = relaxation * f */
296
+ BLAS(scal)(&bdim, &a->relaxation, f, &one);
297
+ /* f += (1 - relaxation) * x_work */
298
+ BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one);
299
+ TIME_TOC
127
300
  }
128
301
 
129
- /* solves the system of equations to perform the aa update
302
+ /* solves the system of equations to perform the AA update
130
303
  * at the end f contains the next iterate to be returned
131
304
  */
132
- static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
133
- blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
134
- bk = (blas_int)a->k;
135
- aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
305
+ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
306
+ TIME_TIC
307
+ blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
308
+ aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
309
+
136
310
  /* work = S'g or Y'g */
137
311
  BLAS(gemv)
138
- ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
139
- a->work, &one);
140
- /* work = M \ work, where M = S'Y or M = Y'Y */
141
- BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
142
- nrm = BLAS(nrm2)(&bk, a->work, &one);
143
- if (info < 0 || nrm >= MAX_AA_NRM) {
144
- #if EXTRA_VERBOSE > 0
145
- scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
146
- a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
147
- #endif
148
- return -1;
312
+ ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
313
+ &zerof, a->work, &one);
314
+
315
+ /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
316
+ BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
317
+ aa_norm = BLAS(nrm2)(&blen, a->work, &one);
318
+ if (a->verbosity > 1) {
319
+ printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
320
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
149
321
  }
150
- /* if solve was successful then set f -= D * work */
322
+
323
+ /* info < 0 input error, input > 0 matrix is singular */
324
+ if (info != 0 || aa_norm >= a->max_weight_norm) {
325
+ if (a->verbosity > 0) {
326
+ printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
327
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
328
+ }
329
+ a->success = 0;
330
+ /* reset aa for stability */
331
+ aa_reset(a);
332
+ TIME_TOC
333
+ return -aa_norm;
334
+ }
335
+
336
+ /* here work = gamma, ie, the correct AA shifted weights */
337
+ /* if solve was successful compute new point */
338
+
339
+ /* first set f -= D * work */
151
340
  BLAS(gemv)
152
- ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
153
- return (aa_int)info;
341
+ ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
342
+ &one);
343
+
344
+ /* if relaxation is not 1 then need to incorporate */
345
+ if (a->relaxation != 1.0) {
346
+ relax(f, a, len);
347
+ }
348
+
349
+ a->success = 1; /* this should be the only place we set success = 1 */
350
+ TIME_TOC
351
+ return aa_norm;
154
352
  }
155
353
 
156
354
  /*
157
355
  * API functions below this line, see aa.h for descriptions.
158
356
  */
159
- AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
357
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
358
+ aa_float relaxation, aa_float safeguard_factor,
359
+ aa_float max_weight_norm, aa_int verbosity) {
360
+ TIME_TIC
160
361
  AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
161
362
  if (!a) {
162
- scs_printf("Failed to allocate memory for AA.\n");
163
- return (void *)0;
363
+ printf("Failed to allocate memory for AA.\n");
364
+ return (AaWork *)0;
164
365
  }
165
366
  a->type1 = type1;
166
367
  a->iter = 0;
167
- a->l = l;
168
- a->k = aa_mem;
169
- if (a->k <= 0) {
368
+ a->dim = dim;
369
+ a->mem = MIN(mem, dim); /* for rank stability */
370
+ a->regularization = regularization;
371
+ a->relaxation = relaxation;
372
+ a->safeguard_factor = safeguard_factor;
373
+ a->max_weight_norm = max_weight_norm;
374
+ a->success = 0;
375
+ a->verbosity = verbosity;
376
+ if (a->mem <= 0) {
170
377
  return a;
171
378
  }
172
379
 
173
- a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
174
- a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
175
- a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
380
+ a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
381
+ a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
382
+ a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
176
383
 
177
- a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
384
+ a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
178
385
 
179
- a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
180
- a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
181
- a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
386
+ a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
387
+ a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
388
+ a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
182
389
 
183
- a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
184
- a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
185
- a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
390
+ a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
391
+ a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
392
+ a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
186
393
 
187
- a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
188
- a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
189
- a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
394
+ a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
395
+ a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
396
+ a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
397
+
398
+ if (relaxation != 1.0) {
399
+ a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
400
+ } else {
401
+ a->x_work = 0;
402
+ }
403
+ TIME_TOC
190
404
  return a;
191
405
  }
192
406
 
193
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
194
- if (a->k <= 0) {
195
- return 0;
407
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
408
+ TIME_TIC
409
+ aa_float aa_norm = 0;
410
+ aa_int len = MIN(a->iter, a->mem);
411
+ a->success = 0; /* if we make an AA step we set this to 1 later */
412
+ if (a->mem <= 0) {
413
+ TIME_TOC
414
+ return aa_norm; /* 0 */
196
415
  }
197
- update_accel_params(x, f, a);
198
- if (a->iter++ == 0) {
416
+ if (a->iter == 0) {
417
+ /* if first iteration then seed params for next iter */
418
+ init_accel_params(x, f, a);
419
+ a->iter++;
420
+ TIME_TOC
421
+ return aa_norm; /* 0 */
422
+ }
423
+ /* set various accel quantities */
424
+ update_accel_params(x, f, a, len);
425
+
426
+ /* only perform solve steps when the memory is full */
427
+ if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
428
+ /* set M = S'Y or Y'Y depending on type of aa used */
429
+ set_m(a, len);
430
+ /* solve linear system, new point overwrites f if successful */
431
+ aa_norm = solve(f, a, len);
432
+ }
433
+ a->iter++;
434
+ TIME_TOC
435
+ return aa_norm;
436
+ }
437
+
438
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
439
+ TIME_TIC
440
+ blas_int bdim = (blas_int)a->dim;
441
+ blas_int one = 1;
442
+ aa_float neg_onef = -1.0;
443
+ aa_float norm_diff;
444
+ if (!a->success) {
445
+ /* last AA update was not successful, no need for safeguarding */
446
+ TIME_TOC
199
447
  return 0;
200
448
  }
201
- /* solve linear system, new point overwrites f if successful */
202
- return solve(f, a, MIN(a->iter - 1, a->k));
449
+
450
+ /* reset success indicator in case safeguarding called multiple times */
451
+ a->success = 0;
452
+
453
+ /* work = x_new */
454
+ memcpy(a->work, x_new, a->dim * sizeof(aa_float));
455
+ /* work = x_new - f_new */
456
+ BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
457
+ /* norm_diff = || f_new - x_new || */
458
+ norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
459
+ /* g = f - x */
460
+ if (norm_diff > a->safeguard_factor * a->norm_g) {
461
+ /* in this case we reject the AA step and reset */
462
+ memcpy(f_new, a->f, a->dim * sizeof(aa_float));
463
+ memcpy(x_new, a->x, a->dim * sizeof(aa_float));
464
+ if (a->verbosity > 0) {
465
+ printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
466
+ (int)a->iter, norm_diff, a->norm_g);
467
+ }
468
+ aa_reset(a);
469
+ TIME_TOC
470
+ return -1;
471
+ }
472
+ TIME_TOC
473
+ return 0;
203
474
  }
204
475
 
205
476
  void aa_finish(AaWork *a) {
@@ -217,8 +488,21 @@ void aa_finish(AaWork *a) {
217
488
  free(a->M);
218
489
  free(a->work);
219
490
  free(a->ipiv);
491
+ if (a->x_work) {
492
+ free(a->x_work);
493
+ }
220
494
  free(a);
221
495
  }
496
+ return;
497
+ }
498
+
499
+ void aa_reset(AaWork *a) {
500
+ /* to reset we simply set a->iter = 0 */
501
+ if (a->verbosity > 0) {
502
+ printf("AA reset.\n");
503
+ }
504
+ a->iter = 0;
505
+ return;
222
506
  }
223
507
 
224
508
  #endif