scs 0.2.2 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +19 -14
  5. data/lib/scs/ffi.rb +31 -20
  6. data/lib/scs/solver.rb +32 -9
  7. data/lib/scs/version.rb +1 -1
  8. data/vendor/scs/CITATION.cff +39 -0
  9. data/vendor/scs/CMakeLists.txt +320 -0
  10. data/vendor/scs/Makefile +32 -23
  11. data/vendor/scs/README.md +9 -218
  12. data/vendor/scs/include/aa.h +67 -23
  13. data/vendor/scs/include/cones.h +22 -19
  14. data/vendor/scs/include/glbopts.h +107 -79
  15. data/vendor/scs/include/linalg.h +3 -4
  16. data/vendor/scs/include/linsys.h +58 -44
  17. data/vendor/scs/include/normalize.h +6 -5
  18. data/vendor/scs/include/rw.h +8 -2
  19. data/vendor/scs/include/scs.h +257 -141
  20. data/vendor/scs/include/scs_types.h +34 -0
  21. data/vendor/scs/include/scs_work.h +83 -0
  22. data/vendor/scs/include/util.h +3 -15
  23. data/vendor/scs/linsys/cpu/direct/private.c +241 -232
  24. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  25. data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
  26. data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
  27. data/vendor/scs/linsys/csparse.c +87 -0
  28. data/vendor/scs/linsys/csparse.h +34 -0
  29. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
  30. data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
  31. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  32. data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
  33. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  34. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  35. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  36. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  37. data/vendor/scs/linsys/gpu/gpu.c +58 -21
  38. data/vendor/scs/linsys/gpu/gpu.h +70 -35
  39. data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
  40. data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
  41. data/vendor/scs/linsys/scs_matrix.c +478 -0
  42. data/vendor/scs/linsys/scs_matrix.h +70 -0
  43. data/vendor/scs/scs.mk +14 -10
  44. data/vendor/scs/src/aa.c +394 -110
  45. data/vendor/scs/src/cones.c +497 -359
  46. data/vendor/scs/src/ctrlc.c +15 -5
  47. data/vendor/scs/src/linalg.c +107 -26
  48. data/vendor/scs/src/normalize.c +30 -72
  49. data/vendor/scs/src/rw.c +202 -27
  50. data/vendor/scs/src/scs.c +769 -571
  51. data/vendor/scs/src/scs_version.c +11 -3
  52. data/vendor/scs/src/util.c +37 -106
  53. data/vendor/scs/test/minunit.h +22 -8
  54. data/vendor/scs/test/problem_utils.h +180 -25
  55. data/vendor/scs/test/problems/degenerate.h +130 -0
  56. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  57. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  58. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  59. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  60. data/vendor/scs/test/problems/random_prob +0 -0
  61. data/vendor/scs/test/problems/random_prob.h +45 -0
  62. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  63. data/vendor/scs/test/problems/small_lp.h +14 -13
  64. data/vendor/scs/test/problems/small_qp.h +352 -0
  65. data/vendor/scs/test/problems/test_validation.h +43 -0
  66. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  67. data/vendor/scs/test/random_socp_prob.c +54 -53
  68. data/vendor/scs/test/rng.h +109 -0
  69. data/vendor/scs/test/run_from_file.c +20 -11
  70. data/vendor/scs/test/run_tests.c +35 -2
  71. metadata +29 -98
  72. data/vendor/scs/linsys/amatrix.c +0 -305
  73. data/vendor/scs/linsys/amatrix.h +0 -36
  74. data/vendor/scs/linsys/amatrix.o +0 -0
  75. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  76. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  77. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  78. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  79. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  80. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  81. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  82. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  83. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  84. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  85. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  86. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  87. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  88. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  89. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  90. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  91. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  92. data/vendor/scs/src/aa.o +0 -0
  93. data/vendor/scs/src/cones.o +0 -0
  94. data/vendor/scs/src/ctrlc.o +0 -0
  95. data/vendor/scs/src/linalg.o +0 -0
  96. data/vendor/scs/src/normalize.o +0 -0
  97. data/vendor/scs/src/rw.o +0 -0
  98. data/vendor/scs/src/scs.o +0 -0
  99. data/vendor/scs/src/scs_version.o +0 -0
  100. data/vendor/scs/src/util.o +0 -0
  101. data/vendor/scs/test/data/small_random_socp +0 -0
  102. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  103. data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c CHANGED
@@ -1,49 +1,104 @@
1
+ /*
2
+ * Anderson acceleration.
3
+ *
4
+ * x: input iterate
5
+ * x_prev: previous input iterate
6
+ * f: f(x) output of map f applied to x
7
+ * g: x - f (error)
8
+ * g_prev: previous error
9
+ * s: x - x_prev
10
+ * y: g - g_prev
11
+ * d: s - y = f - f_prev
12
+ *
13
+ * capital letters are the variables stacked columnwise
14
+ * idx tracks current index where latest quantities written
15
+ * idx cycles from left to right columns in matrix
16
+ *
17
+ * Type-I:
18
+ * return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
19
+ *
20
+ * Type-II:
21
+ * return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
22
+ *
23
+ */
24
+
1
25
  #include "aa.h"
2
26
  #include "scs_blas.h"
3
27
 
4
- /* This file uses Anderson acceleration to improve the convergence of
5
- * a fixed point mapping.
6
- * At each iteration we need to solve a (small) linear system, we
7
- * do this using LAPACK ?gesv.
8
- */
28
+ #define MAX(a, b) (((a) > (b)) ? (a) : (b))
29
+ #define MIN(a, b) (((a) < (b)) ? (a) : (b))
30
+ #define FILL_MEMORY_BEFORE_SOLVE (1)
9
31
 
10
32
  #ifndef USE_LAPACK
11
33
 
12
- typedef void * ACCEL_WORK;
34
+ typedef void *ACCEL_WORK;
13
35
 
14
- AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
15
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
16
- void aa_finish(AaWork *a) {}
36
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
37
+ aa_float relaxation, aa_float safeguard_factor,
38
+ aa_float max_weight_norm, aa_int verbosity) {
39
+ return SCS_NULL;
40
+ }
41
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
42
+ return 0;
43
+ }
44
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
45
+ return 0;
46
+ }
47
+ void aa_finish(AaWork *a) {
48
+ }
49
+ void aa_reset(AaWork *a) {
50
+ }
17
51
 
18
52
  #else
19
53
 
20
- /* contains the necessary parameters to perform aa at each step */
21
- struct ACCEL_WORK {
22
- aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
23
- aa_int k; /* aa memory */
24
- aa_int l; /* variable dimension */
25
- aa_int iter; /* current iteration */
54
+ #if PROFILING > 0
26
55
 
27
- aa_float *x; /* x input to map*/
28
- aa_float *f; /* f(x) output of map */
29
- aa_float *g; /* x - f(x) */
56
+ #define TIME_TIC \
57
+ timer __t; \
58
+ tic(&__t);
59
+ #define TIME_TOC toc(__func__, &__t);
30
60
 
31
- /* from previous iteration */
32
- aa_float *g_prev; /* x - f(x) */
61
+ #include <time.h>
62
+ typedef struct timer {
63
+ struct timespec tic;
64
+ struct timespec toc;
65
+ } timer;
33
66
 
34
- aa_float *y; /* g - g_prev */
35
- aa_float *s; /* x - x_prev */
36
- aa_float *d; /* f - f_prev */
67
+ void tic(timer *t) {
68
+ clock_gettime(CLOCK_MONOTONIC, &t->tic);
69
+ }
37
70
 
38
- aa_float *Y; /* matrix of stacked y values */
39
- aa_float *S; /* matrix of stacked s values */
40
- aa_float *D; /* matrix of stacked d values = (S-Y) */
41
- aa_float *M; /* S'Y or Y'Y depending on type of aa */
71
+ aa_float tocq(timer *t) {
72
+ struct timespec temp;
42
73
 
43
- /* workspace variables */
44
- aa_float *work;
45
- blas_int *ipiv;
46
- };
74
+ clock_gettime(CLOCK_MONOTONIC, &t->toc);
75
+
76
+ if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
77
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
78
+ temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
79
+ } else {
80
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
81
+ temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
82
+ }
83
+ return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
84
+ }
85
+
86
+ aa_float toc(const char *str, timer *t) {
87
+ aa_float time = tocq(t);
88
+ printf("%s - time: %8.4f milli-seconds.\n", str, time);
89
+ return time;
90
+ }
91
+
92
+ #else
93
+
94
+ #define TIME_TIC
95
+ #define TIME_TOC
96
+
97
+ #endif
98
+
99
+ #ifdef __cplusplus
100
+ extern "C" {
101
+ #endif
47
102
 
48
103
  /* BLAS functions used */
49
104
  aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
@@ -59,147 +114,363 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
59
114
  blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
60
115
  blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
61
116
  aa_float *c, blas_int *ldc);
117
+ void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
118
+ const blas_int *incx);
119
+
120
+ #ifdef __cplusplus
121
+ }
122
+ #endif
123
+
124
+ /* This file uses Anderson acceleration to improve the convergence of
125
+ * a fixed point mapping.
126
+ * At each iteration we need to solve a (small) linear system, we
127
+ * do this using LAPACK ?gesv.
128
+ */
129
+
130
+ /* contains the necessary parameters to perform aa at each step */
131
+ struct ACCEL_WORK {
132
+ aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
133
+ aa_int mem; /* aa memory */
134
+ aa_int dim; /* variable dimension */
135
+ aa_int iter; /* current iteration */
136
+ aa_int verbosity; /* verbosity level, 0 is no printing */
137
+ aa_int success; /* was the last AA step successful or not */
138
+
139
+ aa_float relaxation; /* relaxation x and f, beta in some papers */
140
+ aa_float regularization; /* regularization */
141
+ aa_float safeguard_factor; /* safeguard tolerance factor */
142
+ aa_float max_weight_norm; /* maximum norm of AA weights */
143
+
144
+ aa_float *x; /* x input to map*/
145
+ aa_float *f; /* f(x) output of map */
146
+ aa_float *g; /* x - f(x) */
147
+ aa_float norm_g; /* ||x - f(x)|| */
148
+
149
+ /* from previous iteration */
150
+ aa_float *g_prev; /* x_prev - f(x_prev) */
151
+
152
+ aa_float *y; /* g - g_prev */
153
+ aa_float *s; /* x - x_prev */
154
+ aa_float *d; /* f - f_prev */
155
+
156
+ aa_float *Y; /* matrix of stacked y values */
157
+ aa_float *S; /* matrix of stacked s values */
158
+ aa_float *D; /* matrix of stacked d values = (S-Y) */
159
+ aa_float *M; /* S'Y or Y'Y depending on type of aa */
160
+
161
+ /* workspace variables */
162
+ aa_float *work; /* scratch space */
163
+ blas_int *ipiv; /* permutation variable, not used after solve */
164
+
165
+ aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
166
+ };
167
+
168
+ /* add regularization dependent on Y and S matrices */
169
+ static aa_float compute_regularization(AaWork *a, aa_int len) {
170
+ /* typically type-I does better with higher regularization than type-II */
171
+ TIME_TIC
172
+ aa_float r, nrm_m;
173
+ blas_int btotal = (blas_int)(len * len), one = 1;
174
+ nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
175
+ r = a->regularization * nrm_m;
176
+ if (a->verbosity > 2) {
177
+ printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
178
+ }
179
+ TIME_TOC
180
+ return r;
181
+ }
62
182
 
63
183
  /* sets a->M to S'Y or Y'Y depending on type of aa used */
64
- static void set_m(AaWork *a) {
65
- blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
66
- aa_float onef = 1.0, zerof = 0.0;
184
+ /* M is len x len after this */
185
+ static void set_m(AaWork *a, aa_int len) {
186
+ TIME_TIC
187
+ aa_int i;
188
+ blas_int bdim = (blas_int)(a->dim);
189
+ blas_int blen = (blas_int)len;
190
+ aa_float onef = 1.0, zerof = 0.0, r;
191
+ /* if len < mem this only uses len cols */
67
192
  BLAS(gemm)
68
- ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
69
- &zerof, a->M, &bk);
193
+ ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
194
+ a->Y, &bdim, &zerof, a->M, &blen);
195
+ if (a->regularization > 0) {
196
+ r = compute_regularization(a, len);
197
+ for (i = 0; i < len; ++i) {
198
+ a->M[i + len * i] += r;
199
+ }
200
+ }
201
+ TIME_TOC
202
+ return;
203
+ }
204
+
205
+ /* initialize accel params, in particular x_prev, f_prev, g_prev */
206
+ static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
207
+ TIME_TIC
208
+ blas_int bdim = (blas_int)a->dim;
209
+ aa_float neg_onef = -1.0;
210
+ blas_int one = 1;
211
+ /* x_prev = x */
212
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
213
+ /* f_prev = f */
214
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
215
+ /* g_prev = x */
216
+ memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
217
+ /* g_prev = x_prev - f_prev */
218
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
219
+ TIME_TOC
70
220
  }
71
221
 
72
222
  /* updates the workspace parameters for aa for this iteration */
73
- static void update_accel_params(const aa_float *x, const aa_float *f,
74
- AaWork *a) {
223
+ static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
224
+ aa_int len) {
75
225
  /* at the start a->x = x_prev and a->f = f_prev */
76
- aa_int idx = a->iter % a->k;
77
- aa_int l = a->l;
78
-
226
+ TIME_TIC
227
+ aa_int idx = (a->iter - 1) % a->mem;
79
228
  blas_int one = 1;
80
- blas_int bl = (blas_int)l;
229
+ blas_int bdim = (blas_int)a->dim;
81
230
  aa_float neg_onef = -1.0;
82
231
 
83
232
  /* g = x */
84
- memcpy(a->g, x, sizeof(aa_float) * l);
233
+ memcpy(a->g, x, sizeof(aa_float) * a->dim);
85
234
  /* s = x */
86
- memcpy(a->s, x, sizeof(aa_float) * l);
235
+ memcpy(a->s, x, sizeof(aa_float) * a->dim);
87
236
  /* d = f */
88
- memcpy(a->d, f, sizeof(aa_float) * l);
89
- /* g -= f */
90
- BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
91
- /* s -= x_prev */
92
- BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
93
- /* d -= f_prev */
94
- BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
237
+ memcpy(a->d, f, sizeof(aa_float) * a->dim);
238
+ /* g = x - f */
239
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
240
+ /* s = x - x_prev */
241
+ BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
242
+ /* d = f - f_prev */
243
+ BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
95
244
 
96
245
  /* g, s, d correct here */
97
246
 
98
247
  /* y = g */
99
- memcpy(a->y, a->g, sizeof(aa_float) * l);
100
- /* y -= g_prev */
101
- BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
248
+ memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
249
+ /* y = g - g_prev */
250
+ BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
102
251
 
103
252
  /* y correct here */
104
253
 
105
254
  /* copy y into idx col of Y */
106
- memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
255
+ memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
107
256
  /* copy s into idx col of S */
108
- memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
257
+ memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
109
258
  /* copy d into idx col of D */
110
- memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
259
+ memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
260
+
261
+ /* Y, S, D correct here */
111
262
 
112
- /* Y, S,D correct here */
263
+ /* set a->f and a->x for next iter (x_prev and f_prev) */
264
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
265
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
113
266
 
114
- memcpy(a->f, f, sizeof(aa_float) * l);
115
- memcpy(a->x, x, sizeof(aa_float) * l);
267
+ /* workspace for when relaxation != 1.0 */
268
+ if (a->x_work) {
269
+ memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
270
+ }
116
271
 
117
272
  /* x, f correct here */
118
273
 
119
- /* set M = S'*Y */
120
- set_m(a);
274
+ memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
275
+ /* g_prev set for next iter here */
121
276
 
122
- /* M correct here */
277
+ /* compute ||g|| = ||f - x|| */
278
+ a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
123
279
 
124
- memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
280
+ TIME_TOC
281
+ return;
282
+ }
125
283
 
126
- /* g_prev set for next iter here */
284
+ /* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
285
+ static void relax(aa_float *f, AaWork *a, aa_int len) {
286
+ TIME_TIC
287
+ /* x_work = x initially */
288
+ blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
289
+ aa_float onef = 1.0, neg_onef = -1.0;
290
+ aa_float one_m_relaxation = 1. - a->relaxation;
291
+ /* x_work = x - S * work */
292
+ BLAS(gemv)
293
+ ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
294
+ a->x_work, &one);
295
+ /* f = relaxation * f */
296
+ BLAS(scal)(&bdim, &a->relaxation, f, &one);
297
+ /* f += (1 - relaxation) * x_work */
298
+ BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one);
299
+ TIME_TOC
127
300
  }
128
301
 
129
- /* solves the system of equations to perform the aa update
302
+ /* solves the system of equations to perform the AA update
130
303
  * at the end f contains the next iterate to be returned
131
304
  */
132
- static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
133
- blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
134
- bk = (blas_int)a->k;
135
- aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
305
+ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
306
+ TIME_TIC
307
+ blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
308
+ aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
309
+
136
310
  /* work = S'g or Y'g */
137
311
  BLAS(gemv)
138
- ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
139
- a->work, &one);
140
- /* work = M \ work, where M = S'Y or M = Y'Y */
141
- BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
142
- nrm = BLAS(nrm2)(&bk, a->work, &one);
143
- if (info < 0 || nrm >= MAX_AA_NRM) {
144
- #if EXTRA_VERBOSE > 0
145
- scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
146
- a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
147
- #endif
148
- return -1;
312
+ ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
313
+ &zerof, a->work, &one);
314
+
315
+ /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
316
+ BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
317
+ aa_norm = BLAS(nrm2)(&blen, a->work, &one);
318
+ if (a->verbosity > 1) {
319
+ printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
320
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
149
321
  }
150
- /* if solve was successful then set f -= D * work */
322
+
323
+ /* info < 0 input error, input > 0 matrix is singular */
324
+ if (info != 0 || aa_norm >= a->max_weight_norm) {
325
+ if (a->verbosity > 0) {
326
+ printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
327
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
328
+ }
329
+ a->success = 0;
330
+ /* reset aa for stability */
331
+ aa_reset(a);
332
+ TIME_TOC
333
+ return -aa_norm;
334
+ }
335
+
336
+ /* here work = gamma, ie, the correct AA shifted weights */
337
+ /* if solve was successful compute new point */
338
+
339
+ /* first set f -= D * work */
151
340
  BLAS(gemv)
152
- ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
153
- return (aa_int)info;
341
+ ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
342
+ &one);
343
+
344
+ /* if relaxation is not 1 then need to incorporate */
345
+ if (a->relaxation != 1.0) {
346
+ relax(f, a, len);
347
+ }
348
+
349
+ a->success = 1; /* this should be the only place we set success = 1 */
350
+ TIME_TOC
351
+ return aa_norm;
154
352
  }
155
353
 
156
354
  /*
157
355
  * API functions below this line, see aa.h for descriptions.
158
356
  */
159
- AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
357
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
358
+ aa_float relaxation, aa_float safeguard_factor,
359
+ aa_float max_weight_norm, aa_int verbosity) {
360
+ TIME_TIC
160
361
  AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
161
362
  if (!a) {
162
- scs_printf("Failed to allocate memory for AA.\n");
163
- return (void *)0;
363
+ printf("Failed to allocate memory for AA.\n");
364
+ return (AaWork *)0;
164
365
  }
165
366
  a->type1 = type1;
166
367
  a->iter = 0;
167
- a->l = l;
168
- a->k = aa_mem;
169
- if (a->k <= 0) {
368
+ a->dim = dim;
369
+ a->mem = MIN(mem, dim); /* for rank stability */
370
+ a->regularization = regularization;
371
+ a->relaxation = relaxation;
372
+ a->safeguard_factor = safeguard_factor;
373
+ a->max_weight_norm = max_weight_norm;
374
+ a->success = 0;
375
+ a->verbosity = verbosity;
376
+ if (a->mem <= 0) {
170
377
  return a;
171
378
  }
172
379
 
173
- a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
174
- a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
175
- a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
380
+ a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
381
+ a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
382
+ a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
176
383
 
177
- a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
384
+ a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
178
385
 
179
- a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
180
- a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
181
- a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
386
+ a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
387
+ a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
388
+ a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
182
389
 
183
- a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
184
- a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
185
- a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
390
+ a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
391
+ a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
392
+ a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
186
393
 
187
- a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
188
- a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
189
- a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
394
+ a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
395
+ a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
396
+ a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
397
+
398
+ if (relaxation != 1.0) {
399
+ a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
400
+ } else {
401
+ a->x_work = 0;
402
+ }
403
+ TIME_TOC
190
404
  return a;
191
405
  }
192
406
 
193
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
194
- if (a->k <= 0) {
195
- return 0;
407
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
408
+ TIME_TIC
409
+ aa_float aa_norm = 0;
410
+ aa_int len = MIN(a->iter, a->mem);
411
+ a->success = 0; /* if we make an AA step we set this to 1 later */
412
+ if (a->mem <= 0) {
413
+ TIME_TOC
414
+ return aa_norm; /* 0 */
196
415
  }
197
- update_accel_params(x, f, a);
198
- if (a->iter++ == 0) {
416
+ if (a->iter == 0) {
417
+ /* if first iteration then seed params for next iter */
418
+ init_accel_params(x, f, a);
419
+ a->iter++;
420
+ TIME_TOC
421
+ return aa_norm; /* 0 */
422
+ }
423
+ /* set various accel quantities */
424
+ update_accel_params(x, f, a, len);
425
+
426
+ /* only perform solve steps when the memory is full */
427
+ if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
428
+ /* set M = S'Y or Y'Y depending on type of aa used */
429
+ set_m(a, len);
430
+ /* solve linear system, new point overwrites f if successful */
431
+ aa_norm = solve(f, a, len);
432
+ }
433
+ a->iter++;
434
+ TIME_TOC
435
+ return aa_norm;
436
+ }
437
+
438
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
439
+ TIME_TIC
440
+ blas_int bdim = (blas_int)a->dim;
441
+ blas_int one = 1;
442
+ aa_float neg_onef = -1.0;
443
+ aa_float norm_diff;
444
+ if (!a->success) {
445
+ /* last AA update was not successful, no need for safeguarding */
446
+ TIME_TOC
199
447
  return 0;
200
448
  }
201
- /* solve linear system, new point overwrites f if successful */
202
- return solve(f, a, MIN(a->iter - 1, a->k));
449
+
450
+ /* reset success indicator in case safeguarding called multiple times */
451
+ a->success = 0;
452
+
453
+ /* work = x_new */
454
+ memcpy(a->work, x_new, a->dim * sizeof(aa_float));
455
+ /* work = x_new - f_new */
456
+ BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
457
+ /* norm_diff = || f_new - x_new || */
458
+ norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
459
+ /* g = f - x */
460
+ if (norm_diff > a->safeguard_factor * a->norm_g) {
461
+ /* in this case we reject the AA step and reset */
462
+ memcpy(f_new, a->f, a->dim * sizeof(aa_float));
463
+ memcpy(x_new, a->x, a->dim * sizeof(aa_float));
464
+ if (a->verbosity > 0) {
465
+ printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
466
+ (int)a->iter, norm_diff, a->norm_g);
467
+ }
468
+ aa_reset(a);
469
+ TIME_TOC
470
+ return -1;
471
+ }
472
+ TIME_TOC
473
+ return 0;
203
474
  }
204
475
 
205
476
  void aa_finish(AaWork *a) {
@@ -217,8 +488,21 @@ void aa_finish(AaWork *a) {
217
488
  free(a->M);
218
489
  free(a->work);
219
490
  free(a->ipiv);
491
+ if (a->x_work) {
492
+ free(a->x_work);
493
+ }
220
494
  free(a);
221
495
  }
496
+ return;
497
+ }
498
+
499
+ void aa_reset(AaWork *a) {
500
+ /* to reset we simply set a->iter = 0 */
501
+ if (a->verbosity > 0) {
502
+ printf("AA reset.\n");
503
+ }
504
+ a->iter = 0;
505
+ return;
222
506
  }
223
507
 
224
508
  #endif