scs 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +11 -6
  4. data/lib/scs/ffi.rb +30 -13
  5. data/lib/scs/solver.rb +32 -9
  6. data/lib/scs/version.rb +1 -1
  7. data/vendor/scs/CITATION.cff +39 -0
  8. data/vendor/scs/CMakeLists.txt +7 -8
  9. data/vendor/scs/Makefile +24 -15
  10. data/vendor/scs/README.md +5 -263
  11. data/vendor/scs/include/aa.h +67 -23
  12. data/vendor/scs/include/cones.h +17 -17
  13. data/vendor/scs/include/glbopts.h +98 -32
  14. data/vendor/scs/include/linalg.h +2 -4
  15. data/vendor/scs/include/linsys.h +58 -44
  16. data/vendor/scs/include/normalize.h +3 -3
  17. data/vendor/scs/include/rw.h +8 -2
  18. data/vendor/scs/include/scs.h +293 -133
  19. data/vendor/scs/include/util.h +3 -15
  20. data/vendor/scs/linsys/cpu/direct/private.c +220 -224
  21. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  22. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  23. data/vendor/scs/linsys/cpu/indirect/private.c +177 -110
  24. data/vendor/scs/linsys/cpu/indirect/private.h +8 -4
  25. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  26. data/vendor/scs/linsys/csparse.c +87 -0
  27. data/vendor/scs/linsys/csparse.h +34 -0
  28. data/vendor/scs/linsys/csparse.o +0 -0
  29. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +1 -1
  30. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  31. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  32. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  33. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  34. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  35. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  36. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  37. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  38. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  39. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  40. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  41. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  42. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  43. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  44. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  45. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  46. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  47. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  48. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  49. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  50. data/vendor/scs/linsys/gpu/gpu.c +31 -33
  51. data/vendor/scs/linsys/gpu/gpu.h +48 -31
  52. data/vendor/scs/linsys/gpu/indirect/private.c +338 -232
  53. data/vendor/scs/linsys/gpu/indirect/private.h +23 -14
  54. data/vendor/scs/linsys/scs_matrix.c +498 -0
  55. data/vendor/scs/linsys/scs_matrix.h +70 -0
  56. data/vendor/scs/linsys/scs_matrix.o +0 -0
  57. data/vendor/scs/scs.mk +13 -9
  58. data/vendor/scs/src/aa.c +384 -109
  59. data/vendor/scs/src/aa.o +0 -0
  60. data/vendor/scs/src/cones.c +440 -353
  61. data/vendor/scs/src/cones.o +0 -0
  62. data/vendor/scs/src/ctrlc.c +15 -5
  63. data/vendor/scs/src/ctrlc.o +0 -0
  64. data/vendor/scs/src/linalg.c +84 -28
  65. data/vendor/scs/src/linalg.o +0 -0
  66. data/vendor/scs/src/normalize.c +22 -64
  67. data/vendor/scs/src/normalize.o +0 -0
  68. data/vendor/scs/src/rw.c +160 -21
  69. data/vendor/scs/src/rw.o +0 -0
  70. data/vendor/scs/src/scs.c +767 -563
  71. data/vendor/scs/src/scs.o +0 -0
  72. data/vendor/scs/src/scs_indir.o +0 -0
  73. data/vendor/scs/src/scs_version.c +9 -3
  74. data/vendor/scs/src/scs_version.o +0 -0
  75. data/vendor/scs/src/util.c +37 -106
  76. data/vendor/scs/src/util.o +0 -0
  77. data/vendor/scs/test/minunit.h +17 -8
  78. data/vendor/scs/test/problem_utils.h +176 -14
  79. data/vendor/scs/test/problems/degenerate.h +130 -0
  80. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  81. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  82. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  83. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  84. data/vendor/scs/test/problems/random_prob +0 -0
  85. data/vendor/scs/test/problems/random_prob.h +45 -0
  86. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  87. data/vendor/scs/test/problems/small_lp.h +13 -14
  88. data/vendor/scs/test/problems/test_fails.h +43 -0
  89. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  90. data/vendor/scs/test/random_socp_prob.c +54 -53
  91. data/vendor/scs/test/rng.h +109 -0
  92. data/vendor/scs/test/run_from_file.c +19 -10
  93. data/vendor/scs/test/run_tests.c +27 -3
  94. metadata +20 -8
  95. data/vendor/scs/linsys/amatrix.c +0 -305
  96. data/vendor/scs/linsys/amatrix.h +0 -36
  97. data/vendor/scs/linsys/amatrix.o +0 -0
  98. data/vendor/scs/test/data/small_random_socp +0 -0
  99. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  100. data/vendor/scs/test/run_tests +0 -2
data/vendor/scs/src/aa.c CHANGED
@@ -1,49 +1,100 @@
1
+ /*
2
+ * Anderson acceleration.
3
+ *
4
+ * x: input iterate
5
+ * x_prev: previous input iterate
6
+ * f: f(x) output of map f applied to x
7
+ * g: x - f (error)
8
+ * g_prev: previous error
9
+ * s: x - x_prev
10
+ * y: g - g_prev
11
+ * d: s - y = f - f_prev
12
+ *
13
+ * capital letters are the variables stacked columnwise
14
+ * idx tracks current index where latest quantities written
15
+ * idx cycles from left to right columns in matrix
16
+ *
17
+ * Type-I:
18
+ * return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g )
19
+ *
20
+ * Type-II:
21
+ * return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g )
22
+ *
23
+ */
24
+
1
25
  #include "aa.h"
2
26
  #include "scs_blas.h"
3
27
 
4
- /* This file uses Anderson acceleration to improve the convergence of
5
- * a fixed point mapping.
6
- * At each iteration we need to solve a (small) linear system, we
7
- * do this using LAPACK ?gesv.
8
- */
28
+ #define MAX(a, b) (((a) > (b)) ? (a) : (b))
29
+ #define MIN(a, b) (((a) < (b)) ? (a) : (b))
30
+ #define FILL_MEMORY_BEFORE_SOLVE (1)
9
31
 
10
32
  #ifndef USE_LAPACK
11
33
 
12
- typedef void * ACCEL_WORK;
34
+ typedef void *ACCEL_WORK;
13
35
 
14
- AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
15
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
16
- void aa_finish(AaWork *a) {}
36
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
37
+ aa_float relaxation, aa_float safeguard_factor,
38
+ aa_float max_weight_norm, aa_int verbosity) {
39
+ return SCS_NULL;
40
+ }
41
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
42
+ return 0;
43
+ }
44
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
45
+ return 0;
46
+ }
47
+ void aa_finish(AaWork *a) {
48
+ }
49
+ void aa_reset(AaWork *a) {
50
+ }
17
51
 
18
52
  #else
19
53
 
20
- /* contains the necessary parameters to perform aa at each step */
21
- struct ACCEL_WORK {
22
- aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
23
- aa_int k; /* aa memory */
24
- aa_int l; /* variable dimension */
25
- aa_int iter; /* current iteration */
54
+ #if PROFILING > 0
26
55
 
27
- aa_float *x; /* x input to map*/
28
- aa_float *f; /* f(x) output of map */
29
- aa_float *g; /* x - f(x) */
56
+ #define TIME_TIC \
57
+ timer __t; \
58
+ tic(&__t);
59
+ #define TIME_TOC toc(__func__, &__t);
30
60
 
31
- /* from previous iteration */
32
- aa_float *g_prev; /* x - f(x) */
61
+ #include <time.h>
62
+ typedef struct timer {
63
+ struct timespec tic;
64
+ struct timespec toc;
65
+ } timer;
33
66
 
34
- aa_float *y; /* g - g_prev */
35
- aa_float *s; /* x - x_prev */
36
- aa_float *d; /* f - f_prev */
67
+ void tic(timer *t) {
68
+ clock_gettime(CLOCK_MONOTONIC, &t->tic);
69
+ }
37
70
 
38
- aa_float *Y; /* matrix of stacked y values */
39
- aa_float *S; /* matrix of stacked s values */
40
- aa_float *D; /* matrix of stacked d values = (S-Y) */
41
- aa_float *M; /* S'Y or Y'Y depending on type of aa */
71
+ aa_float tocq(timer *t) {
72
+ struct timespec temp;
42
73
 
43
- /* workspace variables */
44
- aa_float *work;
45
- blas_int *ipiv;
46
- };
74
+ clock_gettime(CLOCK_MONOTONIC, &t->toc);
75
+
76
+ if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) {
77
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1;
78
+ temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec;
79
+ } else {
80
+ temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec;
81
+ temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec;
82
+ }
83
+ return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6;
84
+ }
85
+
86
+ aa_float toc(const char *str, timer *t) {
87
+ aa_float time = tocq(t);
88
+ printf("%s - time: %8.4f milli-seconds.\n", str, time);
89
+ return time;
90
+ }
91
+
92
+ #else
93
+
94
+ #define TIME_TIC
95
+ #define TIME_TOC
96
+
97
+ #endif
47
98
 
48
99
  /* BLAS functions used */
49
100
  aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
@@ -59,147 +110,358 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
59
110
  blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
60
111
  blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
61
112
  aa_float *c, blas_int *ldc);
113
+ void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
114
+ const blas_int *incx);
115
+
116
+ /* This file uses Anderson acceleration to improve the convergence of
117
+ * a fixed point mapping.
118
+ * At each iteration we need to solve a (small) linear system, we
119
+ * do this using LAPACK ?gesv.
120
+ */
121
+
122
+ /* contains the necessary parameters to perform aa at each step */
123
+ struct ACCEL_WORK {
124
+ aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
125
+ aa_int mem; /* aa memory */
126
+ aa_int dim; /* variable dimension */
127
+ aa_int iter; /* current iteration */
128
+ aa_int verbosity; /* verbosity level, 0 is no printing */
129
+ aa_int success; /* was the last AA step successful or not */
130
+
131
+ aa_float relaxation; /* relaxation x and f, beta in some papers */
132
+ aa_float regularization; /* regularization */
133
+ aa_float safeguard_factor; /* safeguard tolerance factor */
134
+ aa_float max_weight_norm; /* maximum norm of AA weights */
135
+
136
+ aa_float *x; /* x input to map*/
137
+ aa_float *f; /* f(x) output of map */
138
+ aa_float *g; /* x - f(x) */
139
+ aa_float norm_g; /* ||x - f(x)|| */
140
+
141
+ /* from previous iteration */
142
+ aa_float *g_prev; /* x_prev - f(x_prev) */
143
+
144
+ aa_float *y; /* g - g_prev */
145
+ aa_float *s; /* x - x_prev */
146
+ aa_float *d; /* f - f_prev */
147
+
148
+ aa_float *Y; /* matrix of stacked y values */
149
+ aa_float *S; /* matrix of stacked s values */
150
+ aa_float *D; /* matrix of stacked d values = (S-Y) */
151
+ aa_float *M; /* S'Y or Y'Y depending on type of aa */
152
+
153
+ /* workspace variables */
154
+ aa_float *work; /* scratch space */
155
+ blas_int *ipiv; /* permutation variable, not used after solve */
156
+
157
+ aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */
158
+ };
159
+
160
+ /* add regularization dependent on Y and S matrices */
161
+ static aa_float compute_regularization(AaWork *a, aa_int len) {
162
+ /* typically type-I does better with higher regularization than type-II */
163
+ TIME_TIC
164
+ aa_float r, nrm_m;
165
+ blas_int btotal = (blas_int)(len * len), one = 1;
166
+ nrm_m = BLAS(nrm2)(&btotal, a->M, &one);
167
+ r = a->regularization * nrm_m;
168
+ if (a->verbosity > 2) {
169
+ printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r);
170
+ }
171
+ TIME_TOC
172
+ return r;
173
+ }
62
174
 
63
175
  /* sets a->M to S'Y or Y'Y depending on type of aa used */
64
- static void set_m(AaWork *a) {
65
- blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
66
- aa_float onef = 1.0, zerof = 0.0;
176
+ /* M is len x len after this */
177
+ static void set_m(AaWork *a, aa_int len) {
178
+ TIME_TIC
179
+ aa_int i;
180
+ blas_int bdim = (blas_int)(a->dim);
181
+ blas_int blen = (blas_int)len;
182
+ aa_float onef = 1.0, zerof = 0.0, r;
183
+ /* if len < mem this only uses len cols */
67
184
  BLAS(gemm)
68
- ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
69
- &zerof, a->M, &bk);
185
+ ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
186
+ a->Y, &bdim, &zerof, a->M, &blen);
187
+ if (a->regularization > 0) {
188
+ r = compute_regularization(a, len);
189
+ for (i = 0; i < len; ++i) {
190
+ a->M[i + len * i] += r;
191
+ }
192
+ }
193
+ TIME_TOC
194
+ return;
195
+ }
196
+
197
+ /* initialize accel params, in particular x_prev, f_prev, g_prev */
198
+ static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) {
199
+ TIME_TIC
200
+ blas_int bdim = (blas_int)a->dim;
201
+ aa_float neg_onef = -1.0;
202
+ blas_int one = 1;
203
+ /* x_prev = x */
204
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
205
+ /* f_prev = f */
206
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
207
+ /* g_prev = x */
208
+ memcpy(a->g_prev, x, sizeof(aa_float) * a->dim);
209
+ /* g_prev = x_prev - f_prev */
210
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one);
211
+ TIME_TOC
70
212
  }
71
213
 
72
214
  /* updates the workspace parameters for aa for this iteration */
73
- static void update_accel_params(const aa_float *x, const aa_float *f,
74
- AaWork *a) {
215
+ static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
216
+ aa_int len) {
75
217
  /* at the start a->x = x_prev and a->f = f_prev */
76
- aa_int idx = a->iter % a->k;
77
- aa_int l = a->l;
78
-
218
+ TIME_TIC
219
+ aa_int idx = (a->iter - 1) % a->mem;
79
220
  blas_int one = 1;
80
- blas_int bl = (blas_int)l;
221
+ blas_int bdim = (blas_int)a->dim;
81
222
  aa_float neg_onef = -1.0;
82
223
 
83
224
  /* g = x */
84
- memcpy(a->g, x, sizeof(aa_float) * l);
225
+ memcpy(a->g, x, sizeof(aa_float) * a->dim);
85
226
  /* s = x */
86
- memcpy(a->s, x, sizeof(aa_float) * l);
227
+ memcpy(a->s, x, sizeof(aa_float) * a->dim);
87
228
  /* d = f */
88
- memcpy(a->d, f, sizeof(aa_float) * l);
89
- /* g -= f */
90
- BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
91
- /* s -= x_prev */
92
- BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
93
- /* d -= f_prev */
94
- BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
229
+ memcpy(a->d, f, sizeof(aa_float) * a->dim);
230
+ /* g = x - f */
231
+ BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one);
232
+ /* s = x - x_prev */
233
+ BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one);
234
+ /* d = f - f_prev */
235
+ BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one);
95
236
 
96
237
  /* g, s, d correct here */
97
238
 
98
239
  /* y = g */
99
- memcpy(a->y, a->g, sizeof(aa_float) * l);
100
- /* y -= g_prev */
101
- BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
240
+ memcpy(a->y, a->g, sizeof(aa_float) * a->dim);
241
+ /* y = g - g_prev */
242
+ BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one);
102
243
 
103
244
  /* y correct here */
104
245
 
105
246
  /* copy y into idx col of Y */
106
- memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
247
+ memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim);
107
248
  /* copy s into idx col of S */
108
- memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
249
+ memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim);
109
250
  /* copy d into idx col of D */
110
- memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
251
+ memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim);
111
252
 
112
- /* Y, S,D correct here */
253
+ /* Y, S, D correct here */
113
254
 
114
- memcpy(a->f, f, sizeof(aa_float) * l);
115
- memcpy(a->x, x, sizeof(aa_float) * l);
255
+ /* set a->f and a->x for next iter (x_prev and f_prev) */
256
+ memcpy(a->f, f, sizeof(aa_float) * a->dim);
257
+ memcpy(a->x, x, sizeof(aa_float) * a->dim);
258
+
259
+ /* workspace for when relaxation != 1.0 */
260
+ if (a->x_work) {
261
+ memcpy(a->x_work, x, sizeof(aa_float) * a->dim);
262
+ }
116
263
 
117
264
  /* x, f correct here */
118
265
 
119
- /* set M = S'*Y */
120
- set_m(a);
266
+ memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim);
267
+ /* g_prev set for next iter here */
121
268
 
122
- /* M correct here */
269
+ /* compute ||g|| = ||f - x|| */
270
+ a->norm_g = BLAS(nrm2)(&bdim, a->g, &one);
123
271
 
124
- memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
272
+ TIME_TOC
273
+ return;
274
+ }
125
275
 
126
- /* g_prev set for next iter here */
276
+ /* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
277
+ static void relax(aa_float *f, AaWork *a, aa_int len) {
278
+ TIME_TIC
279
+ /* x_work = x - S * work */
280
+ blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
281
+ aa_float onef = 1.0, neg_onef = -1.0;
282
+ aa_float one_m_relaxation = 1. - a->relaxation;
283
+ BLAS(gemv)
284
+ ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
285
+ a->x_work, &one);
286
+ /* f = relaxation * f */
287
+ BLAS(scal)(&blen, &a->relaxation, f, &one);
288
+ /* f += (1 - relaxation) * x_work */
289
+ BLAS(axpy)(&blen, &one_m_relaxation, a->x_work, &one, f, &one);
290
+ TIME_TOC
127
291
  }
128
292
 
129
- /* solves the system of equations to perform the aa update
293
+ /* solves the system of equations to perform the AA update
130
294
  * at the end f contains the next iterate to be returned
131
295
  */
132
- static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
133
- blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
134
- bk = (blas_int)a->k;
135
- aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
296
+ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
297
+ TIME_TIC
298
+ blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
299
+ aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
300
+
136
301
  /* work = S'g or Y'g */
137
302
  BLAS(gemv)
138
- ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
139
- a->work, &one);
140
- /* work = M \ work, where M = S'Y or M = Y'Y */
141
- BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
142
- nrm = BLAS(nrm2)(&bk, a->work, &one);
143
- if (info < 0 || nrm >= MAX_AA_NRM) {
144
- #if EXTRA_VERBOSE > 0
145
- scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
146
- a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
147
- #endif
148
- return -1;
303
+ ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
304
+ &zerof, a->work, &one);
305
+
306
+ /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
307
+ BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
308
+ aa_norm = BLAS(nrm2)(&blen, a->work, &one);
309
+ if (a->verbosity > 1) {
310
+ printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
311
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
312
+ }
313
+
314
+ /* info < 0 input error, input > 0 matrix is singular */
315
+ if (info != 0 || aa_norm >= a->max_weight_norm) {
316
+ if (a->verbosity > 0) {
317
+ printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n",
318
+ a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm);
319
+ }
320
+ a->success = 0;
321
+ /* reset aa for stability */
322
+ aa_reset(a);
323
+ TIME_TOC
324
+ return -aa_norm;
149
325
  }
150
- /* if solve was successful then set f -= D * work */
326
+
327
+ /* here work = gamma, ie, the correct AA shifted weights */
328
+ /* if solve was successful compute new point */
329
+
330
+ /* first set f -= D * work */
151
331
  BLAS(gemv)
152
- ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
153
- return (aa_int)info;
332
+ ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
333
+ &one);
334
+
335
+ /* if relaxation is not 1 then need to incorporate */
336
+ if (a->relaxation != 1.0) {
337
+ relax(f, a, len);
338
+ }
339
+
340
+ a->success = 1; /* this should be the only place we set success = 1 */
341
+ TIME_TOC
342
+ return aa_norm;
154
343
  }
155
344
 
156
345
  /*
157
346
  * API functions below this line, see aa.h for descriptions.
158
347
  */
159
- AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
348
+ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
349
+ aa_float relaxation, aa_float safeguard_factor,
350
+ aa_float max_weight_norm, aa_int verbosity) {
351
+ TIME_TIC
160
352
  AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
161
353
  if (!a) {
162
- scs_printf("Failed to allocate memory for AA.\n");
354
+ printf("Failed to allocate memory for AA.\n");
163
355
  return (void *)0;
164
356
  }
165
357
  a->type1 = type1;
166
358
  a->iter = 0;
167
- a->l = l;
168
- a->k = aa_mem;
169
- if (a->k <= 0) {
359
+ a->dim = dim;
360
+ a->mem = MIN(mem, dim); /* for rank stability */
361
+ a->regularization = regularization;
362
+ a->relaxation = relaxation;
363
+ a->safeguard_factor = safeguard_factor;
364
+ a->max_weight_norm = max_weight_norm;
365
+ a->success = 0;
366
+ a->verbosity = verbosity;
367
+ if (a->mem <= 0) {
170
368
  return a;
171
369
  }
172
370
 
173
- a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
174
- a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
175
- a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
371
+ a->x = (aa_float *)calloc(a->dim, sizeof(aa_float));
372
+ a->f = (aa_float *)calloc(a->dim, sizeof(aa_float));
373
+ a->g = (aa_float *)calloc(a->dim, sizeof(aa_float));
374
+
375
+ a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float));
176
376
 
177
- a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
377
+ a->y = (aa_float *)calloc(a->dim, sizeof(aa_float));
378
+ a->s = (aa_float *)calloc(a->dim, sizeof(aa_float));
379
+ a->d = (aa_float *)calloc(a->dim, sizeof(aa_float));
178
380
 
179
- a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
180
- a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
181
- a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
381
+ a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
382
+ a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
383
+ a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float));
182
384
 
183
- a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
184
- a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
185
- a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
385
+ a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float));
386
+ a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float));
387
+ a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int));
186
388
 
187
- a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
188
- a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
189
- a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
389
+ if (relaxation != 1.0) {
390
+ a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float));
391
+ } else {
392
+ a->x_work = 0;
393
+ }
394
+ TIME_TOC
190
395
  return a;
191
396
  }
192
397
 
193
- aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
194
- if (a->k <= 0) {
195
- return 0;
398
+ aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
399
+ TIME_TIC
400
+ aa_float aa_norm = 0;
401
+ aa_int len = MIN(a->iter, a->mem);
402
+ a->success = 0; /* if we make an AA step we set this to 1 later */
403
+ if (a->mem <= 0) {
404
+ TIME_TOC
405
+ return aa_norm; /* 0 */
406
+ }
407
+ if (a->iter == 0) {
408
+ /* if first iteration then seed params for next iter */
409
+ init_accel_params(x, f, a);
410
+ a->iter++;
411
+ TIME_TOC
412
+ return aa_norm; /* 0 */
196
413
  }
197
- update_accel_params(x, f, a);
198
- if (a->iter++ == 0) {
414
+ /* set various accel quantities */
415
+ update_accel_params(x, f, a, len);
416
+
417
+ /* only perform solve steps when the memory is full */
418
+ if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) {
419
+ /* set M = S'Y or Y'Y depending on type of aa used */
420
+ set_m(a, len);
421
+ /* solve linear system, new point overwrites f if successful */
422
+ aa_norm = solve(f, a, len);
423
+ }
424
+ a->iter++;
425
+ TIME_TOC
426
+ return aa_norm;
427
+ }
428
+
429
+ aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) {
430
+ TIME_TIC
431
+ blas_int bdim = (blas_int)a->dim;
432
+ blas_int one = 1;
433
+ aa_float neg_onef = -1.0;
434
+ aa_float norm_diff;
435
+ if (!a->success) {
436
+ /* last AA update was not successful, no need for safeguarding */
437
+ TIME_TOC
199
438
  return 0;
200
439
  }
201
- /* solve linear system, new point overwrites f if successful */
202
- return solve(f, a, MIN(a->iter - 1, a->k));
440
+
441
+ /* reset success indicator in case safeguarding called multiple times */
442
+ a->success = 0;
443
+
444
+ /* work = x_new */
445
+ memcpy(a->work, x_new, a->dim * sizeof(aa_float));
446
+ /* work = x_new - f_new */
447
+ BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one);
448
+ /* norm_diff = || f_new - x_new || */
449
+ norm_diff = BLAS(nrm2)(&bdim, a->work, &one);
450
+ /* g = f - x */
451
+ if (norm_diff > a->safeguard_factor * a->norm_g) {
452
+ /* in this case we reject the AA step and reset */
453
+ memcpy(f_new, a->f, a->dim * sizeof(aa_float));
454
+ memcpy(x_new, a->x, a->dim * sizeof(aa_float));
455
+ if (a->verbosity > 0) {
456
+ printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n",
457
+ (int)a->iter, norm_diff, a->norm_g);
458
+ }
459
+ aa_reset(a);
460
+ TIME_TOC
461
+ return -1;
462
+ }
463
+ TIME_TOC
464
+ return 0;
203
465
  }
204
466
 
205
467
  void aa_finish(AaWork *a) {
@@ -217,8 +479,21 @@ void aa_finish(AaWork *a) {
217
479
  free(a->M);
218
480
  free(a->work);
219
481
  free(a->ipiv);
482
+ if (a->x_work) {
483
+ free(a->x_work);
484
+ }
220
485
  free(a);
221
486
  }
487
+ return;
488
+ }
489
+
490
+ void aa_reset(AaWork *a) {
491
+ /* to reset we simply set a->iter = 0 */
492
+ if (a->verbosity > 0) {
493
+ printf("AA reset.\n");
494
+ }
495
+ a->iter = 0;
496
+ return;
222
497
  }
223
498
 
224
499
  #endif
data/vendor/scs/src/aa.o CHANGED
Binary file