scs 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +8 -8
- data/lib/scs/ffi.rb +1 -7
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +1 -1
- data/vendor/scs/CMakeLists.txt +55 -7
- data/vendor/scs/Makefile +9 -9
- data/vendor/scs/README.md +2 -1
- data/vendor/scs/include/aa.h +1 -1
- data/vendor/scs/include/cones.h +14 -11
- data/vendor/scs/include/glbopts.h +26 -64
- data/vendor/scs/include/linalg.h +2 -1
- data/vendor/scs/include/linsys.h +13 -13
- data/vendor/scs/include/normalize.h +6 -5
- data/vendor/scs/include/scs.h +43 -87
- data/vendor/scs/include/scs_types.h +34 -0
- data/vendor/scs/include/scs_work.h +83 -0
- data/vendor/scs/linsys/cpu/direct/private.c +86 -73
- data/vendor/scs/linsys/cpu/direct/private.h +2 -2
- data/vendor/scs/linsys/cpu/indirect/private.c +42 -33
- data/vendor/scs/linsys/cpu/indirect/private.h +1 -2
- data/vendor/scs/linsys/csparse.c +3 -3
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
- data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
- data/vendor/scs/linsys/gpu/gpu.h +8 -11
- data/vendor/scs/linsys/gpu/indirect/private.c +72 -49
- data/vendor/scs/linsys/gpu/indirect/private.h +14 -13
- data/vendor/scs/linsys/scs_matrix.c +26 -46
- data/vendor/scs/linsys/scs_matrix.h +4 -4
- data/vendor/scs/scs.mk +1 -1
- data/vendor/scs/src/aa.c +13 -4
- data/vendor/scs/src/cones.c +143 -92
- data/vendor/scs/src/linalg.c +25 -0
- data/vendor/scs/src/normalize.c +26 -26
- data/vendor/scs/src/rw.c +48 -12
- data/vendor/scs/src/scs.c +104 -110
- data/vendor/scs/src/scs_version.c +8 -6
- data/vendor/scs/src/util.c +1 -1
- data/vendor/scs/test/minunit.h +6 -1
- data/vendor/scs/test/problem_utils.h +28 -35
- data/vendor/scs/test/problems/degenerate.h +1 -1
- data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +1 -1
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +1 -1
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +3 -3
- data/vendor/scs/test/problems/random_prob.h +1 -1
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +1 -1
- data/vendor/scs/test/problems/small_lp.h +3 -1
- data/vendor/scs/test/problems/small_qp.h +352 -0
- data/vendor/scs/test/problems/{test_fails.h → test_validation.h} +3 -3
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +1 -1
- data/vendor/scs/test/random_socp_prob.c +1 -1
- data/vendor/scs/test/run_from_file.c +1 -1
- data/vendor/scs/test/run_tests.c +23 -14
- metadata +8 -5
@@ -15,19 +15,20 @@ struct SCS_LIN_SYS_WORK {
|
|
15
15
|
scs_int n, m; /* linear system dimensions */
|
16
16
|
/* reporting */
|
17
17
|
scs_int tot_cg_its;
|
18
|
+
scs_float *M; /* preconditioner on cpu */
|
18
19
|
/* ALL BELOW HOSTED ON THE GPU */
|
19
|
-
scs_float *p;
|
20
|
-
scs_float *r;
|
21
|
-
scs_float *Gp;
|
22
|
-
scs_float *bg;
|
23
|
-
scs_float *tmp_m;
|
24
|
-
scs_float *z;
|
25
|
-
scs_float *
|
20
|
+
scs_float *p; /* cg iterate, n */
|
21
|
+
scs_float *r; /* cg residual, n */
|
22
|
+
scs_float *Gp; /* G * p, n */
|
23
|
+
scs_float *bg; /* b, n */
|
24
|
+
scs_float *tmp_m; /* m, used in mat_vec */
|
25
|
+
scs_float *z; /* preconditioned */
|
26
|
+
scs_float *M_gpu; /* preconditioner */
|
26
27
|
const ScsMatrix *A; /* does *not* own this memory */
|
27
28
|
const ScsMatrix *P; /* does *not* own this memory */
|
28
|
-
ScsGpuMatrix *Ag;
|
29
|
-
ScsGpuMatrix *Agt;
|
30
|
-
ScsGpuMatrix *Pg;
|
29
|
+
ScsGpuMatrix *Ag; /* A matrix on GPU */
|
30
|
+
ScsGpuMatrix *Agt; /* A trans matrix on GPU */
|
31
|
+
ScsGpuMatrix *Pg; /* P matrix on GPU */
|
31
32
|
/* CUDA */
|
32
33
|
cublasHandle_t cublas_handle;
|
33
34
|
cusparseHandle_t cusparse_handle;
|
@@ -39,9 +40,9 @@ struct SCS_LIN_SYS_WORK {
|
|
39
40
|
cusparseDnVecDescr_t dn_vec_n_p; /* Dense vector of length n */
|
40
41
|
|
41
42
|
/* rho terms */
|
42
|
-
scs_float
|
43
|
-
scs_float *
|
44
|
-
scs_float *
|
43
|
+
scs_float *r_x_gpu;
|
44
|
+
scs_float *inv_r_y; /* inverse R_y */
|
45
|
+
scs_float *inv_r_y_gpu; /* inverse R_y on GPU */
|
45
46
|
};
|
46
47
|
|
47
48
|
#ifdef __cplusplus
|
@@ -18,11 +18,11 @@ scs_int SCS(copy_matrix)(ScsMatrix **dstp, const ScsMatrix *src) {
|
|
18
18
|
A->n = src->n;
|
19
19
|
A->m = src->m;
|
20
20
|
/* A values, size: NNZ A */
|
21
|
-
A->x = (scs_float *)
|
21
|
+
A->x = (scs_float *)scs_calloc(Anz, sizeof(scs_float));
|
22
22
|
/* A row index, size: NNZ A */
|
23
|
-
A->i = (scs_int *)
|
23
|
+
A->i = (scs_int *)scs_calloc(Anz, sizeof(scs_int));
|
24
24
|
/* A column pointer, size: n+1 */
|
25
|
-
A->p = (scs_int *)
|
25
|
+
A->p = (scs_int *)scs_calloc(src->n + 1, sizeof(scs_int));
|
26
26
|
if (!A->x || !A->i || !A->p) {
|
27
27
|
return 0;
|
28
28
|
}
|
@@ -108,9 +108,8 @@ static inline scs_float apply_limit(scs_float x) {
|
|
108
108
|
|
109
109
|
static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
110
110
|
scs_float *c, scs_float *Dt, scs_float *Et,
|
111
|
-
scs_float *s,
|
112
|
-
|
113
|
-
scs_int i, j, kk, count, delta;
|
111
|
+
scs_float *s, ScsConeWork *cone) {
|
112
|
+
scs_int i, j, kk;
|
114
113
|
scs_float wrk;
|
115
114
|
|
116
115
|
/**************************** D ****************************/
|
@@ -129,16 +128,9 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
|
129
128
|
}
|
130
129
|
|
131
130
|
/* accumulate D across each cone */
|
132
|
-
|
133
|
-
for (i = 1; i < cone_boundaries_len; ++i) {
|
134
|
-
delta = boundaries[i];
|
135
|
-
wrk = SCS(norm_inf)(&(Dt[count]), delta);
|
136
|
-
for (j = count; j < count + delta; ++j) {
|
137
|
-
Dt[j] = wrk;
|
138
|
-
}
|
139
|
-
count += delta;
|
140
|
-
}
|
131
|
+
SCS(enforce_cone_boundaries)(cone, Dt, &SCS(norm_inf));
|
141
132
|
|
133
|
+
/* invert temporary vec to form D */
|
142
134
|
for (i = 0; i < A->m; ++i) {
|
143
135
|
Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
|
144
136
|
}
|
@@ -182,9 +174,8 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
|
182
174
|
|
183
175
|
static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
184
176
|
scs_float *c, scs_float *Dt, scs_float *Et,
|
185
|
-
scs_float *s,
|
186
|
-
|
187
|
-
scs_int i, j, kk, count, delta;
|
177
|
+
scs_float *s, ScsConeWork *cone) {
|
178
|
+
scs_int i, j, kk;
|
188
179
|
scs_float wrk, norm_c, norm_b;
|
189
180
|
|
190
181
|
/**************************** D ****************************/
|
@@ -206,19 +197,7 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
|
206
197
|
}
|
207
198
|
|
208
199
|
/* accumulate D across each cone */
|
209
|
-
|
210
|
-
for (i = 1; i < cone_boundaries_len; ++i) {
|
211
|
-
delta = boundaries[i];
|
212
|
-
wrk = 0.;
|
213
|
-
for (j = count; j < count + delta; ++j) {
|
214
|
-
wrk += Dt[j];
|
215
|
-
}
|
216
|
-
wrk /= delta;
|
217
|
-
for (j = count; j < count + delta; ++j) {
|
218
|
-
Dt[j] = wrk;
|
219
|
-
}
|
220
|
-
count += delta;
|
221
|
-
}
|
200
|
+
SCS(enforce_cone_boundaries)(cone, Dt, &SCS(mean));
|
222
201
|
|
223
202
|
for (i = 0; i < A->m; ++i) {
|
224
203
|
Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
|
@@ -265,7 +244,7 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
|
265
244
|
|
266
245
|
static void rescale(ScsMatrix *P, ScsMatrix *A, scs_float *b, scs_float *c,
|
267
246
|
scs_float *Dt, scs_float *Et, scs_float s, ScsScaling *scal,
|
268
|
-
|
247
|
+
ScsConeWork *cone) {
|
269
248
|
scs_int i, j;
|
270
249
|
/* scale the rows of A with D */
|
271
250
|
for (i = 0; i < A->n; ++i) {
|
@@ -352,15 +331,15 @@ static void rescale(ScsMatrix *P, ScsMatrix *A, scs_float *b, scs_float *c,
|
|
352
331
|
* The main complication is that D has to respect cone boundaries.
|
353
332
|
*
|
354
333
|
*/
|
355
|
-
|
356
|
-
|
357
|
-
scs_int cone_boundaries_len) {
|
334
|
+
ScsScaling *SCS(normalize_a_p)(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
335
|
+
scs_float *c, ScsConeWork *cone) {
|
358
336
|
scs_int i;
|
359
337
|
scs_float s;
|
360
|
-
|
361
|
-
scs_float *
|
362
|
-
|
363
|
-
scal->
|
338
|
+
ScsScaling *scal = (ScsScaling *)scs_calloc(1, sizeof(ScsScaling));
|
339
|
+
scs_float *Dt = (scs_float *)scs_calloc(A->m, sizeof(scs_float));
|
340
|
+
scs_float *Et = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
|
341
|
+
scal->D = (scs_float *)scs_calloc(A->m, sizeof(scs_float));
|
342
|
+
scal->E = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
|
364
343
|
|
365
344
|
#if VERBOSITY > 5
|
366
345
|
SCS(timer) normalize_timer;
|
@@ -369,23 +348,23 @@ void SCS(normalize)(ScsMatrix *P, ScsMatrix *A, scs_float *b, scs_float *c,
|
|
369
348
|
#endif
|
370
349
|
|
371
350
|
/* init D, E */
|
351
|
+
scal->m = A->m;
|
372
352
|
for (i = 0; i < A->m; ++i) {
|
373
353
|
scal->D[i] = 1.;
|
374
354
|
}
|
355
|
+
scal->n = A->n;
|
375
356
|
for (i = 0; i < A->n; ++i) {
|
376
357
|
scal->E[i] = 1.;
|
377
358
|
}
|
378
359
|
scal->primal_scale = 1.;
|
379
360
|
scal->dual_scale = 1.;
|
380
361
|
for (i = 0; i < NUM_RUIZ_PASSES; ++i) {
|
381
|
-
compute_ruiz_mats(P, A, b, c, Dt, Et, &s,
|
382
|
-
|
383
|
-
rescale(P, A, b, c, Dt, Et, s, scal, cone_boundaries, cone_boundaries_len);
|
362
|
+
compute_ruiz_mats(P, A, b, c, Dt, Et, &s, cone);
|
363
|
+
rescale(P, A, b, c, Dt, Et, s, scal, cone);
|
384
364
|
}
|
385
365
|
for (i = 0; i < NUM_L2_PASSES; ++i) {
|
386
|
-
compute_l2_mats(P, A, b, c, Dt, Et, &s,
|
387
|
-
|
388
|
-
rescale(P, A, b, c, Dt, Et, s, scal, cone_boundaries, cone_boundaries_len);
|
366
|
+
compute_l2_mats(P, A, b, c, Dt, Et, &s, cone);
|
367
|
+
rescale(P, A, b, c, Dt, Et, s, scal, cone);
|
389
368
|
}
|
390
369
|
scs_free(Dt);
|
391
370
|
scs_free(Et);
|
@@ -404,9 +383,10 @@ void SCS(normalize)(ScsMatrix *P, ScsMatrix *A, scs_float *b, scs_float *c,
|
|
404
383
|
scs_printf("norm D %g\n", SCS(norm_inf)(scal->D, A->m));
|
405
384
|
scs_printf("norm E %g\n", SCS(norm_inf)(scal->E, A->n));
|
406
385
|
#endif
|
386
|
+
return scal;
|
407
387
|
}
|
408
388
|
|
409
|
-
void SCS(
|
389
|
+
void SCS(un_normalize_a_p)(ScsMatrix *A, ScsMatrix *P, const ScsScaling *scal) {
|
410
390
|
scs_int i, j;
|
411
391
|
scs_float *D = scal->D;
|
412
392
|
scs_float *E = scal->E;
|
@@ -7,17 +7,17 @@ extern "C" {
|
|
7
7
|
|
8
8
|
#include "glbopts.h"
|
9
9
|
#include "scs.h"
|
10
|
+
#include "scs_work.h"
|
10
11
|
|
11
12
|
/* Normalization routines, used if d->NORMALIZE is true */
|
12
13
|
/* normalizes A matrix, sets scal->E and scal->D diagonal scaling matrices,
|
13
14
|
* A -> D*A*E. D and E must be all positive entries, D must satisfy cone
|
14
15
|
* boundaries */
|
15
|
-
|
16
|
-
|
17
|
-
scs_int cone_boundaries_len);
|
16
|
+
ScsScaling *SCS(normalize_a_p)(ScsMatrix *P, ScsMatrix *A, scs_float *b,
|
17
|
+
scs_float *c, ScsConeWork *cone);
|
18
18
|
|
19
19
|
/* unnormalizes A matrix, unnormalizes by w->D and w->E */
|
20
|
-
void SCS(
|
20
|
+
void SCS(un_normalize_a_p)(ScsMatrix *A, ScsMatrix *P, const ScsScaling *scal);
|
21
21
|
|
22
22
|
/* to free the memory allocated in a ScsMatrix (called on A and P at finish) */
|
23
23
|
void SCS(free_scs_matrix)(ScsMatrix *A);
|
data/vendor/scs/scs.mk
CHANGED
data/vendor/scs/src/aa.c
CHANGED
@@ -96,6 +96,10 @@ aa_float toc(const char *str, timer *t) {
|
|
96
96
|
|
97
97
|
#endif
|
98
98
|
|
99
|
+
#ifdef __cplusplus
|
100
|
+
extern "C" {
|
101
|
+
#endif
|
102
|
+
|
99
103
|
/* BLAS functions used */
|
100
104
|
aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
|
101
105
|
void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx,
|
@@ -113,6 +117,10 @@ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
|
|
113
117
|
void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x,
|
114
118
|
const blas_int *incx);
|
115
119
|
|
120
|
+
#ifdef __cplusplus
|
121
|
+
}
|
122
|
+
#endif
|
123
|
+
|
116
124
|
/* This file uses Anderson acceleration to improve the convergence of
|
117
125
|
* a fixed point mapping.
|
118
126
|
* At each iteration we need to solve a (small) linear system, we
|
@@ -276,17 +284,18 @@ static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a,
|
|
276
284
|
/* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */
|
277
285
|
static void relax(aa_float *f, AaWork *a, aa_int len) {
|
278
286
|
TIME_TIC
|
279
|
-
/* x_work = x
|
287
|
+
/* x_work = x initially */
|
280
288
|
blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len;
|
281
289
|
aa_float onef = 1.0, neg_onef = -1.0;
|
282
290
|
aa_float one_m_relaxation = 1. - a->relaxation;
|
291
|
+
/* x_work = x - S * work */
|
283
292
|
BLAS(gemv)
|
284
293
|
("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
|
285
294
|
a->x_work, &one);
|
286
295
|
/* f = relaxation * f */
|
287
|
-
BLAS(scal)(&
|
296
|
+
BLAS(scal)(&bdim, &a->relaxation, f, &one);
|
288
297
|
/* f += (1 - relaxation) * x_work */
|
289
|
-
BLAS(axpy)(&
|
298
|
+
BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one);
|
290
299
|
TIME_TOC
|
291
300
|
}
|
292
301
|
|
@@ -352,7 +361,7 @@ AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization,
|
|
352
361
|
AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
|
353
362
|
if (!a) {
|
354
363
|
printf("Failed to allocate memory for AA.\n");
|
355
|
-
return (
|
364
|
+
return (AaWork *)0;
|
356
365
|
}
|
357
366
|
a->type1 = type1;
|
358
367
|
a->iter = 0;
|