scs 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/scs/ffi.rb +2 -2
- data/lib/scs/version.rb +1 -1
- data/lib/scs.rb +3 -3
- data/vendor/scs/CITATION.cff +2 -2
- data/vendor/scs/CMakeLists.txt +305 -171
- data/vendor/scs/Makefile +44 -19
- data/vendor/scs/README.md +1 -1
- data/vendor/scs/include/glbopts.h +34 -14
- data/vendor/scs/include/linsys.h +8 -8
- data/vendor/scs/include/scs.h +6 -2
- data/vendor/scs/include/scs_blas.h +4 -0
- data/vendor/scs/include/scs_types.h +3 -1
- data/vendor/scs/include/scs_work.h +9 -8
- data/vendor/scs/include/util.h +1 -1
- data/vendor/scs/linsys/cpu/direct/private.c +32 -153
- data/vendor/scs/linsys/cpu/direct/private.h +6 -6
- data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
- data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
- data/vendor/scs/linsys/csparse.c +140 -12
- data/vendor/scs/linsys/csparse.h +10 -17
- data/vendor/scs/linsys/gpu/gpu.c +4 -4
- data/vendor/scs/linsys/gpu/gpu.h +1 -1
- data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
- data/vendor/scs/linsys/mkl/direct/private.c +182 -0
- data/vendor/scs/linsys/mkl/direct/private.h +38 -0
- data/vendor/scs/linsys/scs_matrix.c +11 -5
- data/vendor/scs/scs.mk +40 -27
- data/vendor/scs/src/cones.c +17 -161
- data/vendor/scs/src/exp_cone.c +399 -0
- data/vendor/scs/src/linalg.c +17 -3
- data/vendor/scs/src/normalize.c +4 -2
- data/vendor/scs/src/rw.c +107 -38
- data/vendor/scs/src/scs.c +103 -69
- data/vendor/scs/src/util.c +12 -3
- data/vendor/scs/test/minunit.h +2 -1
- data/vendor/scs/test/problem_utils.h +2 -1
- data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
- data/vendor/scs/test/problems/max_ent +0 -0
- data/vendor/scs/test/problems/max_ent.h +8 -0
- data/vendor/scs/test/problems/mpc_bug.h +19 -0
- data/vendor/scs/test/problems/mpc_bug1 +0 -0
- data/vendor/scs/test/problems/mpc_bug2 +0 -0
- data/vendor/scs/test/problems/mpc_bug3 +0 -0
- data/vendor/scs/test/problems/random_prob.h +2 -43
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
- data/vendor/scs/test/problems/test_exp_cone.h +84 -0
- data/vendor/scs/test/problems/test_prob_from_data_file.h +73 -0
- data/vendor/scs/test/run_from_file.c +7 -1
- data/vendor/scs/test/run_tests.c +25 -9
- metadata +14 -3
@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
|
|
21
21
|
return nrm;
|
22
22
|
}
|
23
23
|
|
24
|
-
const char *
|
24
|
+
const char *scs_get_lin_sys_method() {
|
25
25
|
return "sparse-indirect GPU";
|
26
26
|
}
|
27
27
|
|
28
|
-
/*
|
29
|
-
char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
|
30
|
-
char *str = (char *)scs_malloc(sizeof(char) * 128);
|
31
|
-
sprintf(str, "lin-sys: avg cg its: %2.2f\n",
|
32
|
-
(scs_float)p->tot_cg_its / (info->iter + 1));
|
33
|
-
p->tot_cg_its = 0;
|
34
|
-
return str;
|
35
|
-
}
|
36
|
-
*/
|
37
|
-
|
38
28
|
/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
|
39
29
|
/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
|
40
30
|
static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
|
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
|
|
76
66
|
}
|
77
67
|
|
78
68
|
/* no need to update anything in this case */
|
79
|
-
void
|
69
|
+
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
|
80
70
|
scs_int i;
|
81
71
|
|
82
72
|
/* R_x to gpu */
|
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
|
|
93
83
|
set_preconditioner(p, diag_r);
|
94
84
|
}
|
95
85
|
|
96
|
-
void
|
86
|
+
void scs_free_lin_sys_work(ScsLinSysWork *p) {
|
97
87
|
if (p) {
|
98
88
|
scs_free(p->M);
|
99
89
|
scs_free(p->inv_r_y);
|
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
|
|
182
172
|
}
|
183
173
|
|
184
174
|
/* P comes in upper triangular, expand to full
|
185
|
-
* First compute triplet version of full matrix, then compress to
|
175
|
+
* First compute triplet version of full matrix, then compress to CSC
|
186
176
|
* */
|
187
|
-
static
|
177
|
+
static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
|
188
178
|
scs_int i, j, k, kk;
|
189
179
|
scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
|
190
|
-
|
191
|
-
|
180
|
+
ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
|
181
|
+
ScsMatrix *P_full;
|
192
182
|
kk = 0;
|
193
183
|
for (j = 0; j < P->n; j++) { /* cols */
|
194
184
|
for (k = P->p[j]; k < P->p[j + 1]; k++) {
|
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
|
|
209
199
|
kk++;
|
210
200
|
}
|
211
201
|
}
|
212
|
-
|
213
|
-
P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
|
202
|
+
P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
|
214
203
|
SCS(cs_spfree)(P_tmp);
|
215
204
|
return P_full;
|
216
205
|
}
|
217
206
|
|
218
|
-
ScsLinSysWork *
|
219
|
-
|
207
|
+
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
|
208
|
+
const scs_float *diag_r) {
|
220
209
|
cudaError_t err;
|
221
|
-
|
210
|
+
ScsMatrix *P_full;
|
222
211
|
ScsLinSysWork *p = SCS_NULL;
|
223
212
|
ScsGpuMatrix *Ag = SCS_NULL;
|
224
213
|
ScsGpuMatrix *Pg = SCS_NULL;
|
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
|
|
324
313
|
cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
|
325
314
|
|
326
315
|
/* Form preconditioner and copy R_x, 1/R_y to gpu */
|
327
|
-
|
316
|
+
scs_update_lin_sys_diag_r(p, diag_r);
|
328
317
|
|
329
318
|
#if GPU_TRANSPOSE_MAT > 0
|
330
319
|
p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
|
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
|
|
367
356
|
if (err != cudaSuccess) {
|
368
357
|
printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
|
369
358
|
cudaGetErrorString(err));
|
370
|
-
|
359
|
+
scs_free_lin_sys_work(p);
|
371
360
|
return SCS_NULL;
|
372
361
|
}
|
373
362
|
return p;
|
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
|
|
466
455
|
* y = R_y^{-1} (Ax - ry)
|
467
456
|
*
|
468
457
|
*/
|
469
|
-
scs_int
|
470
|
-
|
458
|
+
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
|
459
|
+
scs_float tol) {
|
471
460
|
scs_int cg_its, max_iters;
|
472
461
|
scs_float neg_onef = -1.0;
|
473
462
|
|
@@ -0,0 +1,182 @@
|
|
1
|
+
#include "private.h"
|
2
|
+
|
3
|
+
#define PARDISO_SYMBOLIC (11)
|
4
|
+
#define PARDISO_NUMERIC (22)
|
5
|
+
#define PARDISO_SOLVE (33)
|
6
|
+
#define PARDISO_CLEANUP (-1)
|
7
|
+
|
8
|
+
/* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
|
9
|
+
/*
|
10
|
+
#define MKL_INTERFACE_LP64 0
|
11
|
+
#define MKL_INTERFACE_ILP64 1
|
12
|
+
*/
|
13
|
+
#ifdef DLONG
|
14
|
+
#define _PARDISO pardiso_64
|
15
|
+
#else
|
16
|
+
#define _PARDISO pardiso
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Prototypes for Pardiso functions */
|
20
|
+
void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
|
21
|
+
const scs_int *mtype, const scs_int *phase, const scs_int *n,
|
22
|
+
const scs_float *a, const scs_int *ia, const scs_int *ja,
|
23
|
+
scs_int *perm, const scs_int *nrhs, scs_int *iparm,
|
24
|
+
const scs_int *msglvl, scs_float *b, scs_float *x,
|
25
|
+
scs_int *error);
|
26
|
+
/* scs_int MKL_Set_Interface_Layer(scs_int); */
|
27
|
+
|
28
|
+
const char *scs_get_lin_sys_method() {
|
29
|
+
return "sparse-direct-mkl-pardiso";
|
30
|
+
}
|
31
|
+
|
32
|
+
void scs_free_lin_sys_work(ScsLinSysWork *p) {
|
33
|
+
if (p) {
|
34
|
+
p->phase = PARDISO_CLEANUP;
|
35
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
36
|
+
&(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
|
37
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
|
38
|
+
&(p->error));
|
39
|
+
if (p->error != 0) {
|
40
|
+
scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
|
41
|
+
}
|
42
|
+
if (p->kkt)
|
43
|
+
SCS(cs_spfree)(p->kkt);
|
44
|
+
if (p->sol)
|
45
|
+
scs_free(p->sol);
|
46
|
+
if (p->diag_r_idxs)
|
47
|
+
scs_free(p->diag_r_idxs);
|
48
|
+
if (p->diag_p)
|
49
|
+
scs_free(p->diag_p);
|
50
|
+
scs_free(p);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
|
55
|
+
const scs_float *diag_r) {
|
56
|
+
scs_int i;
|
57
|
+
ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
|
58
|
+
|
59
|
+
/* TODO: is this necessary with pardiso_64? */
|
60
|
+
/* Set MKL interface layer */
|
61
|
+
/*
|
62
|
+
#ifdef DLONG
|
63
|
+
MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
|
64
|
+
#else
|
65
|
+
MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
|
66
|
+
#endif
|
67
|
+
*/
|
68
|
+
p->n = A->n;
|
69
|
+
p->m = A->m;
|
70
|
+
p->n_plus_m = p->n + p->m;
|
71
|
+
|
72
|
+
/* Even though we overwrite rhs with sol pardiso requires the memory */
|
73
|
+
p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
|
74
|
+
p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
|
75
|
+
p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
|
76
|
+
|
77
|
+
/* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
|
78
|
+
* as CSC lower triangular is equivalent. Pass upper=0. */
|
79
|
+
p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
|
80
|
+
if (!(p->kkt)) {
|
81
|
+
scs_printf("Error in forming KKT matrix");
|
82
|
+
scs_free_lin_sys_work(p);
|
83
|
+
return SCS_NULL;
|
84
|
+
}
|
85
|
+
|
86
|
+
for (i = 0; i < 64; i++) {
|
87
|
+
p->iparm[i] = 0; /* Setup Pardiso control parameters */
|
88
|
+
p->pt[i] = 0; /* Initialize the internal solver memory pointer */
|
89
|
+
}
|
90
|
+
|
91
|
+
/* Set Pardiso variables */
|
92
|
+
p->mtype = -2; /* Real symmetric indefinite matrix */
|
93
|
+
p->nrhs = 1; /* Number of right hand sides */
|
94
|
+
p->maxfct = 1; /* Maximum number of numerical factorizations */
|
95
|
+
p->mnum = 1; /* Which factorization to use */
|
96
|
+
p->error = 0; /* Initialize error flag */
|
97
|
+
p->msglvl = VERBOSITY; /* Printing information */
|
98
|
+
|
99
|
+
/* For all iparm vars see MKL documentation */
|
100
|
+
p->iparm[0] = 1; /* Parsido must inspect iparm */
|
101
|
+
p->iparm[1] = 3; /* Fill-in reordering from OpenMP */
|
102
|
+
p->iparm[5] = 1; /* Write solution into b */
|
103
|
+
p->iparm[7] = 0; /* Automatic iterative refinement calculation */
|
104
|
+
p->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
|
105
|
+
p->iparm[34] = 1; /* Use C-style indexing for indices */
|
106
|
+
/* p->iparm[36] = -80; */ /* Form block sparse matrices */
|
107
|
+
|
108
|
+
#ifdef SFLOAT
|
109
|
+
p->iparm[27] = 1; /* 1 is single precision, 0 is double */
|
110
|
+
#endif
|
111
|
+
|
112
|
+
/* Permutation and symbolic factorization */
|
113
|
+
scs_int phase = PARDISO_SYMBOLIC;
|
114
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
|
115
|
+
p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
|
116
|
+
&(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
117
|
+
|
118
|
+
if (p->error != 0) {
|
119
|
+
scs_printf("Error during symbolic factorization: %d", (int)p->error);
|
120
|
+
scs_free_lin_sys_work(p);
|
121
|
+
return SCS_NULL;
|
122
|
+
}
|
123
|
+
|
124
|
+
/* Numerical factorization */
|
125
|
+
p->phase = PARDISO_NUMERIC;
|
126
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
127
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
128
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
129
|
+
|
130
|
+
if (p->error) {
|
131
|
+
scs_printf("Error during numerical factorization: %d", (int)p->error);
|
132
|
+
scs_free_lin_sys_work(p);
|
133
|
+
return SCS_NULL;
|
134
|
+
}
|
135
|
+
|
136
|
+
if (p->iparm[21] < p->n) {
|
137
|
+
scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
|
138
|
+
return SCS_NULL;
|
139
|
+
}
|
140
|
+
|
141
|
+
return p;
|
142
|
+
}
|
143
|
+
|
144
|
+
/* Returns solution to linear system Ax = b with solution stored in b */
|
145
|
+
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
|
146
|
+
scs_float tol) {
|
147
|
+
/* Back substitution and iterative refinement */
|
148
|
+
p->phase = PARDISO_SOLVE;
|
149
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
150
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
151
|
+
&(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
|
152
|
+
if (p->error != 0) {
|
153
|
+
scs_printf("Error during linear system solution: %d", (int)p->error);
|
154
|
+
}
|
155
|
+
return p->error;
|
156
|
+
}
|
157
|
+
|
158
|
+
/* Update factorization when R changes */
|
159
|
+
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
|
160
|
+
scs_int i;
|
161
|
+
|
162
|
+
for (i = 0; i < p->n; ++i) {
|
163
|
+
/* top left is R_x + P, bottom right is -R_y */
|
164
|
+
p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
|
165
|
+
}
|
166
|
+
for (i = p->n; i < p->n + p->m; ++i) {
|
167
|
+
/* top left is R_x + P, bottom right is -R_y */
|
168
|
+
p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Perform numerical factorization */
|
172
|
+
p->phase = PARDISO_NUMERIC;
|
173
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
174
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
175
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
176
|
+
|
177
|
+
if (p->error != 0) {
|
178
|
+
scs_printf("Error in PARDISO factorization when updating: %d.\n",
|
179
|
+
(int)p->error);
|
180
|
+
scs_free_lin_sys_work(p);
|
181
|
+
}
|
182
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#ifndef PRIV_H_GUARD
|
2
|
+
#define PRIV_H_GUARD
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include "csparse.h"
|
9
|
+
#include "linsys.h"
|
10
|
+
|
11
|
+
struct SCS_LIN_SYS_WORK {
|
12
|
+
ScsMatrix *kkt; /* Upper triangular KKT matrix (in CSR format) */
|
13
|
+
scs_float *sol; /* solution to the KKT system */
|
14
|
+
scs_int n; /* number of QP variables */
|
15
|
+
scs_int m; /* number of QP constraints */
|
16
|
+
|
17
|
+
/* Pardiso variables */
|
18
|
+
void *pt[64]; /* internal solver memory pointer pt */
|
19
|
+
scs_int iparm[64]; /* Pardiso control parameters */
|
20
|
+
scs_int n_plus_m; /* dimension of the linear system */
|
21
|
+
scs_int mtype; /* matrix type (-2 for real and symmetric indefinite) */
|
22
|
+
scs_int nrhs; /* number of right-hand sides (1) */
|
23
|
+
scs_int maxfct; /* maximum number of factors (1) */
|
24
|
+
scs_int mnum; /* indicates matrix for the solution phase (1) */
|
25
|
+
scs_int phase; /* control the execution phases of the solver */
|
26
|
+
scs_int error; /* the error indicator (0 for no error) */
|
27
|
+
scs_int msglvl; /* Message level information (0 for no output) */
|
28
|
+
|
29
|
+
/* These are required for matrix updates */
|
30
|
+
scs_int *diag_r_idxs; /* indices where R appears */
|
31
|
+
scs_float *diag_p; /* Diagonal values of P */
|
32
|
+
};
|
33
|
+
|
34
|
+
#ifdef __cplusplus
|
35
|
+
}
|
36
|
+
#endif
|
37
|
+
|
38
|
+
#endif
|
@@ -117,6 +117,7 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
|
|
117
117
|
scs_float *Et, ScsConeWork *cone) {
|
118
118
|
scs_int i, j, kk;
|
119
119
|
scs_float wrk;
|
120
|
+
scs_float nm_a_col;
|
120
121
|
|
121
122
|
/**************************** D ****************************/
|
122
123
|
|
@@ -138,7 +139,8 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
|
|
138
139
|
|
139
140
|
/* invert temporary vec to form D */
|
140
141
|
for (i = 0; i < A->m; ++i) {
|
141
|
-
Dt[i] =
|
142
|
+
Dt[i] = SQRTF(apply_limit(Dt[i]));
|
143
|
+
Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
|
142
144
|
}
|
143
145
|
|
144
146
|
/**************************** E ****************************/
|
@@ -169,8 +171,10 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
|
|
169
171
|
|
170
172
|
/* calculate col norms, E */
|
171
173
|
for (i = 0; i < A->n; ++i) {
|
172
|
-
|
173
|
-
Et[i] =
|
174
|
+
nm_a_col = SCS(norm_inf)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
|
175
|
+
Et[i] = MAX(Et[i], nm_a_col);
|
176
|
+
Et[i] = SQRTF(apply_limit(Et[i]));
|
177
|
+
Et[i] = SAFEDIV_POS(1.0, Et[i]);
|
174
178
|
}
|
175
179
|
}
|
176
180
|
|
@@ -201,7 +205,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
|
|
201
205
|
SCS(enforce_cone_boundaries)(cone, Dt, &SCS(mean));
|
202
206
|
|
203
207
|
for (i = 0; i < A->m; ++i) {
|
204
|
-
Dt[i] =
|
208
|
+
Dt[i] = SQRTF(apply_limit(Dt[i]));
|
209
|
+
Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
|
205
210
|
}
|
206
211
|
|
207
212
|
/**************************** E ****************************/
|
@@ -233,7 +238,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
|
|
233
238
|
/* calculate col norms, E */
|
234
239
|
for (i = 0; i < A->n; ++i) {
|
235
240
|
Et[i] += SCS(norm_sq)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
|
236
|
-
Et[i] =
|
241
|
+
Et[i] = SQRTF(apply_limit(SQRTF(Et[i])));
|
242
|
+
Et[i] = SAFEDIV_POS(1.0, Et[i]);
|
237
243
|
}
|
238
244
|
}
|
239
245
|
|
data/vendor/scs/scs.mk
CHANGED
@@ -69,6 +69,7 @@ DIRSRC = $(LINSYS)/cpu/direct
|
|
69
69
|
INDIRSRC = $(LINSYS)/cpu/indirect
|
70
70
|
GPUDIR = $(LINSYS)/gpu/direct
|
71
71
|
GPUINDIR = $(LINSYS)/gpu/indirect
|
72
|
+
MKLSRC = $(LINSYS)/mkl/direct
|
72
73
|
|
73
74
|
EXTSRC = $(LINSYS)/external
|
74
75
|
|
@@ -83,44 +84,56 @@ ifeq ($(PREFIX),)
|
|
83
84
|
PREFIX = /usr/local
|
84
85
|
endif
|
85
86
|
|
86
|
-
|
87
|
-
########### OPTIONAL FLAGS ##########
|
87
|
+
########### CUSTOM FLAGS ##########
|
88
88
|
# these can all be override from the command line
|
89
|
+
CUSTOM_FLAGS =
|
89
90
|
# e.g. make DLONG=1 will override the setting below
|
90
91
|
DLONG = 0
|
91
92
|
ifneq ($(DLONG), 0)
|
92
|
-
|
93
|
+
CUSTOM_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
|
93
94
|
endif
|
94
95
|
CTRLC = 1
|
95
96
|
ifneq ($(CTRLC), 0)
|
96
|
-
|
97
|
+
CUSTOM_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
|
97
98
|
endif
|
98
99
|
SFLOAT = 0
|
99
100
|
ifneq ($(SFLOAT), 0)
|
100
|
-
|
101
|
-
endif
|
102
|
-
NOTIMER = 0
|
103
|
-
ifneq ($(NOTIMER), 0)
|
104
|
-
OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
|
101
|
+
CUSTOM_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
|
105
102
|
endif
|
106
103
|
GPU_TRANSPOSE_MAT = 1
|
107
104
|
ifneq ($(GPU_TRANSPOSE_MAT), 0)
|
108
|
-
|
105
|
+
CUSTOM_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # transpose A mat in GPU memory
|
106
|
+
endif
|
107
|
+
NO_TIMER = 0
|
108
|
+
ifneq ($(NO_TIMER), 0)
|
109
|
+
CUSTOM_FLAGS += -DNO_TIMER=$(NO_TIMER) # no timing, times reported as nan
|
110
|
+
endif
|
111
|
+
NO_VALIDATE = 0
|
112
|
+
ifneq ($(NO_VALIDATE), 0)
|
113
|
+
CUSTOM_FLAGS += -DNO_VALIDATE=$(NO_VALIDATE) # perform problem validation or skip
|
114
|
+
endif
|
115
|
+
NO_PRINTING = 0
|
116
|
+
ifneq ($(NO_PRINTING), 0)
|
117
|
+
CUSTOM_FLAGS += -DNO_PRINTING=$(NO_PRINTING) # disable printing
|
109
118
|
endif
|
110
|
-
|
111
|
-
ifneq ($(
|
112
|
-
|
119
|
+
NO_READ_WRITE = 0
|
120
|
+
ifneq ($(NO_READ_WRITE), 0)
|
121
|
+
CUSTOM_FLAGS += -DNO_READ_WRITE=$(NO_READ_WRITE) # disable printing
|
113
122
|
endif
|
114
123
|
### VERBOSITY LEVELS: 0,1,2,...
|
115
124
|
VERBOSITY = 0
|
116
125
|
ifneq ($(VERBOSITY), 0)
|
117
|
-
|
126
|
+
CUSTOM_FLAGS += -DVERBOSITY=$(VERBOSITY) # verbosity level
|
118
127
|
endif
|
119
128
|
COVERAGE = 0
|
120
129
|
ifneq ($(COVERAGE), 0)
|
121
|
-
|
130
|
+
CUSTOM_FLAGS += --coverage # generate test coverage data
|
122
131
|
endif
|
123
132
|
|
133
|
+
# See: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-link-line-advisor.html
|
134
|
+
# This is probably not correct for other systems. TODO: update this
|
135
|
+
# to work for all combinations of platform / compiler / threading options.
|
136
|
+
MKLFLAGS = -L$(MKLROOT) -L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_rt -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -ldl
|
124
137
|
|
125
138
|
############ OPENMP: ############
|
126
139
|
# set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
|
@@ -130,7 +143,7 @@ endif
|
|
130
143
|
USE_OPENMP = 0
|
131
144
|
ifneq ($(USE_OPENMP), 0)
|
132
145
|
override CFLAGS += -fopenmp
|
133
|
-
LDFLAGS += -
|
146
|
+
LDFLAGS += -fopenmp
|
134
147
|
endif
|
135
148
|
|
136
149
|
############ SDPS: BLAS + LAPACK ############
|
@@ -138,44 +151,44 @@ endif
|
|
138
151
|
# NB: point the libraries to the locations where
|
139
152
|
# you have blas and lapack installed
|
140
153
|
|
154
|
+
BLASLDFLAGS =
|
141
155
|
USE_LAPACK = 1
|
142
156
|
ifneq ($(USE_LAPACK), 0)
|
143
157
|
# edit these for your setup:
|
144
|
-
BLASLDFLAGS
|
145
|
-
|
146
|
-
OPT_FLAGS += -DUSE_LAPACK
|
158
|
+
BLASLDFLAGS += -llapack -lblas # -lgfortran
|
159
|
+
CUSTOM_FLAGS += -DUSE_LAPACK
|
147
160
|
|
148
161
|
BLAS64 = 0
|
149
162
|
ifneq ($(BLAS64), 0)
|
150
|
-
|
163
|
+
CUSTOM_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
|
151
164
|
endif
|
152
165
|
|
153
166
|
NOBLASSUFFIX = 0
|
154
167
|
ifneq ($(NOBLASSUFFIX), 0)
|
155
|
-
|
168
|
+
CUSTOM_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
|
156
169
|
endif
|
157
170
|
|
158
171
|
BLASSUFFIX = "_"
|
159
172
|
ifneq ($(BLASSUFFIX), "_")
|
160
|
-
|
173
|
+
CUSTOM_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
|
161
174
|
endif
|
162
175
|
endif
|
163
176
|
|
164
177
|
MATLAB_MEX_FILE = 0
|
165
178
|
ifneq ($(MATLAB_MEX_FILE), 0)
|
166
|
-
|
179
|
+
CUSTOM_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
|
167
180
|
endif
|
168
181
|
PYTHON = 0
|
169
182
|
ifneq ($(PYTHON), 0)
|
170
|
-
|
183
|
+
CUSTOM_FLAGS += -DPYTHON=$(PYTHON) # python extension
|
171
184
|
endif
|
172
185
|
USING_R = 0
|
173
186
|
ifneq ($(USING_R), 0)
|
174
|
-
|
187
|
+
CUSTOM_FLAGS += -DUSING_R=$(USING_R) # R extension
|
175
188
|
endif
|
176
189
|
|
177
190
|
# debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
|
178
191
|
print-%: ; @echo $*=$($*)
|
179
192
|
|
180
|
-
override CFLAGS += $(
|
181
|
-
CUDAFLAGS += $(
|
193
|
+
override CFLAGS += $(CUSTOM_FLAGS)
|
194
|
+
CUDAFLAGS += $(CUSTOM_FLAGS)
|