scs 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/lib/scs/version.rb +1 -1
  6. data/vendor/scs/CITATION.cff +2 -2
  7. data/vendor/scs/CMakeLists.txt +284 -168
  8. data/vendor/scs/Makefile +43 -18
  9. data/vendor/scs/README.md +1 -1
  10. data/vendor/scs/include/glbopts.h +32 -13
  11. data/vendor/scs/include/linsys.h +8 -8
  12. data/vendor/scs/include/scs.h +6 -2
  13. data/vendor/scs/include/scs_types.h +3 -1
  14. data/vendor/scs/include/scs_work.h +9 -8
  15. data/vendor/scs/include/util.h +1 -1
  16. data/vendor/scs/linsys/cpu/direct/private.c +32 -153
  17. data/vendor/scs/linsys/cpu/direct/private.h +6 -6
  18. data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
  19. data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
  20. data/vendor/scs/linsys/csparse.c +140 -12
  21. data/vendor/scs/linsys/csparse.h +10 -17
  22. data/vendor/scs/linsys/gpu/gpu.c +4 -4
  23. data/vendor/scs/linsys/gpu/gpu.h +1 -1
  24. data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
  25. data/vendor/scs/linsys/mkl/direct/private.c +182 -0
  26. data/vendor/scs/linsys/mkl/direct/private.h +38 -0
  27. data/vendor/scs/linsys/scs_matrix.c +11 -5
  28. data/vendor/scs/scs.mk +39 -26
  29. data/vendor/scs/src/cones.c +15 -159
  30. data/vendor/scs/src/exp_cone.c +399 -0
  31. data/vendor/scs/src/normalize.c +4 -2
  32. data/vendor/scs/src/rw.c +93 -38
  33. data/vendor/scs/src/scs.c +83 -52
  34. data/vendor/scs/src/util.c +12 -3
  35. data/vendor/scs/test/minunit.h +2 -1
  36. data/vendor/scs/test/problem_utils.h +2 -1
  37. data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
  38. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
  39. data/vendor/scs/test/problems/max_ent +0 -0
  40. data/vendor/scs/test/problems/max_ent.h +8 -0
  41. data/vendor/scs/test/problems/random_prob.h +2 -43
  42. data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
  43. data/vendor/scs/test/problems/test_exp_cone.h +84 -0
  44. data/vendor/scs/test/problems/test_prob_from_data_file.h +57 -0
  45. data/vendor/scs/test/run_from_file.c +7 -1
  46. data/vendor/scs/test/run_tests.c +22 -9
  47. metadata +10 -3
@@ -9,15 +9,15 @@ extern "C" {
9
9
  #include "external/amd/amd.h"
10
10
  #include "external/qdldl/qdldl.h"
11
11
  #include "glbopts.h"
12
- #include "scs.h"
12
+ #include "linsys.h"
13
13
  #include "scs_matrix.h"
14
14
 
15
15
  struct SCS_LIN_SYS_WORK {
16
- scs_int m, n; /* linear system dimensions */
17
- csc *kkt, *L; /* KKT, and factorization matrix L resp. */
18
- scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
- scs_int *perm; /* permutation of KKT matrix for factorization */
20
- scs_float *bp; /* workspace memory for solves */
16
+ scs_int m, n; /* linear system dimensions */
17
+ ScsMatrix *kkt, *L; /* KKT, and factorization matrix L resp. */
18
+ scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
+ scs_int *perm; /* permutation of KKT matrix for factorization */
20
+ scs_float *bp; /* workspace memory for solves */
21
21
  scs_int *diag_r_idxs;
22
22
  scs_int factorizations;
23
23
  /* ldl factorization workspace */
@@ -1,22 +1,9 @@
1
1
  #include "private.h"
2
- #include "linsys.h"
3
- #include "util.h"
4
- #include <limits.h>
5
2
 
6
- const char *SCS(get_lin_sys_method)() {
7
- return "sparse-indirect";
3
+ const char *scs_get_lin_sys_method() {
4
+ return "sparse-indirect-scs";
8
5
  }
9
6
 
10
- /*
11
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
12
- char *str = (char *)scs_malloc(sizeof(char) * 128);
13
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
14
- (scs_float)p->tot_cg_its / (info->iter + 1));
15
- p->tot_cg_its = 0;
16
- return str;
17
- }
18
- */
19
-
20
7
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
21
8
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
22
9
  static void set_preconditioner(ScsLinSysWork *p) {
@@ -97,7 +84,7 @@ static void transpose(const ScsMatrix *A, ScsLinSysWork *p) {
97
84
  #endif
98
85
  }
99
86
 
100
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
87
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
101
88
  if (p) {
102
89
  scs_free(p->p);
103
90
  scs_free(p->r);
@@ -162,13 +149,13 @@ static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
162
149
  }
163
150
 
164
151
  /* no need to update anything in this case */
165
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
152
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
166
153
  p->diag_r = diag_r; /* this isn't needed but do it to be safe */
167
154
  set_preconditioner(p);
168
155
  }
169
156
 
170
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
171
- const scs_float *diag_r) {
157
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
158
+ const scs_float *diag_r) {
172
159
  ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
173
160
  p->A = A;
174
161
  p->P = P;
@@ -198,7 +185,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
198
185
  p->tot_cg_its = 0;
199
186
  if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p ||
200
187
  !p->At->x) {
201
- SCS(free_lin_sys_work)(p);
188
+ scs_free_lin_sys_work(p);
202
189
  return SCS_NULL;
203
190
  }
204
191
  return p;
@@ -288,8 +275,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
288
275
  * y = R_y^{-1} (Ax - ry)
289
276
  *
290
277
  */
291
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
292
- scs_float tol) {
278
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
279
+ scs_float tol) {
293
280
  scs_int cg_its, max_iters;
294
281
 
295
282
  if (tol <= 0.) {
@@ -5,11 +5,13 @@
5
5
  extern "C" {
6
6
  #endif
7
7
 
8
+ #include "csparse.h"
8
9
  #include "glbopts.h"
9
10
  #include "linalg.h"
10
- #include "scs.h"
11
+ #include "linsys.h"
11
12
  #include "scs_matrix.h"
12
- #include <math.h>
13
+ #include "util.h" /* timer */
14
+ #include <string.h>
13
15
 
14
16
  struct SCS_LIN_SYS_WORK {
15
17
  scs_int n, m; /* linear system dimensions */
@@ -2,39 +2,37 @@
2
2
 
3
3
  #include "csparse.h"
4
4
 
5
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
- scs_int triplet) {
7
- csc *A = (csc *)scs_calloc(1, sizeof(csc)); /* allocate the csc struct */
5
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
+ scs_int triplet) {
7
+ ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
8
8
  if (!A) {
9
9
  return SCS_NULL;
10
10
  } /* out of memory */
11
11
  A->m = m; /* define dimensions and nzmax */
12
12
  A->n = n;
13
- A->nzmax = nzmax = MAX(nzmax, 1);
14
- A->nz = triplet ? 0 : -1; /* allocate triplet or comp.col */
15
13
  A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int));
16
14
  A->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
17
15
  A->x = values ? (scs_float *)scs_calloc(nzmax, sizeof(scs_float)) : SCS_NULL;
18
16
  return (!A->p || !A->i || (values && !A->x)) ? SCS(cs_spfree)(A) : A;
19
17
  }
20
18
 
21
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok) {
19
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok) {
22
20
  scs_free(w); /* free workspace */
23
21
  scs_free(x);
24
22
  return ok ? C : SCS(cs_spfree)(C); /* return result if OK, else free it */
25
23
  }
26
24
 
27
25
  /* C = compressed-column form of a triplet matrix T */
28
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping) {
29
- scs_int m, n, nz, p, k, *Cp, *Ci, *w, *Ti, *Tj;
26
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
27
+ scs_int *idx_mapping) {
28
+ scs_int m, n, p, k, *Cp, *Ci, *w, *Ti, *Tj;
30
29
  scs_float *Cx, *Tx;
31
- csc *C;
30
+ ScsMatrix *C;
32
31
  m = T->m;
33
32
  n = T->n;
34
33
  Ti = T->i;
35
34
  Tj = T->p;
36
35
  Tx = T->x;
37
- nz = T->nz;
38
36
  C = SCS(cs_spalloc)(m, n, nz, Tx != SCS_NULL, 0); /* allocate result */
39
37
  w = (scs_int *)scs_calloc(n, sizeof(scs_int)); /* get workspace */
40
38
  if (!C || !w) {
@@ -75,7 +73,7 @@ scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n) {
75
73
  return nz2; /* return sum (c [0..n-1]) */
76
74
  }
77
75
 
78
- csc *SCS(cs_spfree)(csc *A) {
76
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A) {
79
77
  if (!A) {
80
78
  return SCS_NULL;
81
79
  } /* do nothing if A already SCS_NULL */
@@ -83,5 +81,135 @@ csc *SCS(cs_spfree)(csc *A) {
83
81
  scs_free(A->i);
84
82
  scs_free(A->x);
85
83
  scs_free(A);
86
- return (csc *)SCS_NULL; /* free the csc struct and return SCS_NULL */
84
+ /* free the ScsMatrix struct and return SCS_NULL */
85
+ return (ScsMatrix *)SCS_NULL;
86
+ }
87
+
88
+ /* Build the KKT matrix */
89
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
90
+ scs_float *diag_p, const scs_float *diag_r,
91
+ scs_int *diag_r_idxs, scs_int upper) {
92
+ /*
93
+ * Forms column compressed KKT matrix assumes column compressed A,P matrices.
94
+ * Only upper OR lower triangular part is stuffed, depending on `upper` flag.
95
+ *
96
+ * Forms upper/lower triangular part of [(R_x + P) A'; A -R_y]
97
+ * Shapes: P : n x n, A: m x n.
98
+ *
99
+ * Output: `diag_p` will contain values of P diagonal upon completion,
100
+ * and `diag_r_idxs` will contain the indices corresponding to the entries
101
+ * in the returned matrix corresponding to the entries of R.
102
+ *
103
+ */
104
+ scs_int h, i, j, count;
105
+ ScsMatrix *Kcsc, *K;
106
+ scs_int n = A->n;
107
+ scs_int m = A->m;
108
+ scs_int Anz = A->p[n];
109
+ scs_int Knzmax;
110
+ scs_int *idx_mapping;
111
+ if (P) {
112
+ /* Upper bound P + I triangular component NNZs as Pnz + n */
113
+ Knzmax = n + m + Anz + P->p[n];
114
+ } else {
115
+ Knzmax = n + m + Anz;
116
+ }
117
+ K = SCS(cs_spalloc)(m + n, m + n, Knzmax, 1, 1);
118
+
119
+ #if VERBOSITY > 0
120
+ scs_printf("forming kkt\n");
121
+ #endif
122
+ /* Here we generate a triplet matrix and then compress to CSC */
123
+ if (!K) {
124
+ return SCS_NULL;
125
+ }
126
+
127
+ count = 0; /* element counter */
128
+ if (P) {
129
+ /* R_x + P in top left */
130
+ for (j = 0; j < n; j++) { /* cols */
131
+ diag_p[j] = 0.;
132
+ /* empty column, add diagonal */
133
+ if (P->p[j] == P->p[j + 1]) {
134
+ K->i[count] = j;
135
+ K->p[count] = j;
136
+ K->x[count] = diag_r[j];
137
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
138
+ count++;
139
+ }
140
+ for (h = P->p[j]; h < P->p[j + 1]; h++) {
141
+ i = P->i[h]; /* row */
142
+ if (i > j) { /* only upper triangular needed */
143
+ break;
144
+ }
145
+ if (upper) {
146
+ K->i[count] = i;
147
+ K->p[count] = j;
148
+ } else { /* lower triangular */
149
+ /* P is passed in upper triangular, need to flip that here */
150
+ K->i[count] = j; /* col -> row */
151
+ K->p[count] = i; /* row -> col */
152
+ }
153
+ K->x[count] = P->x[h];
154
+ if (i == j) {
155
+ /* P has diagonal element */
156
+ diag_p[j] = P->x[h];
157
+ K->x[count] += diag_r[j];
158
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
159
+ }
160
+ count++;
161
+ /* reached the end without adding diagonal, do it now */
162
+ if ((i < j) && (h + 1 == P->p[j + 1] || P->i[h + 1] > j)) {
163
+ K->i[count] = j;
164
+ K->p[count] = j;
165
+ K->x[count] = diag_r[j];
166
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
167
+ count++;
168
+ }
169
+ }
170
+ }
171
+ } else {
172
+ /* R_x in top left */
173
+ for (j = 0; j < n; j++) {
174
+ diag_p[j] = 0.;
175
+ K->i[count] = j;
176
+ K->p[count] = j;
177
+ K->x[count] = diag_r[j];
178
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
179
+ count++;
180
+ }
181
+ }
182
+
183
+ /* A in bottom left or A^T top right */
184
+ for (j = 0; j < n; j++) { /* column */
185
+ for (h = A->p[j]; h < A->p[j + 1]; h++) {
186
+ if (upper) {
187
+ K->p[count] = A->i[h] + n; /* column */
188
+ K->i[count] = j; /*row */
189
+ } else { /* lower triangular */
190
+ K->p[count] = j; /* column */
191
+ K->i[count] = A->i[h] + n; /* row */
192
+ }
193
+ K->x[count] = A->x[h];
194
+ count++;
195
+ }
196
+ }
197
+
198
+ /* -R_y at bottom right */
199
+ for (j = 0; j < m; j++) {
200
+ K->i[count] = j + n;
201
+ K->p[count] = j + n;
202
+ K->x[count] = -diag_r[j + n];
203
+ diag_r_idxs[j + n] = count; /* store the indices where diag_r occurs */
204
+ count++;
205
+ }
206
+
207
+ idx_mapping = (scs_int *)scs_calloc(count, sizeof(scs_int));
208
+ Kcsc = SCS(cs_compress)(K, count, idx_mapping);
209
+ for (i = 0; i < m + n; i++) {
210
+ diag_r_idxs[i] = idx_mapping[diag_r_idxs[i]];
211
+ }
212
+ SCS(cs_spfree)(K);
213
+ scs_free(idx_mapping);
214
+ return Kcsc;
87
215
  }
@@ -10,24 +10,17 @@ extern "C" {
10
10
  #include "glbopts.h"
11
11
  #include "scs.h"
12
12
 
13
- /* matrix in compressed-column or triplet form */
14
- typedef struct SPARSE_MATRIX {
15
- scs_int nzmax; /* maximum number of entries */
16
- scs_int m; /* number of rows */
17
- scs_int n; /* number of columns */
18
- scs_int *p; /* column pointers (size n+1) or col indices (size nzmax) */
19
- scs_int *i; /* row indices, size nzmax */
20
- scs_float *x; /* numerical values, size nzmax */
21
- scs_int nz; /* # of entries in triplet matrix, -1 for compressed-col */
22
- } csc;
23
-
24
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
25
- scs_int triplet);
26
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok);
27
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping);
13
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
14
+ scs_int triplet);
15
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok);
16
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
17
+ scs_int *idx_mapping);
18
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A);
28
19
  scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n);
29
- csc *SCS(cs_spfree)(csc *A);
30
-
20
+ /* Forms KKT matrix */
21
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
22
+ scs_float *diag_p, const scs_float *diag_r,
23
+ scs_int *diag_r_idxs, scs_int upper);
31
24
  #ifdef __cplusplus
32
25
  }
33
26
  #endif
@@ -19,13 +19,13 @@ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
19
19
  if (*buffer != SCS_NULL) {
20
20
  cudaFree(*buffer);
21
21
  }
22
- cudaMalloc(buffer, *buffer_size);
22
+ cudaMalloc(buffer, new_buffer_size);
23
23
  *buffer_size = new_buffer_size;
24
24
  }
25
25
 
26
26
  CUSPARSE_GEN(SpMV)
27
27
  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
- &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
29
29
  }
30
30
 
31
31
  /* this is slow, use trans routine if possible */
@@ -48,13 +48,13 @@ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
48
48
  if (*buffer != SCS_NULL) {
49
49
  cudaFree(*buffer);
50
50
  }
51
- cudaMalloc(buffer, *buffer_size);
51
+ cudaMalloc(buffer, new_buffer_size);
52
52
  *buffer_size = new_buffer_size;
53
53
  }
54
54
 
55
55
  CUSPARSE_GEN(SpMV)
56
56
  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
58
58
  }
59
59
 
60
60
  /* This assumes that P has been made full (ie not triangular) and uses the
@@ -74,7 +74,7 @@ extern "C" {
74
74
  #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
75
75
  #endif
76
76
 
77
- #define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
77
+ #define SCS_CSRMV_ALG CUSPARSE_SPMV_CSR_ALG1
78
78
  #define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
79
79
 
80
80
  /*
@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
21
21
  return nrm;
22
22
  }
23
23
 
24
- const char *SCS(get_lin_sys_method)() {
24
+ const char *scs_get_lin_sys_method() {
25
25
  return "sparse-indirect GPU";
26
26
  }
27
27
 
28
- /*
29
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
30
- char *str = (char *)scs_malloc(sizeof(char) * 128);
31
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
32
- (scs_float)p->tot_cg_its / (info->iter + 1));
33
- p->tot_cg_its = 0;
34
- return str;
35
- }
36
- */
37
-
38
28
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
39
29
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
40
30
  static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
76
66
  }
77
67
 
78
68
  /* no need to update anything in this case */
79
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
69
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
80
70
  scs_int i;
81
71
 
82
72
  /* R_x to gpu */
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
93
83
  set_preconditioner(p, diag_r);
94
84
  }
95
85
 
96
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
86
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
97
87
  if (p) {
98
88
  scs_free(p->M);
99
89
  scs_free(p->inv_r_y);
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
182
172
  }
183
173
 
184
174
  /* P comes in upper triangular, expand to full
185
- * First compute triplet version of full matrix, then compress to csc
175
+ * First compute triplet version of full matrix, then compress to CSC
186
176
  * */
187
- static csc *fill_p_matrix(const ScsMatrix *P) {
177
+ static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
188
178
  scs_int i, j, k, kk;
189
179
  scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
190
- csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
191
- csc *P_full;
180
+ ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
181
+ ScsMatrix *P_full;
192
182
  kk = 0;
193
183
  for (j = 0; j < P->n; j++) { /* cols */
194
184
  for (k = P->p[j]; k < P->p[j + 1]; k++) {
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
209
199
  kk++;
210
200
  }
211
201
  }
212
- P_tmp->nz = kk; /* set number of nonzeros */
213
- P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
202
+ P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
214
203
  SCS(cs_spfree)(P_tmp);
215
204
  return P_full;
216
205
  }
217
206
 
218
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
219
- const scs_float *diag_r) {
207
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
208
+ const scs_float *diag_r) {
220
209
  cudaError_t err;
221
- csc *P_full;
210
+ ScsMatrix *P_full;
222
211
  ScsLinSysWork *p = SCS_NULL;
223
212
  ScsGpuMatrix *Ag = SCS_NULL;
224
213
  ScsGpuMatrix *Pg = SCS_NULL;
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
324
313
  cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
325
314
 
326
315
  /* Form preconditioner and copy R_x, 1/R_y to gpu */
327
- SCS(update_lin_sys_diag_r)(p, diag_r);
316
+ scs_update_lin_sys_diag_r(p, diag_r);
328
317
 
329
318
  #if GPU_TRANSPOSE_MAT > 0
330
319
  p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
367
356
  if (err != cudaSuccess) {
368
357
  printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
369
358
  cudaGetErrorString(err));
370
- SCS(free_lin_sys_work)(p);
359
+ scs_free_lin_sys_work(p);
371
360
  return SCS_NULL;
372
361
  }
373
362
  return p;
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
466
455
  * y = R_y^{-1} (Ax - ry)
467
456
  *
468
457
  */
469
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
470
- scs_float tol) {
458
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
459
+ scs_float tol) {
471
460
  scs_int cg_its, max_iters;
472
461
  scs_float neg_onef = -1.0;
473
462
 
@@ -0,0 +1,182 @@
1
+ #include "private.h"
2
+
3
+ #define PARDISO_SYMBOLIC (11)
4
+ #define PARDISO_NUMERIC (22)
5
+ #define PARDISO_SOLVE (33)
6
+ #define PARDISO_CLEANUP (-1)
7
+
8
+ /* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
9
+ /*
10
+ #define MKL_INTERFACE_LP64 0
11
+ #define MKL_INTERFACE_ILP64 1
12
+ */
13
+ #ifdef DLONG
14
+ #define _PARDISO pardiso_64
15
+ #else
16
+ #define _PARDISO pardiso
17
+ #endif
18
+
19
+ /* Prototypes for Pardiso functions */
20
+ void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
21
+ const scs_int *mtype, const scs_int *phase, const scs_int *n,
22
+ const scs_float *a, const scs_int *ia, const scs_int *ja,
23
+ scs_int *perm, const scs_int *nrhs, scs_int *iparm,
24
+ const scs_int *msglvl, scs_float *b, scs_float *x,
25
+ scs_int *error);
26
+ /* scs_int MKL_Set_Interface_Layer(scs_int); */
27
+
28
+ const char *scs_get_lin_sys_method() {
29
+ return "sparse-direct-mkl-pardiso";
30
+ }
31
+
32
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
33
+ if (p) {
34
+ p->phase = PARDISO_CLEANUP;
35
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
36
+ &(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
37
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
38
+ &(p->error));
39
+ if (p->error != 0) {
40
+ scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
41
+ }
42
+ if (p->kkt)
43
+ SCS(cs_spfree)(p->kkt);
44
+ if (p->sol)
45
+ scs_free(p->sol);
46
+ if (p->diag_r_idxs)
47
+ scs_free(p->diag_r_idxs);
48
+ if (p->diag_p)
49
+ scs_free(p->diag_p);
50
+ scs_free(p);
51
+ }
52
+ }
53
+
54
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
55
+ const scs_float *diag_r) {
56
+ scs_int i;
57
+ ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
58
+
59
+ /* TODO: is this necessary with pardiso_64? */
60
+ /* Set MKL interface layer */
61
+ /*
62
+ #ifdef DLONG
63
+ MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
64
+ #else
65
+ MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
66
+ #endif
67
+ */
68
+ p->n = A->n;
69
+ p->m = A->m;
70
+ p->n_plus_m = p->n + p->m;
71
+
72
+ /* Even though we overwrite rhs with sol pardiso requires the memory */
73
+ p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
74
+ p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
75
+ p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
76
+
77
+ /* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
78
+ * as CSC lower triangular is equivalent. Pass upper=0. */
79
+ p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
80
+ if (!(p->kkt)) {
81
+ scs_printf("Error in forming KKT matrix");
82
+ scs_free_lin_sys_work(p);
83
+ return SCS_NULL;
84
+ }
85
+
86
+ for (i = 0; i < 64; i++) {
87
+ p->iparm[i] = 0; /* Setup Pardiso control parameters */
88
+ p->pt[i] = 0; /* Initialize the internal solver memory pointer */
89
+ }
90
+
91
+ /* Set Pardiso variables */
92
+ p->mtype = -2; /* Real symmetric indefinite matrix */
93
+ p->nrhs = 1; /* Number of right hand sides */
94
+ p->maxfct = 1; /* Maximum number of numerical factorizations */
95
+ p->mnum = 1; /* Which factorization to use */
96
+ p->error = 0; /* Initialize error flag */
97
+ p->msglvl = VERBOSITY; /* Printing information */
98
+
99
+ /* For all iparm vars see MKL documentation */
100
+ p->iparm[0] = 1; /* Parsido must inspect iparm */
101
+ p->iparm[1] = 3; /* Fill-in reordering from OpenMP */
102
+ p->iparm[5] = 1; /* Write solution into b */
103
+ p->iparm[7] = 0; /* Automatic iterative refinement calculation */
104
+ p->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
105
+ p->iparm[34] = 1; /* Use C-style indexing for indices */
106
+ /* p->iparm[36] = -80; */ /* Form block sparse matrices */
107
+
108
+ #ifdef SFLOAT
109
+ p->iparm[27] = 1; /* 1 is single precision, 0 is double */
110
+ #endif
111
+
112
+ /* Permutation and symbolic factorization */
113
+ scs_int phase = PARDISO_SYMBOLIC;
114
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
115
+ p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
116
+ &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
117
+
118
+ if (p->error != 0) {
119
+ scs_printf("Error during symbolic factorization: %d", (int)p->error);
120
+ scs_free_lin_sys_work(p);
121
+ return SCS_NULL;
122
+ }
123
+
124
+ /* Numerical factorization */
125
+ p->phase = PARDISO_NUMERIC;
126
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
127
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
128
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
129
+
130
+ if (p->error) {
131
+ scs_printf("Error during numerical factorization: %d", (int)p->error);
132
+ scs_free_lin_sys_work(p);
133
+ return SCS_NULL;
134
+ }
135
+
136
+ if (p->iparm[21] < p->n) {
137
+ scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
138
+ return SCS_NULL;
139
+ }
140
+
141
+ return p;
142
+ }
143
+
144
+ /* Returns solution to linear system Ax = b with solution stored in b */
145
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
146
+ scs_float tol) {
147
+ /* Back substitution and iterative refinement */
148
+ p->phase = PARDISO_SOLVE;
149
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
150
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
151
+ &(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
152
+ if (p->error != 0) {
153
+ scs_printf("Error during linear system solution: %d", (int)p->error);
154
+ }
155
+ return p->error;
156
+ }
157
+
158
+ /* Update factorization when R changes */
159
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
160
+ scs_int i;
161
+
162
+ for (i = 0; i < p->n; ++i) {
163
+ /* top left is R_x + P, bottom right is -R_y */
164
+ p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
165
+ }
166
+ for (i = p->n; i < p->n + p->m; ++i) {
167
+ /* top left is R_x + P, bottom right is -R_y */
168
+ p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
169
+ }
170
+
171
+ /* Perform numerical factorization */
172
+ p->phase = PARDISO_NUMERIC;
173
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
174
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
175
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
176
+
177
+ if (p->error != 0) {
178
+ scs_printf("Error in PARDISO factorization when updating: %d.\n",
179
+ (int)p->error);
180
+ scs_free_lin_sys_work(p);
181
+ }
182
+ }