scs 0.2.2 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +19 -14
  5. data/lib/scs/ffi.rb +31 -20
  6. data/lib/scs/solver.rb +32 -9
  7. data/lib/scs/version.rb +1 -1
  8. data/vendor/scs/CITATION.cff +39 -0
  9. data/vendor/scs/CMakeLists.txt +320 -0
  10. data/vendor/scs/Makefile +32 -23
  11. data/vendor/scs/README.md +9 -218
  12. data/vendor/scs/include/aa.h +67 -23
  13. data/vendor/scs/include/cones.h +22 -19
  14. data/vendor/scs/include/glbopts.h +107 -79
  15. data/vendor/scs/include/linalg.h +3 -4
  16. data/vendor/scs/include/linsys.h +58 -44
  17. data/vendor/scs/include/normalize.h +6 -5
  18. data/vendor/scs/include/rw.h +8 -2
  19. data/vendor/scs/include/scs.h +257 -141
  20. data/vendor/scs/include/scs_types.h +34 -0
  21. data/vendor/scs/include/scs_work.h +83 -0
  22. data/vendor/scs/include/util.h +3 -15
  23. data/vendor/scs/linsys/cpu/direct/private.c +241 -232
  24. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  25. data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
  26. data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
  27. data/vendor/scs/linsys/csparse.c +87 -0
  28. data/vendor/scs/linsys/csparse.h +34 -0
  29. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
  30. data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
  31. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  32. data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
  33. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  34. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  35. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  36. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  37. data/vendor/scs/linsys/gpu/gpu.c +58 -21
  38. data/vendor/scs/linsys/gpu/gpu.h +70 -35
  39. data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
  40. data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
  41. data/vendor/scs/linsys/scs_matrix.c +478 -0
  42. data/vendor/scs/linsys/scs_matrix.h +70 -0
  43. data/vendor/scs/scs.mk +14 -10
  44. data/vendor/scs/src/aa.c +394 -110
  45. data/vendor/scs/src/cones.c +497 -359
  46. data/vendor/scs/src/ctrlc.c +15 -5
  47. data/vendor/scs/src/linalg.c +107 -26
  48. data/vendor/scs/src/normalize.c +30 -72
  49. data/vendor/scs/src/rw.c +202 -27
  50. data/vendor/scs/src/scs.c +769 -571
  51. data/vendor/scs/src/scs_version.c +11 -3
  52. data/vendor/scs/src/util.c +37 -106
  53. data/vendor/scs/test/minunit.h +22 -8
  54. data/vendor/scs/test/problem_utils.h +180 -25
  55. data/vendor/scs/test/problems/degenerate.h +130 -0
  56. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  57. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  58. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  59. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  60. data/vendor/scs/test/problems/random_prob +0 -0
  61. data/vendor/scs/test/problems/random_prob.h +45 -0
  62. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  63. data/vendor/scs/test/problems/small_lp.h +14 -13
  64. data/vendor/scs/test/problems/small_qp.h +352 -0
  65. data/vendor/scs/test/problems/test_validation.h +43 -0
  66. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  67. data/vendor/scs/test/random_socp_prob.c +54 -53
  68. data/vendor/scs/test/rng.h +109 -0
  69. data/vendor/scs/test/run_from_file.c +20 -11
  70. data/vendor/scs/test/run_tests.c +35 -2
  71. metadata +29 -98
  72. data/vendor/scs/linsys/amatrix.c +0 -305
  73. data/vendor/scs/linsys/amatrix.h +0 -36
  74. data/vendor/scs/linsys/amatrix.o +0 -0
  75. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  76. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  77. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  78. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  79. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  80. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  81. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  82. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  83. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  84. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  85. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  86. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  87. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  88. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  89. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  90. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  91. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  92. data/vendor/scs/src/aa.o +0 -0
  93. data/vendor/scs/src/cones.o +0 -0
  94. data/vendor/scs/src/ctrlc.o +0 -0
  95. data/vendor/scs/src/linalg.o +0 -0
  96. data/vendor/scs/src/normalize.o +0 -0
  97. data/vendor/scs/src/rw.o +0 -0
  98. data/vendor/scs/src/scs.o +0 -0
  99. data/vendor/scs/src/scs_version.o +0 -0
  100. data/vendor/scs/src/util.o +0 -0
  101. data/vendor/scs/test/data/small_random_socp +0 -0
  102. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  103. data/vendor/scs/test/run_tests +0 -2
@@ -1,3 +1,5 @@
1
+ Last qdldl commit: a00d500906621fbf014b39e42a3304d1143eb65f
2
+
1
3
  flatten into one dir
2
4
  create qdldl_types.h from template file
3
5
  add 'include "glbopts.h"' to qdldl_types.h
@@ -1,35 +1,9 @@
1
1
  #include "qdldl.h"
2
- #include "ctrlc.h"
3
2
 
4
3
  #define QDLDL_UNKNOWN (-1)
5
4
  #define QDLDL_USED (1)
6
5
  #define QDLDL_UNUSED (0)
7
6
 
8
- // //DEBUG
9
- // #include <stdio.h>
10
- // void qdprint_arrayi(const QDLDL_int* data, QDLDL_int n,char* varName){
11
-
12
- // QDLDL_int i;
13
- // printf("%s = [",varName);
14
- // for(i=0; i< n; i++){
15
- // printf("%lli,",data[i]);
16
- // }
17
- // printf("]\n");
18
-
19
- // }
20
-
21
- // void qdprint_arrayf(const QDLDL_float* data, QDLDL_int n, char* varName){
22
-
23
- // QDLDL_int i;
24
- // printf("%s = [",varName);
25
- // for(i=0; i< n; i++){
26
- // printf("%.3g,",data[i]);
27
- // }
28
- // printf("]\n");
29
-
30
- // }
31
- // // END DEBUG
32
-
33
7
  /* Compute the elimination tree for a quasidefinite matrix
34
8
  in compressed sparse column form.
35
9
  */
@@ -41,7 +15,7 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
41
15
  QDLDL_int* Lnz,
42
16
  QDLDL_int* etree){
43
17
 
44
- QDLDL_int sumLnz = 0;
18
+ QDLDL_int sumLnz;
45
19
  QDLDL_int i,j,p;
46
20
 
47
21
 
@@ -76,8 +50,19 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
76
50
  }
77
51
 
78
52
  //compute the total nonzeros in L. This much
79
- //space is required to store Li and Lx
80
- for(i = 0; i < n; i++){sumLnz += Lnz[i];}
53
+ //space is required to store Li and Lx. Return
54
+ //error code -2 if the nonzero count will overflow
55
+ //its unteger type.
56
+ sumLnz = 0;
57
+ for(i = 0; i < n; i++){
58
+ if(sumLnz > QDLDL_INT_MAX - Lnz[i]){
59
+ sumLnz = -2;
60
+ break;
61
+ }
62
+ else{
63
+ sumLnz += Lnz[i];
64
+ }
65
+ }
81
66
 
82
67
  return sumLnz;
83
68
  }
@@ -139,10 +124,6 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
139
124
  //Start from 1 here. The upper LH corner is trivially 0
140
125
  //in L b/c we are only computing the subdiagonal elements
141
126
  for(k = 1; k < n; k++){
142
- if(scs_is_interrupted()) {
143
- scs_printf("interrupt detected in factorization\n");
144
- return -1;
145
- }
146
127
 
147
128
  //NB : For each k, we compute a solution to
148
129
  //y = L(0:(k-1),0:k-1))\b, where b is the kth
@@ -258,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n,
258
239
  const QDLDL_float* Lx,
259
240
  QDLDL_float* x){
260
241
 
261
- QDLDL_int i,j;
242
+ QDLDL_int i,j;
262
243
  for(i = 0; i < n; i++){
263
- for(j = Lp[i]; j < Lp[i+1]; j++){
264
- x[Li[j]] -= Lx[j]*x[i];
265
- }
244
+ QDLDL_float val = x[i];
245
+ for(j = Lp[i]; j < Lp[i+1]; j++){
246
+ x[Li[j]] -= Lx[j]*val;
247
+ }
266
248
  }
267
249
  }
268
250
 
@@ -273,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n,
273
255
  const QDLDL_float* Lx,
274
256
  QDLDL_float* x){
275
257
 
276
- QDLDL_int i,j;
258
+ QDLDL_int i,j;
277
259
  for(i = n-1; i>=0; i--){
278
- for(j = Lp[i]; j < Lp[i+1]; j++){
279
- x[i] -= Lx[j]*x[Li[j]];
280
- }
260
+ QDLDL_float val = x[i];
261
+ for(j = Lp[i]; j < Lp[i+1]; j++){
262
+ val -= Lx[j]*x[Li[j]];
263
+ }
264
+ x[i] = val;
281
265
  }
282
266
  }
283
267
 
@@ -289,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n,
289
273
  const QDLDL_float* Dinv,
290
274
  QDLDL_float* x){
291
275
 
292
- QDLDL_int i;
293
-
294
- QDLDL_Lsolve(n,Lp,Li,Lx,x);
295
- for(i = 0; i < n; i++) x[i] *= Dinv[i];
296
- QDLDL_Ltsolve(n,Lp,Li,Lx,x);
276
+ QDLDL_int i;
297
277
 
278
+ QDLDL_Lsolve(n,Lp,Li,Lx,x);
279
+ for(i = 0; i < n; i++) x[i] *= Dinv[i];
280
+ QDLDL_Ltsolve(n,Lp,Li,Lx,x);
298
281
  }
@@ -32,19 +32,17 @@ extern "C" {
32
32
  * this function will *not* return an error, as it may still be possible to factor
33
33
  * such a matrix in LDL form. No promises are made in this case though...
34
34
  *
35
- * @param n number of columns in CSC matrix A (assumed square)
35
+ * @param n number of columns in CSC matrix A (assumed square)
36
36
  * @param Ap column pointers (size n+1) for columns of A
37
37
  * @param Ai row indices of A. Has Ap[n] elements
38
38
  * @param work work vector (size n) (no meaning on return)
39
39
  * @param Lnz count of nonzeros in each column of L (size n) below diagonal
40
40
  * @param etree elimination tree (size n)
41
- * @return total sum of Lnz (i.e. total nonzeros in L below diagonal). Returns
42
- * -1 if the input does not have triu structure or has an empty
43
- * column.
44
- *
41
+ * @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
42
+ * Returns -1 if the input is not triu or has an empty column.
43
+ * Returns -2 if the return value overflows QDLDL_int.
45
44
  *
46
45
  */
47
-
48
46
  QDLDL_int QDLDL_etree(const QDLDL_int n,
49
47
  const QDLDL_int* Ap,
50
48
  const QDLDL_int* Ai,
@@ -52,6 +50,7 @@ extern "C" {
52
50
  QDLDL_int* Lnz,
53
51
  QDLDL_int* etree);
54
52
 
53
+
55
54
  /**
56
55
  * Compute an LDL decomposition for a quasidefinite matrix
57
56
  * in compressed sparse column form, where the input matrix is
@@ -61,21 +60,22 @@ extern "C" {
61
60
  * Returns factors L, D and Dinv = 1./D.
62
61
  *
63
62
  * Does not use MALLOC. It is assumed that L will be a compressed
64
- * sparse column matrix with data (Ln,Lp,Li) with sufficient space
63
+ * sparse column matrix with data (n,Lp,Li,Lx) with sufficient space
65
64
  * allocated, with a number of nonzeros equal to the count given
66
- * as a return value by osqp_ldl_etree
67
- *
68
- * @param n number of columns in L and A (both square)
69
- * @param Ap column pointers (size n+1) for columns of A
70
- * @param Ai row indices of A. Has Ap[n] elements
71
- * @param Ln number of columns in CSC matrix L
72
- * @param Lp column pointers (size Ln+1) for columns of L
73
- * @param Li row indices of L. Has Lp[Ln] elements
65
+ * as a return value by QDLDL_etree
66
+ *
67
+ * @param n number of columns in L and A (both square)
68
+ * @param Ap column pointers (size n+1) for columns of A (not modified)
69
+ * @param Ai row indices of A. Has Ap[n] elements (not modified)
70
+ * @param Ax data of A. Has Ap[n] elements (not modified)
71
+ * @param Lp column pointers (size n+1) for columns of L
72
+ * @param Li row indices of L. Has Lp[n] elements
73
+ * @param Lx data of L. Has Lp[n] elements
74
74
  * @param D vectorized factor D. Length is n
75
75
  * @param Dinv reciprocal of D. Length is n
76
76
  * @param Lnz count of nonzeros in each column of L below diagonal,
77
- * as given by osqp_ldl_etree (not modified)
78
- * @param etree elimination tree as as given by osqp_ldl_etree (not modified)
77
+ * as given by QDLDL_etree (not modified)
78
+ * @param etree elimination tree as as given by QDLDL_etree (not modified)
79
79
  * @param bwork working array of bools. Length is n
80
80
  * @param iwork working array of integers. Length is 3*n
81
81
  * @param fwork working array of floats. Length is n
@@ -85,8 +85,6 @@ extern "C" {
85
85
  * or otherwise LDL factorisable)
86
86
  *
87
87
  */
88
-
89
-
90
88
  QDLDL_int QDLDL_factor(const QDLDL_int n,
91
89
  const QDLDL_int* Ap,
92
90
  const QDLDL_int* Ai,
@@ -107,16 +105,15 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
107
105
  * Solves LDL'x = b
108
106
  *
109
107
  * It is assumed that L will be a compressed
110
- * sparse column matrix with data (Ln,Lp,Li).
108
+ * sparse column matrix with data (n,Lp,Li,Lx).
111
109
  *
112
- * @param n number of columns in L (both square)
113
- * @param Ln number of columns in CSC matrix L
114
- * @param Lp column pointers (size Ln+1) for columns of L
115
- * @param Li row indices of L. Has Lp[Ln] elements
110
+ * @param n number of columns in L
111
+ * @param Lp column pointers (size n+1) for columns of L
112
+ * @param Li row indices of L. Has Lp[n] elements
113
+ * @param Lx data of L. Has Lp[n] elements
116
114
  * @param Dinv reciprocal of D. Length is n
117
115
  * @param x initialized to b. Equal to x on return
118
116
  *
119
- *
120
117
  */
121
118
  void QDLDL_solve(const QDLDL_int n,
122
119
  const QDLDL_int* Lp,
@@ -130,40 +127,35 @@ void QDLDL_solve(const QDLDL_int n,
130
127
  * Solves (L+I)x = b
131
128
  *
132
129
  * It is assumed that L will be a compressed
133
- * sparse column matrix with data (Ln,Lp,Li).
130
+ * sparse column matrix with data (n,Lp,Li,Lx).
134
131
  *
135
- * @param n number of columns in L (both square)
136
- * @param Ln number of columns in CSC matrix L
137
- * @param Lp column pointers (size Ln+1) for columns of L
138
- * @param Li row indices of L. Has Lp[Ln] elements
139
- * @param Dinv reciprocal of D. Length is n
132
+ * @param n number of columns in L
133
+ * @param Lp column pointers (size n+1) for columns of L
134
+ * @param Li row indices of L. Has Lp[n] elements
135
+ * @param Lx data of L. Has Lp[n] elements
140
136
  * @param x initialized to b. Equal to x on return
141
137
  *
142
- *
143
138
  */
144
-
145
139
  void QDLDL_Lsolve(const QDLDL_int n,
146
140
  const QDLDL_int* Lp,
147
141
  const QDLDL_int* Li,
148
142
  const QDLDL_float* Lx,
149
143
  QDLDL_float* x);
150
144
 
145
+
151
146
  /**
152
147
  * Solves (L+I)'x = b
153
148
  *
154
149
  * It is assumed that L will be a compressed
155
- * sparse column matrix with data (Ln,Lp,Li).
150
+ * sparse column matrix with data (n,Lp,Li,Lx).
156
151
  *
157
- * @param n number of columns in L (both square)
158
- * @param Ln number of columns in CSC matrix L
159
- * @param Lp column pointers (size Ln+1) for columns of L
160
- * @param Li row indices of L. Has Lp[Ln] elements
161
- * @param Dinv reciprocal of D. Length is n
152
+ * @param n number of columns in L
153
+ * @param Lp column pointers (size n+1) for columns of L
154
+ * @param Li row indices of L. Has Lp[n] elements
155
+ * @param Lx data of L. Has Lp[n] elements
162
156
  * @param x initialized to b. Equal to x on return
163
157
  *
164
- *
165
158
  */
166
-
167
159
  void QDLDL_Ltsolve(const QDLDL_int n,
168
160
  const QDLDL_int* Lp,
169
161
  const QDLDL_int* Li,
@@ -1,18 +1,26 @@
1
1
  #ifndef QDLDL_TYPES_H
2
2
  # define QDLDL_TYPES_H
3
3
 
4
- #include "glbopts.h"
5
-
6
4
  # ifdef __cplusplus
7
5
  extern "C" {
8
6
  # endif /* ifdef __cplusplus */
9
7
 
10
- // QDLDL integer and float types
8
+ #include "glbopts.h"
9
+ #include <limits.h> //for the QDLDL_INT_TYPE_MAX
10
+
11
+ /* QDLDL integer and float types */
11
12
 
12
13
  #define QDLDL_int scs_int
13
14
  #define QDLDL_float scs_float
14
15
  #define QDLDL_bool scs_int
15
16
 
17
+ /* Maximum value of the signed type QDLDL_int */
18
+ #ifdef DLONG
19
+ #define QDLDL_INT_MAX LLONG_MAX
20
+ #else
21
+ #define QDLDL_INT_MAX INT_MAX
22
+ #endif
23
+
16
24
  # ifdef __cplusplus
17
25
  }
18
26
  # endif /* ifdef __cplusplus */
@@ -1,41 +1,78 @@
1
1
  #include "gpu.h"
2
2
 
3
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
4
- scs_float *y, cusparseHandle_t cusparse_handle) {
3
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
4
+ const cusparseDnVecDescr_t x,
5
+ cusparseDnVecDescr_t y,
6
+ cusparseHandle_t cusparse_handle,
7
+ size_t *buffer_size, void **buffer) {
5
8
  /* y += A'*x
6
9
  x and y MUST be on GPU already
7
10
  */
8
11
  const scs_float onef = 1.0;
9
- CUSPARSE(csrmv)
10
- (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, Ag->n, Ag->m, Ag->Annz,
11
- &onef, Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
12
+ size_t new_buffer_size = 0;
13
+
14
+ CUSPARSE_GEN(SpMV_bufferSize)
15
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
16
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
17
+
18
+ if (new_buffer_size > *buffer_size) {
19
+ if (*buffer != SCS_NULL) {
20
+ cudaFree(*buffer);
21
+ }
22
+ cudaMalloc(buffer, *buffer_size);
23
+ *buffer_size = new_buffer_size;
24
+ }
25
+
26
+ CUSPARSE_GEN(SpMV)
27
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
12
29
  }
13
30
 
14
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
15
- scs_float *y, cusparseHandle_t cusparse_handle) {
31
+ /* this is slow, use trans routine if possible */
32
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
33
+ cusparseDnVecDescr_t y,
34
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
35
+ void **buffer) {
16
36
  /* y += A*x
17
37
  x and y MUST be on GPU already
18
38
  */
19
39
  const scs_float onef = 1.0;
40
+ size_t new_buffer_size = 0;
41
+
20
42
  /* The A matrix idx pointers must be ORDERED */
21
- CUSPARSE(csrmv)
22
- (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, Ag->n, Ag->m, Ag->Annz, &onef,
23
- Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
43
+ CUSPARSE_GEN(SpMV_bufferSize)
44
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
45
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
46
+
47
+ if (new_buffer_size > *buffer_size) {
48
+ if (*buffer != SCS_NULL) {
49
+ cudaFree(*buffer);
50
+ }
51
+ cudaMalloc(buffer, *buffer_size);
52
+ *buffer_size = new_buffer_size;
53
+ }
54
+
55
+ CUSPARSE_GEN(SpMV)
56
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
58
+ }
59
+
60
+ /* This assumes that P has been made full (ie not triangular) and uses the
61
+ * fact that the GPU is faster for general sparse matrices than for symmetric
62
+ */
63
+ /* y += P*x
64
+ x and y MUST be on GPU already
65
+ */
66
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
67
+ cusparseDnVecDescr_t y,
68
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
69
+ void **buffer) {
70
+ SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
24
71
  }
25
72
 
26
73
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
27
74
  cudaFree(A->x);
28
75
  cudaFree(A->i);
29
76
  cudaFree(A->p);
30
- cusparseDestroyMatDescr(A->descr);
31
- }
32
-
33
- void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
34
- ScsScaling *scal) {
35
- SCS(_normalize_a)(A, stgs, k, scal);
36
- }
37
-
38
- void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
39
- const ScsScaling *scal) {
40
- SCS(_un_normalize_a)(A, stgs, scal);
77
+ cusparseDestroySpMat(A->descr);
41
78
  }
@@ -1,57 +1,82 @@
1
- #ifndef SCSGPU_H_GUARD
2
- #define SCSGPU_H_GUARD
1
+ #ifndef SCS_GPU_H_GUARD
2
+ #define SCS_GPU_H_GUARD
3
3
 
4
4
  #ifdef __cplusplus
5
5
  extern "C" {
6
6
  #endif
7
7
 
8
- #include <cublas_v2.h>
8
+ /* TODO: Do we need this?
9
+
9
10
  #include <cuda.h>
11
+
12
+ */
13
+
14
+ #include <cublas_v2.h>
10
15
  #include <cuda_runtime_api.h>
11
16
  #include <cusparse.h>
12
17
 
13
- #include "amatrix.h"
14
18
  #include "glbopts.h"
15
19
  #include "linalg.h"
16
20
  #include "linsys.h"
17
21
  #include "scs.h"
22
+ #include "scs_matrix.h"
18
23
  #include "util.h"
19
24
 
20
- #define CUDA_CHECK_ERR \
21
- do { \
22
- cudaError_t err = cudaGetLastError(); \
23
- if (err != cudaSuccess) { \
24
- printf("%s:%d:%s\n ERROR_CUDA: %s\n", __FILE__, __LINE__, __func__, \
25
- cudaGetErrorString(err)); \
26
- } \
25
+ #define CUDA_CHECK_ERR \
26
+ do { \
27
+ cudaDeviceSynchronize(); \
28
+ cudaError_t err = cudaGetLastError(); \
29
+ if (err != cudaSuccess) { \
30
+ scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \
31
+ __func__, cudaGetErrorString(err)); \
32
+ } \
27
33
  } while (0)
28
34
 
29
- #ifndef EXTRA_VERBOSE
35
+ #if VERBOSITY == 0
30
36
  #ifndef SFLOAT
31
37
  #define CUBLAS(x) cublasD##x
32
- #define CUSPARSE(x) cusparseD##x
38
+ #define CUBLASI(x) cublasId##x
33
39
  #else
34
40
  #define CUBLAS(x) cublasS##x
35
- #define CUSPARSE(x) cusparseS##x
41
+ #define CUBLASI(x) cublasIs##x
36
42
  #endif
43
+ #define CUSPARSE_GEN(x) cusparse##x
37
44
  #else
38
45
  #ifndef SFLOAT
39
- #define CUBLAS(x) \
40
- CUDA_CHECK_ERR; \
46
+ #define CUBLAS(x) \
47
+ CUDA_CHECK_ERR; \
41
48
  cublasD##x
42
- #define CUSPARSE(x) \
43
- CUDA_CHECK_ERR; \
44
- cusparseD##x
49
+ #define CUBLASI(x) \
50
+ CUDA_CHECK_ERR; \
51
+ cublasId##x
45
52
  #else
46
- #define CUBLAS(x) \
47
- CUDA_CHECK_ERR; \
53
+ #define CUBLAS(x) \
54
+ CUDA_CHECK_ERR; \
48
55
  cublasS##x
49
- #define CUSPARSE(x) \
50
- CUDA_CHECK_ERR; \
51
- cusparseS##x
56
+ #define CUBLASI(x) \
57
+ CUDA_CHECK_ERR; \
58
+ cublasIs##x
52
59
  #endif
60
+ #define CUSPARSE_GEN(x) \
61
+ CUDA_CHECK_ERR; \
62
+ cusparse##x
53
63
  #endif
54
64
 
65
+ #ifndef SFLOAT
66
+ #define SCS_CUDA_FLOAT CUDA_R_64F
67
+ #else
68
+ #define SCS_CUDA_FLOAT CUDA_R_32F
69
+ #endif
70
+
71
+ #ifndef DLONG
72
+ #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_32I
73
+ #else
74
+ #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
75
+ #endif
76
+
77
+ #define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
78
+ #define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
79
+
55
80
  /*
56
81
  CUDA matrix routines only for CSR, not CSC matrices:
57
82
  CSC CSR GPU Mult
@@ -59,23 +84,33 @@ extern "C" {
59
84
  A'(n x m) A (m x n) Agt accum_by_a_gpu
60
85
  */
61
86
 
62
- /* this struct defines the data matrix A on GPU */
63
- typedef struct SCS_GPU_A_DATA_MATRIX {
87
+ /* this struct defines the data matrix on GPU */
88
+ typedef struct SCS_GPU_DATA_MATRIX {
64
89
  /* A is supplied in column compressed format */
65
- scs_float *x; /* A values, size: NNZ A */
66
- scs_int *i; /* A row index, size: NNZ A */
67
- scs_int *p; /* A column pointer, size: n+1 */
90
+ scs_float *x; /* values, size: NNZ */
91
+ scs_int *i; /* row index, size: NNZ */
92
+ scs_int *p; /* column pointer, size: n+1 */
68
93
  scs_int m, n; /* m rows, n cols */
69
- scs_int Annz; /* num non-zeros in A matrix */
94
+ scs_int nnz; /* num non-zeros in matrix */
70
95
  /* CUDA */
71
- cusparseMatDescr_t descr;
96
+ cusparseSpMatDescr_t descr;
72
97
  } ScsGpuMatrix;
73
98
 
74
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const scs_float *x,
75
- scs_float *y, cusparseHandle_t cusparse_handle);
99
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A,
100
+ const cusparseDnVecDescr_t x,
101
+ cusparseDnVecDescr_t y,
102
+ cusparseHandle_t cusparse_handle,
103
+ size_t *buffer_size, void **buffer);
104
+
105
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
106
+ cusparseDnVecDescr_t y,
107
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
108
+ void **buffer);
76
109
 
77
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const scs_float *x,
78
- scs_float *y, cusparseHandle_t cusparse_handle);
110
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x,
111
+ cusparseDnVecDescr_t y,
112
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
113
+ void **buffer);
79
114
 
80
115
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
81
116