scs 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +28 -9
  5. data/ext/scs/extconf.rb +29 -0
  6. data/lib/scs/ffi.rb +30 -13
  7. data/lib/scs/solver.rb +32 -14
  8. data/lib/scs/version.rb +1 -1
  9. data/vendor/scs/CITATION.cff +39 -0
  10. data/vendor/scs/CMakeLists.txt +272 -0
  11. data/vendor/scs/Makefile +24 -15
  12. data/vendor/scs/README.md +8 -216
  13. data/vendor/scs/include/aa.h +67 -23
  14. data/vendor/scs/include/cones.h +17 -17
  15. data/vendor/scs/include/glbopts.h +98 -32
  16. data/vendor/scs/include/linalg.h +2 -4
  17. data/vendor/scs/include/linsys.h +58 -44
  18. data/vendor/scs/include/normalize.h +3 -3
  19. data/vendor/scs/include/rw.h +8 -2
  20. data/vendor/scs/include/scs.h +293 -133
  21. data/vendor/scs/include/util.h +3 -15
  22. data/vendor/scs/linsys/cpu/direct/private.c +220 -224
  23. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  24. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  25. data/vendor/scs/linsys/cpu/indirect/private.c +177 -110
  26. data/vendor/scs/linsys/cpu/indirect/private.h +8 -4
  27. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  28. data/vendor/scs/linsys/csparse.c +87 -0
  29. data/vendor/scs/linsys/csparse.h +34 -0
  30. data/vendor/scs/linsys/csparse.o +0 -0
  31. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +1 -1
  32. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  33. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  34. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  35. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  36. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  37. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  38. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  39. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  40. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  41. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  42. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  43. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  44. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  45. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  46. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  47. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  48. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  49. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  50. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  51. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  52. data/vendor/scs/linsys/gpu/gpu.c +58 -21
  53. data/vendor/scs/linsys/gpu/gpu.h +66 -28
  54. data/vendor/scs/linsys/gpu/indirect/private.c +368 -154
  55. data/vendor/scs/linsys/gpu/indirect/private.h +26 -12
  56. data/vendor/scs/linsys/scs_matrix.c +498 -0
  57. data/vendor/scs/linsys/scs_matrix.h +70 -0
  58. data/vendor/scs/linsys/scs_matrix.o +0 -0
  59. data/vendor/scs/scs.mk +13 -9
  60. data/vendor/scs/src/aa.c +384 -109
  61. data/vendor/scs/src/aa.o +0 -0
  62. data/vendor/scs/src/cones.c +440 -353
  63. data/vendor/scs/src/cones.o +0 -0
  64. data/vendor/scs/src/ctrlc.c +15 -5
  65. data/vendor/scs/src/ctrlc.o +0 -0
  66. data/vendor/scs/src/linalg.c +84 -28
  67. data/vendor/scs/src/linalg.o +0 -0
  68. data/vendor/scs/src/normalize.c +22 -64
  69. data/vendor/scs/src/normalize.o +0 -0
  70. data/vendor/scs/src/rw.c +161 -22
  71. data/vendor/scs/src/rw.o +0 -0
  72. data/vendor/scs/src/scs.c +768 -561
  73. data/vendor/scs/src/scs.o +0 -0
  74. data/vendor/scs/src/scs_indir.o +0 -0
  75. data/vendor/scs/src/scs_version.c +9 -3
  76. data/vendor/scs/src/scs_version.o +0 -0
  77. data/vendor/scs/src/util.c +37 -106
  78. data/vendor/scs/src/util.o +0 -0
  79. data/vendor/scs/test/minunit.h +17 -8
  80. data/vendor/scs/test/problem_utils.h +176 -14
  81. data/vendor/scs/test/problems/degenerate.h +130 -0
  82. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  83. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  84. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  85. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  86. data/vendor/scs/test/problems/random_prob +0 -0
  87. data/vendor/scs/test/problems/random_prob.h +45 -0
  88. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  89. data/vendor/scs/test/problems/small_lp.h +13 -14
  90. data/vendor/scs/test/problems/test_fails.h +43 -0
  91. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  92. data/vendor/scs/test/random_socp_prob.c +54 -53
  93. data/vendor/scs/test/rng.h +109 -0
  94. data/vendor/scs/test/run_from_file.c +19 -10
  95. data/vendor/scs/test/run_tests.c +27 -3
  96. metadata +30 -73
  97. data/ext/scs/Rakefile +0 -11
  98. data/vendor/scs/linsys/amatrix.c +0 -305
  99. data/vendor/scs/linsys/amatrix.h +0 -36
  100. data/vendor/scs/linsys/amatrix.o +0 -0
  101. data/vendor/scs/test/data/small_random_socp +0 -0
  102. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  103. data/vendor/scs/test/run_tests +0 -2
@@ -1,35 +1,9 @@
1
1
  #include "qdldl.h"
2
- #include "ctrlc.h"
3
2
 
4
3
  #define QDLDL_UNKNOWN (-1)
5
4
  #define QDLDL_USED (1)
6
5
  #define QDLDL_UNUSED (0)
7
6
 
8
- // //DEBUG
9
- // #include <stdio.h>
10
- // void qdprint_arrayi(const QDLDL_int* data, QDLDL_int n,char* varName){
11
-
12
- // QDLDL_int i;
13
- // printf("%s = [",varName);
14
- // for(i=0; i< n; i++){
15
- // printf("%lli,",data[i]);
16
- // }
17
- // printf("]\n");
18
-
19
- // }
20
-
21
- // void qdprint_arrayf(const QDLDL_float* data, QDLDL_int n, char* varName){
22
-
23
- // QDLDL_int i;
24
- // printf("%s = [",varName);
25
- // for(i=0; i< n; i++){
26
- // printf("%.3g,",data[i]);
27
- // }
28
- // printf("]\n");
29
-
30
- // }
31
- // // END DEBUG
32
-
33
7
  /* Compute the elimination tree for a quasidefinite matrix
34
8
  in compressed sparse column form.
35
9
  */
@@ -41,7 +15,7 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
41
15
  QDLDL_int* Lnz,
42
16
  QDLDL_int* etree){
43
17
 
44
- QDLDL_int sumLnz = 0;
18
+ QDLDL_int sumLnz;
45
19
  QDLDL_int i,j,p;
46
20
 
47
21
 
@@ -76,8 +50,19 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
76
50
  }
77
51
 
78
52
  //compute the total nonzeros in L. This much
79
- //space is required to store Li and Lx
80
- for(i = 0; i < n; i++){sumLnz += Lnz[i];}
53
+ //space is required to store Li and Lx. Return
54
+ //error code -2 if the nonzero count will overflow
55
+ //its unteger type.
56
+ sumLnz = 0;
57
+ for(i = 0; i < n; i++){
58
+ if(sumLnz > QDLDL_INT_MAX - Lnz[i]){
59
+ sumLnz = -2;
60
+ break;
61
+ }
62
+ else{
63
+ sumLnz += Lnz[i];
64
+ }
65
+ }
81
66
 
82
67
  return sumLnz;
83
68
  }
@@ -139,10 +124,6 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
139
124
  //Start from 1 here. The upper LH corner is trivially 0
140
125
  //in L b/c we are only computing the subdiagonal elements
141
126
  for(k = 1; k < n; k++){
142
- if(scs_is_interrupted()) {
143
- scs_printf("interrupt detected in factorization\n");
144
- return -1;
145
- }
146
127
 
147
128
  //NB : For each k, we compute a solution to
148
129
  //y = L(0:(k-1),0:k-1))\b, where b is the kth
@@ -258,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n,
258
239
  const QDLDL_float* Lx,
259
240
  QDLDL_float* x){
260
241
 
261
- QDLDL_int i,j;
242
+ QDLDL_int i,j;
262
243
  for(i = 0; i < n; i++){
263
- for(j = Lp[i]; j < Lp[i+1]; j++){
264
- x[Li[j]] -= Lx[j]*x[i];
265
- }
244
+ QDLDL_float val = x[i];
245
+ for(j = Lp[i]; j < Lp[i+1]; j++){
246
+ x[Li[j]] -= Lx[j]*val;
247
+ }
266
248
  }
267
249
  }
268
250
 
@@ -273,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n,
273
255
  const QDLDL_float* Lx,
274
256
  QDLDL_float* x){
275
257
 
276
- QDLDL_int i,j;
258
+ QDLDL_int i,j;
277
259
  for(i = n-1; i>=0; i--){
278
- for(j = Lp[i]; j < Lp[i+1]; j++){
279
- x[i] -= Lx[j]*x[Li[j]];
280
- }
260
+ QDLDL_float val = x[i];
261
+ for(j = Lp[i]; j < Lp[i+1]; j++){
262
+ val -= Lx[j]*x[Li[j]];
263
+ }
264
+ x[i] = val;
281
265
  }
282
266
  }
283
267
 
@@ -289,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n,
289
273
  const QDLDL_float* Dinv,
290
274
  QDLDL_float* x){
291
275
 
292
- QDLDL_int i;
293
-
294
- QDLDL_Lsolve(n,Lp,Li,Lx,x);
295
- for(i = 0; i < n; i++) x[i] *= Dinv[i];
296
- QDLDL_Ltsolve(n,Lp,Li,Lx,x);
276
+ QDLDL_int i;
297
277
 
278
+ QDLDL_Lsolve(n,Lp,Li,Lx,x);
279
+ for(i = 0; i < n; i++) x[i] *= Dinv[i];
280
+ QDLDL_Ltsolve(n,Lp,Li,Lx,x);
298
281
  }
@@ -32,19 +32,17 @@ extern "C" {
32
32
  * this function will *not* return an error, as it may still be possible to factor
33
33
  * such a matrix in LDL form. No promises are made in this case though...
34
34
  *
35
- * @param n number of columns in CSC matrix A (assumed square)
35
+ * @param n number of columns in CSC matrix A (assumed square)
36
36
  * @param Ap column pointers (size n+1) for columns of A
37
37
  * @param Ai row indices of A. Has Ap[n] elements
38
38
  * @param work work vector (size n) (no meaning on return)
39
39
  * @param Lnz count of nonzeros in each column of L (size n) below diagonal
40
40
  * @param etree elimination tree (size n)
41
- * @return total sum of Lnz (i.e. total nonzeros in L below diagonal). Returns
42
- * -1 if the input does not have triu structure or has an empty
43
- * column.
44
- *
41
+ * @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
42
+ * Returns -1 if the input is not triu or has an empty column.
43
+ * Returns -2 if the return value overflows QDLDL_int.
45
44
  *
46
45
  */
47
-
48
46
  QDLDL_int QDLDL_etree(const QDLDL_int n,
49
47
  const QDLDL_int* Ap,
50
48
  const QDLDL_int* Ai,
@@ -52,6 +50,7 @@ extern "C" {
52
50
  QDLDL_int* Lnz,
53
51
  QDLDL_int* etree);
54
52
 
53
+
55
54
  /**
56
55
  * Compute an LDL decomposition for a quasidefinite matrix
57
56
  * in compressed sparse column form, where the input matrix is
@@ -61,21 +60,22 @@ extern "C" {
61
60
  * Returns factors L, D and Dinv = 1./D.
62
61
  *
63
62
  * Does not use MALLOC. It is assumed that L will be a compressed
64
- * sparse column matrix with data (Ln,Lp,Li) with sufficient space
63
+ * sparse column matrix with data (n,Lp,Li,Lx) with sufficient space
65
64
  * allocated, with a number of nonzeros equal to the count given
66
- * as a return value by osqp_ldl_etree
67
- *
68
- * @param n number of columns in L and A (both square)
69
- * @param Ap column pointers (size n+1) for columns of A
70
- * @param Ai row indices of A. Has Ap[n] elements
71
- * @param Ln number of columns in CSC matrix L
72
- * @param Lp column pointers (size Ln+1) for columns of L
73
- * @param Li row indices of L. Has Lp[Ln] elements
65
+ * as a return value by QDLDL_etree
66
+ *
67
+ * @param n number of columns in L and A (both square)
68
+ * @param Ap column pointers (size n+1) for columns of A (not modified)
69
+ * @param Ai row indices of A. Has Ap[n] elements (not modified)
70
+ * @param Ax data of A. Has Ap[n] elements (not modified)
71
+ * @param Lp column pointers (size n+1) for columns of L
72
+ * @param Li row indices of L. Has Lp[n] elements
73
+ * @param Lx data of L. Has Lp[n] elements
74
74
  * @param D vectorized factor D. Length is n
75
75
  * @param Dinv reciprocal of D. Length is n
76
76
  * @param Lnz count of nonzeros in each column of L below diagonal,
77
- * as given by osqp_ldl_etree (not modified)
78
- * @param etree elimination tree as as given by osqp_ldl_etree (not modified)
77
+ * as given by QDLDL_etree (not modified)
78
+ * @param etree elimination tree as as given by QDLDL_etree (not modified)
79
79
  * @param bwork working array of bools. Length is n
80
80
  * @param iwork working array of integers. Length is 3*n
81
81
  * @param fwork working array of floats. Length is n
@@ -85,8 +85,6 @@ extern "C" {
85
85
  * or otherwise LDL factorisable)
86
86
  *
87
87
  */
88
-
89
-
90
88
  QDLDL_int QDLDL_factor(const QDLDL_int n,
91
89
  const QDLDL_int* Ap,
92
90
  const QDLDL_int* Ai,
@@ -107,16 +105,15 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
107
105
  * Solves LDL'x = b
108
106
  *
109
107
  * It is assumed that L will be a compressed
110
- * sparse column matrix with data (Ln,Lp,Li).
108
+ * sparse column matrix with data (n,Lp,Li,Lx).
111
109
  *
112
- * @param n number of columns in L (both square)
113
- * @param Ln number of columns in CSC matrix L
114
- * @param Lp column pointers (size Ln+1) for columns of L
115
- * @param Li row indices of L. Has Lp[Ln] elements
110
+ * @param n number of columns in L
111
+ * @param Lp column pointers (size n+1) for columns of L
112
+ * @param Li row indices of L. Has Lp[n] elements
113
+ * @param Lx data of L. Has Lp[n] elements
116
114
  * @param Dinv reciprocal of D. Length is n
117
115
  * @param x initialized to b. Equal to x on return
118
116
  *
119
- *
120
117
  */
121
118
  void QDLDL_solve(const QDLDL_int n,
122
119
  const QDLDL_int* Lp,
@@ -130,40 +127,35 @@ void QDLDL_solve(const QDLDL_int n,
130
127
  * Solves (L+I)x = b
131
128
  *
132
129
  * It is assumed that L will be a compressed
133
- * sparse column matrix with data (Ln,Lp,Li).
130
+ * sparse column matrix with data (n,Lp,Li,Lx).
134
131
  *
135
- * @param n number of columns in L (both square)
136
- * @param Ln number of columns in CSC matrix L
137
- * @param Lp column pointers (size Ln+1) for columns of L
138
- * @param Li row indices of L. Has Lp[Ln] elements
139
- * @param Dinv reciprocal of D. Length is n
132
+ * @param n number of columns in L
133
+ * @param Lp column pointers (size n+1) for columns of L
134
+ * @param Li row indices of L. Has Lp[n] elements
135
+ * @param Lx data of L. Has Lp[n] elements
140
136
  * @param x initialized to b. Equal to x on return
141
137
  *
142
- *
143
138
  */
144
-
145
139
  void QDLDL_Lsolve(const QDLDL_int n,
146
140
  const QDLDL_int* Lp,
147
141
  const QDLDL_int* Li,
148
142
  const QDLDL_float* Lx,
149
143
  QDLDL_float* x);
150
144
 
145
+
151
146
  /**
152
147
  * Solves (L+I)'x = b
153
148
  *
154
149
  * It is assumed that L will be a compressed
155
- * sparse column matrix with data (Ln,Lp,Li).
150
+ * sparse column matrix with data (n,Lp,Li,Lx).
156
151
  *
157
- * @param n number of columns in L (both square)
158
- * @param Ln number of columns in CSC matrix L
159
- * @param Lp column pointers (size Ln+1) for columns of L
160
- * @param Li row indices of L. Has Lp[Ln] elements
161
- * @param Dinv reciprocal of D. Length is n
152
+ * @param n number of columns in L
153
+ * @param Lp column pointers (size n+1) for columns of L
154
+ * @param Li row indices of L. Has Lp[n] elements
155
+ * @param Lx data of L. Has Lp[n] elements
162
156
  * @param x initialized to b. Equal to x on return
163
157
  *
164
- *
165
158
  */
166
-
167
159
  void QDLDL_Ltsolve(const QDLDL_int n,
168
160
  const QDLDL_int* Lp,
169
161
  const QDLDL_int* Li,
@@ -1,18 +1,26 @@
1
1
  #ifndef QDLDL_TYPES_H
2
2
  # define QDLDL_TYPES_H
3
3
 
4
- #include "glbopts.h"
5
-
6
4
  # ifdef __cplusplus
7
5
  extern "C" {
8
6
  # endif /* ifdef __cplusplus */
9
7
 
10
- // QDLDL integer and float types
8
+ #include "glbopts.h"
9
+ #include <limits.h> //for the QDLDL_INT_TYPE_MAX
10
+
11
+ /* QDLDL integer and float types */
11
12
 
12
13
  #define QDLDL_int scs_int
13
14
  #define QDLDL_float scs_float
14
15
  #define QDLDL_bool scs_int
15
16
 
17
+ /* Maximum value of the signed type QDLDL_int */
18
+ #ifdef DLONG
19
+ #define QDLDL_INT_MAX LLONG_MAX
20
+ #else
21
+ #define QDLDL_INT_MAX INT_MAX
22
+ #endif
23
+
16
24
  # ifdef __cplusplus
17
25
  }
18
26
  # endif /* ifdef __cplusplus */
@@ -1,41 +1,78 @@
1
1
  #include "gpu.h"
2
2
 
3
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
4
- scs_float *y, cusparseHandle_t cusparse_handle) {
3
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
4
+ const cusparseDnVecDescr_t x,
5
+ cusparseDnVecDescr_t y,
6
+ cusparseHandle_t cusparse_handle,
7
+ size_t *buffer_size, void **buffer) {
5
8
  /* y += A'*x
6
9
  x and y MUST be on GPU already
7
10
  */
8
11
  const scs_float onef = 1.0;
9
- CUSPARSE(csrmv)
10
- (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, Ag->n, Ag->m, Ag->Annz,
11
- &onef, Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
12
+ size_t new_buffer_size = 0;
13
+
14
+ CUSPARSE_GEN(SpMV_bufferSize)
15
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
16
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
17
+
18
+ if (new_buffer_size > *buffer_size) {
19
+ if (*buffer != SCS_NULL) {
20
+ cudaFree(*buffer);
21
+ }
22
+ cudaMalloc(buffer, *buffer_size);
23
+ *buffer_size = new_buffer_size;
24
+ }
25
+
26
+ CUSPARSE_GEN(SpMV)
27
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
12
29
  }
13
30
 
14
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
15
- scs_float *y, cusparseHandle_t cusparse_handle) {
31
+ /* this is slow, use trans routine if possible */
32
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
33
+ cusparseDnVecDescr_t y,
34
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
35
+ void **buffer) {
16
36
  /* y += A*x
17
37
  x and y MUST be on GPU already
18
38
  */
19
39
  const scs_float onef = 1.0;
40
+ size_t new_buffer_size = 0;
41
+
20
42
  /* The A matrix idx pointers must be ORDERED */
21
- CUSPARSE(csrmv)
22
- (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, Ag->n, Ag->m, Ag->Annz, &onef,
23
- Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
43
+ CUSPARSE_GEN(SpMV_bufferSize)
44
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
45
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
46
+
47
+ if (new_buffer_size > *buffer_size) {
48
+ if (*buffer != SCS_NULL) {
49
+ cudaFree(*buffer);
50
+ }
51
+ cudaMalloc(buffer, *buffer_size);
52
+ *buffer_size = new_buffer_size;
53
+ }
54
+
55
+ CUSPARSE_GEN(SpMV)
56
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
58
+ }
59
+
60
+ /* This assumes that P has been made full (ie not triangular) and uses the
61
+ * fact that the GPU is faster for general sparse matrices than for symmetric
62
+ */
63
+ /* y += P*x
64
+ x and y MUST be on GPU already
65
+ */
66
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
67
+ cusparseDnVecDescr_t y,
68
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
69
+ void **buffer) {
70
+ SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
24
71
  }
25
72
 
26
73
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
27
74
  cudaFree(A->x);
28
75
  cudaFree(A->i);
29
76
  cudaFree(A->p);
30
- cusparseDestroyMatDescr(A->descr);
31
- }
32
-
33
- void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
34
- ScsScaling *scal) {
35
- SCS(_normalize_a)(A, stgs, k, scal);
36
- }
37
-
38
- void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
39
- const ScsScaling *scal) {
40
- SCS(_un_normalize_a)(A, stgs, scal);
77
+ cusparseDestroySpMat(A->descr);
41
78
  }
@@ -10,48 +10,76 @@ extern "C" {
10
10
  #include <cuda_runtime_api.h>
11
11
  #include <cusparse.h>
12
12
 
13
- #include "amatrix.h"
14
13
  #include "glbopts.h"
15
14
  #include "linalg.h"
16
15
  #include "linsys.h"
17
16
  #include "scs.h"
17
+ #include "scs_matrix.h"
18
18
  #include "util.h"
19
19
 
20
- #define CUDA_CHECK_ERR \
21
- do { \
22
- cudaError_t err = cudaGetLastError(); \
23
- if (err != cudaSuccess) { \
24
- printf("%s:%d:%s\n ERROR_CUDA: %s\n", __FILE__, __LINE__, __func__, \
25
- cudaGetErrorString(err)); \
26
- } \
20
+ #define CUDA_CHECK_ERR \
21
+ do { \
22
+ cudaDeviceSynchronize(); \
23
+ cudaError_t err = cudaGetLastError(); \
24
+ if (err != cudaSuccess) { \
25
+ scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \
26
+ __func__, cudaGetErrorString(err)); \
27
+ } \
27
28
  } while (0)
28
29
 
29
- #ifndef EXTRA_VERBOSE
30
+ #if VERBOSITY == 0
30
31
  #ifndef SFLOAT
31
32
  #define CUBLAS(x) cublasD##x
33
+ #define CUBLASI(x) cublasId##x
32
34
  #define CUSPARSE(x) cusparseD##x
33
35
  #else
34
36
  #define CUBLAS(x) cublasS##x
37
+ #define CUBLASI(x) cublasIs##x
35
38
  #define CUSPARSE(x) cusparseS##x
36
39
  #endif
40
+ #define CUSPARSE_GEN(x) cusparse##x
37
41
  #else
38
42
  #ifndef SFLOAT
39
- #define CUBLAS(x) \
40
- CUDA_CHECK_ERR; \
43
+ #define CUBLAS(x) \
44
+ CUDA_CHECK_ERR; \
41
45
  cublasD##x
42
- #define CUSPARSE(x) \
43
- CUDA_CHECK_ERR; \
46
+ #define CUBLASI(x) \
47
+ CUDA_CHECK_ERR; \
48
+ cublasId##x
49
+ #define CUSPARSE(x) \
50
+ CUDA_CHECK_ERR; \
44
51
  cusparseD##x
45
52
  #else
46
- #define CUBLAS(x) \
47
- CUDA_CHECK_ERR; \
53
+ #define CUBLAS(x) \
54
+ CUDA_CHECK_ERR; \
48
55
  cublasS##x
49
- #define CUSPARSE(x) \
50
- CUDA_CHECK_ERR; \
56
+ #define CUBLASI(x) \
57
+ CUDA_CHECK_ERR; \
58
+ cublasIs##x
59
+ #define CUSPARSE(x) \
60
+ CUDA_CHECK_ERR; \
51
61
  cusparseS##x
52
62
  #endif
63
+ #define CUSPARSE_GEN(x) \
64
+ CUDA_CHECK_ERR; \
65
+ cusparse##x
53
66
  #endif
54
67
 
68
+ #ifndef SFLOAT
69
+ #define SCS_CUDA_FLOAT CUDA_R_64F
70
+ #else
71
+ #define SCS_CUDA_FLOAT CUDA_R_32F
72
+ #endif
73
+
74
+ #ifndef DLONG
75
+ #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_32I
76
+ #else
77
+ #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
78
+ #endif
79
+
80
+ #define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
81
+ #define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
82
+
55
83
  /*
56
84
  CUDA matrix routines only for CSR, not CSC matrices:
57
85
  CSC CSR GPU Mult
@@ -59,23 +87,33 @@ extern "C" {
59
87
  A'(n x m) A (m x n) Agt accum_by_a_gpu
60
88
  */
61
89
 
62
- /* this struct defines the data matrix A on GPU */
63
- typedef struct SCS_GPU_A_DATA_MATRIX {
90
+ /* this struct defines the data matrix on GPU */
91
+ typedef struct SCS_GPU_DATA_MATRIX {
64
92
  /* A is supplied in column compressed format */
65
- scs_float *x; /* A values, size: NNZ A */
66
- scs_int *i; /* A row index, size: NNZ A */
67
- scs_int *p; /* A column pointer, size: n+1 */
93
+ scs_float *x; /* values, size: NNZ */
94
+ scs_int *i; /* row index, size: NNZ */
95
+ scs_int *p; /* column pointer, size: n+1 */
68
96
  scs_int m, n; /* m rows, n cols */
69
- scs_int Annz; /* num non-zeros in A matrix */
97
+ scs_int nnz; /* num non-zeros in matrix */
70
98
  /* CUDA */
71
- cusparseMatDescr_t descr;
99
+ cusparseSpMatDescr_t descr;
72
100
  } ScsGpuMatrix;
73
101
 
74
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const scs_float *x,
75
- scs_float *y, cusparseHandle_t cusparse_handle);
102
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A,
103
+ const cusparseDnVecDescr_t x,
104
+ cusparseDnVecDescr_t y,
105
+ cusparseHandle_t cusparse_handle,
106
+ size_t *buffer_size, void **buffer);
107
+
108
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
109
+ cusparseDnVecDescr_t y,
110
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
111
+ void **buffer);
76
112
 
77
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const scs_float *x,
78
- scs_float *y, cusparseHandle_t cusparse_handle);
113
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x,
114
+ cusparseDnVecDescr_t y,
115
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
116
+ void **buffer);
79
117
 
80
118
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
81
119