scs 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +11 -6
  4. data/lib/scs/ffi.rb +30 -13
  5. data/lib/scs/solver.rb +32 -9
  6. data/lib/scs/version.rb +1 -1
  7. data/vendor/scs/CITATION.cff +39 -0
  8. data/vendor/scs/CMakeLists.txt +7 -8
  9. data/vendor/scs/Makefile +24 -15
  10. data/vendor/scs/README.md +5 -263
  11. data/vendor/scs/include/aa.h +67 -23
  12. data/vendor/scs/include/cones.h +17 -17
  13. data/vendor/scs/include/glbopts.h +98 -32
  14. data/vendor/scs/include/linalg.h +2 -4
  15. data/vendor/scs/include/linsys.h +58 -44
  16. data/vendor/scs/include/normalize.h +3 -3
  17. data/vendor/scs/include/rw.h +8 -2
  18. data/vendor/scs/include/scs.h +293 -133
  19. data/vendor/scs/include/util.h +3 -15
  20. data/vendor/scs/linsys/cpu/direct/private.c +220 -224
  21. data/vendor/scs/linsys/cpu/direct/private.h +13 -7
  22. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  23. data/vendor/scs/linsys/cpu/indirect/private.c +177 -110
  24. data/vendor/scs/linsys/cpu/indirect/private.h +8 -4
  25. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  26. data/vendor/scs/linsys/csparse.c +87 -0
  27. data/vendor/scs/linsys/csparse.h +34 -0
  28. data/vendor/scs/linsys/csparse.o +0 -0
  29. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +1 -1
  30. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  31. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  32. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  33. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  34. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  35. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  36. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  37. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  38. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  39. data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
  40. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  41. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  42. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  43. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  44. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  45. data/vendor/scs/linsys/external/qdldl/changes +2 -0
  46. data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
  47. data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
  48. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  49. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
  50. data/vendor/scs/linsys/gpu/gpu.c +31 -33
  51. data/vendor/scs/linsys/gpu/gpu.h +48 -31
  52. data/vendor/scs/linsys/gpu/indirect/private.c +338 -232
  53. data/vendor/scs/linsys/gpu/indirect/private.h +23 -14
  54. data/vendor/scs/linsys/scs_matrix.c +498 -0
  55. data/vendor/scs/linsys/scs_matrix.h +70 -0
  56. data/vendor/scs/linsys/scs_matrix.o +0 -0
  57. data/vendor/scs/scs.mk +13 -9
  58. data/vendor/scs/src/aa.c +384 -109
  59. data/vendor/scs/src/aa.o +0 -0
  60. data/vendor/scs/src/cones.c +440 -353
  61. data/vendor/scs/src/cones.o +0 -0
  62. data/vendor/scs/src/ctrlc.c +15 -5
  63. data/vendor/scs/src/ctrlc.o +0 -0
  64. data/vendor/scs/src/linalg.c +84 -28
  65. data/vendor/scs/src/linalg.o +0 -0
  66. data/vendor/scs/src/normalize.c +22 -64
  67. data/vendor/scs/src/normalize.o +0 -0
  68. data/vendor/scs/src/rw.c +160 -21
  69. data/vendor/scs/src/rw.o +0 -0
  70. data/vendor/scs/src/scs.c +767 -563
  71. data/vendor/scs/src/scs.o +0 -0
  72. data/vendor/scs/src/scs_indir.o +0 -0
  73. data/vendor/scs/src/scs_version.c +9 -3
  74. data/vendor/scs/src/scs_version.o +0 -0
  75. data/vendor/scs/src/util.c +37 -106
  76. data/vendor/scs/src/util.o +0 -0
  77. data/vendor/scs/test/minunit.h +17 -8
  78. data/vendor/scs/test/problem_utils.h +176 -14
  79. data/vendor/scs/test/problems/degenerate.h +130 -0
  80. data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
  81. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
  82. data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
  83. data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
  84. data/vendor/scs/test/problems/random_prob +0 -0
  85. data/vendor/scs/test/problems/random_prob.h +45 -0
  86. data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
  87. data/vendor/scs/test/problems/small_lp.h +13 -14
  88. data/vendor/scs/test/problems/test_fails.h +43 -0
  89. data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
  90. data/vendor/scs/test/random_socp_prob.c +54 -53
  91. data/vendor/scs/test/rng.h +109 -0
  92. data/vendor/scs/test/run_from_file.c +19 -10
  93. data/vendor/scs/test/run_tests.c +27 -3
  94. metadata +20 -8
  95. data/vendor/scs/linsys/amatrix.c +0 -305
  96. data/vendor/scs/linsys/amatrix.h +0 -36
  97. data/vendor/scs/linsys/amatrix.o +0 -0
  98. data/vendor/scs/test/data/small_random_socp +0 -0
  99. data/vendor/scs/test/problems/small_random_socp.h +0 -33
  100. data/vendor/scs/test/run_tests +0 -2
@@ -1,35 +1,9 @@
1
1
  #include "qdldl.h"
2
- #include "ctrlc.h"
3
2
 
4
3
  #define QDLDL_UNKNOWN (-1)
5
4
  #define QDLDL_USED (1)
6
5
  #define QDLDL_UNUSED (0)
7
6
 
8
- // //DEBUG
9
- // #include <stdio.h>
10
- // void qdprint_arrayi(const QDLDL_int* data, QDLDL_int n,char* varName){
11
-
12
- // QDLDL_int i;
13
- // printf("%s = [",varName);
14
- // for(i=0; i< n; i++){
15
- // printf("%lli,",data[i]);
16
- // }
17
- // printf("]\n");
18
-
19
- // }
20
-
21
- // void qdprint_arrayf(const QDLDL_float* data, QDLDL_int n, char* varName){
22
-
23
- // QDLDL_int i;
24
- // printf("%s = [",varName);
25
- // for(i=0; i< n; i++){
26
- // printf("%.3g,",data[i]);
27
- // }
28
- // printf("]\n");
29
-
30
- // }
31
- // // END DEBUG
32
-
33
7
  /* Compute the elimination tree for a quasidefinite matrix
34
8
  in compressed sparse column form.
35
9
  */
@@ -41,7 +15,7 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
41
15
  QDLDL_int* Lnz,
42
16
  QDLDL_int* etree){
43
17
 
44
- QDLDL_int sumLnz = 0;
18
+ QDLDL_int sumLnz;
45
19
  QDLDL_int i,j,p;
46
20
 
47
21
 
@@ -76,8 +50,19 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
76
50
  }
77
51
 
78
52
  //compute the total nonzeros in L. This much
79
- //space is required to store Li and Lx
80
- for(i = 0; i < n; i++){sumLnz += Lnz[i];}
53
+ //space is required to store Li and Lx. Return
54
+ //error code -2 if the nonzero count will overflow
55
+ //its unteger type.
56
+ sumLnz = 0;
57
+ for(i = 0; i < n; i++){
58
+ if(sumLnz > QDLDL_INT_MAX - Lnz[i]){
59
+ sumLnz = -2;
60
+ break;
61
+ }
62
+ else{
63
+ sumLnz += Lnz[i];
64
+ }
65
+ }
81
66
 
82
67
  return sumLnz;
83
68
  }
@@ -139,10 +124,6 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
139
124
  //Start from 1 here. The upper LH corner is trivially 0
140
125
  //in L b/c we are only computing the subdiagonal elements
141
126
  for(k = 1; k < n; k++){
142
- if(scs_is_interrupted()) {
143
- scs_printf("interrupt detected in factorization\n");
144
- return -1;
145
- }
146
127
 
147
128
  //NB : For each k, we compute a solution to
148
129
  //y = L(0:(k-1),0:k-1))\b, where b is the kth
@@ -258,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n,
258
239
  const QDLDL_float* Lx,
259
240
  QDLDL_float* x){
260
241
 
261
- QDLDL_int i,j;
242
+ QDLDL_int i,j;
262
243
  for(i = 0; i < n; i++){
263
- for(j = Lp[i]; j < Lp[i+1]; j++){
264
- x[Li[j]] -= Lx[j]*x[i];
265
- }
244
+ QDLDL_float val = x[i];
245
+ for(j = Lp[i]; j < Lp[i+1]; j++){
246
+ x[Li[j]] -= Lx[j]*val;
247
+ }
266
248
  }
267
249
  }
268
250
 
@@ -273,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n,
273
255
  const QDLDL_float* Lx,
274
256
  QDLDL_float* x){
275
257
 
276
- QDLDL_int i,j;
258
+ QDLDL_int i,j;
277
259
  for(i = n-1; i>=0; i--){
278
- for(j = Lp[i]; j < Lp[i+1]; j++){
279
- x[i] -= Lx[j]*x[Li[j]];
280
- }
260
+ QDLDL_float val = x[i];
261
+ for(j = Lp[i]; j < Lp[i+1]; j++){
262
+ val -= Lx[j]*x[Li[j]];
263
+ }
264
+ x[i] = val;
281
265
  }
282
266
  }
283
267
 
@@ -289,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n,
289
273
  const QDLDL_float* Dinv,
290
274
  QDLDL_float* x){
291
275
 
292
- QDLDL_int i;
293
-
294
- QDLDL_Lsolve(n,Lp,Li,Lx,x);
295
- for(i = 0; i < n; i++) x[i] *= Dinv[i];
296
- QDLDL_Ltsolve(n,Lp,Li,Lx,x);
276
+ QDLDL_int i;
297
277
 
278
+ QDLDL_Lsolve(n,Lp,Li,Lx,x);
279
+ for(i = 0; i < n; i++) x[i] *= Dinv[i];
280
+ QDLDL_Ltsolve(n,Lp,Li,Lx,x);
298
281
  }
@@ -32,19 +32,17 @@ extern "C" {
32
32
  * this function will *not* return an error, as it may still be possible to factor
33
33
  * such a matrix in LDL form. No promises are made in this case though...
34
34
  *
35
- * @param n number of columns in CSC matrix A (assumed square)
35
+ * @param n number of columns in CSC matrix A (assumed square)
36
36
  * @param Ap column pointers (size n+1) for columns of A
37
37
  * @param Ai row indices of A. Has Ap[n] elements
38
38
  * @param work work vector (size n) (no meaning on return)
39
39
  * @param Lnz count of nonzeros in each column of L (size n) below diagonal
40
40
  * @param etree elimination tree (size n)
41
- * @return total sum of Lnz (i.e. total nonzeros in L below diagonal). Returns
42
- * -1 if the input does not have triu structure or has an empty
43
- * column.
44
- *
41
+ * @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
42
+ * Returns -1 if the input is not triu or has an empty column.
43
+ * Returns -2 if the return value overflows QDLDL_int.
45
44
  *
46
45
  */
47
-
48
46
  QDLDL_int QDLDL_etree(const QDLDL_int n,
49
47
  const QDLDL_int* Ap,
50
48
  const QDLDL_int* Ai,
@@ -52,6 +50,7 @@ extern "C" {
52
50
  QDLDL_int* Lnz,
53
51
  QDLDL_int* etree);
54
52
 
53
+
55
54
  /**
56
55
  * Compute an LDL decomposition for a quasidefinite matrix
57
56
  * in compressed sparse column form, where the input matrix is
@@ -61,21 +60,22 @@ extern "C" {
61
60
  * Returns factors L, D and Dinv = 1./D.
62
61
  *
63
62
  * Does not use MALLOC. It is assumed that L will be a compressed
64
- * sparse column matrix with data (Ln,Lp,Li) with sufficient space
63
+ * sparse column matrix with data (n,Lp,Li,Lx) with sufficient space
65
64
  * allocated, with a number of nonzeros equal to the count given
66
- * as a return value by osqp_ldl_etree
67
- *
68
- * @param n number of columns in L and A (both square)
69
- * @param Ap column pointers (size n+1) for columns of A
70
- * @param Ai row indices of A. Has Ap[n] elements
71
- * @param Ln number of columns in CSC matrix L
72
- * @param Lp column pointers (size Ln+1) for columns of L
73
- * @param Li row indices of L. Has Lp[Ln] elements
65
+ * as a return value by QDLDL_etree
66
+ *
67
+ * @param n number of columns in L and A (both square)
68
+ * @param Ap column pointers (size n+1) for columns of A (not modified)
69
+ * @param Ai row indices of A. Has Ap[n] elements (not modified)
70
+ * @param Ax data of A. Has Ap[n] elements (not modified)
71
+ * @param Lp column pointers (size n+1) for columns of L
72
+ * @param Li row indices of L. Has Lp[n] elements
73
+ * @param Lx data of L. Has Lp[n] elements
74
74
  * @param D vectorized factor D. Length is n
75
75
  * @param Dinv reciprocal of D. Length is n
76
76
  * @param Lnz count of nonzeros in each column of L below diagonal,
77
- * as given by osqp_ldl_etree (not modified)
78
- * @param etree elimination tree as as given by osqp_ldl_etree (not modified)
77
+ * as given by QDLDL_etree (not modified)
78
+ * @param etree elimination tree as as given by QDLDL_etree (not modified)
79
79
  * @param bwork working array of bools. Length is n
80
80
  * @param iwork working array of integers. Length is 3*n
81
81
  * @param fwork working array of floats. Length is n
@@ -85,8 +85,6 @@ extern "C" {
85
85
  * or otherwise LDL factorisable)
86
86
  *
87
87
  */
88
-
89
-
90
88
  QDLDL_int QDLDL_factor(const QDLDL_int n,
91
89
  const QDLDL_int* Ap,
92
90
  const QDLDL_int* Ai,
@@ -107,16 +105,15 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
107
105
  * Solves LDL'x = b
108
106
  *
109
107
  * It is assumed that L will be a compressed
110
- * sparse column matrix with data (Ln,Lp,Li).
108
+ * sparse column matrix with data (n,Lp,Li,Lx).
111
109
  *
112
- * @param n number of columns in L (both square)
113
- * @param Ln number of columns in CSC matrix L
114
- * @param Lp column pointers (size Ln+1) for columns of L
115
- * @param Li row indices of L. Has Lp[Ln] elements
110
+ * @param n number of columns in L
111
+ * @param Lp column pointers (size n+1) for columns of L
112
+ * @param Li row indices of L. Has Lp[n] elements
113
+ * @param Lx data of L. Has Lp[n] elements
116
114
  * @param Dinv reciprocal of D. Length is n
117
115
  * @param x initialized to b. Equal to x on return
118
116
  *
119
- *
120
117
  */
121
118
  void QDLDL_solve(const QDLDL_int n,
122
119
  const QDLDL_int* Lp,
@@ -130,40 +127,35 @@ void QDLDL_solve(const QDLDL_int n,
130
127
  * Solves (L+I)x = b
131
128
  *
132
129
  * It is assumed that L will be a compressed
133
- * sparse column matrix with data (Ln,Lp,Li).
130
+ * sparse column matrix with data (n,Lp,Li,Lx).
134
131
  *
135
- * @param n number of columns in L (both square)
136
- * @param Ln number of columns in CSC matrix L
137
- * @param Lp column pointers (size Ln+1) for columns of L
138
- * @param Li row indices of L. Has Lp[Ln] elements
139
- * @param Dinv reciprocal of D. Length is n
132
+ * @param n number of columns in L
133
+ * @param Lp column pointers (size n+1) for columns of L
134
+ * @param Li row indices of L. Has Lp[n] elements
135
+ * @param Lx data of L. Has Lp[n] elements
140
136
  * @param x initialized to b. Equal to x on return
141
137
  *
142
- *
143
138
  */
144
-
145
139
  void QDLDL_Lsolve(const QDLDL_int n,
146
140
  const QDLDL_int* Lp,
147
141
  const QDLDL_int* Li,
148
142
  const QDLDL_float* Lx,
149
143
  QDLDL_float* x);
150
144
 
145
+
151
146
  /**
152
147
  * Solves (L+I)'x = b
153
148
  *
154
149
  * It is assumed that L will be a compressed
155
- * sparse column matrix with data (Ln,Lp,Li).
150
+ * sparse column matrix with data (n,Lp,Li,Lx).
156
151
  *
157
- * @param n number of columns in L (both square)
158
- * @param Ln number of columns in CSC matrix L
159
- * @param Lp column pointers (size Ln+1) for columns of L
160
- * @param Li row indices of L. Has Lp[Ln] elements
161
- * @param Dinv reciprocal of D. Length is n
152
+ * @param n number of columns in L
153
+ * @param Lp column pointers (size n+1) for columns of L
154
+ * @param Li row indices of L. Has Lp[n] elements
155
+ * @param Lx data of L. Has Lp[n] elements
162
156
  * @param x initialized to b. Equal to x on return
163
157
  *
164
- *
165
158
  */
166
-
167
159
  void QDLDL_Ltsolve(const QDLDL_int n,
168
160
  const QDLDL_int* Lp,
169
161
  const QDLDL_int* Li,
@@ -1,18 +1,26 @@
1
1
  #ifndef QDLDL_TYPES_H
2
2
  # define QDLDL_TYPES_H
3
3
 
4
- #include "glbopts.h"
5
-
6
4
  # ifdef __cplusplus
7
5
  extern "C" {
8
6
  # endif /* ifdef __cplusplus */
9
7
 
10
- // QDLDL integer and float types
8
+ #include "glbopts.h"
9
+ #include <limits.h> //for the QDLDL_INT_TYPE_MAX
10
+
11
+ /* QDLDL integer and float types */
11
12
 
12
13
  #define QDLDL_int scs_int
13
14
  #define QDLDL_float scs_float
14
15
  #define QDLDL_bool scs_int
15
16
 
17
+ /* Maximum value of the signed type QDLDL_int */
18
+ #ifdef DLONG
19
+ #define QDLDL_INT_MAX LLONG_MAX
20
+ #else
21
+ #define QDLDL_INT_MAX INT_MAX
22
+ #endif
23
+
16
24
  # ifdef __cplusplus
17
25
  }
18
26
  # endif /* ifdef __cplusplus */
@@ -1,8 +1,10 @@
1
1
  #include "gpu.h"
2
2
 
3
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
4
- cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
5
- size_t *buffer_size, void **buffer) {
3
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
4
+ const cusparseDnVecDescr_t x,
5
+ cusparseDnVecDescr_t y,
6
+ cusparseHandle_t cusparse_handle,
7
+ size_t *buffer_size, void **buffer) {
6
8
  /* y += A'*x
7
9
  x and y MUST be on GPU already
8
10
  */
@@ -10,10 +12,8 @@ void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_
10
12
  size_t new_buffer_size = 0;
11
13
 
12
14
  CUSPARSE_GEN(SpMV_bufferSize)
13
- (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
14
- &onef, Ag->descr, x, &onef, y,
15
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
16
- &new_buffer_size);
15
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
16
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
17
17
 
18
18
  if (new_buffer_size > *buffer_size) {
19
19
  if (*buffer != SCS_NULL) {
@@ -24,15 +24,15 @@ void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_
24
24
  }
25
25
 
26
26
  CUSPARSE_GEN(SpMV)
27
- (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
28
- &onef, Ag->descr, x, &onef, y,
29
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
30
- buffer);
27
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
31
29
  }
32
30
 
33
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
34
- cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
35
- size_t *buffer_size, void **buffer) {
31
+ /* this is slow, use trans routine if possible */
32
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
33
+ cusparseDnVecDescr_t y,
34
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
35
+ void **buffer) {
36
36
  /* y += A*x
37
37
  x and y MUST be on GPU already
38
38
  */
@@ -40,12 +40,9 @@ void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
40
40
  size_t new_buffer_size = 0;
41
41
 
42
42
  /* The A matrix idx pointers must be ORDERED */
43
-
44
43
  CUSPARSE_GEN(SpMV_bufferSize)
45
- (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
46
- &onef, Ag->descr, x, &onef, y,
47
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
48
- &new_buffer_size);
44
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
45
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
49
46
 
50
47
  if (new_buffer_size > *buffer_size) {
51
48
  if (*buffer != SCS_NULL) {
@@ -56,10 +53,21 @@ void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
56
53
  }
57
54
 
58
55
  CUSPARSE_GEN(SpMV)
59
- (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
60
- &onef, Ag->descr, x, &onef, y,
61
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
62
- buffer);
56
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
58
+ }
59
+
60
+ /* This assumes that P has been made full (ie not triangular) and uses the
61
+ * fact that the GPU is faster for general sparse matrices than for symmetric
62
+ */
63
+ /* y += P*x
64
+ x and y MUST be on GPU already
65
+ */
66
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
67
+ cusparseDnVecDescr_t y,
68
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
69
+ void **buffer) {
70
+ SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
63
71
  }
64
72
 
65
73
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
@@ -68,13 +76,3 @@ void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
68
76
  cudaFree(A->p);
69
77
  cusparseDestroySpMat(A->descr);
70
78
  }
71
-
72
- void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
73
- ScsScaling *scal) {
74
- SCS(_normalize_a)(A, stgs, k, scal);
75
- }
76
-
77
- void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
78
- const ScsScaling *scal) {
79
- SCS(_un_normalize_a)(A, stgs, scal);
80
- }
@@ -10,49 +10,58 @@ extern "C" {
10
10
  #include <cuda_runtime_api.h>
11
11
  #include <cusparse.h>
12
12
 
13
- #include "amatrix.h"
14
13
  #include "glbopts.h"
15
14
  #include "linalg.h"
16
15
  #include "linsys.h"
17
16
  #include "scs.h"
17
+ #include "scs_matrix.h"
18
18
  #include "util.h"
19
19
 
20
- #define CUDA_CHECK_ERR \
21
- do { \
22
- cudaError_t err = cudaGetLastError(); \
23
- if (err != cudaSuccess) { \
24
- printf("%s:%d:%s\n ERROR_CUDA: %s\n", __FILE__, __LINE__, __func__, \
25
- cudaGetErrorString(err)); \
26
- } \
20
+ #define CUDA_CHECK_ERR \
21
+ do { \
22
+ cudaDeviceSynchronize(); \
23
+ cudaError_t err = cudaGetLastError(); \
24
+ if (err != cudaSuccess) { \
25
+ scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \
26
+ __func__, cudaGetErrorString(err)); \
27
+ } \
27
28
  } while (0)
28
29
 
29
- #ifndef EXTRA_VERBOSE
30
+ #if VERBOSITY == 0
30
31
  #ifndef SFLOAT
31
32
  #define CUBLAS(x) cublasD##x
33
+ #define CUBLASI(x) cublasId##x
32
34
  #define CUSPARSE(x) cusparseD##x
33
35
  #else
34
36
  #define CUBLAS(x) cublasS##x
37
+ #define CUBLASI(x) cublasIs##x
35
38
  #define CUSPARSE(x) cusparseS##x
36
39
  #endif
37
40
  #define CUSPARSE_GEN(x) cusparse##x
38
41
  #else
39
42
  #ifndef SFLOAT
40
- #define CUBLAS(x) \
41
- CUDA_CHECK_ERR; \
43
+ #define CUBLAS(x) \
44
+ CUDA_CHECK_ERR; \
42
45
  cublasD##x
43
- #define CUSPARSE(x) \
44
- CUDA_CHECK_ERR; \
46
+ #define CUBLASI(x) \
47
+ CUDA_CHECK_ERR; \
48
+ cublasId##x
49
+ #define CUSPARSE(x) \
50
+ CUDA_CHECK_ERR; \
45
51
  cusparseD##x
46
52
  #else
47
- #define CUBLAS(x) \
48
- CUDA_CHECK_ERR; \
53
+ #define CUBLAS(x) \
54
+ CUDA_CHECK_ERR; \
49
55
  cublasS##x
50
- #define CUSPARSE(x) \
51
- CUDA_CHECK_ERR; \
56
+ #define CUBLASI(x) \
57
+ CUDA_CHECK_ERR; \
58
+ cublasIs##x
59
+ #define CUSPARSE(x) \
60
+ CUDA_CHECK_ERR; \
52
61
  cusparseS##x
53
62
  #endif
54
- #define CUSPARSE_GEN(x) \
55
- CUDA_CHECK_ERR; \
63
+ #define CUSPARSE_GEN(x) \
64
+ CUDA_CHECK_ERR; \
56
65
  cusparse##x
57
66
  #endif
58
67
 
@@ -78,25 +87,33 @@ extern "C" {
78
87
  A'(n x m) A (m x n) Agt accum_by_a_gpu
79
88
  */
80
89
 
81
- /* this struct defines the data matrix A on GPU */
82
- typedef struct SCS_GPU_A_DATA_MATRIX {
90
+ /* this struct defines the data matrix on GPU */
91
+ typedef struct SCS_GPU_DATA_MATRIX {
83
92
  /* A is supplied in column compressed format */
84
- scs_float *x; /* A values, size: NNZ A */
85
- scs_int *i; /* A row index, size: NNZ A */
86
- scs_int *p; /* A column pointer, size: n+1 */
93
+ scs_float *x; /* values, size: NNZ */
94
+ scs_int *i; /* row index, size: NNZ */
95
+ scs_int *p; /* column pointer, size: n+1 */
87
96
  scs_int m, n; /* m rows, n cols */
88
- scs_int Annz; /* num non-zeros in A matrix */
97
+ scs_int nnz; /* num non-zeros in matrix */
89
98
  /* CUDA */
90
99
  cusparseSpMatDescr_t descr;
91
100
  } ScsGpuMatrix;
92
101
 
93
- void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
94
- cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
95
- size_t *buffer_size, void **buffer);
102
+ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A,
103
+ const cusparseDnVecDescr_t x,
104
+ cusparseDnVecDescr_t y,
105
+ cusparseHandle_t cusparse_handle,
106
+ size_t *buffer_size, void **buffer);
96
107
 
97
- void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
98
- cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
99
- size_t *buffer_size, void **buffer);
108
+ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
109
+ cusparseDnVecDescr_t y,
110
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
111
+ void **buffer);
112
+
113
+ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x,
114
+ cusparseDnVecDescr_t y,
115
+ cusparseHandle_t cusparse_handle, size_t *buffer_size,
116
+ void **buffer);
100
117
 
101
118
  void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
102
119