RubyGems - scs - Versions diffs - 0.5.1 → 0.5.2 - Mend

scs 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/scs/ffi.rb +2 -0
data/lib/scs/version.rb +1 -1
data/vendor/scs/CITATION.cff +2 -2
data/vendor/scs/CMakeLists.txt +136 -6
data/vendor/scs/Makefile +53 -3
data/vendor/scs/README.md +1 -1
data/vendor/scs/include/cones.h +47 -2
data/vendor/scs/include/glbopts.h +1 -1
data/vendor/scs/include/scs.h +29 -0
data/vendor/scs/include/scs_blas.h +4 -0
data/vendor/scs/include/scs_types.h +3 -1
data/vendor/scs/include/util_spectral_cones.h +45 -0
data/vendor/scs/linsys/cpu/direct/private.c +3 -3
data/vendor/scs/linsys/cpu/direct/private.h +2 -1
data/vendor/scs/linsys/csparse.c +1 -1
data/vendor/scs/linsys/cudss/direct/private.c +279 -0
data/vendor/scs/linsys/cudss/direct/private.h +63 -0
data/vendor/scs/linsys/external/qdldl/qdldl_types.h +1 -1
data/vendor/scs/linsys/gpu/indirect/private.c +14 -21
data/vendor/scs/scs.mk +17 -2
data/vendor/scs/src/aa.c +8 -12
data/vendor/scs/src/cones.c +783 -12
data/vendor/scs/src/rw.c +15 -1
data/vendor/scs/src/scs.c +4 -0
data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_IPM.c +660 -0
data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_Newton.c +279 -0
data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_wrapper.c +205 -0
data/vendor/scs/src/spectral_cones/logdeterminant/logdet_cone.c +143 -0
data/vendor/scs/src/spectral_cones/nuclear/ell1_cone.c +221 -0
data/vendor/scs/src/spectral_cones/nuclear/nuclear_cone.c +99 -0
data/vendor/scs/src/spectral_cones/sum-largest/sum_largest_cone.c +196 -0
data/vendor/scs/src/spectral_cones/sum-largest/sum_largest_eval_cone.c +140 -0
data/vendor/scs/src/spectral_cones/util_spectral_cones.c +52 -0
data/vendor/scs/test/problems/complex_PSD.h +83 -0
data/vendor/scs/test/rng.h +4 -4
data/vendor/scs/test/run_tests.c +25 -0
data/vendor/scs/test/spectral_cones_problems/exp_design.h +141 -0
data/vendor/scs/test/spectral_cones_problems/graph_partitioning.h +275 -0
data/vendor/scs/test/spectral_cones_problems/robust_pca.h +253 -0
data/vendor/scs/test/spectral_cones_problems/several_logdet_cones.h +222 -0
data/vendor/scs/test/spectral_cones_problems/several_nuc_cone.h +285 -0
data/vendor/scs/test/spectral_cones_problems/several_sum_largest.h +420 -0
metadata +21 -2

data/vendor/scs/linsys/cudss/direct/private.c ADDED Viewed

@@ -0,0 +1,279 @@
+#include "private.h"
+#include "linsys.h"
+/* In case of error abort freeing p */
+#define CUDSS_CHECK_ABORT(call, p, fname)                                      \
+  do {                                                                         \
+    cudssStatus_t status = call;                                               \
+    if (status != CUDSS_STATUS_SUCCESS) {                                      \
+      scs_printf("CUDSS call " #fname " returned status = %d\n", status);      \
+      scs_free_lin_sys_work(p);                                                \
+      return SCS_NULL;                                                         \
+    }                                                                          \
+  } while (0);
+/* In case of error abort freeing p */
+#define CUDA_CHECK_ABORT(call, p, fname)                                       \
+  do {                                                                         \
+    cudaError_t status = call;                                                 \
+    if (status != cudaSuccess) {                                               \
+      printf("CUDA call " #fname " returned status = %d\n", status);           \
+      scs_free_lin_sys_work(p);                                                \
+      return SCS_NULL;                                                         \
+    }                                                                          \
+  } while (0);
+/* Return the linear system method name */
+const char *scs_get_lin_sys_method() {
+  return "sparse-direct-cuDSS";
+}
+/* Free allocated resources for the linear system solver */
+void scs_free_lin_sys_work(ScsLinSysWork *p) {
+  if (p) {
+    /* Free GPU resources */
+    if (p->d_kkt_val)
+      cudaFree(p->d_kkt_val);
+    if (p->d_kkt_row_ptr)
+      cudaFree(p->d_kkt_row_ptr);
+    if (p->d_kkt_col_ind)
+      cudaFree(p->d_kkt_col_ind);
+    if (p->d_b)
+      cudaFree(p->d_b);
+    if (p->d_sol)
+      cudaFree(p->d_sol);
+    /* Free cuDSS resources */
+    if (p->d_kkt_mat)
+      cudssMatrixDestroy(p->d_kkt_mat);
+    if (p->d_b_mat)
+      cudssMatrixDestroy(p->d_b_mat);
+    if (p->d_sol_mat)
+      cudssMatrixDestroy(p->d_sol_mat);
+    if (p->solver_config)
+      cudssConfigDestroy(p->solver_config);
+    if (p->solver_data && p->handle)
+      cudssDataDestroy(p->handle, p->solver_data);
+    if (p->handle)
+      cudssDestroy(p->handle);
+    /* Free CPU resources */
+    if (p->kkt)
+      SCS(cs_spfree)(p->kkt);
+    if (p->sol)
+      scs_free(p->sol);
+    if (p->diag_r_idxs)
+      scs_free(p->diag_r_idxs);
+    if (p->diag_p)
+      scs_free(p->diag_p);
+    scs_free(p);
+  }
+}
+/* Initialize the linear system solver workspace */
+ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
+                                     const scs_float *diag_r) {
+  ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
+  if (!p)
+    return SCS_NULL;
+  /* Store problem dimensions */
+  p->n = A->n;
+  p->m = A->m;
+  p->n_plus_m = p->n + p->m;
+  /* Allocate CPU memory */
+  p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
+  if (!p->sol) {
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
+  if (!p->diag_r_idxs) {
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
+  if (!p->diag_p) {
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  /* Form KKT matrix as upper-triangular, CSC */
+  /* Because of symmetry it is equivalent to lower-triangular, CSR */
+  p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 1);
+  if (!p->kkt) {
+    scs_printf("Error in forming KKT matrix");
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  cudssStatus_t status;
+  cudaError_t cuda_error;
+  /* Create cuDSS handle */
+  CUDSS_CHECK_ABORT(cudssCreate(&p->handle), p, "cudssCreate");
+  /* Creating cuDSS solver configuration and data objects */
+  CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p,
+                    "cudssConfigCreate");
+  CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p,
+                    "cudssDataCreate");
+  /* Allocate device memory for KKT matrix */
+  scs_int nnz = p->kkt->p[p->n_plus_m];
+  CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)),
+                   p, "cudaMalloc: kkt_val");
+  CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr,
+                              (p->n_plus_m + 1) * sizeof(scs_int)),
+                   p, "cudaMalloc: kkt_row_ptr");
+  CUDA_CHECK_ABORT(
+      cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p,
+      "cudaMalloc: kkt_col_ind");
+  /* Copy KKT matrix to device */
+  /* Note: we treat column pointers (p->kkt->p) as row pointers on the device */
+  CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_val, p->kkt->x, nnz * sizeof(scs_float),
+                              cudaMemcpyHostToDevice),
+                   p, "cudaMemcpy: kkt_val");
+  CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p,
+                              (p->kkt->n + 1) * sizeof(scs_int),
+                              cudaMemcpyHostToDevice),
+                   p, "cudaMemcpy: kkt_row_ptr");
+  CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i,
+                              nnz * sizeof(scs_int), cudaMemcpyHostToDevice),
+                   p, "cudaMemcpy: kkt_col_ind");
+  /* Create kkt matrix descriptor */
+  /* We pass the kkt matrix as symmetric, lower triangular */
+  cudssMatrixType_t mtype = CUDSS_MTYPE_SYMMETRIC;
+  cudssMatrixViewType_t mview = CUDSS_MVIEW_LOWER;
+  cudssIndexBase_t base = CUDSS_BASE_ZERO;
+  CUDSS_CHECK_ABORT(cudssMatrixCreateCsr(
+                        &p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz,
+                        p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind, p->d_kkt_val,
+                        SCS_CUDA_INDEX, SCS_CUDA_FLOAT, mtype, mview, base),
+                    p, "cudssMatrixCreateCsr");
+  /* Allocate device memory for vectors */
+  CUDA_CHECK_ABORT(
+      cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p,
+      "cudaMalloc: b");
+  CUDA_CHECK_ABORT(
+      cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p,
+      "cudaMalloc: sol");
+  /* Create RHS and solution matrix descriptors */
+  scs_int nrhs = 1;
+  CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs,
+                                        p->n_plus_m, p->d_b, SCS_CUDA_FLOAT,
+                                        CUDSS_LAYOUT_COL_MAJOR),
+                    p, "cudssMatrixCreateDn: b");
+  CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs,
+                                        p->n_plus_m, p->d_sol, SCS_CUDA_FLOAT,
+                                        CUDSS_LAYOUT_COL_MAJOR),
+                    p, "cudssMatrixCreateDn: sol");
+  /* Symbolic factorization */
+  CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS,
+                                 p->solver_config, p->solver_data, p->d_kkt_mat,
+                                 p->d_sol_mat, p->d_b_mat),
+                    p, "cudssExecute: analysis");
+  /* Numerical Factorization */
+  CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION,
+                                 p->solver_config, p->solver_data, p->d_kkt_mat,
+                                 p->d_sol_mat, p->d_b_mat),
+                    p, "cudssExecute: factorization");
+  return p;
+}
+/* Solve the linear system for a given RHS b */
+scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
+                          scs_float tol) {
+  /* Copy right-hand side to device */
+  cudaError_t custatus = cudaMemcpy(p->d_b, b, p->n_plus_m * sizeof(scs_float),
+                                    cudaMemcpyHostToDevice);
+  if (custatus != cudaSuccess) {
+    scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n",
+               (int)custatus);
+    return custatus;
+  }
+  // is this really needed?
+  cudssMatrixSetValues(p->d_b_mat, p->d_b);
+  /* Solve the system */
+  cudssStatus_t status =
+      cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config,
+                   p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
+  if (status != CUDSS_STATUS_SUCCESS) {
+    scs_printf("scs_solve_lin_sys: Error during solve: %d\n", (int)status);
+    return status;
+  }
+  /* Copy solution back to host */
+  custatus = cudaMemcpy(b, p->d_sol, p->n_plus_m * sizeof(scs_float),
+                        cudaMemcpyDeviceToHost);
+  if (status != cudaSuccess) {
+    scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n",
+               (int)status);
+    return status;
+  }
+  return 0; /* Success */
+}
+/* Update the KKT matrix when R changes */
+void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
+  scs_int i;
+  /* Update KKT matrix on CPU */
+  for (i = 0; i < p->n; ++i) {
+    /* top left is R_x + P */
+    p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
+  }
+  for (i = p->n; i < p->n + p->m; ++i) {
+    /* bottom right is -R_y */
+    p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
+  }
+  /* Copy updated values to device */
+  cudaError_t custatus = cudaMemcpy(p->d_kkt_val, p->kkt->x,
+                                    p->kkt->p[p->n_plus_m] * sizeof(scs_float),
+                                    cudaMemcpyHostToDevice);
+  if (custatus != cudaSuccess) {
+    scs_printf(
+        "scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n",
+        (int)custatus);
+    return;
+  }
+  /* Update the matrix values in cuDSS */
+  cudssStatus_t status;
+  status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL,
+                                     p->d_kkt_col_ind, p->d_kkt_val);
+  if (status != CUDSS_STATUS_SUCCESS) {
+    scs_printf(
+        "scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n",
+        (int)status);
+    return;
+  }
+  /* Perform Refactorization with the updated matrix */
+  status =
+      cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config,
+                   p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
+  if (status != CUDSS_STATUS_SUCCESS) {
+    scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n",
+               (int)status);
+    return;
+  }
+}

data/vendor/scs/linsys/cudss/direct/private.h ADDED Viewed

@@ -0,0 +1,63 @@
+#ifndef PRIV_H_GUARD
+#define PRIV_H_GUARD
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifndef SFLOAT
+#define SCS_CUDA_FLOAT CUDA_R_64F
+#else
+#define SCS_CUDA_FLOAT CUDA_R_32F
+#endif
+#ifndef DLONG
+#define SCS_CUDA_INDEX CUDA_R_32I
+#else
+#define SCS_CUDA_INDEX CUDA_R_64I
+#endif
+#include "csparse.h"
+#include "linsys.h"
+#include <cuda_runtime.h>
+#include <cudss.h>
+struct SCS_LIN_SYS_WORK {
+  /* General problem dimensions */
+  scs_int n;        /* number of QP variables */
+  scs_int m;        /* number of QP constraints */
+  scs_int n_plus_m; /* dimension of the linear system */
+  /* CPU matrices and vectors */
+  ScsMatrix *kkt; /* KKT matrix in CSR format */
+  scs_float *sol; /* solution to the KKT system */
+  /* cuDSS handle and descriptors */
+  cudssHandle_t handle;    /* cuDSS library handle */
+  cudssMatrix_t d_kkt_mat; /* cuDSS matrix descriptors */
+  cudssMatrix_t d_b_mat;
+  cudssMatrix_t d_sol_mat;
+  /* Device memory for KKT matrix */
+  scs_float *d_kkt_val;   /* device copy of KKT values */
+  scs_int *d_kkt_row_ptr; /* device copy of KKT row pointers */
+  scs_int *d_kkt_col_ind; /* device copy of KKT column indices */
+  /* Device memory for vectors */
+  scs_float *d_b;   /* device copy of right-hand side */
+  scs_float *d_sol; /* device copy of solution */
+  /* These are required for matrix updates */
+  scs_int *diag_r_idxs; /* indices where R appears in the KKT matrix */
+  scs_float *diag_p;    /* Diagonal values of P */
+  /* cuDSS configuration */
+  cudssConfig_t solver_config; /* cuDSS solver handle */
+  cudssData_t solver_data;     /* cuDSS data handle */
+};
+#ifdef __cplusplus
+}
+#endif
+#endif

data/vendor/scs/linsys/external/qdldl/qdldl_types.h CHANGED Viewed

@@ -12,7 +12,7 @@ extern "C" {
 #define QDLDL_int scs_int
 #define QDLDL_float scs_float
-#define QDLDL_bool scs_int
+#define QDLDL_bool unsigned char
 /* Maximum value of the signed type QDLDL_int */
 #ifdef DLONG

data/vendor/scs/linsys/gpu/indirect/private.c CHANGED Viewed

@@ -147,28 +147,24 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
   if (p->Pg) {
     /* y = R_x * x + P x */
-    SCS(accum_by_p_gpu)
-    (p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
-     &p->buffer);
+    SCS(accum_by_p_gpu)(p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle,
+                        &p->buffer_size, &p->buffer);
   }
   /* z = Ax */
 #if GPU_TRANSPOSE_MAT > 0
-  SCS(accum_by_atrans_gpu)
-  (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+                           &p->buffer_size, &p->buffer);
 #else
-  SCS(accum_by_a_gpu)
-  (p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_a_gpu)(p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+                      &p->buffer_size, &p->buffer);
 #endif
   /* z = R_y^{-1} A x */
   scale_by_diag(p->cublas_handle, p->inv_r_y_gpu, z, p->m);
   /* y += A'z => y = R_x * x + P x + A' R_y^{-1} Ax */
-  SCS(accum_by_atrans_gpu)
-  (p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_atrans_gpu)(p->Ag, p->dn_vec_m, p->dn_vec_n_p,
+                           p->cusparse_handle, &p->buffer_size, &p->buffer);
 }
 /* P comes in upper triangular, expand to full
@@ -488,9 +484,8 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
   cusparseDnVecSetValues(p->dn_vec_m, (void *)tmp_m); /* R * ry */
   cusparseDnVecSetValues(p->dn_vec_n, (void *)bg);    /* rx */
   /* bg[:n] = rx + A' R ry */
-  SCS(accum_by_atrans_gpu)
-  (Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_atrans_gpu)(Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle,
+                           &p->buffer_size, &p->buffer);
   /* set max_iters to 10 * n (though in theory n is enough for any tol) */
   max_iters = 10 * Ag->n;
@@ -506,13 +501,11 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
   /* b[n:] = Ax - ry */
 #if GPU_TRANSPOSE_MAT > 0
-  SCS(accum_by_atrans_gpu)
-  (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+                           &p->buffer_size, &p->buffer);
 #else
-  SCS(accum_by_a_gpu)
-  (Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
-   &p->buffer);
+  SCS(accum_by_a_gpu)(Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+                      &p->buffer_size, &p->buffer);
 #endif
   /* bg[n:] = R_y^{-1} bg[n:] = R_y^{-1} (Ax - ry) = y */

data/vendor/scs/scs.mk CHANGED Viewed

@@ -8,7 +8,6 @@ endif
 # For cross-compiling with mingw use these.
 #CC = i686-w64-mingw32-gcc -m32
 #CC = x86_64-w64-mingw32-gcc-4.8
-CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs
 # For GPU must add cuda libs to path, e.g.
 # export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
@@ -53,13 +52,19 @@ endif
 #TODO: check if this works for all platforms:
 ifeq ($(CUDA_PATH), )
 CUDA_PATH=/usr/local/cuda
+CUCC = $(CUDA_PATH)/bin/nvcc
 endif
 CULDFLAGS = -L$(CUDA_PATH)/lib -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcusparse
 CUDAFLAGS = $(CFLAGS) -I$(CUDA_PATH)/include -Ilinsys/gpu -Wno-c++11-long-long # turn off annoying long-long warnings in cuda header files
+CUDSS_FLAGS = -I$(CUDSS_PATH)/include -I$(CUDA_PATH)/include
+CUDSS_LDFLAGS = $(CULDFLAGS) -L$(CUDSS_PATH)/lib -lcudss
 # Add on default CFLAGS
 OPT = -O3
-override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes -I. -Iinclude -Ilinsys $(OPT)
+INCLUDE = -I. -Iinclude -Ilinsys
+override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes $(INCLUDE) $(OPT) -Werror=incompatible-pointer-types
 ifneq ($(ISWINDOWS), 1)
 override CFLAGS += -fPIC
 endif
@@ -70,6 +75,7 @@ INDIRSRC = $(LINSYS)/cpu/indirect
 GPUDIR = $(LINSYS)/gpu/direct
 GPUINDIR = $(LINSYS)/gpu/indirect
 MKLSRC = $(LINSYS)/mkl/direct
+CUDSSSRC = $(LINSYS)/cudss/direct
 EXTSRC = $(LINSYS)/external
@@ -174,6 +180,15 @@ ifneq ($(USE_LAPACK), 0)
   endif
 endif
+############ SPECTRAL CONES ############
+USE_SPECTRAL_CONES = 0
+ifneq ($(USE_SPECTRAL_CONES), 0)
+  ifeq ($(USE_LAPACK), 0)
+    $(error USE_SPECTRAL_CONES requires USE_LAPACK to be enabled)
+  endif
+  CUSTOM_FLAGS += -DUSE_SPECTRAL_CONES
+endif
 MATLAB_MEX_FILE = 0
 ifneq ($(MATLAB_MEX_FILE), 0)
 CUSTOM_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex

data/vendor/scs/src/aa.c CHANGED Viewed

@@ -189,9 +189,8 @@ static void set_m(AaWork *a, aa_int len) {
   blas_int blen = (blas_int)len;
   aa_float onef = 1.0, zerof = 0.0, r;
   /* if len < mem this only uses len cols */
-  BLAS(gemm)
-  ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
-   a->Y, &bdim, &zerof, a->M, &blen);
+  BLAS(gemm)("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y,
+             &bdim, a->Y, &bdim, &zerof, a->M, &blen);
   if (a->regularization > 0) {
     r = compute_regularization(a, len);
     for (i = 0; i < len; ++i) {
@@ -287,9 +286,8 @@ static void relax(aa_float *f, AaWork *a, aa_int len) {
   aa_float onef = 1.0, neg_onef = -1.0;
   aa_float one_m_relaxation = 1. - a->relaxation;
   /* x_work = x - S * work */
-  BLAS(gemv)
-  ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
-   a->x_work, &one);
+  BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one,
+             &onef, a->x_work, &one);
   /* f = relaxation * f */
   BLAS(scal)(&bdim, &a->relaxation, f, &one);
   /* f += (1 - relaxation) * x_work */
@@ -306,9 +304,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
   aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
   /* work = S'g or Y'g */
-  BLAS(gemv)
-  ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
-   &zerof, a->work, &one);
+  BLAS(gemv)("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g,
+             &one, &zerof, a->work, &one);
   /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
   BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
@@ -335,9 +332,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
   /* if solve was successful compute new point */
   /* first set f -= D * work */
-  BLAS(gemv)
-  ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
-   &one);
+  BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one,
+             &onef, f, &one);
   /* if relaxation is not 1 then need to incorporate */
   if (a->relaxation != 1.0) {