scs 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +98 -0
  5. data/ext/scs/extconf.rb +29 -0
  6. data/lib/scs.rb +17 -0
  7. data/lib/scs/ffi.rb +117 -0
  8. data/lib/scs/solver.rb +173 -0
  9. data/lib/scs/version.rb +3 -0
  10. data/vendor/scs/LICENSE.txt +21 -0
  11. data/vendor/scs/Makefile +164 -0
  12. data/vendor/scs/README.md +222 -0
  13. data/vendor/scs/include/aa.h +56 -0
  14. data/vendor/scs/include/cones.h +46 -0
  15. data/vendor/scs/include/ctrlc.h +33 -0
  16. data/vendor/scs/include/glbopts.h +177 -0
  17. data/vendor/scs/include/linalg.h +26 -0
  18. data/vendor/scs/include/linsys.h +64 -0
  19. data/vendor/scs/include/normalize.h +18 -0
  20. data/vendor/scs/include/rw.h +17 -0
  21. data/vendor/scs/include/scs.h +161 -0
  22. data/vendor/scs/include/scs_blas.h +51 -0
  23. data/vendor/scs/include/util.h +65 -0
  24. data/vendor/scs/linsys/amatrix.c +305 -0
  25. data/vendor/scs/linsys/amatrix.h +36 -0
  26. data/vendor/scs/linsys/amatrix.o +0 -0
  27. data/vendor/scs/linsys/cpu/direct/private.c +366 -0
  28. data/vendor/scs/linsys/cpu/direct/private.h +26 -0
  29. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  30. data/vendor/scs/linsys/cpu/indirect/private.c +256 -0
  31. data/vendor/scs/linsys/cpu/indirect/private.h +31 -0
  32. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  33. data/vendor/scs/linsys/external/amd/LICENSE.txt +934 -0
  34. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +469 -0
  35. data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +254 -0
  36. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  37. data/vendor/scs/linsys/external/amd/amd.h +400 -0
  38. data/vendor/scs/linsys/external/amd/amd_1.c +180 -0
  39. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  40. data/vendor/scs/linsys/external/amd/amd_2.c +1842 -0
  41. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  42. data/vendor/scs/linsys/external/amd/amd_aat.c +184 -0
  43. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  44. data/vendor/scs/linsys/external/amd/amd_control.c +64 -0
  45. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  46. data/vendor/scs/linsys/external/amd/amd_defaults.c +37 -0
  47. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  48. data/vendor/scs/linsys/external/amd/amd_dump.c +179 -0
  49. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  50. data/vendor/scs/linsys/external/amd/amd_global.c +16 -0
  51. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  52. data/vendor/scs/linsys/external/amd/amd_info.c +119 -0
  53. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  54. data/vendor/scs/linsys/external/amd/amd_internal.h +304 -0
  55. data/vendor/scs/linsys/external/amd/amd_order.c +199 -0
  56. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  57. data/vendor/scs/linsys/external/amd/amd_post_tree.c +120 -0
  58. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  59. data/vendor/scs/linsys/external/amd/amd_postorder.c +206 -0
  60. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  61. data/vendor/scs/linsys/external/amd/amd_preprocess.c +118 -0
  62. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  63. data/vendor/scs/linsys/external/amd/amd_valid.c +92 -0
  64. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  65. data/vendor/scs/linsys/external/amd/changes +11 -0
  66. data/vendor/scs/linsys/external/qdldl/LICENSE +201 -0
  67. data/vendor/scs/linsys/external/qdldl/README.md +120 -0
  68. data/vendor/scs/linsys/external/qdldl/changes +4 -0
  69. data/vendor/scs/linsys/external/qdldl/qdldl.c +298 -0
  70. data/vendor/scs/linsys/external/qdldl/qdldl.h +177 -0
  71. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  72. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +21 -0
  73. data/vendor/scs/linsys/gpu/gpu.c +41 -0
  74. data/vendor/scs/linsys/gpu/gpu.h +85 -0
  75. data/vendor/scs/linsys/gpu/indirect/private.c +304 -0
  76. data/vendor/scs/linsys/gpu/indirect/private.h +36 -0
  77. data/vendor/scs/scs.mk +181 -0
  78. data/vendor/scs/src/aa.c +224 -0
  79. data/vendor/scs/src/aa.o +0 -0
  80. data/vendor/scs/src/cones.c +802 -0
  81. data/vendor/scs/src/cones.o +0 -0
  82. data/vendor/scs/src/ctrlc.c +77 -0
  83. data/vendor/scs/src/ctrlc.o +0 -0
  84. data/vendor/scs/src/linalg.c +84 -0
  85. data/vendor/scs/src/linalg.o +0 -0
  86. data/vendor/scs/src/normalize.c +93 -0
  87. data/vendor/scs/src/normalize.o +0 -0
  88. data/vendor/scs/src/rw.c +167 -0
  89. data/vendor/scs/src/rw.o +0 -0
  90. data/vendor/scs/src/scs.c +978 -0
  91. data/vendor/scs/src/scs.o +0 -0
  92. data/vendor/scs/src/scs_version.c +5 -0
  93. data/vendor/scs/src/scs_version.o +0 -0
  94. data/vendor/scs/src/util.c +196 -0
  95. data/vendor/scs/src/util.o +0 -0
  96. data/vendor/scs/test/data/small_random_socp +0 -0
  97. data/vendor/scs/test/minunit.h +13 -0
  98. data/vendor/scs/test/problem_utils.h +93 -0
  99. data/vendor/scs/test/problems/rob_gauss_cov_est.h +85 -0
  100. data/vendor/scs/test/problems/small_lp.h +50 -0
  101. data/vendor/scs/test/problems/small_random_socp.h +33 -0
  102. data/vendor/scs/test/random_socp_prob.c +171 -0
  103. data/vendor/scs/test/run_from_file.c +69 -0
  104. data/vendor/scs/test/run_tests +2 -0
  105. data/vendor/scs/test/run_tests.c +32 -0
  106. metadata +203 -0
@@ -0,0 +1,36 @@
1
+ #ifndef PRIV_H_GUARD
2
+ #define PRIV_H_GUARD
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "gpu.h"
9
+ #include "glbopts.h"
10
+ #include "linalg.h"
11
+ #include "scs.h"
12
+
13
+
14
+ struct SCS_LIN_SYS_WORK {
15
+ /* reporting */
16
+ scs_int tot_cg_its;
17
+ scs_float total_solve_time;
18
+ /* ALL BELOW HOSTED ON THE GPU */
19
+ scs_float *p; /* cg iterate, n */
20
+ scs_float *r; /* cg residual, n */
21
+ scs_float *Gp; /* G * p, n */
22
+ scs_float *bg; /* b, n */
23
+ scs_float *tmp_m; /* m, used in mat_vec */
24
+ scs_float *z; /* preconditioned */
25
+ scs_float *M; /* preconditioner */
26
+ ScsGpuMatrix *Ag; /* A matrix on GPU */
27
+ ScsGpuMatrix *Agt; /* A trans matrix on GPU */
28
+ /* CUDA */
29
+ cublasHandle_t cublas_handle;
30
+ cusparseHandle_t cusparse_handle;
31
+ };
32
+
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
36
+ #endif
@@ -0,0 +1,181 @@
1
+ ifeq ($(OS),Windows_NT)
2
+ UNAME = CYGWINorMINGWorMSYS
3
+ else
4
+ UNAME = $(shell uname -s)
5
+ endif
6
+
7
+ #CC = gcc
8
+ # For cross-compiling with mingw use these.
9
+ #CC = i686-w64-mingw32-gcc -m32
10
+ #CC = x86_64-w64-mingw32-gcc-4.8
11
+ CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs
12
+
13
+ # For GPU must add cuda libs to path, e.g.
14
+ # export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
15
+
16
+ ifneq (, $(findstring CYGWIN, $(UNAME)))
17
+ ISWINDOWS := 1
18
+ else
19
+ ifneq (, $(findstring MINGW, $(UNAME)))
20
+ ISWINDOWS := 1
21
+ else
22
+ ifneq (, $(findstring MSYS, $(UNAME)))
23
+ ISWINDOWS := 1
24
+ else
25
+ ifneq (, $(findstring mingw, $(CC)))
26
+ ISWINDOWS := 1
27
+ else
28
+ ISWINDOWS := 0
29
+ endif
30
+ endif
31
+ endif
32
+ endif
33
+
34
+ ifeq ($(UNAME), Darwin)
35
+ # we're on apple, no need to link rt library
36
+ LDFLAGS += -lm
37
+ SHARED = dylib
38
+ SONAME = -install_name
39
+ else
40
+ ifeq ($(ISWINDOWS), 1)
41
+ # we're on windows (cygwin or msys)
42
+ LDFLAGS += -lm
43
+ SHARED = dll
44
+ SONAME = -soname
45
+ else
46
+ # we're on a linux system, use accurate timer provided by clock_gettime()
47
+ LDFLAGS += -lm -lrt
48
+ SHARED = so
49
+ SONAME = -soname
50
+ endif
51
+ endif
52
+
53
+ #TODO: check if this works for all platforms:
54
+ ifeq ($(CUDA_PATH), )
55
+ CUDA_PATH=/usr/local/cuda
56
+ endif
57
+ CULDFLAGS = -L$(CUDA_PATH)/lib -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcusparse
58
+ CUDAFLAGS = $(CFLAGS) -I$(CUDA_PATH)/include -Ilinsys/gpu -Wno-c++11-long-long # turn off annoying long-long warnings in cuda header files
59
+
60
+ # Add on default CFLAGS
61
+ OPT = -O3
62
+ override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes -I. -Iinclude -Ilinsys $(OPT)
63
+ ifneq ($(ISWINDOWS), 1)
64
+ override CFLAGS += -fPIC
65
+ endif
66
+
67
+ LINSYS = linsys
68
+ DIRSRC = $(LINSYS)/cpu/direct
69
+ INDIRSRC = $(LINSYS)/cpu/indirect
70
+ GPUDIR = $(LINSYS)/gpu/direct
71
+ GPUINDIR = $(LINSYS)/gpu/indirect
72
+
73
+ EXTSRC = $(LINSYS)/external
74
+
75
+ OUT = out
76
+ AR = ar
77
+ ARFLAGS = rv
78
+ ARCHIVE = $(AR) $(ARFLAGS)
79
+ RANLIB = ranlib
80
+ INSTALL = install
81
+
82
+ ifeq ($(PREFIX),)
83
+ PREFIX = /usr/local
84
+ endif
85
+
86
+ OPT_FLAGS =
87
+ ########### OPTIONAL FLAGS ##########
88
+ # these can all be override from the command line
89
+ # e.g. make DLONG=1 will override the setting below
90
+ DLONG = 0
91
+ ifneq ($(DLONG), 0)
92
+ OPT_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
93
+ endif
94
+ CTRLC = 1
95
+ ifneq ($(CTRLC), 0)
96
+ OPT_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
97
+ endif
98
+ SFLOAT = 0
99
+ ifneq ($(SFLOAT), 0)
100
+ OPT_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
101
+ endif
102
+ NOVALIDATE = 0
103
+ ifneq ($(NOVALIDATE), 0)
104
+ OPT_FLAGS += -DNOVALIDATE=$(NOVALIDATE)$ # remove data validation step
105
+ endif
106
+ NOTIMER = 0
107
+ ifneq ($(NOTIMER), 0)
108
+ OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
109
+ endif
110
+ COPYAMATRIX = 1
111
+ ifneq ($(COPYAMATRIX), 0)
112
+ OPT_FLAGS += -DCOPYAMATRIX=$(COPYAMATRIX) # if normalize, copy A
113
+ endif
114
+ GPU_TRANSPOSE_MAT = 1
115
+ ifneq ($(GPU_TRANSPOSE_MAT), 0)
116
+ OPT_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # tranpose A mat in GPU memory
117
+ endif
118
+
119
+ ### VERBOSITY LEVELS: 0,1,2
120
+ EXTRA_VERBOSE = 0
121
+ ifneq ($(EXTRA_VERBOSE), 0)
122
+ OPT_FLAGS += -DEXTRA_VERBOSE=$(EXTRA_VERBOSE) # extra verbosity level
123
+ endif
124
+
125
+ ############ OPENMP: ############
126
+ # set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
127
+ # set the number of threads to, for example, 4 by entering the command:
128
+ # export OMP_NUM_THREADS=4
129
+
130
+ USE_OPENMP = 0
131
+ ifneq ($(USE_OPENMP), 0)
132
+ override CFLAGS += -fopenmp
133
+ LDFLAGS += -lgomp
134
+ endif
135
+
136
+ ############ SDPS: BLAS + LAPACK ############
137
+ # set USE_LAPACK = 1 below to enable solving SDPs
138
+ # NB: point the libraries to the locations where
139
+ # you have blas and lapack installed
140
+
141
+ USE_LAPACK = 1
142
+ ifneq ($(USE_LAPACK), 0)
143
+ # edit these for your setup:
144
+ BLASLDFLAGS = -lblas -llapack #-lgfortran
145
+ LDFLAGS += $(BLASLDFLAGS)
146
+ OPT_FLAGS += -DUSE_LAPACK
147
+
148
+ BLAS64 = 0
149
+ ifneq ($(BLAS64), 0)
150
+ OPT_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
151
+ endif
152
+
153
+ NOBLASSUFFIX = 0
154
+ ifneq ($(NOBLASSUFFIX), 0)
155
+ OPT_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
156
+ endif
157
+
158
+ BLASSUFFIX = "_"
159
+ ifneq ($(BLASSUFFIX), "_")
160
+ OPT_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
161
+ endif
162
+ endif
163
+
164
+ MATLAB_MEX_FILE = 0
165
+ ifneq ($(MATLAB_MEX_FILE), 0)
166
+ OPT_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
167
+ endif
168
+ PYTHON = 0
169
+ ifneq ($(PYTHON), 0)
170
+ OPT_FLAGS += -DPYTHON=$(PYTHON) # python extension
171
+ endif
172
+ USING_R = 0
173
+ ifneq ($(USING_R), 0)
174
+ OPT_FLAGS += -DUSING_R=$(USING_R) # R extension
175
+ endif
176
+
177
+ # debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
178
+ print-%: ; @echo $*=$($*)
179
+
180
+ override CFLAGS += $(OPT_FLAGS)
181
+ CUDAFLAGS += $(OPT_FLAGS)
@@ -0,0 +1,224 @@
1
+ #include "aa.h"
2
+ #include "scs_blas.h"
3
+
4
+ /* This file uses Anderson acceleration to improve the convergence of
5
+ * a fixed point mapping.
6
+ * At each iteration we need to solve a (small) linear system, we
7
+ * do this using LAPACK ?gesv.
8
+ */
9
+
10
+ #ifndef USE_LAPACK
11
+
12
+ typedef void * ACCEL_WORK;
13
+
14
+ AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
15
+ aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
16
+ void aa_finish(AaWork *a) {}
17
+
18
+ #else
19
+
20
+ /* contains the necessary parameters to perform aa at each step */
21
+ struct ACCEL_WORK {
22
+ aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
23
+ aa_int k; /* aa memory */
24
+ aa_int l; /* variable dimension */
25
+ aa_int iter; /* current iteration */
26
+
27
+ aa_float *x; /* x input to map*/
28
+ aa_float *f; /* f(x) output of map */
29
+ aa_float *g; /* x - f(x) */
30
+
31
+ /* from previous iteration */
32
+ aa_float *g_prev; /* x - f(x) */
33
+
34
+ aa_float *y; /* g - g_prev */
35
+ aa_float *s; /* x - x_prev */
36
+ aa_float *d; /* f - f_prev */
37
+
38
+ aa_float *Y; /* matrix of stacked y values */
39
+ aa_float *S; /* matrix of stacked s values */
40
+ aa_float *D; /* matrix of stacked d values = (S-Y) */
41
+ aa_float *M; /* S'Y or Y'Y depending on type of aa */
42
+
43
+ /* workspace variables */
44
+ aa_float *work;
45
+ blas_int *ipiv;
46
+ };
47
+
48
+ /* BLAS functions used */
49
+ aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
50
+ void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx,
51
+ aa_float *y, blas_int *incy);
52
+ void BLAS(gemv)(const char *trans, const blas_int *m, const blas_int *n,
53
+ const aa_float *alpha, const aa_float *a, const blas_int *lda,
54
+ const aa_float *x, const blas_int *incx, const aa_float *beta,
55
+ aa_float *y, const blas_int *incy);
56
+ void BLAS(gesv)(blas_int *n, blas_int *nrhs, aa_float *a, blas_int *lda,
57
+ blas_int *ipiv, aa_float *b, blas_int *ldb, blas_int *info);
58
+ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
59
+ blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
60
+ blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
61
+ aa_float *c, blas_int *ldc);
62
+
63
+ /* sets a->M to S'Y or Y'Y depending on type of aa used */
64
+ static void set_m(AaWork *a) {
65
+ blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
66
+ aa_float onef = 1.0, zerof = 0.0;
67
+ BLAS(gemm)
68
+ ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
69
+ &zerof, a->M, &bk);
70
+ }
71
+
72
+ /* updates the workspace parameters for aa for this iteration */
73
+ static void update_accel_params(const aa_float *x, const aa_float *f,
74
+ AaWork *a) {
75
+ /* at the start a->x = x_prev and a->f = f_prev */
76
+ aa_int idx = a->iter % a->k;
77
+ aa_int l = a->l;
78
+
79
+ blas_int one = 1;
80
+ blas_int bl = (blas_int)l;
81
+ aa_float neg_onef = -1.0;
82
+
83
+ /* g = x */
84
+ memcpy(a->g, x, sizeof(aa_float) * l);
85
+ /* s = x */
86
+ memcpy(a->s, x, sizeof(aa_float) * l);
87
+ /* d = f */
88
+ memcpy(a->d, f, sizeof(aa_float) * l);
89
+ /* g -= f */
90
+ BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
91
+ /* s -= x_prev */
92
+ BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
93
+ /* d -= f_prev */
94
+ BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
95
+
96
+ /* g, s, d correct here */
97
+
98
+ /* y = g */
99
+ memcpy(a->y, a->g, sizeof(aa_float) * l);
100
+ /* y -= g_prev */
101
+ BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
102
+
103
+ /* y correct here */
104
+
105
+ /* copy y into idx col of Y */
106
+ memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
107
+ /* copy s into idx col of S */
108
+ memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
109
+ /* copy d into idx col of D */
110
+ memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
111
+
112
+ /* Y, S,D correct here */
113
+
114
+ memcpy(a->f, f, sizeof(aa_float) * l);
115
+ memcpy(a->x, x, sizeof(aa_float) * l);
116
+
117
+ /* x, f correct here */
118
+
119
+ /* set M = S'*Y */
120
+ set_m(a);
121
+
122
+ /* M correct here */
123
+
124
+ memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
125
+
126
+ /* g_prev set for next iter here */
127
+ }
128
+
129
+ /* solves the system of equations to perform the aa update
130
+ * at the end f contains the next iterate to be returned
131
+ */
132
+ static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
133
+ blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
134
+ bk = (blas_int)a->k;
135
+ aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
136
+ /* work = S'g or Y'g */
137
+ BLAS(gemv)
138
+ ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
139
+ a->work, &one);
140
+ /* work = M \ work, where M = S'Y or M = Y'Y */
141
+ BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
142
+ nrm = BLAS(nrm2)(&bk, a->work, &one);
143
+ if (info < 0 || nrm >= MAX_AA_NRM) {
144
+ #if EXTRA_VERBOSE > 0
145
+ scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
146
+ a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
147
+ #endif
148
+ return -1;
149
+ }
150
+ /* if solve was successful then set f -= D * work */
151
+ BLAS(gemv)
152
+ ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
153
+ return (aa_int)info;
154
+ }
155
+
156
+ /*
157
+ * API functions below this line, see aa.h for descriptions.
158
+ */
159
+ AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
160
+ AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
161
+ if (!a) {
162
+ scs_printf("Failed to allocate memory for AA.\n");
163
+ return (void *)0;
164
+ }
165
+ a->type1 = type1;
166
+ a->iter = 0;
167
+ a->l = l;
168
+ a->k = aa_mem;
169
+ if (a->k <= 0) {
170
+ return a;
171
+ }
172
+
173
+ a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
174
+ a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
175
+ a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
176
+
177
+ a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
178
+
179
+ a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
180
+ a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
181
+ a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
182
+
183
+ a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
184
+ a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
185
+ a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
186
+
187
+ a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
188
+ a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
189
+ a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
190
+ return a;
191
+ }
192
+
193
+ aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
194
+ if (a->k <= 0) {
195
+ return 0;
196
+ }
197
+ update_accel_params(x, f, a);
198
+ if (a->iter++ == 0) {
199
+ return 0;
200
+ }
201
+ /* solve linear system, new point overwrites f if successful */
202
+ return solve(f, a, MIN(a->iter - 1, a->k));
203
+ }
204
+
205
+ void aa_finish(AaWork *a) {
206
+ if (a) {
207
+ free(a->x);
208
+ free(a->f);
209
+ free(a->g);
210
+ free(a->g_prev);
211
+ free(a->y);
212
+ free(a->s);
213
+ free(a->d);
214
+ free(a->Y);
215
+ free(a->S);
216
+ free(a->D);
217
+ free(a->M);
218
+ free(a->work);
219
+ free(a->ipiv);
220
+ free(a);
221
+ }
222
+ }
223
+
224
+ #endif
Binary file
@@ -0,0 +1,802 @@
1
+ #include "cones.h"
2
+
3
+ #include "linalg.h"
4
+ #include "scs.h"
5
+ #include "scs_blas.h" /* contains BLAS(X) macros and type info */
6
+ #include "util.h"
7
+
8
+ #define CONE_RATE (2)
9
+ #define CONE_TOL (1e-8)
10
+ #define CONE_THRESH (1e-6)
11
+ #define EXP_CONE_MAX_ITERS (100)
12
+ #define POW_CONE_MAX_ITERS (20)
13
+
14
+ #ifdef USE_LAPACK
15
+ void BLAS(syevr)(const char *jobz, const char *range, const char *uplo,
16
+ blas_int *n, scs_float *a, blas_int *lda, scs_float *vl,
17
+ scs_float *vu, blas_int *il, blas_int *iu, scs_float *abstol,
18
+ blas_int *m, scs_float *w, scs_float *z, blas_int *ldz,
19
+ blas_int *isuppz, scs_float *work, blas_int *lwork,
20
+ blas_int *iwork, blas_int *liwork, blas_int *info);
21
+ void BLAS(syr)(const char *uplo, const blas_int *n, const scs_float *alpha,
22
+ const scs_float *x, const blas_int *incx, scs_float *a,
23
+ const blas_int *lda);
24
+ void BLAS(scal)(const blas_int *n, const scs_float *sa, scs_float *sx,
25
+ const blas_int *incx);
26
+ scs_float BLAS(nrm2)(const blas_int *n, scs_float *x, const blas_int *incx);
27
+ #endif
28
+
29
+ static scs_int get_sd_cone_size(scs_int s) { return (s * (s + 1)) / 2; }
30
+
31
+ /*
32
+ * boundaries will contain array of indices of rows of A corresponding to
33
+ * cone boundaries, boundaries[0] is starting index for cones of size strictly
34
+ * larger than 1
35
+ * returns length of boundaries array, boundaries malloc-ed here so should be
36
+ * freed
37
+ */
38
+ scs_int SCS(get_cone_boundaries)(const ScsCone *k, scs_int **boundaries) {
39
+ scs_int i, count = 0;
40
+ scs_int len = 1 + k->qsize + k->ssize + k->ed + k->ep + k->psize;
41
+ scs_int *b = (scs_int *)scs_calloc(len, sizeof(scs_int));
42
+ b[count] = k->f + k->l;
43
+ count += 1;
44
+ if (k->qsize > 0) {
45
+ memcpy(&b[count], k->q, k->qsize * sizeof(scs_int));
46
+ }
47
+ count += k->qsize;
48
+ for (i = 0; i < k->ssize; ++i) {
49
+ b[count + i] = get_sd_cone_size(k->s[i]);
50
+ }
51
+ count += k->ssize;
52
+ for (i = 0; i < k->ep + k->ed; ++i) {
53
+ b[count + i] = 3;
54
+ }
55
+ count += k->ep + k->ed;
56
+ for (i = 0; i < k->psize; ++i) {
57
+ b[count + i] = 3;
58
+ }
59
+ count += k->psize;
60
+ *boundaries = b;
61
+ return len;
62
+ }
63
+
64
+ static scs_int get_full_cone_dims(const ScsCone *k) {
65
+ scs_int i, c = 0;
66
+ if (k->f) {
67
+ c += k->f;
68
+ }
69
+ if (k->l) {
70
+ c += k->l;
71
+ }
72
+ if (k->qsize && k->q) {
73
+ for (i = 0; i < k->qsize; ++i) {
74
+ c += k->q[i];
75
+ }
76
+ }
77
+ if (k->ssize && k->s) {
78
+ for (i = 0; i < k->ssize; ++i) {
79
+ c += get_sd_cone_size(k->s[i]);
80
+ }
81
+ }
82
+ if (k->ed) {
83
+ c += 3 * k->ed;
84
+ }
85
+ if (k->ep) {
86
+ c += 3 * k->ep;
87
+ }
88
+ if (k->p) {
89
+ c += 3 * k->psize;
90
+ }
91
+ return c;
92
+ }
93
+
94
+ scs_int SCS(validate_cones)(const ScsData *d, const ScsCone *k) {
95
+ scs_int i;
96
+ if (get_full_cone_dims(k) != d->m) {
97
+ scs_printf("cone dimensions %li not equal to num rows in A = m = %li\n",
98
+ (long)get_full_cone_dims(k), (long)d->m);
99
+ return -1;
100
+ }
101
+ if (k->f && k->f < 0) {
102
+ scs_printf("free cone error\n");
103
+ return -1;
104
+ }
105
+ if (k->l && k->l < 0) {
106
+ scs_printf("lp cone error\n");
107
+ return -1;
108
+ }
109
+ if (k->qsize && k->q) {
110
+ if (k->qsize < 0) {
111
+ scs_printf("soc cone error\n");
112
+ return -1;
113
+ }
114
+ for (i = 0; i < k->qsize; ++i) {
115
+ if (k->q[i] < 0) {
116
+ scs_printf("soc cone error\n");
117
+ return -1;
118
+ }
119
+ }
120
+ }
121
+ if (k->ssize && k->s) {
122
+ if (k->ssize < 0) {
123
+ scs_printf("sd cone error\n");
124
+ return -1;
125
+ }
126
+ for (i = 0; i < k->ssize; ++i) {
127
+ if (k->s[i] < 0) {
128
+ scs_printf("sd cone error\n");
129
+ return -1;
130
+ }
131
+ }
132
+ }
133
+ if (k->ed && k->ed < 0) {
134
+ scs_printf("ep cone error\n");
135
+ return -1;
136
+ }
137
+ if (k->ep && k->ep < 0) {
138
+ scs_printf("ed cone error\n");
139
+ return -1;
140
+ }
141
+ if (k->psize && k->p) {
142
+ if (k->psize < 0) {
143
+ scs_printf("power cone error\n");
144
+ return -1;
145
+ }
146
+ for (i = 0; i < k->psize; ++i) {
147
+ if (k->p[i] < -1 || k->p[i] > 1) {
148
+ scs_printf("power cone error, values must be in [-1,1]\n");
149
+ return -1;
150
+ }
151
+ }
152
+ }
153
+ return 0;
154
+ }
155
+
156
+ char *SCS(get_cone_summary)(const ScsInfo *info, ScsConeWork *c) {
157
+ char *str = (char *)scs_malloc(sizeof(char) * 64);
158
+ sprintf(str, "\tCones: avg projection time: %1.2es\n",
159
+ c->total_cone_time / (info->iter + 1) / 1e3);
160
+ c->total_cone_time = 0.0;
161
+ return str;
162
+ }
163
+
164
+ void SCS(finish_cone)(ScsConeWork *c) {
165
+ #ifdef USE_LAPACK
166
+ if (c->Xs) {
167
+ scs_free(c->Xs);
168
+ }
169
+ if (c->Z) {
170
+ scs_free(c->Z);
171
+ }
172
+ if (c->e) {
173
+ scs_free(c->e);
174
+ }
175
+ if (c->work) {
176
+ scs_free(c->work);
177
+ }
178
+ if (c->iwork) {
179
+ scs_free(c->iwork);
180
+ }
181
+ #endif
182
+ if (c) {
183
+ scs_free(c);
184
+ }
185
+ }
186
+
187
+ char *SCS(get_cone_header)(const ScsCone *k) {
188
+ char *tmp = (char *)scs_malloc(sizeof(char) * 512);
189
+ scs_int i, soc_vars, soc_blks, sd_vars, sd_blks;
190
+ sprintf(tmp, "Cones:");
191
+ if (k->f) {
192
+ sprintf(tmp + strlen(tmp), "\tprimal zero / dual free vars: %li\n",
193
+ (long)k->f);
194
+ }
195
+ if (k->l) {
196
+ sprintf(tmp + strlen(tmp), "\tlinear vars: %li\n", (long)k->l);
197
+ }
198
+ soc_vars = 0;
199
+ soc_blks = 0;
200
+ if (k->qsize && k->q) {
201
+ soc_blks = k->qsize;
202
+ for (i = 0; i < k->qsize; i++) {
203
+ soc_vars += k->q[i];
204
+ }
205
+ sprintf(tmp + strlen(tmp), "\tsoc vars: %li, soc blks: %li\n",
206
+ (long)soc_vars, (long)soc_blks);
207
+ }
208
+ sd_vars = 0;
209
+ sd_blks = 0;
210
+ if (k->ssize && k->s) {
211
+ sd_blks = k->ssize;
212
+ for (i = 0; i < k->ssize; i++) {
213
+ sd_vars += get_sd_cone_size(k->s[i]);
214
+ }
215
+ sprintf(tmp + strlen(tmp), "\tsd vars: %li, sd blks: %li\n", (long)sd_vars,
216
+ (long)sd_blks);
217
+ }
218
+ if (k->ep || k->ed) {
219
+ sprintf(tmp + strlen(tmp), "\texp vars: %li, dual exp vars: %li\n",
220
+ (long)(3 * k->ep), (long)(3 * k->ed));
221
+ }
222
+ if (k->psize && k->p) {
223
+ sprintf(tmp + strlen(tmp), "\tprimal + dual power vars: %li\n",
224
+ (long)(3 * k->psize));
225
+ }
226
+ return tmp;
227
+ }
228
+
229
+ static scs_int is_simple_semi_definite_cone(scs_int *s, scs_int ssize) {
230
+ scs_int i;
231
+ for (i = 0; i < ssize; i++) {
232
+ if (s[i] > 2) {
233
+ return 0; /* false */
234
+ }
235
+ }
236
+ return 1; /* true */
237
+ }
238
+
239
+ static scs_float exp_newton_one_d(scs_float rho, scs_float y_hat,
240
+ scs_float z_hat) {
241
+ scs_float t = MAX(-z_hat, 1e-6);
242
+ scs_float f, fp;
243
+ scs_int i;
244
+ for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
245
+ f = t * (t + z_hat) / rho / rho - y_hat / rho + log(t / rho) + 1;
246
+ fp = (2 * t + z_hat) / rho / rho + 1 / t;
247
+
248
+ t = t - f / fp;
249
+
250
+ if (t <= -z_hat) {
251
+ return 0;
252
+ } else if (t <= 0) {
253
+ return z_hat;
254
+ } else if (ABS(f) < CONE_TOL) {
255
+ break;
256
+ }
257
+ }
258
+ return t + z_hat;
259
+ }
260
+
261
+ static void exp_solve_for_x_with_rho(scs_float *v, scs_float *x,
262
+ scs_float rho) {
263
+ x[2] = exp_newton_one_d(rho, v[1], v[2]);
264
+ x[1] = (x[2] - v[2]) * x[2] / rho;
265
+ x[0] = v[0] - rho;
266
+ }
267
+
268
+ static scs_float exp_calc_grad(scs_float *v, scs_float *x, scs_float rho) {
269
+ exp_solve_for_x_with_rho(v, x, rho);
270
+ if (x[1] <= 1e-12) {
271
+ return x[0];
272
+ }
273
+ return x[0] + x[1] * log(x[1] / x[2]);
274
+ }
275
+
276
+ static void exp_get_rho_ub(scs_float *v, scs_float *x, scs_float *ub,
277
+ scs_float *lb) {
278
+ *lb = 0;
279
+ *ub = 0.125;
280
+ while (exp_calc_grad(v, x, *ub) > 0) {
281
+ *lb = *ub;
282
+ (*ub) *= 2;
283
+ }
284
+ }
285
+
286
+ /* project onto the exponential cone, v has dimension *exactly* 3 */
287
+ static scs_int proj_exp_cone(scs_float *v) {
288
+ scs_int i;
289
+ scs_float ub, lb, rho, g, x[3];
290
+ scs_float r = v[0], s = v[1], t = v[2];
291
+ scs_float tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
292
+ POWF((iter + 1), CONE_RATE)); */
293
+
294
+ /* v in cl(Kexp) */
295
+ if ((s * exp(r / s) - t <= CONE_THRESH && s > 0) ||
296
+ (r <= 0 && s == 0 && t >= 0)) {
297
+ return 0;
298
+ }
299
+
300
+ /* -v in Kexp^* */
301
+ if ((-r < 0 && r * exp(s / r) + exp(1) * t <= CONE_THRESH) ||
302
+ (-r == 0 && -s >= 0 && -t >= 0)) {
303
+ memset(v, 0, 3 * sizeof(scs_float));
304
+ return 0;
305
+ }
306
+
307
+ /* special case with analytical solution */
308
+ if (r < 0 && s < 0) {
309
+ v[1] = 0.0;
310
+ v[2] = MAX(v[2], 0);
311
+ return 0;
312
+ }
313
+
314
+ /* iterative procedure to find projection, bisects on dual variable: */
315
+ exp_get_rho_ub(v, x, &ub, &lb); /* get starting upper and lower bounds */
316
+ for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
317
+ rho = (ub + lb) / 2; /* halfway between upper and lower bounds */
318
+ g = exp_calc_grad(v, x, rho); /* calculates gradient wrt dual var */
319
+ if (g > 0) {
320
+ lb = rho;
321
+ } else {
322
+ ub = rho;
323
+ }
324
+ if (ub - lb < tol) {
325
+ break;
326
+ }
327
+ }
328
+ /*
329
+ #if EXTRA_VERBOSE > 0
330
+ scs_printf("exponential cone proj iters %i\n", i);
331
+ #endif
332
+ */
333
+ v[0] = x[0];
334
+ v[1] = x[1];
335
+ v[2] = x[2];
336
+ return 0;
337
+ }
338
+
339
+ static scs_int set_up_sd_cone_work_space(ScsConeWork *c, const ScsCone *k) {
340
+ #ifdef USE_LAPACK
341
+ scs_int i;
342
+ blas_int n_max = 0;
343
+ scs_float eig_tol = 1e-8;
344
+ blas_int neg_one = -1;
345
+ blas_int m = 0;
346
+ blas_int info = 0;
347
+ scs_float wkopt = 0.0;
348
+ #if EXTRA_VERBOSE > 0
349
+ #define _STR_EXPAND(tok) #tok
350
+ #define _STR(tok) _STR_EXPAND(tok)
351
+ scs_printf("BLAS(func) = '%s'\n", _STR(BLAS(func)));
352
+ #endif
353
+ /* eigenvector decomp workspace */
354
+ for (i = 0; i < k->ssize; ++i) {
355
+ if (k->s[i] > n_max) {
356
+ n_max = (blas_int)k->s[i];
357
+ }
358
+ }
359
+ c->Xs = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
360
+ c->Z = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
361
+ c->e = (scs_float *)scs_calloc(n_max, sizeof(scs_float));
362
+ c->liwork = 0;
363
+
364
+ BLAS(syevr)
365
+ ("Vectors", "All", "Lower", &n_max, c->Xs, &n_max, SCS_NULL, SCS_NULL,
366
+ SCS_NULL, SCS_NULL, &eig_tol, &m, c->e, c->Z, &n_max, SCS_NULL, &wkopt,
367
+ &neg_one, &(c->liwork), &neg_one, &info);
368
+
369
+ if (info != 0) {
370
+ scs_printf("FATAL: syevr failure, info = %li\n", (long)info);
371
+ return -1;
372
+ }
373
+ c->lwork = (blas_int)(wkopt + 0.01); /* 0.01 for int casting safety */
374
+ c->work = (scs_float *)scs_calloc(c->lwork, sizeof(scs_float));
375
+ c->iwork = (blas_int *)scs_calloc(c->liwork, sizeof(blas_int));
376
+
377
+ if (!c->Xs || !c->Z || !c->e || !c->work || !c->iwork) {
378
+ return -1;
379
+ }
380
+ return 0;
381
+ #else
382
+ scs_printf(
383
+ "FATAL: Cannot solve SDPs with > 2x2 matrices without linked "
384
+ "blas+lapack libraries\n");
385
+ scs_printf(
386
+ "Install blas+lapack and re-compile SCS with blas+lapack library "
387
+ "locations\n");
388
+ return -1;
389
+ #endif
390
+ }
391
+
392
+ ScsConeWork *SCS(init_cone)(const ScsCone *k) {
393
+ ScsConeWork *c = (ScsConeWork *)scs_calloc(1, sizeof(ScsConeWork));
394
+ #if EXTRA_VERBOSE > 0
395
+ scs_printf("init_cone\n");
396
+ #endif
397
+ c->total_cone_time = 0.0;
398
+ if (k->ssize && k->s) {
399
+ if (!is_simple_semi_definite_cone(k->s, k->ssize) &&
400
+ set_up_sd_cone_work_space(c, k) < 0) {
401
+ SCS(finish_cone)(c);
402
+ return SCS_NULL;
403
+ }
404
+ }
405
+ #if EXTRA_VERBOSE > 0
406
+ scs_printf("init_cone complete\n");
407
+ #ifdef MATLAB_MEX_FILE
408
+ mexEvalString("drawnow;");
409
+ #endif
410
+ #endif
411
+ return c;
412
+ }
413
+
414
+ static scs_int project_2x2_sdc(scs_float *X) {
415
+ scs_float a, b, d, l1, l2, x1, x2, rad;
416
+ scs_float sqrt2 = SQRTF(2.0);
417
+ a = X[0];
418
+ b = X[1] / sqrt2;
419
+ d = X[2];
420
+
421
+ if (ABS(b) < 1e-6) { /* diagonal matrix */
422
+ X[0] = MAX(a, 0);
423
+ X[1] = 0;
424
+ X[2] = MAX(d, 0);
425
+ return 0;
426
+ }
427
+
428
+ rad = SQRTF((a - d) * (a - d) + 4 * b * b);
429
+ /* l1 >= l2 always, since rad >= 0 */
430
+ l1 = 0.5 * (a + d + rad);
431
+ l2 = 0.5 * (a + d - rad);
432
+
433
+ #if EXTRA_VERBOSE > 0
434
+ scs_printf(
435
+ "2x2 SD: a = %4f, b = %4f, (X[1] = %4f, X[2] = %4f), d = %4f, "
436
+ "rad = %4f, l1 = %4f, l2 = %4f\n",
437
+ a, b, X[1], X[2], d, rad, l1, l2);
438
+ #endif
439
+
440
+ if (l2 >= 0) { /* both eigs positive already */
441
+ return 0;
442
+ }
443
+ if (l1 <= 0) { /* both eigs negative, set to 0 */
444
+ X[0] = 0;
445
+ X[1] = 0;
446
+ X[2] = 0;
447
+ return 0;
448
+ }
449
+
450
+ /* l1 pos, l2 neg */
451
+ x1 = 1 / SQRTF(1 + (l1 - a) * (l1 - a) / b / b);
452
+ x2 = x1 * (l1 - a) / b;
453
+
454
+ X[0] = l1 * x1 * x1;
455
+ X[1] = (l1 * x1 * x2) * sqrt2;
456
+ X[2] = l1 * x2 * x2;
457
+ return 0;
458
+ }
459
+
460
+ /* size of X is get_sd_cone_size(n) */
461
+ static scs_int proj_semi_definite_cone(scs_float *X, const scs_int n,
462
+ ScsConeWork *c) {
463
+ /* project onto the positive semi-definite cone */
464
+ #ifdef USE_LAPACK
465
+ scs_int i;
466
+ blas_int one = 1;
467
+ blas_int m = 0;
468
+ blas_int nb = (blas_int)n;
469
+ blas_int nb_plus_one = (blas_int)(n + 1);
470
+ blas_int cone_sz = (blas_int)(get_sd_cone_size(n));
471
+
472
+ scs_float sqrt2 = SQRTF(2.0);
473
+ scs_float sqrt2Inv = 1.0 / sqrt2;
474
+ scs_float *Xs = c->Xs;
475
+ scs_float *Z = c->Z;
476
+ scs_float *e = c->e;
477
+ scs_float *work = c->work;
478
+ blas_int *iwork = c->iwork;
479
+ blas_int lwork = c->lwork;
480
+ blas_int liwork = c->liwork;
481
+
482
+ scs_float eig_tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
483
+ POWF(iter + 1, CONE_RATE)); */
484
+ scs_float zero = 0.0;
485
+ blas_int info = 0;
486
+ scs_float vupper = 0.0;
487
+ #endif
488
+ if (n == 0) {
489
+ return 0;
490
+ }
491
+ if (n == 1) {
492
+ if (X[0] < 0.0) {
493
+ X[0] = 0.0;
494
+ }
495
+ return 0;
496
+ }
497
+ if (n == 2) {
498
+ return project_2x2_sdc(X);
499
+ }
500
+ #ifdef USE_LAPACK
501
+
502
+ memset(Xs, 0, n * n * sizeof(scs_float));
503
+ /* expand lower triangular matrix to full matrix */
504
+ for (i = 0; i < n; ++i) {
505
+ memcpy(&(Xs[i * (n + 1)]), &(X[i * n - ((i - 1) * i) / 2]),
506
+ (n - i) * sizeof(scs_float));
507
+ }
508
+ /*
509
+ rescale so projection works, and matrix norm preserved
510
+ see http://www.seas.ucla.edu/~vandenbe/publications/mlbook.pdf pg 3
511
+ */
512
+ /* scale diags by sqrt(2) */
513
+ BLAS(scal)(&nb, &sqrt2, Xs, &nb_plus_one); /* not n_squared */
514
+
515
+ /* max-eig upper bounded by frobenius norm */
516
+ vupper = 1.1 * sqrt2 *
517
+ BLAS(nrm2)(&cone_sz, X,
518
+ &one); /* mult by factor to make sure is upper bound */
519
+ vupper = MAX(vupper, 0.01);
520
+ #if EXTRA_VERBOSE > 0
521
+ SCS(print_array)(Xs, n * n, "Xs");
522
+ SCS(print_array)(X, get_sd_cone_size(n), "X");
523
+ #endif
524
+ /* Solve eigenproblem, reuse workspaces */
525
+ BLAS(syevr)
526
+ ("Vectors", "VInterval", "Lower", &nb, Xs, &nb, &zero, &vupper, SCS_NULL,
527
+ SCS_NULL, &eig_tol, &m, e, Z, &nb, SCS_NULL, work, &lwork, iwork, &liwork,
528
+ &info);
529
+ #if EXTRA_VERBOSE > 0
530
+ if (info != 0) {
531
+ scs_printf("WARN: LAPACK syevr error, info = %i\n", info);
532
+ }
533
+ scs_printf("syevr input parameter dump:\n");
534
+ scs_printf("nb = %li\n", (long)nb);
535
+ scs_printf("lwork = %li\n", (long)lwork);
536
+ scs_printf("liwork = %li\n", (long)liwork);
537
+ scs_printf("vupper = %f\n", vupper);
538
+ scs_printf("eig_tol = %e\n", eig_tol);
539
+ SCS(print_array)(e, m, "e");
540
+ SCS(print_array)(Z, m * n, "Z");
541
+ #endif
542
+ if (info < 0) {
543
+ return -1;
544
+ }
545
+
546
+ memset(Xs, 0, n * n * sizeof(scs_float));
547
+ for (i = 0; i < m; ++i) {
548
+ scs_float a = e[i];
549
+ BLAS(syr)("Lower", &nb, &a, &(Z[i * n]), &one, Xs, &nb);
550
+ }
551
+ /* scale diags by 1/sqrt(2) */
552
+ BLAS(scal)(&nb, &sqrt2Inv, Xs, &nb_plus_one); /* not n_squared */
553
+ /* extract just lower triangular matrix */
554
+ for (i = 0; i < n; ++i) {
555
+ memcpy(&(X[i * n - ((i - 1) * i) / 2]), &(Xs[i * (n + 1)]),
556
+ (n - i) * sizeof(scs_float));
557
+ }
558
+
559
+ #if EXTRA_VERBOSE > 0
560
+ SCS(print_array)(Xs, n * n, "Xs");
561
+ SCS(print_array)(X, get_sd_cone_size(n), "X");
562
+ #endif
563
+
564
+ #else
565
+ scs_printf(
566
+ "FAILURE: solving SDP with > 2x2 matrices, but no blas/lapack "
567
+ "libraries were linked!\n");
568
+ scs_printf("SCS will return nonsense!\n");
569
+ SCS(scale_array)(X, NAN, n);
570
+ return -1;
571
+ #endif
572
+ return 0;
573
+ }
574
+
575
+ static scs_float pow_calc_x(scs_float r, scs_float xh, scs_float rh,
576
+ scs_float a) {
577
+ scs_float x = 0.5 * (xh + SQRTF(xh * xh + 4 * a * (rh - r) * r));
578
+ return MAX(x, 1e-12);
579
+ }
580
+
581
+ static scs_float pow_calcdxdr(scs_float x, scs_float xh, scs_float rh,
582
+ scs_float r, scs_float a) {
583
+ return a * (rh - 2 * r) / (2 * x - xh);
584
+ }
585
+
586
+ static scs_float pow_calc_f(scs_float x, scs_float y, scs_float r,
587
+ scs_float a) {
588
+ return POWF(x, a) * POWF(y, (1 - a)) - r;
589
+ }
590
+
591
+ static scs_float pow_calc_fp(scs_float x, scs_float y, scs_float dxdr,
592
+ scs_float dydr, scs_float a) {
593
+ return POWF(x, a) * POWF(y, (1 - a)) * (a * dxdr / x + (1 - a) * dydr / y) -
594
+ 1;
595
+ }
596
+
597
+ static void proj_power_cone(scs_float *v, scs_float a) {
598
+ scs_float xh = v[0], yh = v[1], rh = ABS(v[2]);
599
+ scs_float x = 0.0, y = 0.0, r;
600
+ scs_int i;
601
+ /* v in K_a */
602
+ if (xh >= 0 && yh >= 0 &&
603
+ CONE_THRESH + POWF(xh, a) * POWF(yh, (1 - a)) >= rh) {
604
+ return;
605
+ }
606
+
607
+ /* -v in K_a^* */
608
+ if (xh <= 0 && yh <= 0 &&
609
+ CONE_THRESH + POWF(-xh, a) * POWF(-yh, 1 - a) >=
610
+ rh * POWF(a, a) * POWF(1 - a, 1 - a)) {
611
+ v[0] = v[1] = v[2] = 0;
612
+ return;
613
+ }
614
+
615
+ r = rh / 2;
616
+ for (i = 0; i < POW_CONE_MAX_ITERS; ++i) {
617
+ scs_float f, fp, dxdr, dydr;
618
+ x = pow_calc_x(r, xh, rh, a);
619
+ y = pow_calc_x(r, yh, rh, 1 - a);
620
+
621
+ f = pow_calc_f(x, y, r, a);
622
+ if (ABS(f) < CONE_TOL) {
623
+ break;
624
+ }
625
+
626
+ dxdr = pow_calcdxdr(x, xh, rh, r, a);
627
+ dydr = pow_calcdxdr(y, yh, rh, r, (1 - a));
628
+ fp = pow_calc_fp(x, y, dxdr, dydr, a);
629
+
630
+ r = MAX(r - f / fp, 0);
631
+ r = MIN(r, rh);
632
+ }
633
+ v[0] = x;
634
+ v[1] = y;
635
+ v[2] = (v[2] < 0) ? -(r) : (r);
636
+ }
637
+
638
+ /* outward facing cone projection routine, iter is outer algorithm iteration, if
639
+ iter < 0 then iter is ignored
640
+ warm_start contains guess of projection (can be set to SCS_NULL) */
641
+ scs_int SCS(proj_dual_cone)(scs_float *x, const ScsCone *k, ScsConeWork *c,
642
+ const scs_float *warm_start, scs_int iter) {
643
+ scs_int i;
644
+ scs_int count = (k->f ? k->f : 0);
645
+ SCS(timer) cone_timer;
646
+ #if EXTRA_VERBOSE > 0
647
+ SCS(timer) proj_timer;
648
+ SCS(tic)(&proj_timer);
649
+ #endif
650
+ SCS(tic)(&cone_timer);
651
+
652
+ if (k->l) {
653
+ /* project onto positive orthant */
654
+ for (i = count; i < count + k->l; ++i) {
655
+ if (x[i] < 0.0) {
656
+ x[i] = 0.0;
657
+ }
658
+ /* x[i] = (x[i] < 0.0) ? 0.0 : x[i]; */
659
+ }
660
+ count += k->l;
661
+ #if EXTRA_VERBOSE > 0
662
+ scs_printf("pos orthant proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
663
+ SCS(tic)(&proj_timer);
664
+ #endif
665
+ }
666
+
667
+ if (k->qsize && k->q) {
668
+ /* project onto SOC */
669
+ for (i = 0; i < k->qsize; ++i) {
670
+ if (k->q[i] == 0) {
671
+ continue;
672
+ }
673
+ if (k->q[i] == 1) {
674
+ if (x[count] < 0.0) {
675
+ x[count] = 0.0;
676
+ }
677
+ } else {
678
+ scs_float v1 = x[count];
679
+ scs_float s = SCS(norm)(&(x[count + 1]), k->q[i] - 1);
680
+ scs_float alpha = (s + v1) / 2.0;
681
+
682
+ if (s <= v1) { /* do nothing */
683
+ } else if (s <= -v1) {
684
+ memset(&(x[count]), 0, k->q[i] * sizeof(scs_float));
685
+ } else {
686
+ x[count] = alpha;
687
+ SCS(scale_array)(&(x[count + 1]), alpha / s, k->q[i] - 1);
688
+ }
689
+ }
690
+ count += k->q[i];
691
+ }
692
+ #if EXTRA_VERBOSE > 0
693
+ scs_printf("SOC proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
694
+ SCS(tic)(&proj_timer);
695
+ #endif
696
+ }
697
+
698
+ if (k->ssize && k->s) {
699
+ /* project onto PSD cone */
700
+ for (i = 0; i < k->ssize; ++i) {
701
+ #if EXTRA_VERBOSE > 0
702
+ scs_printf("SD proj size %li\n", (long)k->s[i]);
703
+ #endif
704
+ if (k->s[i] == 0) {
705
+ continue;
706
+ }
707
+ if (proj_semi_definite_cone(&(x[count]), k->s[i], c) < 0) {
708
+ return -1;
709
+ }
710
+ count += get_sd_cone_size(k->s[i]);
711
+ }
712
+ #if EXTRA_VERBOSE > 0
713
+ scs_printf("SD proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
714
+ SCS(tic)(&proj_timer);
715
+ #endif
716
+ }
717
+
718
+ if (k->ep) {
719
+ scs_float r, s, t;
720
+ scs_int idx;
721
+ /*
722
+ * exponential cone is not self dual, if s \in K
723
+ * then y \in K^* and so if K is the primal cone
724
+ * here we project onto K^*, via Moreau
725
+ * \Pi_C^*(y) = y + \Pi_C(-y)
726
+ */
727
+ SCS(scale_array)(&(x[count]), -1, 3 * k->ep); /* x = -x; */
728
+ #ifdef _OPENMP
729
+ #pragma omp parallel for private(r, s, t, idx)
730
+ #endif
731
+ for (i = 0; i < k->ep; ++i) {
732
+ idx = count + 3 * i;
733
+ r = x[idx];
734
+ s = x[idx + 1];
735
+ t = x[idx + 2];
736
+
737
+ proj_exp_cone(&(x[idx]));
738
+
739
+ x[idx] -= r;
740
+ x[idx + 1] -= s;
741
+ x[idx + 2] -= t;
742
+ }
743
+ count += 3 * k->ep;
744
+ #if EXTRA_VERBOSE > 0
745
+ scs_printf("EP proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
746
+ SCS(tic)(&proj_timer);
747
+ #endif
748
+ }
749
+
750
+ if (k->ed) {
751
+ /* exponential cone: */
752
+ #ifdef _OPENMP
753
+ #pragma omp parallel for
754
+ #endif
755
+ for (i = 0; i < k->ed; ++i) {
756
+ proj_exp_cone(&(x[count + 3 * i]));
757
+ }
758
+ count += 3 * k->ed;
759
+ #if EXTRA_VERBOSE > 0
760
+ scs_printf("ED proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
761
+ SCS(tic)(&proj_timer);
762
+ #endif
763
+ }
764
+
765
+ if (k->psize && k->p) {
766
+ scs_float v[3];
767
+ scs_int idx;
768
+ /* don't use openmp for power cone
769
+ ifdef _OPENMP
770
+ pragma omp parallel for private(v, idx)
771
+ endif
772
+ */
773
+ for (i = 0; i < k->psize; ++i) {
774
+ idx = count + 3 * i;
775
+ if (k->p[i] <= 0) {
776
+ /* dual power cone */
777
+ proj_power_cone(&(x[idx]), -k->p[i]);
778
+ } else {
779
+ /* primal power cone, using Moreau */
780
+ v[0] = -x[idx];
781
+ v[1] = -x[idx + 1];
782
+ v[2] = -x[idx + 2];
783
+
784
+ proj_power_cone(v, k->p[i]);
785
+
786
+ x[idx] += v[0];
787
+ x[idx + 1] += v[1];
788
+ x[idx + 2] += v[2];
789
+ }
790
+ }
791
+ count += 3 * k->psize;
792
+ #if EXTRA_VERBOSE > 0
793
+ scs_printf("Power cone proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
794
+ SCS(tic)(&proj_timer);
795
+ #endif
796
+ }
797
+ /* project onto OTHER cones */
798
+ if (c) {
799
+ c->total_cone_time += SCS(tocq)(&cone_timer);
800
+ }
801
+ return 0;
802
+ }