svmlightcli 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +85 -0
- data/README.md +49 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bin/svm_classify +23 -0
- data/bin/svm_learn +23 -0
- data/ext/svmlightcli/Makefile +95 -0
- data/ext/svmlightcli/extconf.rb +33 -0
- data/ext/svmlightcli/kernel.h +40 -0
- data/ext/svmlightcli/svm_classify.c +197 -0
- data/ext/svmlightcli/svm_common.c +985 -0
- data/ext/svmlightcli/svm_common.h +301 -0
- data/ext/svmlightcli/svm_hideo.c +1062 -0
- data/ext/svmlightcli/svm_learn.c +4147 -0
- data/ext/svmlightcli/svm_learn.h +169 -0
- data/ext/svmlightcli/svm_learn_main.c +397 -0
- data/lib/svmlightcli.rb +6 -0
- data/lib/svmlightcli/version.rb +3 -0
- data/svmlightcli.gemspec +28 -0
- metadata +113 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
/************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* svm_common.h */
|
4
|
+
/* */
|
5
|
+
/* Definitions and functions used in both svm_learn and svm_classify. */
|
6
|
+
/* */
|
7
|
+
/* Author: Thorsten Joachims */
|
8
|
+
/* Date: 02.07.02 */
|
9
|
+
/* */
|
10
|
+
/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
11
|
+
/* */
|
12
|
+
/* This software is available for non-commercial use only. It must */
|
13
|
+
/* not be modified and distributed without prior permission of the */
|
14
|
+
/* author. The author is not responsible for implications from the */
|
15
|
+
/* use of this software. */
|
16
|
+
/* */
|
17
|
+
/************************************************************************/
|
18
|
+
|
19
|
+
#ifndef SVM_COMMON
|
20
|
+
#define SVM_COMMON
|
21
|
+
|
22
|
+
# include <stdio.h>
|
23
|
+
# include <ctype.h>
|
24
|
+
# include <math.h>
|
25
|
+
# include <string.h>
|
26
|
+
# include <stdlib.h>
|
27
|
+
# include <time.h>
|
28
|
+
# include <float.h>
|
29
|
+
|
30
|
+
# define VERSION "V6.02"
|
31
|
+
# define VERSION_DATE "14.08.08"
|
32
|
+
|
33
|
+
# define CFLOAT float /* the type of float to use for caching */
|
34
|
+
/* kernel evaluations. Using float saves */
|
35
|
+
/* us some memory, but you can use double, too */
|
36
|
+
# define FNUM long /* the type used for storing feature ids */
|
37
|
+
# define FVAL float /* the type used for storing feature values */
|
38
|
+
# define MAXFEATNUM 99999999 /* maximum feature number (must be in
|
39
|
+
valid range of FNUM type and long int!) */
|
40
|
+
|
41
|
+
# define LINEAR 0 /* linear kernel type */
|
42
|
+
# define POLY 1 /* polynoial kernel type */
|
43
|
+
# define RBF 2 /* rbf kernel type */
|
44
|
+
# define SIGMOID 3 /* sigmoid kernel type */
|
45
|
+
|
46
|
+
# define CLASSIFICATION 1 /* train classification model */
|
47
|
+
# define REGRESSION 2 /* train regression model */
|
48
|
+
# define RANKING 3 /* train ranking model */
|
49
|
+
# define OPTIMIZATION 4 /* train on general set of constraints */
|
50
|
+
|
51
|
+
# define MAXSHRINK 50000 /* maximum number of shrinking rounds */
|
52
|
+
|
53
|
+
typedef struct word {
|
54
|
+
FNUM wnum; /* word number */
|
55
|
+
FVAL weight; /* word weight */
|
56
|
+
} WORD;
|
57
|
+
|
58
|
+
typedef struct svector {
|
59
|
+
WORD *words; /* The features/values in the vector by
|
60
|
+
increasing feature-number. Feature
|
61
|
+
numbers that are skipped are
|
62
|
+
interpreted as having value zero. */
|
63
|
+
double twonorm_sq; /* The squared euclidian length of the
|
64
|
+
vector. Used to speed up the RBF kernel. */
|
65
|
+
char *userdefined; /* You can put additional information
|
66
|
+
here. This can be useful, if you are
|
67
|
+
implementing your own kernel that
|
68
|
+
does not work with feature/values
|
69
|
+
representations (for example a
|
70
|
+
string kernel). By default,
|
71
|
+
svm-light will put here the string
|
72
|
+
after the # sign from each line of
|
73
|
+
the input file. */
|
74
|
+
long kernel_id; /* Feature vectors with different
|
75
|
+
kernel_id's are orthogonal (ie. the
|
76
|
+
feature number do not match). This
|
77
|
+
is used for computing component
|
78
|
+
kernels for linear constraints which
|
79
|
+
are a sum of several different
|
80
|
+
weight vectors. (currently not
|
81
|
+
implemented). */
|
82
|
+
struct svector *next; /* Let's you set up a list of SVECTOR's
|
83
|
+
for linear constraints which are a
|
84
|
+
sum of multiple feature
|
85
|
+
vectors. List is terminated by
|
86
|
+
NULL. */
|
87
|
+
double factor; /* Factor by which this feature vector
|
88
|
+
is multiplied in the sum. */
|
89
|
+
} SVECTOR;
|
90
|
+
|
91
|
+
typedef struct doc {
|
92
|
+
long docnum; /* Document ID. This has to be the position of
|
93
|
+
the document in the training set array. */
|
94
|
+
long queryid; /* for learning rankings, constraints are
|
95
|
+
generated for documents with the same
|
96
|
+
queryID. */
|
97
|
+
double costfactor; /* Scales the cost of misclassifying this
|
98
|
+
document by this factor. The effect of this
|
99
|
+
value is, that the upper bound on the alpha
|
100
|
+
for this example is scaled by this factor.
|
101
|
+
The factors are set by the feature
|
102
|
+
'cost:<val>' in the training data. */
|
103
|
+
long slackid; /* Index of the slack variable
|
104
|
+
corresponding to this
|
105
|
+
constraint. All constraints with the
|
106
|
+
same slackid share the same slack
|
107
|
+
variable. This can only be used for
|
108
|
+
svm_learn_optimization. */
|
109
|
+
SVECTOR *fvec; /* Feature vector of the example. The
|
110
|
+
feature vector can actually be a
|
111
|
+
list of feature vectors. For
|
112
|
+
example, the list will have two
|
113
|
+
elements, if this DOC is a
|
114
|
+
preference constraint. The one
|
115
|
+
vector that is supposed to be ranked
|
116
|
+
higher, will have a factor of +1,
|
117
|
+
the lower ranked one should have a
|
118
|
+
factor of -1. */
|
119
|
+
} DOC;
|
120
|
+
|
121
|
+
typedef struct learn_parm {
|
122
|
+
long type; /* selects between regression and
|
123
|
+
classification */
|
124
|
+
double svm_c; /* upper bound C on alphas */
|
125
|
+
double eps; /* regression epsilon (eps=1.0 for
|
126
|
+
classification */
|
127
|
+
double svm_costratio; /* factor to multiply C for positive examples */
|
128
|
+
double transduction_posratio;/* fraction of unlabeled examples to be */
|
129
|
+
/* classified as positives */
|
130
|
+
long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0
|
131
|
+
otherwise w*x=0 */
|
132
|
+
long sharedslack; /* if nonzero, it will use the shared
|
133
|
+
slack variable mode in
|
134
|
+
svm_learn_optimization. It requires
|
135
|
+
that the slackid is set for every
|
136
|
+
training example */
|
137
|
+
long svm_maxqpsize; /* size q of working set */
|
138
|
+
long svm_newvarsinqp; /* new variables to enter the working set
|
139
|
+
in each iteration */
|
140
|
+
long kernel_cache_size; /* size of kernel cache in megabytes */
|
141
|
+
double epsilon_crit; /* tolerable error for distances used
|
142
|
+
in stopping criterion */
|
143
|
+
double epsilon_shrink; /* how much a multiplier should be above
|
144
|
+
zero for shrinking */
|
145
|
+
long svm_iter_to_shrink; /* iterations h after which an example can
|
146
|
+
be removed by shrinking */
|
147
|
+
long maxiter; /* number of iterations after which the
|
148
|
+
optimizer terminates, if there was
|
149
|
+
no progress in maxdiff */
|
150
|
+
long remove_inconsistent; /* exclude examples with alpha at C and
|
151
|
+
retrain */
|
152
|
+
long skip_final_opt_check; /* do not check KT-Conditions at the end of
|
153
|
+
optimization for examples removed by
|
154
|
+
shrinking. WARNING: This might lead to
|
155
|
+
sub-optimal solutions! */
|
156
|
+
long compute_loo; /* if nonzero, computes leave-one-out
|
157
|
+
estimates */
|
158
|
+
double rho; /* parameter in xi/alpha-estimates and for
|
159
|
+
pruning leave-one-out range [1..2] */
|
160
|
+
long xa_depth; /* parameter in xi/alpha-estimates upper
|
161
|
+
bounding the number of SV the current
|
162
|
+
alpha_t is distributed over */
|
163
|
+
char predfile[200]; /* file for predicitions on unlabeled examples
|
164
|
+
in transduction */
|
165
|
+
char alphafile[200]; /* file to store optimal alphas in. use
|
166
|
+
empty string if alphas should not be
|
167
|
+
output */
|
168
|
+
|
169
|
+
/* you probably do not want to touch the following */
|
170
|
+
double epsilon_const; /* tolerable error on eq-constraint */
|
171
|
+
double epsilon_a; /* tolerable error on alphas at bounds */
|
172
|
+
double opt_precision; /* precision of solver, set to e.g. 1e-21
|
173
|
+
if you get convergence problems */
|
174
|
+
|
175
|
+
/* the following are only for internal use */
|
176
|
+
long svm_c_steps; /* do so many steps for finding optimal C */
|
177
|
+
double svm_c_factor; /* increase C by this factor every step */
|
178
|
+
double svm_costratio_unlab;
|
179
|
+
double svm_unlabbound;
|
180
|
+
double *svm_cost; /* individual upper bounds for each var */
|
181
|
+
long totwords; /* number of features */
|
182
|
+
} LEARN_PARM;
|
183
|
+
|
184
|
+
typedef struct kernel_parm {
|
185
|
+
long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
|
186
|
+
long poly_degree;
|
187
|
+
double rbf_gamma;
|
188
|
+
double coef_lin;
|
189
|
+
double coef_const;
|
190
|
+
char custom[50]; /* for user supplied kernel */
|
191
|
+
} KERNEL_PARM;
|
192
|
+
|
193
|
+
typedef struct model {
|
194
|
+
long sv_num;
|
195
|
+
long at_upper_bound;
|
196
|
+
double b;
|
197
|
+
DOC **supvec;
|
198
|
+
double *alpha;
|
199
|
+
long *index; /* index from docnum to position in model */
|
200
|
+
long totwords; /* number of features */
|
201
|
+
long totdoc; /* number of training documents */
|
202
|
+
KERNEL_PARM kernel_parm; /* kernel */
|
203
|
+
|
204
|
+
/* the following values are not written to file */
|
205
|
+
double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
|
206
|
+
double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */
|
207
|
+
double *lin_weights; /* weights for linear case using
|
208
|
+
folding */
|
209
|
+
double maxdiff; /* precision, up to which this
|
210
|
+
model is accurate */
|
211
|
+
} MODEL;
|
212
|
+
|
213
|
+
typedef struct quadratic_program {
|
214
|
+
long opt_n; /* number of variables */
|
215
|
+
long opt_m; /* number of linear equality constraints */
|
216
|
+
double *opt_ce,*opt_ce0; /* linear equality constraints */
|
217
|
+
double *opt_g; /* hessian of objective */
|
218
|
+
double *opt_g0; /* linear part of objective */
|
219
|
+
double *opt_xinit; /* initial value for variables */
|
220
|
+
double *opt_low,*opt_up; /* box constraints */
|
221
|
+
} QP;
|
222
|
+
|
223
|
+
typedef struct kernel_cache {
|
224
|
+
long *index; /* cache some kernel evalutations */
|
225
|
+
CFLOAT *buffer; /* to improve speed */
|
226
|
+
long *invindex;
|
227
|
+
long *active2totdoc;
|
228
|
+
long *totdoc2active;
|
229
|
+
long *lru;
|
230
|
+
long *occu;
|
231
|
+
long elems;
|
232
|
+
long max_elems;
|
233
|
+
long time;
|
234
|
+
long activenum;
|
235
|
+
long buffsize;
|
236
|
+
} KERNEL_CACHE;
|
237
|
+
|
238
|
+
|
239
|
+
typedef struct timing_profile {
|
240
|
+
long time_kernel;
|
241
|
+
long time_opti;
|
242
|
+
long time_shrink;
|
243
|
+
long time_update;
|
244
|
+
long time_model;
|
245
|
+
long time_check;
|
246
|
+
long time_select;
|
247
|
+
} TIMING;
|
248
|
+
|
249
|
+
typedef struct shrink_state {
|
250
|
+
long *active;
|
251
|
+
long *inactive_since;
|
252
|
+
long deactnum;
|
253
|
+
double **a_history; /* for shrinking with non-linear kernel */
|
254
|
+
long maxhistory;
|
255
|
+
double *last_a; /* for shrinking with linear kernel */
|
256
|
+
double *last_lin; /* for shrinking with linear kernel */
|
257
|
+
} SHRINK_STATE;
|
258
|
+
|
259
|
+
double classify_example(MODEL *, DOC *);
|
260
|
+
double classify_example_linear(MODEL *, DOC *);
|
261
|
+
double kernel(KERNEL_PARM *, DOC *, DOC *);
|
262
|
+
double single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
|
263
|
+
double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
|
264
|
+
SVECTOR *create_svector(WORD *, char *, double);
|
265
|
+
SVECTOR *copy_svector(SVECTOR *);
|
266
|
+
void free_svector(SVECTOR *);
|
267
|
+
double sprod_ss(SVECTOR *, SVECTOR *);
|
268
|
+
SVECTOR* sub_ss(SVECTOR *, SVECTOR *);
|
269
|
+
SVECTOR* add_ss(SVECTOR *, SVECTOR *);
|
270
|
+
SVECTOR* add_list_ss(SVECTOR *);
|
271
|
+
void append_svector_list(SVECTOR *a, SVECTOR *b);
|
272
|
+
SVECTOR* smult_s(SVECTOR *, double);
|
273
|
+
int featvec_eq(SVECTOR *, SVECTOR *);
|
274
|
+
double model_length_s(MODEL *, KERNEL_PARM *);
|
275
|
+
void clear_vector_n(double *, long);
|
276
|
+
void add_vector_ns(double *, SVECTOR *, double);
|
277
|
+
double sprod_ns(double *, SVECTOR *);
|
278
|
+
void add_weight_vector_to_linear_model(MODEL *);
|
279
|
+
DOC *create_example(long, long, long, double, SVECTOR *);
|
280
|
+
void free_example(DOC *, long);
|
281
|
+
MODEL *read_model(char *);
|
282
|
+
MODEL *copy_model(MODEL *);
|
283
|
+
void free_model(MODEL *, int);
|
284
|
+
void read_documents(char *, DOC ***, double **, long *, long *);
|
285
|
+
int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
|
286
|
+
double *read_alphas(char *,long);
|
287
|
+
void nol_ll(char *, long *, long *, long *);
|
288
|
+
long minl(long, long);
|
289
|
+
long maxl(long, long);
|
290
|
+
long get_runtime(void);
|
291
|
+
int space_or_null(int);
|
292
|
+
void *my_malloc(size_t);
|
293
|
+
void copyright_notice(void);
|
294
|
+
# ifdef _MSC_VER
|
295
|
+
int isnan(double);
|
296
|
+
# endif
|
297
|
+
|
298
|
+
extern long verbosity; /* verbosity level (0-4) */
|
299
|
+
extern long kernel_cache_statistic;
|
300
|
+
|
301
|
+
#endif
|
@@ -0,0 +1,1062 @@
|
|
1
|
+
/***********************************************************************/
|
2
|
+
/* */
|
3
|
+
/* svm_hideo.c */
|
4
|
+
/* */
|
5
|
+
/* The Hildreth and D'Espo solver specialized for SVMs. */
|
6
|
+
/* */
|
7
|
+
/* Author: Thorsten Joachims */
|
8
|
+
/* Date: 02.07.02 */
|
9
|
+
/* */
|
10
|
+
/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
11
|
+
/* */
|
12
|
+
/* This software is available for non-commercial use only. It must */
|
13
|
+
/* not be modified and distributed without prior permission of the */
|
14
|
+
/* author. The author is not responsible for implications from the */
|
15
|
+
/* use of this software. */
|
16
|
+
/* */
|
17
|
+
/***********************************************************************/
|
18
|
+
|
19
|
+
# include <math.h>
|
20
|
+
# include "svm_common.h"
|
21
|
+
|
22
|
+
/*
|
23
|
+
solve the quadratic programming problem
|
24
|
+
|
25
|
+
minimize g0 * x + 1/2 x' * G * x
|
26
|
+
subject to ce*x = ce0
|
27
|
+
l <= x <= u
|
28
|
+
|
29
|
+
The linear constraint vector ce can only have -1/+1 as entries
|
30
|
+
*/
|
31
|
+
|
32
|
+
/* Common Block Declarations */
|
33
|
+
|
34
|
+
long verbosity;
|
35
|
+
|
36
|
+
# define PRIMAL_OPTIMAL 1
|
37
|
+
# define DUAL_OPTIMAL 2
|
38
|
+
# define MAXITER_EXCEEDED 3
|
39
|
+
# define NAN_SOLUTION 4
|
40
|
+
# define ONLY_ONE_VARIABLE 5
|
41
|
+
|
42
|
+
# define LARGEROUND 0
|
43
|
+
# define SMALLROUND 1
|
44
|
+
|
45
|
+
/* /////////////////////////////////////////////////////////////// */
|
46
|
+
|
47
|
+
# define DEF_PRECISION 1E-5
|
48
|
+
# define DEF_MAX_ITERATIONS 200
|
49
|
+
# define DEF_LINDEP_SENSITIVITY 1E-8
|
50
|
+
# define EPSILON_HIDEO 1E-20
|
51
|
+
# define EPSILON_EQ 1E-5
|
52
|
+
|
53
|
+
double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
|
54
|
+
double *primal=0,*dual=0;
|
55
|
+
long precision_violations=0;
|
56
|
+
double opt_precision=DEF_PRECISION;
|
57
|
+
long maxiter=DEF_MAX_ITERATIONS;
|
58
|
+
double lindep_sensitivity=DEF_LINDEP_SENSITIVITY;
|
59
|
+
double *buffer;
|
60
|
+
long *nonoptimal;
|
61
|
+
|
62
|
+
long smallroundcount=0;
|
63
|
+
long roundnumber=0;
|
64
|
+
|
65
|
+
/* /////////////////////////////////////////////////////////////// */
|
66
|
+
|
67
|
+
void *my_malloc();
|
68
|
+
|
69
|
+
int optimize_hildreth_despo(long,long,double,double,double,long,long,long,double,double *,
|
70
|
+
double *,double *,double *,double *,double *,
|
71
|
+
double *,double *,double *,long *,double *,double *);
|
72
|
+
int solve_dual(long,long,double,double,long,double *,double *,double *,
|
73
|
+
double *,double *,double *,double *,double *,double *,
|
74
|
+
double *,double *,double *,double *,long);
|
75
|
+
|
76
|
+
void linvert_matrix(double *, long, double *, double, long *);
|
77
|
+
void lprint_matrix(double *, long);
|
78
|
+
void ladd_matrix(double *, long, double);
|
79
|
+
void lcopy_matrix(double *, long, double *);
|
80
|
+
void lswitch_rows_matrix(double *, long, long, long);
|
81
|
+
void lswitchrk_matrix(double *, long, long, long);
|
82
|
+
|
83
|
+
double calculate_qp_objective(long, double *, double *, double *);
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm)
|
88
|
+
QP *qp;
|
89
|
+
double *epsilon_crit;
|
90
|
+
long nx; /* Maximum number of variables in QP */
|
91
|
+
double *threshold;
|
92
|
+
LEARN_PARM *learn_parm;
|
93
|
+
/* start the optimizer and return the optimal values */
|
94
|
+
/* The HIDEO optimizer does not necessarily fully solve the problem. */
|
95
|
+
/* Since it requires a strictly positive definite hessian, the solution */
|
96
|
+
/* is restricted to a linear independent subset in case the matrix is */
|
97
|
+
/* only semi-definite. */
|
98
|
+
{
|
99
|
+
long i,j;
|
100
|
+
int result;
|
101
|
+
double eq,progress;
|
102
|
+
|
103
|
+
roundnumber++;
|
104
|
+
|
105
|
+
if(!primal) { /* allocate memory at first call */
|
106
|
+
primal=(double *)my_malloc(sizeof(double)*nx);
|
107
|
+
dual=(double *)my_malloc(sizeof(double)*((nx+1)*2));
|
108
|
+
nonoptimal=(long *)my_malloc(sizeof(long)*(nx));
|
109
|
+
buffer=(double *)my_malloc(sizeof(double)*((nx+1)*2*(nx+1)*2+
|
110
|
+
nx*nx+2*(nx+1)*2+2*nx+1+2*nx+
|
111
|
+
nx+nx+nx*nx));
|
112
|
+
(*threshold)=0;
|
113
|
+
for(i=0;i<nx;i++) {
|
114
|
+
primal[i]=0;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
if(verbosity>=4) { /* really verbose */
|
119
|
+
printf("\n\n");
|
120
|
+
eq=qp->opt_ce0[0];
|
121
|
+
for(i=0;i<qp->opt_n;i++) {
|
122
|
+
eq+=qp->opt_xinit[i]*qp->opt_ce[i];
|
123
|
+
printf("%f: ",qp->opt_g0[i]);
|
124
|
+
for(j=0;j<qp->opt_n;j++) {
|
125
|
+
printf("%f ",qp->opt_g[i*qp->opt_n+j]);
|
126
|
+
}
|
127
|
+
printf(": a=%.10f < %f",qp->opt_xinit[i],qp->opt_up[i]);
|
128
|
+
printf(": y=%f\n",qp->opt_ce[i]);
|
129
|
+
}
|
130
|
+
if(qp->opt_m) {
|
131
|
+
printf("EQ: %f*x0",qp->opt_ce[0]);
|
132
|
+
for(i=1;i<qp->opt_n;i++) {
|
133
|
+
printf(" + %f*x%ld",qp->opt_ce[i],i);
|
134
|
+
}
|
135
|
+
printf(" = %f\n\n",-qp->opt_ce0[0]);
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
|
140
|
+
opt_precision,(*epsilon_crit),
|
141
|
+
learn_parm->epsilon_a,maxiter,
|
142
|
+
/* (long)PRIMAL_OPTIMAL, */
|
143
|
+
(long)0, (long)0,
|
144
|
+
lindep_sensitivity,
|
145
|
+
qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
|
146
|
+
qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
|
147
|
+
dual,nonoptimal,buffer,&progress);
|
148
|
+
if(verbosity>=3) {
|
149
|
+
printf("return(%d)...",result);
|
150
|
+
}
|
151
|
+
|
152
|
+
if(learn_parm->totwords < learn_parm->svm_maxqpsize) {
|
153
|
+
/* larger working sets will be linear dependent anyway */
|
154
|
+
learn_parm->svm_maxqpsize=maxl(learn_parm->totwords,(long)2);
|
155
|
+
}
|
156
|
+
|
157
|
+
if(result == NAN_SOLUTION) {
|
158
|
+
lindep_sensitivity*=2; /* throw out linear dependent examples more */
|
159
|
+
/* generously */
|
160
|
+
if(learn_parm->svm_maxqpsize>2) {
|
161
|
+
learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */
|
162
|
+
}
|
163
|
+
precision_violations++;
|
164
|
+
}
|
165
|
+
|
166
|
+
/* take one round of only two variable to get unstuck */
|
167
|
+
if((result != PRIMAL_OPTIMAL) || (!(roundnumber % 31)) || (progress <= 0)) {
|
168
|
+
|
169
|
+
smallroundcount++;
|
170
|
+
|
171
|
+
result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
|
172
|
+
opt_precision,(*epsilon_crit),
|
173
|
+
learn_parm->epsilon_a,(long)maxiter,
|
174
|
+
(long)PRIMAL_OPTIMAL,(long)SMALLROUND,
|
175
|
+
lindep_sensitivity,
|
176
|
+
qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
|
177
|
+
qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
|
178
|
+
dual,nonoptimal,buffer,&progress);
|
179
|
+
if(verbosity>=3) {
|
180
|
+
printf("return_srd(%d)...",result);
|
181
|
+
}
|
182
|
+
|
183
|
+
if(result != PRIMAL_OPTIMAL) {
|
184
|
+
if(result != ONLY_ONE_VARIABLE)
|
185
|
+
precision_violations++;
|
186
|
+
if(result == MAXITER_EXCEEDED)
|
187
|
+
maxiter+=100;
|
188
|
+
if(result == NAN_SOLUTION) {
|
189
|
+
lindep_sensitivity*=2; /* throw out linear dependent examples more */
|
190
|
+
/* generously */
|
191
|
+
/* results not valid, so return inital values */
|
192
|
+
for(i=0;i<qp->opt_n;i++) {
|
193
|
+
primal[i]=qp->opt_xinit[i];
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
|
200
|
+
if(precision_violations > 50) {
|
201
|
+
precision_violations=0;
|
202
|
+
(*epsilon_crit)*=10.0;
|
203
|
+
if(verbosity>=1) {
|
204
|
+
printf("\nWARNING: Relaxing epsilon on KT-Conditions (%f).\n",
|
205
|
+
(*epsilon_crit));
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
if((qp->opt_m>0) && (result != NAN_SOLUTION) && (!isnan(dual[1]-dual[0])))
|
210
|
+
(*threshold)=dual[1]-dual[0];
|
211
|
+
else
|
212
|
+
(*threshold)=0;
|
213
|
+
|
214
|
+
if(verbosity>=4) { /* really verbose */
|
215
|
+
printf("\n\n");
|
216
|
+
eq=qp->opt_ce0[0];
|
217
|
+
for(i=0;i<qp->opt_n;i++) {
|
218
|
+
eq+=primal[i]*qp->opt_ce[i];
|
219
|
+
printf("%f: ",qp->opt_g0[i]);
|
220
|
+
for(j=0;j<qp->opt_n;j++) {
|
221
|
+
printf("%f ",qp->opt_g[i*qp->opt_n+j]);
|
222
|
+
}
|
223
|
+
printf(": a=%.30f",primal[i]);
|
224
|
+
printf(": nonopti=%ld",nonoptimal[i]);
|
225
|
+
printf(": y=%f\n",qp->opt_ce[i]);
|
226
|
+
}
|
227
|
+
printf("eq-constraint=%.30f\n",eq);
|
228
|
+
printf("b=%f\n",(*threshold));
|
229
|
+
printf(" smallroundcount=%ld ",smallroundcount);
|
230
|
+
}
|
231
|
+
|
232
|
+
return(primal);
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
int optimize_hildreth_despo(n,m,precision,epsilon_crit,epsilon_a,maxiter,goal,
|
238
|
+
smallround,lindep_sensitivity,g,g0,ce,ce0,low,up,
|
239
|
+
primal,init,dual,lin_dependent,buffer,progress)
|
240
|
+
long n; /* number of variables */
|
241
|
+
long m; /* number of linear equality constraints [0,1] */
|
242
|
+
double precision; /* solve at least to this dual precision */
|
243
|
+
double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */
|
244
|
+
double epsilon_a; /* precision of alphas at bounds */
|
245
|
+
long maxiter; /* stop after this many iterations */
|
246
|
+
long goal; /* keep going until goal fulfilled */
|
247
|
+
long smallround; /* use only two variables of steepest descent */
|
248
|
+
double lindep_sensitivity; /* epsilon for detecting linear dependent ex */
|
249
|
+
double *g; /* hessian of objective */
|
250
|
+
double *g0; /* linear part of objective */
|
251
|
+
double *ce,*ce0; /* linear equality constraints */
|
252
|
+
double *low,*up; /* box constraints */
|
253
|
+
double *primal; /* primal variables */
|
254
|
+
double *init; /* initial values of primal */
|
255
|
+
double *dual; /* dual variables */
|
256
|
+
long *lin_dependent;
|
257
|
+
double *buffer;
|
258
|
+
double *progress; /* delta in the objective function between
|
259
|
+
before and after */
|
260
|
+
{
|
261
|
+
long i,j,k,from,to,n_indep,changed;
|
262
|
+
double sum,bmin=0,bmax=0;
|
263
|
+
double *d,*d0,*ig,*dual_old,*temp,*start;
|
264
|
+
double *g0_new,*g_new,*ce_new,*ce0_new,*low_new,*up_new;
|
265
|
+
double add,t;
|
266
|
+
int result;
|
267
|
+
double obj_before,obj_after;
|
268
|
+
long b1,b2;
|
269
|
+
double g0_b1,g0_b2,ce0_b;
|
270
|
+
|
271
|
+
g0_new=&(buffer[0]); /* claim regions of buffer */
|
272
|
+
d=&(buffer[n]);
|
273
|
+
d0=&(buffer[n+(n+m)*2*(n+m)*2]);
|
274
|
+
ce_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2]);
|
275
|
+
ce0_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n]);
|
276
|
+
ig=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m]);
|
277
|
+
dual_old=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n]);
|
278
|
+
low_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2]);
|
279
|
+
up_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n]);
|
280
|
+
start=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n]);
|
281
|
+
g_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n]);
|
282
|
+
temp=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n+n*n]);
|
283
|
+
|
284
|
+
b1=-1;
|
285
|
+
b2=-1;
|
286
|
+
for(i=0;i<n;i++) { /* get variables with steepest feasible descent */
|
287
|
+
sum=g0[i];
|
288
|
+
for(j=0;j<n;j++)
|
289
|
+
sum+=init[j]*g[i*n+j];
|
290
|
+
sum=sum*ce[i];
|
291
|
+
if(((b1==-1) || (sum<bmin))
|
292
|
+
&& (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]<0.0)))
|
293
|
+
&& (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]>0.0)))
|
294
|
+
) {
|
295
|
+
bmin=sum;
|
296
|
+
b1=i;
|
297
|
+
}
|
298
|
+
if(((b2==-1) || (sum>=bmax))
|
299
|
+
&& (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]>0.0)))
|
300
|
+
&& (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]<0.0)))
|
301
|
+
) {
|
302
|
+
bmax=sum;
|
303
|
+
b2=i;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
/* in case of unbiased hyperplane, the previous projection on */
|
307
|
+
/* equality constraint can lead to b1 or b2 being -1. */
|
308
|
+
if((b1 == -1) || (b2 == -1)) {
|
309
|
+
b1=maxl(b1,b2);
|
310
|
+
b2=maxl(b1,b2);
|
311
|
+
}
|
312
|
+
|
313
|
+
for(i=0;i<n;i++) {
|
314
|
+
start[i]=init[i];
|
315
|
+
}
|
316
|
+
|
317
|
+
/* in case both example vectors are linearly dependent */
|
318
|
+
/* WARNING: Assumes that ce[] in {-1,1} */
|
319
|
+
add=0;
|
320
|
+
changed=0;
|
321
|
+
if((b1 != b2) && (m==1)) {
|
322
|
+
for(i=0;i<n;i++) { /* fix other vectors */
|
323
|
+
if(i==b1)
|
324
|
+
g0_b1=g0[i];
|
325
|
+
if(i==b2)
|
326
|
+
g0_b2=g0[i];
|
327
|
+
}
|
328
|
+
ce0_b=ce0[0];
|
329
|
+
for(i=0;i<n;i++) {
|
330
|
+
if((i!=b1) && (i!=b2)) {
|
331
|
+
for(j=0;j<n;j++) {
|
332
|
+
if(j==b1)
|
333
|
+
g0_b1+=start[i]*g[i*n+j];
|
334
|
+
if(j==b2)
|
335
|
+
g0_b2+=start[i]*g[i*n+j];
|
336
|
+
}
|
337
|
+
ce0_b-=(start[i]*ce[i]);
|
338
|
+
}
|
339
|
+
}
|
340
|
+
if((g[b1*n+b2] == g[b1*n+b1]) && (g[b1*n+b2] == g[b2*n+b2])) {
|
341
|
+
/* printf("euqal\n"); */
|
342
|
+
if(ce[b1] == ce[b2]) {
|
343
|
+
if(g0_b1 <= g0_b2) { /* set b1 to upper bound */
|
344
|
+
/* printf("case +=<\n"); */
|
345
|
+
changed=1;
|
346
|
+
t=up[b1]-init[b1];
|
347
|
+
if((init[b2]-low[b2]) < t) {
|
348
|
+
t=init[b2]-low[b2];
|
349
|
+
}
|
350
|
+
start[b1]=init[b1]+t;
|
351
|
+
start[b2]=init[b2]-t;
|
352
|
+
}
|
353
|
+
else if(g0_b1 > g0_b2) { /* set b2 to upper bound */
|
354
|
+
/* printf("case +=>\n"); */
|
355
|
+
changed=1;
|
356
|
+
t=up[b2]-init[b2];
|
357
|
+
if((init[b1]-low[b1]) < t) {
|
358
|
+
t=init[b1]-low[b1];
|
359
|
+
}
|
360
|
+
start[b1]=init[b1]-t;
|
361
|
+
start[b2]=init[b2]+t;
|
362
|
+
}
|
363
|
+
}
|
364
|
+
else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1] != ce[b2]) */
|
365
|
+
/* printf("case +!\n"); */
|
366
|
+
t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2];
|
367
|
+
changed=1;
|
368
|
+
if((up[b2]-init[b2]) < t) {
|
369
|
+
t=up[b2]-init[b2];
|
370
|
+
}
|
371
|
+
if((init[b2]-low[b2]) < -t) {
|
372
|
+
t=-(init[b2]-low[b2]);
|
373
|
+
}
|
374
|
+
if((up[b1]-init[b1]) < t) {
|
375
|
+
t=(up[b1]-init[b1]);
|
376
|
+
}
|
377
|
+
if((init[b1]-low[b1]) < -t) {
|
378
|
+
t=-(init[b1]-low[b1]);
|
379
|
+
}
|
380
|
+
start[b1]=init[b1]+t;
|
381
|
+
start[b2]=init[b2]+t;
|
382
|
+
}
|
383
|
+
}
|
384
|
+
if((-g[b1*n+b2] == g[b1*n+b1]) && (-g[b1*n+b2] == g[b2*n+b2])) {
|
385
|
+
/* printf("diffeuqal\n"); */
|
386
|
+
if(ce[b1] != ce[b2]) {
|
387
|
+
if((g0_b1+g0_b2) < 0) { /* set b1 and b2 to upper bound */
|
388
|
+
/* printf("case -!<\n"); */
|
389
|
+
changed=1;
|
390
|
+
t=up[b1]-init[b1];
|
391
|
+
if((up[b2]-init[b2]) < t) {
|
392
|
+
t=up[b2]-init[b2];
|
393
|
+
}
|
394
|
+
start[b1]=init[b1]+t;
|
395
|
+
start[b2]=init[b2]+t;
|
396
|
+
}
|
397
|
+
else if((g0_b1+g0_b2) >= 0) { /* set b1 and b2 to lower bound */
|
398
|
+
/* printf("case -!>\n"); */
|
399
|
+
changed=1;
|
400
|
+
t=init[b1]-low[b1];
|
401
|
+
if((init[b2]-low[b2]) < t) {
|
402
|
+
t=init[b2]-low[b2];
|
403
|
+
}
|
404
|
+
start[b1]=init[b1]-t;
|
405
|
+
start[b2]=init[b2]-t;
|
406
|
+
}
|
407
|
+
}
|
408
|
+
else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1]==ce[b2]) */
|
409
|
+
/* printf("case -=\n"); */
|
410
|
+
t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2];
|
411
|
+
changed=1;
|
412
|
+
if((up[b2]-init[b2]) < t) {
|
413
|
+
t=up[b2]-init[b2];
|
414
|
+
}
|
415
|
+
if((init[b2]-low[b2]) < -t) {
|
416
|
+
t=-(init[b2]-low[b2]);
|
417
|
+
}
|
418
|
+
if((up[b1]-init[b1]) < -t) {
|
419
|
+
t=-(up[b1]-init[b1]);
|
420
|
+
}
|
421
|
+
if((init[b1]-low[b1]) < t) {
|
422
|
+
t=init[b1]-low[b1];
|
423
|
+
}
|
424
|
+
start[b1]=init[b1]-t;
|
425
|
+
start[b2]=init[b2]+t;
|
426
|
+
}
|
427
|
+
}
|
428
|
+
}
|
429
|
+
/* if we have a biased hyperplane, then adding a constant to the */
|
430
|
+
/* hessian does not change the solution. So that is done for examples */
|
431
|
+
/* with zero diagonal entry, since HIDEO cannot handle them. */
|
432
|
+
if((m>0)
|
433
|
+
&& ((fabs(g[b1*n+b1]) < lindep_sensitivity)
|
434
|
+
|| (fabs(g[b2*n+b2]) < lindep_sensitivity))) {
|
435
|
+
/* printf("Case 0\n"); */
|
436
|
+
add+=0.093274;
|
437
|
+
}
|
438
|
+
/* in case both examples are linear dependent */
|
439
|
+
else if((m>0)
|
440
|
+
&& (g[b1*n+b2] != 0 && g[b2*n+b2] != 0)
|
441
|
+
&& (fabs(g[b1*n+b1]/g[b1*n+b2] - g[b1*n+b2]/g[b2*n+b2])
|
442
|
+
< lindep_sensitivity)) {
|
443
|
+
/* printf("Case lindep\n"); */
|
444
|
+
add+=0.078274;
|
445
|
+
}
|
446
|
+
|
447
|
+
/* special case for zero diagonal entry on unbiased hyperplane */
|
448
|
+
if((m==0) && (b1>=0)) {
|
449
|
+
if(fabs(g[b1*n+b1]) < lindep_sensitivity) {
|
450
|
+
/* printf("Case 0b1\n"); */
|
451
|
+
for(i=0;i<n;i++) { /* fix other vectors */
|
452
|
+
if(i==b1)
|
453
|
+
g0_b1=g0[i];
|
454
|
+
}
|
455
|
+
for(i=0;i<n;i++) {
|
456
|
+
if(i!=b1) {
|
457
|
+
for(j=0;j<n;j++) {
|
458
|
+
if(j==b1)
|
459
|
+
g0_b1+=start[i]*g[i*n+j];
|
460
|
+
}
|
461
|
+
}
|
462
|
+
}
|
463
|
+
if(g0_b1<0)
|
464
|
+
start[b1]=up[b1];
|
465
|
+
if(g0_b1>=0)
|
466
|
+
start[b1]=low[b1];
|
467
|
+
}
|
468
|
+
}
|
469
|
+
if((m==0) && (b2>=0)) {
|
470
|
+
if(fabs(g[b2*n+b2]) < lindep_sensitivity) {
|
471
|
+
/* printf("Case 0b2\n"); */
|
472
|
+
for(i=0;i<n;i++) { /* fix other vectors */
|
473
|
+
if(i==b2)
|
474
|
+
g0_b2=g0[i];
|
475
|
+
}
|
476
|
+
for(i=0;i<n;i++) {
|
477
|
+
if(i!=b2) {
|
478
|
+
for(j=0;j<n;j++) {
|
479
|
+
if(j==b2)
|
480
|
+
g0_b2+=start[i]*g[i*n+j];
|
481
|
+
}
|
482
|
+
}
|
483
|
+
}
|
484
|
+
if(g0_b2<0)
|
485
|
+
start[b2]=up[b2];
|
486
|
+
if(g0_b2>=0)
|
487
|
+
start[b2]=low[b2];
|
488
|
+
}
|
489
|
+
}
|
490
|
+
|
491
|
+
/* printf("b1=%ld,b2=%ld\n",b1,b2); */
|
492
|
+
|
493
|
+
lcopy_matrix(g,n,d);
|
494
|
+
if((m==1) && (add>0.0)) {
|
495
|
+
for(j=0;j<n;j++) {
|
496
|
+
for(k=0;k<n;k++) {
|
497
|
+
d[j*n+k]+=add*ce[j]*ce[k];
|
498
|
+
}
|
499
|
+
}
|
500
|
+
}
|
501
|
+
else {
|
502
|
+
add=0.0;
|
503
|
+
}
|
504
|
+
|
505
|
+
if(n>2) { /* switch, so that variables are better mixed */
|
506
|
+
lswitchrk_matrix(d,n,b1,(long)0);
|
507
|
+
if(b2 == 0)
|
508
|
+
lswitchrk_matrix(d,n,b1,(long)1);
|
509
|
+
else
|
510
|
+
lswitchrk_matrix(d,n,b2,(long)1);
|
511
|
+
}
|
512
|
+
if(smallround == SMALLROUND) {
|
513
|
+
for(i=2;i<n;i++) {
|
514
|
+
lin_dependent[i]=1;
|
515
|
+
}
|
516
|
+
if(m>0) { /* for biased hyperplane, pick two variables */
|
517
|
+
lin_dependent[0]=0;
|
518
|
+
lin_dependent[1]=0;
|
519
|
+
}
|
520
|
+
else { /* for unbiased hyperplane, pick only one variable */
|
521
|
+
lin_dependent[0]=smallroundcount % 2;
|
522
|
+
lin_dependent[1]=(smallroundcount+1) % 2;
|
523
|
+
}
|
524
|
+
}
|
525
|
+
else {
|
526
|
+
for(i=0;i<n;i++) {
|
527
|
+
lin_dependent[i]=0;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
linvert_matrix(d,n,ig,lindep_sensitivity,lin_dependent);
|
531
|
+
if(n>2) { /* now switch back */
|
532
|
+
if(b2 == 0) {
|
533
|
+
lswitchrk_matrix(ig,n,b1,(long)1);
|
534
|
+
i=lin_dependent[1];
|
535
|
+
lin_dependent[1]=lin_dependent[b1];
|
536
|
+
lin_dependent[b1]=i;
|
537
|
+
}
|
538
|
+
else {
|
539
|
+
lswitchrk_matrix(ig,n,b2,(long)1);
|
540
|
+
i=lin_dependent[1];
|
541
|
+
lin_dependent[1]=lin_dependent[b2];
|
542
|
+
lin_dependent[b2]=i;
|
543
|
+
}
|
544
|
+
lswitchrk_matrix(ig,n,b1,(long)0);
|
545
|
+
i=lin_dependent[0];
|
546
|
+
lin_dependent[0]=lin_dependent[b1];
|
547
|
+
lin_dependent[b1]=i;
|
548
|
+
}
|
549
|
+
/* lprint_matrix(d,n); */
|
550
|
+
/* lprint_matrix(ig,n); */
|
551
|
+
|
552
|
+
lcopy_matrix(g,n,g_new); /* restore g_new matrix */
|
553
|
+
if(add>0)
|
554
|
+
for(j=0;j<n;j++) {
|
555
|
+
for(k=0;k<n;k++) {
|
556
|
+
g_new[j*n+k]+=add*ce[j]*ce[k];
|
557
|
+
}
|
558
|
+
}
|
559
|
+
|
560
|
+
for(i=0;i<n;i++) { /* fix linear dependent vectors */
|
561
|
+
g0_new[i]=g0[i]+add*ce0[0]*ce[i];
|
562
|
+
}
|
563
|
+
if(m>0) ce0_new[0]=-ce0[0];
|
564
|
+
for(i=0;i<n;i++) { /* fix linear dependent vectors */
|
565
|
+
if(lin_dependent[i]) {
|
566
|
+
for(j=0;j<n;j++) {
|
567
|
+
if(!lin_dependent[j]) {
|
568
|
+
g0_new[j]+=start[i]*g_new[i*n+j];
|
569
|
+
}
|
570
|
+
}
|
571
|
+
if(m>0) ce0_new[0]-=(start[i]*ce[i]);
|
572
|
+
}
|
573
|
+
}
|
574
|
+
from=0; /* remove linear dependent vectors */
|
575
|
+
to=0;
|
576
|
+
n_indep=0;
|
577
|
+
for(i=0;i<n;i++) {
|
578
|
+
if(!lin_dependent[i]) {
|
579
|
+
g0_new[n_indep]=g0_new[i];
|
580
|
+
ce_new[n_indep]=ce[i];
|
581
|
+
low_new[n_indep]=low[i];
|
582
|
+
up_new[n_indep]=up[i];
|
583
|
+
primal[n_indep]=start[i];
|
584
|
+
n_indep++;
|
585
|
+
}
|
586
|
+
for(j=0;j<n;j++) {
|
587
|
+
if((!lin_dependent[i]) && (!lin_dependent[j])) {
|
588
|
+
ig[to]=ig[from];
|
589
|
+
g_new[to]=g_new[from];
|
590
|
+
to++;
|
591
|
+
}
|
592
|
+
from++;
|
593
|
+
}
|
594
|
+
}
|
595
|
+
|
596
|
+
if(verbosity>=3) {
|
597
|
+
printf("real_qp_size(%ld)...",n_indep);
|
598
|
+
}
|
599
|
+
|
600
|
+
/* cannot optimize with only one variable */
|
601
|
+
if((n_indep<=1) && (m>0) && (!changed)) {
|
602
|
+
for(i=n-1;i>=0;i--) {
|
603
|
+
primal[i]=init[i];
|
604
|
+
}
|
605
|
+
return((int)ONLY_ONE_VARIABLE);
|
606
|
+
}
|
607
|
+
|
608
|
+
if((!changed) || (n_indep>1)) {
|
609
|
+
result=solve_dual(n_indep,m,precision,epsilon_crit,maxiter,g_new,g0_new,
|
610
|
+
ce_new,ce0_new,low_new,up_new,primal,d,d0,ig,
|
611
|
+
dual,dual_old,temp,goal);
|
612
|
+
}
|
613
|
+
else {
|
614
|
+
result=PRIMAL_OPTIMAL;
|
615
|
+
}
|
616
|
+
|
617
|
+
j=n_indep;
|
618
|
+
for(i=n-1;i>=0;i--) {
|
619
|
+
if(!lin_dependent[i]) {
|
620
|
+
j--;
|
621
|
+
primal[i]=primal[j];
|
622
|
+
}
|
623
|
+
else {
|
624
|
+
primal[i]=start[i]; /* leave as is */
|
625
|
+
}
|
626
|
+
temp[i]=primal[i];
|
627
|
+
}
|
628
|
+
|
629
|
+
obj_before=calculate_qp_objective(n,g,g0,init);
|
630
|
+
obj_after=calculate_qp_objective(n,g,g0,primal);
|
631
|
+
(*progress)=obj_before-obj_after;
|
632
|
+
if(verbosity>=3) {
|
633
|
+
printf("before(%.30f)...after(%.30f)...result_sd(%d)...",
|
634
|
+
obj_before,obj_after,result);
|
635
|
+
}
|
636
|
+
|
637
|
+
return((int)result);
|
638
|
+
}
|
639
|
+
|
640
|
+
|
641
|
+
int solve_dual(n,m,precision,epsilon_crit,maxiter,g,g0,ce,ce0,low,up,primal,
|
642
|
+
d,d0,ig,dual,dual_old,temp,goal)
|
643
|
+
/* Solves the dual using the method of Hildreth and D'Espo. */
|
644
|
+
/* Can only handle problems with zero or exactly one */
|
645
|
+
/* equality constraints. */
|
646
|
+
|
647
|
+
long n; /* number of variables */
|
648
|
+
long m; /* number of linear equality constraints */
|
649
|
+
double precision; /* solve at least to this dual precision */
|
650
|
+
double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */
|
651
|
+
long maxiter; /* stop after that many iterations */
|
652
|
+
double *g;
|
653
|
+
double *g0; /* linear part of objective */
|
654
|
+
double *ce,*ce0; /* linear equality constraints */
|
655
|
+
double *low,*up; /* box constraints */
|
656
|
+
double *primal; /* variables (with initial values) */
|
657
|
+
double *d,*d0,*ig,*dual,*dual_old,*temp; /* buffer */
|
658
|
+
long goal;
|
659
|
+
{
|
660
|
+
long i,j,k,iter;
|
661
|
+
double sum,w,maxviol,viol,temp1,temp2,isnantest;
|
662
|
+
double model_b,dist;
|
663
|
+
long retrain,maxfaktor,primal_optimal=0,at_bound,scalemaxiter;
|
664
|
+
double epsilon_a=1E-15,epsilon_hideo;
|
665
|
+
double eq;
|
666
|
+
|
667
|
+
if((m<0) || (m>1))
|
668
|
+
perror("SOLVE DUAL: inappropriate number of eq-constrains!");
|
669
|
+
|
670
|
+
/*
|
671
|
+
printf("\n");
|
672
|
+
for(i=0;i<n;i++) {
|
673
|
+
printf("%f: ",g0[i]);
|
674
|
+
for(j=0;j<n;j++) {
|
675
|
+
printf("%f ",g[i*n+j]);
|
676
|
+
}
|
677
|
+
printf(": a=%.30f",primal[i]);
|
678
|
+
printf(": y=%f\n",ce[i]);
|
679
|
+
}
|
680
|
+
*/
|
681
|
+
|
682
|
+
for(i=0;i<2*(n+m);i++) {
|
683
|
+
dual[i]=0;
|
684
|
+
dual_old[i]=0;
|
685
|
+
}
|
686
|
+
for(i=0;i<n;i++) {
|
687
|
+
for(j=0;j<n;j++) { /* dual hessian for box constraints */
|
688
|
+
d[i*2*(n+m)+j]=ig[i*n+j];
|
689
|
+
d[(i+n)*2*(n+m)+j]=-ig[i*n+j];
|
690
|
+
d[i*2*(n+m)+j+n]=-ig[i*n+j];
|
691
|
+
d[(i+n)*2*(n+m)+j+n]=ig[i*n+j];
|
692
|
+
}
|
693
|
+
if(m>0) {
|
694
|
+
sum=0; /* dual hessian for eq constraints */
|
695
|
+
for(j=0;j<n;j++) {
|
696
|
+
sum+=(ce[j]*ig[i*n+j]);
|
697
|
+
}
|
698
|
+
d[i*2*(n+m)+2*n]=sum;
|
699
|
+
d[i*2*(n+m)+2*n+1]=-sum;
|
700
|
+
d[(n+i)*2*(n+m)+2*n]=-sum;
|
701
|
+
d[(n+i)*2*(n+m)+2*n+1]=sum;
|
702
|
+
d[(n+n)*2*(n+m)+i]=sum;
|
703
|
+
d[(n+n+1)*2*(n+m)+i]=-sum;
|
704
|
+
d[(n+n)*2*(n+m)+(n+i)]=-sum;
|
705
|
+
d[(n+n+1)*2*(n+m)+(n+i)]=sum;
|
706
|
+
|
707
|
+
sum=0;
|
708
|
+
for(j=0;j<n;j++) {
|
709
|
+
for(k=0;k<n;k++) {
|
710
|
+
sum+=(ce[k]*ce[j]*ig[j*n+k]);
|
711
|
+
}
|
712
|
+
}
|
713
|
+
d[(n+n)*2*(n+m)+2*n]=sum;
|
714
|
+
d[(n+n)*2*(n+m)+2*n+1]=-sum;
|
715
|
+
d[(n+n+1)*2*(n+m)+2*n]=-sum;
|
716
|
+
d[(n+n+1)*2*(n+m)+2*n+1]=sum;
|
717
|
+
}
|
718
|
+
}
|
719
|
+
|
720
|
+
for(i=0;i<n;i++) { /* dual linear component for the box constraints */
|
721
|
+
w=0;
|
722
|
+
for(j=0;j<n;j++) {
|
723
|
+
w+=(ig[i*n+j]*g0[j]);
|
724
|
+
}
|
725
|
+
d0[i]=up[i]+w;
|
726
|
+
d0[i+n]=-low[i]-w;
|
727
|
+
}
|
728
|
+
|
729
|
+
if(m>0) {
|
730
|
+
sum=0; /* dual linear component for eq constraints */
|
731
|
+
for(j=0;j<n;j++) {
|
732
|
+
for(k=0;k<n;k++) {
|
733
|
+
sum+=(ce[k]*ig[k*n+j]*g0[j]);
|
734
|
+
}
|
735
|
+
}
|
736
|
+
d0[2*n]=ce0[0]+sum;
|
737
|
+
d0[2*n+1]=-ce0[0]-sum;
|
738
|
+
}
|
739
|
+
|
740
|
+
maxviol=999999;
|
741
|
+
iter=0;
|
742
|
+
retrain=1;
|
743
|
+
maxfaktor=1;
|
744
|
+
scalemaxiter=maxiter/5;
|
745
|
+
while((retrain) && (maxviol > 0) && (iter < (scalemaxiter*maxfaktor))) {
|
746
|
+
iter++;
|
747
|
+
|
748
|
+
while((maxviol > precision) && (iter < (scalemaxiter*maxfaktor))) {
|
749
|
+
iter++;
|
750
|
+
maxviol=0;
|
751
|
+
for(i=0;i<2*(n+m);i++) {
|
752
|
+
sum=d0[i];
|
753
|
+
for(j=0;j<2*(n+m);j++) {
|
754
|
+
sum+=d[i*2*(n+m)+j]*dual_old[j];
|
755
|
+
}
|
756
|
+
sum-=d[i*2*(n+m)+i]*dual_old[i];
|
757
|
+
dual[i]=-sum/d[i*2*(n+m)+i];
|
758
|
+
if(dual[i]<0) dual[i]=0;
|
759
|
+
|
760
|
+
viol=fabs(dual[i]-dual_old[i]);
|
761
|
+
if(viol>maxviol)
|
762
|
+
maxviol=viol;
|
763
|
+
dual_old[i]=dual[i];
|
764
|
+
}
|
765
|
+
/*
|
766
|
+
printf("%d) maxviol=%20f precision=%f\n",iter,maxviol,precision);
|
767
|
+
*/
|
768
|
+
}
|
769
|
+
|
770
|
+
if(m>0) {
|
771
|
+
for(i=0;i<n;i++) {
|
772
|
+
temp[i]=dual[i]-dual[i+n]+ce[i]*(dual[n+n]-dual[n+n+1])+g0[i];
|
773
|
+
}
|
774
|
+
}
|
775
|
+
else {
|
776
|
+
for(i=0;i<n;i++) {
|
777
|
+
temp[i]=dual[i]-dual[i+n]+g0[i];
|
778
|
+
}
|
779
|
+
}
|
780
|
+
for(i=0;i<n;i++) {
|
781
|
+
primal[i]=0; /* calc value of primal variables */
|
782
|
+
for(j=0;j<n;j++) {
|
783
|
+
primal[i]+=ig[i*n+j]*temp[j];
|
784
|
+
}
|
785
|
+
primal[i]*=-1.0;
|
786
|
+
if(primal[i]<=(low[i])) { /* clip conservatively */
|
787
|
+
primal[i]=low[i];
|
788
|
+
}
|
789
|
+
else if(primal[i]>=(up[i])) {
|
790
|
+
primal[i]=up[i];
|
791
|
+
}
|
792
|
+
}
|
793
|
+
|
794
|
+
if(m>0)
|
795
|
+
model_b=dual[n+n+1]-dual[n+n];
|
796
|
+
else
|
797
|
+
model_b=0;
|
798
|
+
|
799
|
+
epsilon_hideo=EPSILON_HIDEO;
|
800
|
+
for(i=0;i<n;i++) { /* check precision of alphas */
|
801
|
+
dist=-model_b*ce[i];
|
802
|
+
dist+=(g0[i]+1.0);
|
803
|
+
for(j=0;j<i;j++) {
|
804
|
+
dist+=(primal[j]*g[j*n+i]);
|
805
|
+
}
|
806
|
+
for(j=i;j<n;j++) {
|
807
|
+
dist+=(primal[j]*g[i*n+j]);
|
808
|
+
}
|
809
|
+
if((primal[i]<(up[i]-epsilon_hideo)) && (dist < (1.0-epsilon_crit))) {
|
810
|
+
epsilon_hideo=(up[i]-primal[i])*2.0;
|
811
|
+
}
|
812
|
+
else if((primal[i]>(low[i]+epsilon_hideo)) &&(dist>(1.0+epsilon_crit))) {
|
813
|
+
epsilon_hideo=(primal[i]-low[i])*2.0;
|
814
|
+
}
|
815
|
+
}
|
816
|
+
/* printf("\nEPSILON_HIDEO=%.30f\n",epsilon_hideo); */
|
817
|
+
|
818
|
+
for(i=0;i<n;i++) { /* clip alphas to bounds */
|
819
|
+
if(primal[i]<=(low[i]+epsilon_hideo)) {
|
820
|
+
primal[i]=low[i];
|
821
|
+
}
|
822
|
+
else if(primal[i]>=(up[i]-epsilon_hideo)) {
|
823
|
+
primal[i]=up[i];
|
824
|
+
}
|
825
|
+
}
|
826
|
+
|
827
|
+
retrain=0;
|
828
|
+
primal_optimal=1;
|
829
|
+
at_bound=0;
|
830
|
+
for(i=0;(i<n);i++) { /* check primal KT-Conditions */
|
831
|
+
dist=-model_b*ce[i];
|
832
|
+
dist+=(g0[i]+1.0);
|
833
|
+
for(j=0;j<i;j++) {
|
834
|
+
dist+=(primal[j]*g[j*n+i]);
|
835
|
+
}
|
836
|
+
for(j=i;j<n;j++) {
|
837
|
+
dist+=(primal[j]*g[i*n+j]);
|
838
|
+
}
|
839
|
+
if((primal[i]<(up[i]-epsilon_a)) && (dist < (1.0-epsilon_crit))) {
|
840
|
+
retrain=1;
|
841
|
+
primal_optimal=0;
|
842
|
+
}
|
843
|
+
else if((primal[i]>(low[i]+epsilon_a)) && (dist > (1.0+epsilon_crit))) {
|
844
|
+
retrain=1;
|
845
|
+
primal_optimal=0;
|
846
|
+
}
|
847
|
+
if((primal[i]<=(low[i]+epsilon_a)) || (primal[i]>=(up[i]-epsilon_a))) {
|
848
|
+
at_bound++;
|
849
|
+
}
|
850
|
+
/* printf("HIDEOtemp: a[%ld]=%.30f, dist=%.6f, b=%f, at_bound=%ld\n",i,primal[i],dist,model_b,at_bound); */
|
851
|
+
}
|
852
|
+
if(m>0) {
|
853
|
+
eq=-ce0[0]; /* check precision of eq-constraint */
|
854
|
+
for(i=0;i<n;i++) {
|
855
|
+
eq+=(ce[i]*primal[i]);
|
856
|
+
}
|
857
|
+
if((EPSILON_EQ < fabs(eq))
|
858
|
+
/*
|
859
|
+
&& !((goal==PRIMAL_OPTIMAL)
|
860
|
+
&& (at_bound==n)) */
|
861
|
+
) {
|
862
|
+
retrain=1;
|
863
|
+
primal_optimal=0;
|
864
|
+
}
|
865
|
+
/* printf("\n eq=%.30f ce0=%f at-bound=%ld\n",eq,ce0[0],at_bound); */
|
866
|
+
}
|
867
|
+
|
868
|
+
if(retrain) {
|
869
|
+
precision/=10;
|
870
|
+
if(((goal == PRIMAL_OPTIMAL) && (maxfaktor < 50000))
|
871
|
+
|| (maxfaktor < 5)) {
|
872
|
+
maxfaktor++;
|
873
|
+
}
|
874
|
+
}
|
875
|
+
}
|
876
|
+
|
877
|
+
if(!primal_optimal) {
|
878
|
+
for(i=0;i<n;i++) {
|
879
|
+
primal[i]=0; /* calc value of primal variables */
|
880
|
+
for(j=0;j<n;j++) {
|
881
|
+
primal[i]+=ig[i*n+j]*temp[j];
|
882
|
+
}
|
883
|
+
primal[i]*=-1.0;
|
884
|
+
if(primal[i]<=(low[i]+epsilon_a)) { /* clip conservatively */
|
885
|
+
primal[i]=low[i];
|
886
|
+
}
|
887
|
+
else if(primal[i]>=(up[i]-epsilon_a)) {
|
888
|
+
primal[i]=up[i];
|
889
|
+
}
|
890
|
+
}
|
891
|
+
}
|
892
|
+
|
893
|
+
isnantest=0;
|
894
|
+
for(i=0;i<n;i++) { /* check for isnan */
|
895
|
+
isnantest+=primal[i];
|
896
|
+
}
|
897
|
+
|
898
|
+
if(m>0) {
|
899
|
+
temp1=dual[n+n+1]; /* copy the dual variables for the eq */
|
900
|
+
temp2=dual[n+n]; /* constraints to a handier location */
|
901
|
+
for(i=n+n+1;i>=2;i--) {
|
902
|
+
dual[i]=dual[i-2];
|
903
|
+
}
|
904
|
+
dual[0]=temp2;
|
905
|
+
dual[1]=temp1;
|
906
|
+
isnantest+=temp1+temp2;
|
907
|
+
}
|
908
|
+
|
909
|
+
if(isnan(isnantest)) {
|
910
|
+
return((int)NAN_SOLUTION);
|
911
|
+
}
|
912
|
+
else if(primal_optimal) {
|
913
|
+
return((int)PRIMAL_OPTIMAL);
|
914
|
+
}
|
915
|
+
else if(maxviol == 0.0) {
|
916
|
+
return((int)DUAL_OPTIMAL);
|
917
|
+
}
|
918
|
+
else {
|
919
|
+
return((int)MAXITER_EXCEEDED);
|
920
|
+
}
|
921
|
+
}
|
922
|
+
|
923
|
+
|
924
|
+
void linvert_matrix(matrix,depth,inverse,lindep_sensitivity,lin_dependent)
|
925
|
+
double *matrix;
|
926
|
+
long depth;
|
927
|
+
double *inverse,lindep_sensitivity;
|
928
|
+
long *lin_dependent; /* indicates the active parts of matrix on
|
929
|
+
input and output*/
|
930
|
+
{
|
931
|
+
long i,j,k;
|
932
|
+
double factor;
|
933
|
+
|
934
|
+
for(i=0;i<depth;i++) {
|
935
|
+
/* lin_dependent[i]=0; */
|
936
|
+
for(j=0;j<depth;j++) {
|
937
|
+
inverse[i*depth+j]=0.0;
|
938
|
+
}
|
939
|
+
inverse[i*depth+i]=1.0;
|
940
|
+
}
|
941
|
+
for(i=0;i<depth;i++) {
|
942
|
+
if(lin_dependent[i] || (fabs(matrix[i*depth+i])<lindep_sensitivity)) {
|
943
|
+
lin_dependent[i]=1;
|
944
|
+
}
|
945
|
+
else {
|
946
|
+
for(j=i+1;j<depth;j++) {
|
947
|
+
factor=matrix[j*depth+i]/matrix[i*depth+i];
|
948
|
+
for(k=i;k<depth;k++) {
|
949
|
+
matrix[j*depth+k]-=(factor*matrix[i*depth+k]);
|
950
|
+
}
|
951
|
+
for(k=0;k<depth;k++) {
|
952
|
+
inverse[j*depth+k]-=(factor*inverse[i*depth+k]);
|
953
|
+
}
|
954
|
+
}
|
955
|
+
}
|
956
|
+
}
|
957
|
+
for(i=depth-1;i>=0;i--) {
|
958
|
+
if(!lin_dependent[i]) {
|
959
|
+
factor=1/matrix[i*depth+i];
|
960
|
+
for(k=0;k<depth;k++) {
|
961
|
+
inverse[i*depth+k]*=factor;
|
962
|
+
}
|
963
|
+
matrix[i*depth+i]=1;
|
964
|
+
for(j=i-1;j>=0;j--) {
|
965
|
+
factor=matrix[j*depth+i];
|
966
|
+
matrix[j*depth+i]=0;
|
967
|
+
for(k=0;k<depth;k++) {
|
968
|
+
inverse[j*depth+k]-=(factor*inverse[i*depth+k]);
|
969
|
+
}
|
970
|
+
}
|
971
|
+
}
|
972
|
+
}
|
973
|
+
}
|
974
|
+
|
975
|
+
void lprint_matrix(matrix,depth)
|
976
|
+
double *matrix;
|
977
|
+
long depth;
|
978
|
+
{
|
979
|
+
long i,j;
|
980
|
+
for(i=0;i<depth;i++) {
|
981
|
+
for(j=0;j<depth;j++) {
|
982
|
+
printf("%5.2f ",(double)(matrix[i*depth+j]));
|
983
|
+
}
|
984
|
+
printf("\n");
|
985
|
+
}
|
986
|
+
printf("\n");
|
987
|
+
}
|
988
|
+
|
989
|
+
void ladd_matrix(matrix,depth,scalar)
|
990
|
+
double *matrix;
|
991
|
+
long depth;
|
992
|
+
double scalar;
|
993
|
+
{
|
994
|
+
long i,j;
|
995
|
+
for(i=0;i<depth;i++) {
|
996
|
+
for(j=0;j<depth;j++) {
|
997
|
+
matrix[i*depth+j]+=scalar;
|
998
|
+
}
|
999
|
+
}
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
void lcopy_matrix(matrix,depth,matrix2)
|
1003
|
+
double *matrix;
|
1004
|
+
long depth;
|
1005
|
+
double *matrix2;
|
1006
|
+
{
|
1007
|
+
long i;
|
1008
|
+
|
1009
|
+
for(i=0;i<(depth)*(depth);i++) {
|
1010
|
+
matrix2[i]=matrix[i];
|
1011
|
+
}
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
void lswitch_rows_matrix(matrix,depth,r1,r2)
|
1015
|
+
double *matrix;
|
1016
|
+
long depth,r1,r2;
|
1017
|
+
{
|
1018
|
+
long i;
|
1019
|
+
double temp;
|
1020
|
+
|
1021
|
+
for(i=0;i<depth;i++) {
|
1022
|
+
temp=matrix[r1*depth+i];
|
1023
|
+
matrix[r1*depth+i]=matrix[r2*depth+i];
|
1024
|
+
matrix[r2*depth+i]=temp;
|
1025
|
+
}
|
1026
|
+
}
|
1027
|
+
|
1028
|
+
void lswitchrk_matrix(matrix,depth,rk1,rk2)
|
1029
|
+
double *matrix;
|
1030
|
+
long depth,rk1,rk2;
|
1031
|
+
{
|
1032
|
+
long i;
|
1033
|
+
double temp;
|
1034
|
+
|
1035
|
+
for(i=0;i<depth;i++) {
|
1036
|
+
temp=matrix[rk1*depth+i];
|
1037
|
+
matrix[rk1*depth+i]=matrix[rk2*depth+i];
|
1038
|
+
matrix[rk2*depth+i]=temp;
|
1039
|
+
}
|
1040
|
+
for(i=0;i<depth;i++) {
|
1041
|
+
temp=matrix[i*depth+rk1];
|
1042
|
+
matrix[i*depth+rk1]=matrix[i*depth+rk2];
|
1043
|
+
matrix[i*depth+rk2]=temp;
|
1044
|
+
}
|
1045
|
+
}
|
1046
|
+
|
1047
|
+
double calculate_qp_objective(opt_n,opt_g,opt_g0,alpha)
|
1048
|
+
long opt_n;
|
1049
|
+
double *opt_g,*opt_g0,*alpha;
|
1050
|
+
{
|
1051
|
+
double obj;
|
1052
|
+
long i,j;
|
1053
|
+
obj=0; /* calculate objective */
|
1054
|
+
for(i=0;i<opt_n;i++) {
|
1055
|
+
obj+=(opt_g0[i]*alpha[i]);
|
1056
|
+
obj+=(0.5*alpha[i]*alpha[i]*opt_g[i*opt_n+i]);
|
1057
|
+
for(j=0;j<i;j++) {
|
1058
|
+
obj+=(alpha[j]*alpha[i]*opt_g[j*opt_n+i]);
|
1059
|
+
}
|
1060
|
+
}
|
1061
|
+
return(obj);
|
1062
|
+
}
|