tomz-libsvm-ruby-swig 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/AUTHORS ADDED
@@ -0,0 +1,3 @@
1
+ Tom Zeng <tom.z.zeng@mail.com> (Ruby SWIG interface to LIBSVM)
2
+ FeedbackMine <feedbackmine@feedbackmine.com> (gem)
3
+ Chih-Chung Chang and Chih-Jen Lin <cjlin@csie.ntu.edu.tw> (developers of LIBSVM)
data/COPYING ADDED
@@ -0,0 +1,24 @@
1
+ == LICENSE:
2
+
3
+ (The MIT License)
4
+
5
+ Copyright (c) 2009 Tom Zeng
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining
8
+ a copy of this software and associated documentation files (the
9
+ 'Software'), to deal in the Software without restriction, including
10
+ without limitation the rights to use, copy, modify, merge, publish,
11
+ distribute, sublicense, and/or sell copies of the Software, and to
12
+ permit persons to whom the Software is furnished to do so, subject to
13
+ the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be
16
+ included in all copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
19
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ 2009-03-04 Tom Zeng (tom.z.zeng@gmail.com)
2
+ * adopted the gem spec by feedbackmine.com
3
+
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.txt
2
+ COPYING
3
+ AUTHORS
4
+ Manifest.txt
5
+ README.txt
6
+ Rakefile
7
+ lib/svm.rb
8
+ ext/svmc_wrap.cxx
9
+ ext/svm.cpp
10
+ ext/svm.h
11
+ ext/extconf.rb
data/README.txt ADDED
@@ -0,0 +1,55 @@
1
+ = libsvm-ruby-swig
2
+
3
+ * Ruby interface to LIBSVM (using SWIG)
4
+ * http://www.tomzconsulting.com
5
+
6
+ == DESCRIPTION:
7
+
8
+ This is the Ruby port of the LIBSVM Python SWIG (Simplified Wrapper and
9
+ Interface Generator) interface.
10
+
11
+ A modified version of LIBSVM 2.88 is included, it contains changes merged from:
12
+ git://github.com/npinto/libsvm-2.88_objs-np.git
13
+ git://github.com/alanfalloon/libsvm-2.88_output_model_params.git
14
+ to expose additional data/parameters in the model object. You don't need your
15
+ own copy of SWIG to use this library - all needed files are generated using
16
+ SWIG already.
17
+
18
+ Look for the README file in the ruby subdirectory for instructions.
19
+ The binaries included were built under Ubuntu Linux 2.6.24-23-generic,
20
+ you should run make under the libsvm-2.88 and libsvm-2.88/ruby
21
+ directories to regenerate the executables for your environment.
22
+
23
+ == INSTALL:
24
+
25
+ sudo gem sources -a http://gems.github.com (you only have to do this once)
26
+ sudo gem install tomz-libsvm-ruby-swig
27
+
28
+ == SYNOPSIS:
29
+
30
+ Quick Interactive Tutorial using irb (adopted from the python code from Toby
31
+ Segaran's "Programming Collective Intelligence" book):
32
+
33
+ irb(main):001:0> require 'svm'
34
+ => true
35
+ irb(main):002:0> prob = Problem.new([1,-1],[[1,0,1],[-1,0,-1]])
36
+ irb(main):003:0> param = Parameter.new(:kernel_type => LINEAR, :C => 10)
37
+ irb(main):004:0> m = Model.new(prob,param)
38
+ irb(main):005:0> m.predict([1,1,1])
39
+ => 1.0
40
+ irb(main):006:0> m.predict([0,0,1])
41
+ => 1.0
42
+ irb(main):007:0> m.predict([0,0,-1])
43
+ => -1.0
44
+ irb(main):008:0> m.save("test.model")
45
+ irb(main):009:0> m2 = Model.new("test.model")
46
+ irb(main):010:0> m2.predict([0,0,-1])
47
+ => -1.0
48
+
49
+ == AUTHOR:
50
+
51
+ Tom Zeng
52
+ http://www.tomzconsulting.com
53
+ http://www.linkedin.com/in/tomzeng
54
+ tom.z.zeng _at_ gmail _dot_ com
55
+
data/Rakefile ADDED
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ task :default => ["make_gem"]
5
+
6
+ EXT = "ext/blah.#{Hoe::DLEXT}"
7
+
8
+ Hoe.new('libsvm-ruby-swig', '0.2.0') do |p|
9
+ p.author = 'Tom Zeng'
10
+ p.email = 'tom.z.zeng@gmail.com'
11
+ p.url = 'http://www.tomzconsulting.com'
12
+ p.summary = 'Ruby wrapper of LIBSVM using SWIG'
13
+ p.description = 'Ruby wrapper of LIBSVM using SWIG'
14
+
15
+ p.spec_extras[:extensions] = "ext/extconf.rb"
16
+ p.clean_globs << EXT << "ext/*.o" << "ext/Makefile"
17
+ end
18
+
19
+ task :make_gem => EXT
20
+
21
+ file EXT => ["ext/extconf.rb", "ext/svmc_wrap.cxx", "ext/svm.cpp", "ext/svm.h"] do
22
+ Dir.chdir "ext" do
23
+ ruby "extconf.rb"
24
+ sh "make"
25
+ end
26
+ end
27
+
28
+ task :copy_files do
29
+ cp "libsvm-2.88/svm.h","ext/"
30
+ cp "libsvm-2.88/svm.cpp","ext/"
31
+ cp "libsvm-2.88/ruby/svmc_wrap.cxx","ext/"
32
+ cp "libsvm-2.88/ruby/svm.rb","lib/"
33
+ end
data/ext/extconf.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+ $CFLAGS = "#{ENV['CFLAGS']} -Wall -O3 "
3
+ if CONFIG["MAJOR"].to_i >= 1 && CONFIG["MINOR"].to_i >= 8
4
+ $CFLAGS << " -DHAVE_DEFINE_ALLOC_FUNCTION"
5
+ end
6
+ create_makefile('svmc')
data/ext/svm.cpp ADDED
@@ -0,0 +1,3095 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <ctype.h>
5
+ #include <float.h>
6
+ #include <string.h>
7
+ #include <stdarg.h>
8
+ #include "svm.h"
9
+ typedef float Qfloat;
10
+ typedef signed char schar;
11
+ #ifndef min
12
+ template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
13
+ #endif
14
+ #ifndef max
15
+ template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
16
+ #endif
17
+ template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
18
+ template <class S, class T> inline void clone(T*& dst, S* src, int n)
19
+ {
20
+ dst = new T[n];
21
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
+ }
23
+ inline double powi(double base, int times)
24
+ {
25
+ double tmp = base, ret = 1.0;
26
+
27
+ for(int t=times; t>0; t/=2)
28
+ {
29
+ if(t%2==1) ret*=tmp;
30
+ tmp = tmp * tmp;
31
+ }
32
+ return ret;
33
+ }
34
+ #define INF HUGE_VAL
35
+ #define TAU 1e-12
36
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
37
+ #if 1
38
+ int info_on = 0;
39
+ static void info(const char *fmt,...)
40
+ {
41
+ va_list ap;
42
+ if (info_on==1) {
43
+ va_start(ap,fmt);
44
+ vprintf(fmt,ap);
45
+ va_end(ap);
46
+ }
47
+ }
48
+ static void info_flush()
49
+ {
50
+ if (info_on==1) fflush(stdout);
51
+ }
52
+ #else
53
+ static void info(char *fmt,...) {}
54
+ static void info_flush() {}
55
+ #endif
56
+
57
+ //
58
+ // Kernel Cache
59
+ //
60
+ // l is the number of total data items
61
+ // size is the cache size limit in bytes
62
+ //
63
+ class Cache
64
+ {
65
+ public:
66
+ Cache(int l,long int size);
67
+ ~Cache();
68
+
69
+ // request data [0,len)
70
+ // return some position p where [p,len) need to be filled
71
+ // (p >= len if nothing needs to be filled)
72
+ int get_data(const int index, Qfloat **data, int len);
73
+ void swap_index(int i, int j);
74
+ private:
75
+ int l;
76
+ long int size;
77
+ struct head_t
78
+ {
79
+ head_t *prev, *next; // a circular list
80
+ Qfloat *data;
81
+ int len; // data[0,len) is cached in this entry
82
+ };
83
+
84
+ head_t *head;
85
+ head_t lru_head;
86
+ void lru_delete(head_t *h);
87
+ void lru_insert(head_t *h);
88
+ };
89
+
90
+ Cache::Cache(int l_,long int size_):l(l_),size(size_)
91
+ {
92
+ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
93
+ size /= sizeof(Qfloat);
94
+ size -= l * sizeof(head_t) / sizeof(Qfloat);
95
+ size = max(size, 2 * (long int) l); // cache must be large enough for two columns
96
+ lru_head.next = lru_head.prev = &lru_head;
97
+ }
98
+
99
+ Cache::~Cache()
100
+ {
101
+ for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
102
+ free(h->data);
103
+ free(head);
104
+ }
105
+
106
+ void Cache::lru_delete(head_t *h)
107
+ {
108
+ // delete from current location
109
+ h->prev->next = h->next;
110
+ h->next->prev = h->prev;
111
+ }
112
+
113
+ void Cache::lru_insert(head_t *h)
114
+ {
115
+ // insert to last position
116
+ h->next = &lru_head;
117
+ h->prev = lru_head.prev;
118
+ h->prev->next = h;
119
+ h->next->prev = h;
120
+ }
121
+
122
+ int Cache::get_data(const int index, Qfloat **data, int len)
123
+ {
124
+ head_t *h = &head[index];
125
+ if(h->len) lru_delete(h);
126
+ int more = len - h->len;
127
+
128
+ if(more > 0)
129
+ {
130
+ // free old space
131
+ while(size < more)
132
+ {
133
+ head_t *old = lru_head.next;
134
+ lru_delete(old);
135
+ free(old->data);
136
+ size += old->len;
137
+ old->data = 0;
138
+ old->len = 0;
139
+ }
140
+
141
+ // allocate new space
142
+ h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
143
+ size -= more;
144
+ swap(h->len,len);
145
+ }
146
+
147
+ lru_insert(h);
148
+ *data = h->data;
149
+ return len;
150
+ }
151
+
152
+ void Cache::swap_index(int i, int j)
153
+ {
154
+ if(i==j) return;
155
+
156
+ if(head[i].len) lru_delete(&head[i]);
157
+ if(head[j].len) lru_delete(&head[j]);
158
+ swap(head[i].data,head[j].data);
159
+ swap(head[i].len,head[j].len);
160
+ if(head[i].len) lru_insert(&head[i]);
161
+ if(head[j].len) lru_insert(&head[j]);
162
+
163
+ if(i>j) swap(i,j);
164
+ for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
165
+ {
166
+ if(h->len > i)
167
+ {
168
+ if(h->len > j)
169
+ swap(h->data[i],h->data[j]);
170
+ else
171
+ {
172
+ // give up
173
+ lru_delete(h);
174
+ free(h->data);
175
+ size += h->len;
176
+ h->data = 0;
177
+ h->len = 0;
178
+ }
179
+ }
180
+ }
181
+ }
182
+
183
+ //
184
+ // Kernel evaluation
185
+ //
186
+ // the static method k_function is for doing single kernel evaluation
187
+ // the constructor of Kernel prepares to calculate the l*l kernel matrix
188
+ // the member function get_Q is for getting one column from the Q Matrix
189
+ //
190
+ class QMatrix {
191
+ public:
192
+ virtual Qfloat *get_Q(int column, int len) const = 0;
193
+ virtual Qfloat *get_QD() const = 0;
194
+ virtual void swap_index(int i, int j) const = 0;
195
+ virtual ~QMatrix() {}
196
+ };
197
+
198
+ class Kernel: public QMatrix {
199
+ public:
200
+ Kernel(int l, svm_node * const * x, const svm_parameter& param);
201
+ virtual ~Kernel();
202
+
203
+ static double k_function(const svm_node *x, const svm_node *y,
204
+ const svm_parameter& param);
205
+ virtual Qfloat *get_Q(int column, int len) const = 0;
206
+ virtual Qfloat *get_QD() const = 0;
207
+ virtual void swap_index(int i, int j) const // no so const...
208
+ {
209
+ swap(x[i],x[j]);
210
+ if(x_square) swap(x_square[i],x_square[j]);
211
+ }
212
+ protected:
213
+
214
+ double (Kernel::*kernel_function)(int i, int j) const;
215
+
216
+ private:
217
+ const svm_node **x;
218
+ double *x_square;
219
+
220
+ // svm_parameter
221
+ const int kernel_type;
222
+ const int degree;
223
+ const double gamma;
224
+ const double coef0;
225
+
226
+ static double dot(const svm_node *px, const svm_node *py);
227
+ double kernel_linear(int i, int j) const
228
+ {
229
+ return dot(x[i],x[j]);
230
+ }
231
+ double kernel_poly(int i, int j) const
232
+ {
233
+ return powi(gamma*dot(x[i],x[j])+coef0,degree);
234
+ }
235
+ double kernel_rbf(int i, int j) const
236
+ {
237
+ return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
238
+ }
239
+ double kernel_sigmoid(int i, int j) const
240
+ {
241
+ return tanh(gamma*dot(x[i],x[j])+coef0);
242
+ }
243
+ double kernel_precomputed(int i, int j) const
244
+ {
245
+ return x[i][(int)(x[j][0].value)].value;
246
+ }
247
+ };
248
+
249
+ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
250
+ :kernel_type(param.kernel_type), degree(param.degree),
251
+ gamma(param.gamma), coef0(param.coef0)
252
+ {
253
+ switch(kernel_type)
254
+ {
255
+ case LINEAR:
256
+ kernel_function = &Kernel::kernel_linear;
257
+ break;
258
+ case POLY:
259
+ kernel_function = &Kernel::kernel_poly;
260
+ break;
261
+ case RBF:
262
+ kernel_function = &Kernel::kernel_rbf;
263
+ break;
264
+ case SIGMOID:
265
+ kernel_function = &Kernel::kernel_sigmoid;
266
+ break;
267
+ case PRECOMPUTED:
268
+ kernel_function = &Kernel::kernel_precomputed;
269
+ break;
270
+ }
271
+
272
+ clone(x,x_,l);
273
+
274
+ if(kernel_type == RBF)
275
+ {
276
+ x_square = new double[l];
277
+ for(int i=0;i<l;i++)
278
+ x_square[i] = dot(x[i],x[i]);
279
+ }
280
+ else
281
+ x_square = 0;
282
+ }
283
+
284
+ Kernel::~Kernel()
285
+ {
286
+ delete[] x;
287
+ delete[] x_square;
288
+ }
289
+
290
+ double Kernel::dot(const svm_node *px, const svm_node *py)
291
+ {
292
+ double sum = 0;
293
+ while(px->index != -1 && py->index != -1)
294
+ {
295
+ if(px->index == py->index)
296
+ {
297
+ sum += px->value * py->value;
298
+ ++px;
299
+ ++py;
300
+ }
301
+ else
302
+ {
303
+ if(px->index > py->index)
304
+ ++py;
305
+ else
306
+ ++px;
307
+ }
308
+ }
309
+ return sum;
310
+ }
311
+
312
+ double Kernel::k_function(const svm_node *x, const svm_node *y,
313
+ const svm_parameter& param)
314
+ {
315
+ switch(param.kernel_type)
316
+ {
317
+ case LINEAR:
318
+ return dot(x,y);
319
+ case POLY:
320
+ return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
321
+ case RBF:
322
+ {
323
+ double sum = 0;
324
+ while(x->index != -1 && y->index !=-1)
325
+ {
326
+ if(x->index == y->index)
327
+ {
328
+ double d = x->value - y->value;
329
+ sum += d*d;
330
+ ++x;
331
+ ++y;
332
+ }
333
+ else
334
+ {
335
+ if(x->index > y->index)
336
+ {
337
+ sum += y->value * y->value;
338
+ ++y;
339
+ }
340
+ else
341
+ {
342
+ sum += x->value * x->value;
343
+ ++x;
344
+ }
345
+ }
346
+ }
347
+
348
+ while(x->index != -1)
349
+ {
350
+ sum += x->value * x->value;
351
+ ++x;
352
+ }
353
+
354
+ while(y->index != -1)
355
+ {
356
+ sum += y->value * y->value;
357
+ ++y;
358
+ }
359
+
360
+ return exp(-param.gamma*sum);
361
+ }
362
+ case SIGMOID:
363
+ return tanh(param.gamma*dot(x,y)+param.coef0);
364
+ case PRECOMPUTED: //x: test (validation), y: SV
365
+ return x[(int)(y->value)].value;
366
+ default:
367
+ return 0; // Unreachable
368
+ }
369
+ }
370
+
371
+ // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
372
+ // Solves:
373
+ //
374
+ // min 0.5(\alpha^T Q \alpha) + p^T \alpha
375
+ //
376
+ // y^T \alpha = \delta
377
+ // y_i = +1 or -1
378
+ // 0 <= alpha_i <= Cp for y_i = 1
379
+ // 0 <= alpha_i <= Cn for y_i = -1
380
+ //
381
+ // Given:
382
+ //
383
+ // Q, p, y, Cp, Cn, and an initial feasible point \alpha
384
+ // l is the size of vectors and matrices
385
+ // eps is the stopping tolerance
386
+ //
387
+ // solution will be put in \alpha, objective value will be put in obj
388
+ //
389
+ class Solver {
390
+ public:
391
+ Solver() {};
392
+ virtual ~Solver() {};
393
+
394
+ struct SolutionInfo {
395
+ double obj;
396
+ double rho;
397
+ double upper_bound_p;
398
+ double upper_bound_n;
399
+ double r; // for Solver_NU
400
+ };
401
+
402
+ void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
403
+ double *alpha_, double Cp, double Cn, double eps,
404
+ SolutionInfo* si, int shrinking);
405
+ protected:
406
+ int active_size;
407
+ schar *y;
408
+ double *G; // gradient of objective function
409
+ enum { LOWER_BOUND, UPPER_BOUND, FREE };
410
+ char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
411
+ double *alpha;
412
+ const QMatrix *Q;
413
+ const Qfloat *QD;
414
+ double eps;
415
+ double Cp,Cn;
416
+ double *p;
417
+ int *active_set;
418
+ double *G_bar; // gradient, if we treat free variables as 0
419
+ int l;
420
+ bool unshrink; // XXX
421
+
422
+ double get_C(int i)
423
+ {
424
+ return (y[i] > 0)? Cp : Cn;
425
+ }
426
+ void update_alpha_status(int i)
427
+ {
428
+ if(alpha[i] >= get_C(i))
429
+ alpha_status[i] = UPPER_BOUND;
430
+ else if(alpha[i] <= 0)
431
+ alpha_status[i] = LOWER_BOUND;
432
+ else alpha_status[i] = FREE;
433
+ }
434
+ bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
435
+ bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
436
+ bool is_free(int i) { return alpha_status[i] == FREE; }
437
+ void swap_index(int i, int j);
438
+ void reconstruct_gradient();
439
+ virtual int select_working_set(int &i, int &j);
440
+ virtual double calculate_rho();
441
+ virtual void do_shrinking();
442
+ private:
443
+ bool be_shrunk(int i, double Gmax1, double Gmax2);
444
+ };
445
+
446
+ void Solver::swap_index(int i, int j)
447
+ {
448
+ Q->swap_index(i,j);
449
+ swap(y[i],y[j]);
450
+ swap(G[i],G[j]);
451
+ swap(alpha_status[i],alpha_status[j]);
452
+ swap(alpha[i],alpha[j]);
453
+ swap(p[i],p[j]);
454
+ swap(active_set[i],active_set[j]);
455
+ swap(G_bar[i],G_bar[j]);
456
+ }
457
+
458
+ void Solver::reconstruct_gradient()
459
+ {
460
+ // reconstruct inactive elements of G from G_bar and free variables
461
+
462
+ if(active_size == l) return;
463
+
464
+ int i,j;
465
+ int nr_free = 0;
466
+
467
+ for(j=active_size;j<l;j++)
468
+ G[j] = G_bar[j] + p[j];
469
+
470
+ for(j=0;j<active_size;j++)
471
+ if(is_free(j))
472
+ nr_free++;
473
+
474
+ if(2*nr_free < active_size)
475
+ info("\nWarning: using -h 0 may be faster\n");
476
+
477
+ if (nr_free*l > 2*active_size*(l-active_size))
478
+ {
479
+ for(i=active_size;i<l;i++)
480
+ {
481
+ const Qfloat *Q_i = Q->get_Q(i,active_size);
482
+ for(j=0;j<active_size;j++)
483
+ if(is_free(j))
484
+ G[i] += alpha[j] * Q_i[j];
485
+ }
486
+ }
487
+ else
488
+ {
489
+ for(i=0;i<active_size;i++)
490
+ if(is_free(i))
491
+ {
492
+ const Qfloat *Q_i = Q->get_Q(i,l);
493
+ double alpha_i = alpha[i];
494
+ for(j=active_size;j<l;j++)
495
+ G[j] += alpha_i * Q_i[j];
496
+ }
497
+ }
498
+ }
499
+
500
+ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
501
+ double *alpha_, double Cp, double Cn, double eps,
502
+ SolutionInfo* si, int shrinking)
503
+ {
504
+ this->l = l;
505
+ this->Q = &Q;
506
+ QD=Q.get_QD();
507
+ clone(p, p_,l);
508
+ clone(y, y_,l);
509
+ clone(alpha,alpha_,l);
510
+ this->Cp = Cp;
511
+ this->Cn = Cn;
512
+ this->eps = eps;
513
+ unshrink = false;
514
+
515
+ // initialize alpha_status
516
+ {
517
+ alpha_status = new char[l];
518
+ for(int i=0;i<l;i++)
519
+ update_alpha_status(i);
520
+ }
521
+
522
+ // initialize active set (for shrinking)
523
+ {
524
+ active_set = new int[l];
525
+ for(int i=0;i<l;i++)
526
+ active_set[i] = i;
527
+ active_size = l;
528
+ }
529
+
530
+ // initialize gradient
531
+ {
532
+ G = new double[l];
533
+ G_bar = new double[l];
534
+ int i;
535
+ for(i=0;i<l;i++)
536
+ {
537
+ G[i] = p[i];
538
+ G_bar[i] = 0;
539
+ }
540
+ for(i=0;i<l;i++)
541
+ if(!is_lower_bound(i))
542
+ {
543
+ const Qfloat *Q_i = Q.get_Q(i,l);
544
+ double alpha_i = alpha[i];
545
+ int j;
546
+ for(j=0;j<l;j++)
547
+ G[j] += alpha_i*Q_i[j];
548
+ if(is_upper_bound(i))
549
+ for(j=0;j<l;j++)
550
+ G_bar[j] += get_C(i) * Q_i[j];
551
+ }
552
+ }
553
+
554
+ // optimization step
555
+
556
+ int iter = 0;
557
+ int counter = min(l,1000)+1;
558
+
559
+ while(1)
560
+ {
561
+ // show progress and do shrinking
562
+
563
+ if(--counter == 0)
564
+ {
565
+ counter = min(l,1000);
566
+ if(shrinking) do_shrinking();
567
+ info("."); info_flush();
568
+ }
569
+
570
+ int i,j;
571
+ if(select_working_set(i,j)!=0)
572
+ {
573
+ // reconstruct the whole gradient
574
+ reconstruct_gradient();
575
+ // reset active set size and check
576
+ active_size = l;
577
+ info("*"); info_flush();
578
+ if(select_working_set(i,j)!=0)
579
+ break;
580
+ else
581
+ counter = 1; // do shrinking next iteration
582
+ }
583
+
584
+ ++iter;
585
+
586
+ // update alpha[i] and alpha[j], handle bounds carefully
587
+
588
+ const Qfloat *Q_i = Q.get_Q(i,active_size);
589
+ const Qfloat *Q_j = Q.get_Q(j,active_size);
590
+
591
+ double C_i = get_C(i);
592
+ double C_j = get_C(j);
593
+
594
+ double old_alpha_i = alpha[i];
595
+ double old_alpha_j = alpha[j];
596
+
597
+ if(y[i]!=y[j])
598
+ {
599
+ double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
600
+ if (quad_coef <= 0)
601
+ quad_coef = TAU;
602
+ double delta = (-G[i]-G[j])/quad_coef;
603
+ double diff = alpha[i] - alpha[j];
604
+ alpha[i] += delta;
605
+ alpha[j] += delta;
606
+
607
+ if(diff > 0)
608
+ {
609
+ if(alpha[j] < 0)
610
+ {
611
+ alpha[j] = 0;
612
+ alpha[i] = diff;
613
+ }
614
+ }
615
+ else
616
+ {
617
+ if(alpha[i] < 0)
618
+ {
619
+ alpha[i] = 0;
620
+ alpha[j] = -diff;
621
+ }
622
+ }
623
+ if(diff > C_i - C_j)
624
+ {
625
+ if(alpha[i] > C_i)
626
+ {
627
+ alpha[i] = C_i;
628
+ alpha[j] = C_i - diff;
629
+ }
630
+ }
631
+ else
632
+ {
633
+ if(alpha[j] > C_j)
634
+ {
635
+ alpha[j] = C_j;
636
+ alpha[i] = C_j + diff;
637
+ }
638
+ }
639
+ }
640
+ else
641
+ {
642
+ double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
643
+ if (quad_coef <= 0)
644
+ quad_coef = TAU;
645
+ double delta = (G[i]-G[j])/quad_coef;
646
+ double sum = alpha[i] + alpha[j];
647
+ alpha[i] -= delta;
648
+ alpha[j] += delta;
649
+
650
+ if(sum > C_i)
651
+ {
652
+ if(alpha[i] > C_i)
653
+ {
654
+ alpha[i] = C_i;
655
+ alpha[j] = sum - C_i;
656
+ }
657
+ }
658
+ else
659
+ {
660
+ if(alpha[j] < 0)
661
+ {
662
+ alpha[j] = 0;
663
+ alpha[i] = sum;
664
+ }
665
+ }
666
+ if(sum > C_j)
667
+ {
668
+ if(alpha[j] > C_j)
669
+ {
670
+ alpha[j] = C_j;
671
+ alpha[i] = sum - C_j;
672
+ }
673
+ }
674
+ else
675
+ {
676
+ if(alpha[i] < 0)
677
+ {
678
+ alpha[i] = 0;
679
+ alpha[j] = sum;
680
+ }
681
+ }
682
+ }
683
+
684
+ // update G
685
+
686
+ double delta_alpha_i = alpha[i] - old_alpha_i;
687
+ double delta_alpha_j = alpha[j] - old_alpha_j;
688
+
689
+ for(int k=0;k<active_size;k++)
690
+ {
691
+ G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
692
+ }
693
+
694
+ // update alpha_status and G_bar
695
+
696
+ {
697
+ bool ui = is_upper_bound(i);
698
+ bool uj = is_upper_bound(j);
699
+ update_alpha_status(i);
700
+ update_alpha_status(j);
701
+ int k;
702
+ if(ui != is_upper_bound(i))
703
+ {
704
+ Q_i = Q.get_Q(i,l);
705
+ if(ui)
706
+ for(k=0;k<l;k++)
707
+ G_bar[k] -= C_i * Q_i[k];
708
+ else
709
+ for(k=0;k<l;k++)
710
+ G_bar[k] += C_i * Q_i[k];
711
+ }
712
+
713
+ if(uj != is_upper_bound(j))
714
+ {
715
+ Q_j = Q.get_Q(j,l);
716
+ if(uj)
717
+ for(k=0;k<l;k++)
718
+ G_bar[k] -= C_j * Q_j[k];
719
+ else
720
+ for(k=0;k<l;k++)
721
+ G_bar[k] += C_j * Q_j[k];
722
+ }
723
+ }
724
+ }
725
+
726
+ // calculate rho
727
+
728
+ si->rho = calculate_rho();
729
+
730
+ // calculate objective value
731
+ {
732
+ double v = 0;
733
+ int i;
734
+ for(i=0;i<l;i++)
735
+ v += alpha[i] * (G[i] + p[i]);
736
+
737
+ si->obj = v/2;
738
+ }
739
+
740
+ // put back the solution
741
+ {
742
+ for(int i=0;i<l;i++)
743
+ alpha_[active_set[i]] = alpha[i];
744
+ }
745
+
746
+ // juggle everything back
747
+ /*{
748
+ for(int i=0;i<l;i++)
749
+ while(active_set[i] != i)
750
+ swap_index(i,active_set[i]);
751
+ // or Q.swap_index(i,active_set[i]);
752
+ }*/
753
+
754
+ si->upper_bound_p = Cp;
755
+ si->upper_bound_n = Cn;
756
+
757
+ info("\noptimization finished, #iter = %d\n",iter);
758
+
759
+ delete[] p;
760
+ delete[] y;
761
+ delete[] alpha;
762
+ delete[] alpha_status;
763
+ delete[] active_set;
764
+ delete[] G;
765
+ delete[] G_bar;
766
+ }
767
+
768
+ // return 1 if already optimal, return 0 otherwise
769
+ int Solver::select_working_set(int &out_i, int &out_j)
770
+ {
771
+ // return i,j such that
772
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
773
+ // j: minimizes the decrease of obj value
774
+ // (if quadratic coefficeint <= 0, replace it with tau)
775
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
776
+
777
+ double Gmax = -INF;
778
+ double Gmax2 = -INF;
779
+ int Gmax_idx = -1;
780
+ int Gmin_idx = -1;
781
+ double obj_diff_min = INF;
782
+
783
+ for(int t=0;t<active_size;t++)
784
+ if(y[t]==+1)
785
+ {
786
+ if(!is_upper_bound(t))
787
+ if(-G[t] >= Gmax)
788
+ {
789
+ Gmax = -G[t];
790
+ Gmax_idx = t;
791
+ }
792
+ }
793
+ else
794
+ {
795
+ if(!is_lower_bound(t))
796
+ if(G[t] >= Gmax)
797
+ {
798
+ Gmax = G[t];
799
+ Gmax_idx = t;
800
+ }
801
+ }
802
+
803
+ int i = Gmax_idx;
804
+ const Qfloat *Q_i = NULL;
805
+ if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
806
+ Q_i = Q->get_Q(i,active_size);
807
+
808
+ for(int j=0;j<active_size;j++)
809
+ {
810
+ if(y[j]==+1)
811
+ {
812
+ if (!is_lower_bound(j))
813
+ {
814
+ double grad_diff=Gmax+G[j];
815
+ if (G[j] >= Gmax2)
816
+ Gmax2 = G[j];
817
+ if (grad_diff > 0)
818
+ {
819
+ double obj_diff;
820
+ double quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
821
+ if (quad_coef > 0)
822
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
823
+ else
824
+ obj_diff = -(grad_diff*grad_diff)/TAU;
825
+
826
+ if (obj_diff <= obj_diff_min)
827
+ {
828
+ Gmin_idx=j;
829
+ obj_diff_min = obj_diff;
830
+ }
831
+ }
832
+ }
833
+ }
834
+ else
835
+ {
836
+ if (!is_upper_bound(j))
837
+ {
838
+ double grad_diff= Gmax-G[j];
839
+ if (-G[j] >= Gmax2)
840
+ Gmax2 = -G[j];
841
+ if (grad_diff > 0)
842
+ {
843
+ double obj_diff;
844
+ double quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
845
+ if (quad_coef > 0)
846
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
847
+ else
848
+ obj_diff = -(grad_diff*grad_diff)/TAU;
849
+
850
+ if (obj_diff <= obj_diff_min)
851
+ {
852
+ Gmin_idx=j;
853
+ obj_diff_min = obj_diff;
854
+ }
855
+ }
856
+ }
857
+ }
858
+ }
859
+
860
+ if(Gmax+Gmax2 < eps)
861
+ return 1;
862
+
863
+ out_i = Gmax_idx;
864
+ out_j = Gmin_idx;
865
+ return 0;
866
+ }
867
+
868
+ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
869
+ {
870
+ if(is_upper_bound(i))
871
+ {
872
+ if(y[i]==+1)
873
+ return(-G[i] > Gmax1);
874
+ else
875
+ return(-G[i] > Gmax2);
876
+ }
877
+ else if(is_lower_bound(i))
878
+ {
879
+ if(y[i]==+1)
880
+ return(G[i] > Gmax2);
881
+ else
882
+ return(G[i] > Gmax1);
883
+ }
884
+ else
885
+ return(false);
886
+ }
887
+
888
+ void Solver::do_shrinking()
889
+ {
890
+ int i;
891
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
892
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
893
+
894
+ // find maximal violating pair first
895
+ for(i=0;i<active_size;i++)
896
+ {
897
+ if(y[i]==+1)
898
+ {
899
+ if(!is_upper_bound(i))
900
+ {
901
+ if(-G[i] >= Gmax1)
902
+ Gmax1 = -G[i];
903
+ }
904
+ if(!is_lower_bound(i))
905
+ {
906
+ if(G[i] >= Gmax2)
907
+ Gmax2 = G[i];
908
+ }
909
+ }
910
+ else
911
+ {
912
+ if(!is_upper_bound(i))
913
+ {
914
+ if(-G[i] >= Gmax2)
915
+ Gmax2 = -G[i];
916
+ }
917
+ if(!is_lower_bound(i))
918
+ {
919
+ if(G[i] >= Gmax1)
920
+ Gmax1 = G[i];
921
+ }
922
+ }
923
+ }
924
+
925
+ if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
926
+ {
927
+ unshrink = true;
928
+ reconstruct_gradient();
929
+ active_size = l;
930
+ info("*"); info_flush();
931
+ }
932
+
933
+ for(i=0;i<active_size;i++)
934
+ if (be_shrunk(i, Gmax1, Gmax2))
935
+ {
936
+ active_size--;
937
+ while (active_size > i)
938
+ {
939
+ if (!be_shrunk(active_size, Gmax1, Gmax2))
940
+ {
941
+ swap_index(i,active_size);
942
+ break;
943
+ }
944
+ active_size--;
945
+ }
946
+ }
947
+ }
948
+
949
+ double Solver::calculate_rho()
950
+ {
951
+ double r;
952
+ int nr_free = 0;
953
+ double ub = INF, lb = -INF, sum_free = 0;
954
+ for(int i=0;i<active_size;i++)
955
+ {
956
+ double yG = y[i]*G[i];
957
+
958
+ if(is_upper_bound(i))
959
+ {
960
+ if(y[i]==-1)
961
+ ub = min(ub,yG);
962
+ else
963
+ lb = max(lb,yG);
964
+ }
965
+ else if(is_lower_bound(i))
966
+ {
967
+ if(y[i]==+1)
968
+ ub = min(ub,yG);
969
+ else
970
+ lb = max(lb,yG);
971
+ }
972
+ else
973
+ {
974
+ ++nr_free;
975
+ sum_free += yG;
976
+ }
977
+ }
978
+
979
+ if(nr_free>0)
980
+ r = sum_free/nr_free;
981
+ else
982
+ r = (ub+lb)/2;
983
+
984
+ return r;
985
+ }
986
+
987
+ //
988
+ // Solver for nu-svm classification and regression
989
+ //
990
+ // additional constraint: e^T \alpha = constant
991
+ //
992
+ class Solver_NU : public Solver
993
+ {
994
+ public:
995
+ Solver_NU() {}
996
+ void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
997
+ double *alpha, double Cp, double Cn, double eps,
998
+ SolutionInfo* si, int shrinking)
999
+ {
1000
+ this->si = si;
1001
+ Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1002
+ }
1003
+ private:
1004
+ SolutionInfo *si;
1005
+ int select_working_set(int &i, int &j);
1006
+ double calculate_rho();
1007
+ bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1008
+ void do_shrinking();
1009
+ };
1010
+
1011
+ // return 1 if already optimal, return 0 otherwise
1012
+ int Solver_NU::select_working_set(int &out_i, int &out_j)
1013
+ {
1014
+ // return i,j such that y_i = y_j and
1015
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1016
+ // j: minimizes the decrease of obj value
1017
+ // (if quadratic coefficeint <= 0, replace it with tau)
1018
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1019
+
1020
+ double Gmaxp = -INF;
1021
+ double Gmaxp2 = -INF;
1022
+ int Gmaxp_idx = -1;
1023
+
1024
+ double Gmaxn = -INF;
1025
+ double Gmaxn2 = -INF;
1026
+ int Gmaxn_idx = -1;
1027
+
1028
+ int Gmin_idx = -1;
1029
+ double obj_diff_min = INF;
1030
+
1031
+ for(int t=0;t<active_size;t++)
1032
+ if(y[t]==+1)
1033
+ {
1034
+ if(!is_upper_bound(t))
1035
+ if(-G[t] >= Gmaxp)
1036
+ {
1037
+ Gmaxp = -G[t];
1038
+ Gmaxp_idx = t;
1039
+ }
1040
+ }
1041
+ else
1042
+ {
1043
+ if(!is_lower_bound(t))
1044
+ if(G[t] >= Gmaxn)
1045
+ {
1046
+ Gmaxn = G[t];
1047
+ Gmaxn_idx = t;
1048
+ }
1049
+ }
1050
+
1051
+ int ip = Gmaxp_idx;
1052
+ int in = Gmaxn_idx;
1053
+ const Qfloat *Q_ip = NULL;
1054
+ const Qfloat *Q_in = NULL;
1055
+ if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1056
+ Q_ip = Q->get_Q(ip,active_size);
1057
+ if(in != -1)
1058
+ Q_in = Q->get_Q(in,active_size);
1059
+
1060
+ for(int j=0;j<active_size;j++)
1061
+ {
1062
+ if(y[j]==+1)
1063
+ {
1064
+ if (!is_lower_bound(j))
1065
+ {
1066
+ double grad_diff=Gmaxp+G[j];
1067
+ if (G[j] >= Gmaxp2)
1068
+ Gmaxp2 = G[j];
1069
+ if (grad_diff > 0)
1070
+ {
1071
+ double obj_diff;
1072
+ double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1073
+ if (quad_coef > 0)
1074
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1075
+ else
1076
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1077
+
1078
+ if (obj_diff <= obj_diff_min)
1079
+ {
1080
+ Gmin_idx=j;
1081
+ obj_diff_min = obj_diff;
1082
+ }
1083
+ }
1084
+ }
1085
+ }
1086
+ else
1087
+ {
1088
+ if (!is_upper_bound(j))
1089
+ {
1090
+ double grad_diff=Gmaxn-G[j];
1091
+ if (-G[j] >= Gmaxn2)
1092
+ Gmaxn2 = -G[j];
1093
+ if (grad_diff > 0)
1094
+ {
1095
+ double obj_diff;
1096
+ double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1097
+ if (quad_coef > 0)
1098
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1099
+ else
1100
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1101
+
1102
+ if (obj_diff <= obj_diff_min)
1103
+ {
1104
+ Gmin_idx=j;
1105
+ obj_diff_min = obj_diff;
1106
+ }
1107
+ }
1108
+ }
1109
+ }
1110
+ }
1111
+
1112
+ if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1113
+ return 1;
1114
+
1115
+ if (y[Gmin_idx] == +1)
1116
+ out_i = Gmaxp_idx;
1117
+ else
1118
+ out_i = Gmaxn_idx;
1119
+ out_j = Gmin_idx;
1120
+
1121
+ return 0;
1122
+ }
1123
+
1124
+ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1125
+ {
1126
+ if(is_upper_bound(i))
1127
+ {
1128
+ if(y[i]==+1)
1129
+ return(-G[i] > Gmax1);
1130
+ else
1131
+ return(-G[i] > Gmax4);
1132
+ }
1133
+ else if(is_lower_bound(i))
1134
+ {
1135
+ if(y[i]==+1)
1136
+ return(G[i] > Gmax2);
1137
+ else
1138
+ return(G[i] > Gmax3);
1139
+ }
1140
+ else
1141
+ return(false);
1142
+ }
1143
+
1144
+ void Solver_NU::do_shrinking()
1145
+ {
1146
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1147
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1148
+ double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1149
+ double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1150
+
1151
+ // find maximal violating pair first
1152
+ int i;
1153
+ for(i=0;i<active_size;i++)
1154
+ {
1155
+ if(!is_upper_bound(i))
1156
+ {
1157
+ if(y[i]==+1)
1158
+ {
1159
+ if(-G[i] > Gmax1) Gmax1 = -G[i];
1160
+ }
1161
+ else if(-G[i] > Gmax4) Gmax4 = -G[i];
1162
+ }
1163
+ if(!is_lower_bound(i))
1164
+ {
1165
+ if(y[i]==+1)
1166
+ {
1167
+ if(G[i] > Gmax2) Gmax2 = G[i];
1168
+ }
1169
+ else if(G[i] > Gmax3) Gmax3 = G[i];
1170
+ }
1171
+ }
1172
+
1173
+ if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1174
+ {
1175
+ unshrink = true;
1176
+ reconstruct_gradient();
1177
+ active_size = l;
1178
+ }
1179
+
1180
+ for(i=0;i<active_size;i++)
1181
+ if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1182
+ {
1183
+ active_size--;
1184
+ while (active_size > i)
1185
+ {
1186
+ if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1187
+ {
1188
+ swap_index(i,active_size);
1189
+ break;
1190
+ }
1191
+ active_size--;
1192
+ }
1193
+ }
1194
+ }
1195
+
1196
+ double Solver_NU::calculate_rho()
1197
+ {
1198
+ int nr_free1 = 0,nr_free2 = 0;
1199
+ double ub1 = INF, ub2 = INF;
1200
+ double lb1 = -INF, lb2 = -INF;
1201
+ double sum_free1 = 0, sum_free2 = 0;
1202
+
1203
+ for(int i=0;i<active_size;i++)
1204
+ {
1205
+ if(y[i]==+1)
1206
+ {
1207
+ if(is_upper_bound(i))
1208
+ lb1 = max(lb1,G[i]);
1209
+ else if(is_lower_bound(i))
1210
+ ub1 = min(ub1,G[i]);
1211
+ else
1212
+ {
1213
+ ++nr_free1;
1214
+ sum_free1 += G[i];
1215
+ }
1216
+ }
1217
+ else
1218
+ {
1219
+ if(is_upper_bound(i))
1220
+ lb2 = max(lb2,G[i]);
1221
+ else if(is_lower_bound(i))
1222
+ ub2 = min(ub2,G[i]);
1223
+ else
1224
+ {
1225
+ ++nr_free2;
1226
+ sum_free2 += G[i];
1227
+ }
1228
+ }
1229
+ }
1230
+
1231
+ double r1,r2;
1232
+ if(nr_free1 > 0)
1233
+ r1 = sum_free1/nr_free1;
1234
+ else
1235
+ r1 = (ub1+lb1)/2;
1236
+
1237
+ if(nr_free2 > 0)
1238
+ r2 = sum_free2/nr_free2;
1239
+ else
1240
+ r2 = (ub2+lb2)/2;
1241
+
1242
+ si->r = (r1+r2)/2;
1243
+ return (r1-r2)/2;
1244
+ }
1245
+
1246
+ //
1247
+ // Q matrices for various formulations
1248
+ //
1249
+ class SVC_Q: public Kernel
1250
+ {
1251
+ public:
1252
+ SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1253
+ :Kernel(prob.l, prob.x, param)
1254
+ {
1255
+ clone(y,y_,prob.l);
1256
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1257
+ QD = new Qfloat[prob.l];
1258
+ for(int i=0;i<prob.l;i++)
1259
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1260
+ }
1261
+
1262
+ Qfloat *get_Q(int i, int len) const
1263
+ {
1264
+ Qfloat *data;
1265
+ int start, j;
1266
+ if((start = cache->get_data(i,&data,len)) < len)
1267
+ {
1268
+ for(j=start;j<len;j++)
1269
+ data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1270
+ }
1271
+ return data;
1272
+ }
1273
+
1274
+ Qfloat *get_QD() const
1275
+ {
1276
+ return QD;
1277
+ }
1278
+
1279
+ void swap_index(int i, int j) const
1280
+ {
1281
+ cache->swap_index(i,j);
1282
+ Kernel::swap_index(i,j);
1283
+ swap(y[i],y[j]);
1284
+ swap(QD[i],QD[j]);
1285
+ }
1286
+
1287
+ ~SVC_Q()
1288
+ {
1289
+ delete[] y;
1290
+ delete cache;
1291
+ delete[] QD;
1292
+ }
1293
+ private:
1294
+ schar *y;
1295
+ Cache *cache;
1296
+ Qfloat *QD;
1297
+ };
1298
+
1299
+ class ONE_CLASS_Q: public Kernel
1300
+ {
1301
+ public:
1302
+ ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1303
+ :Kernel(prob.l, prob.x, param)
1304
+ {
1305
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1306
+ QD = new Qfloat[prob.l];
1307
+ for(int i=0;i<prob.l;i++)
1308
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1309
+ }
1310
+
1311
+ Qfloat *get_Q(int i, int len) const
1312
+ {
1313
+ Qfloat *data;
1314
+ int start, j;
1315
+ if((start = cache->get_data(i,&data,len)) < len)
1316
+ {
1317
+ for(j=start;j<len;j++)
1318
+ data[j] = (Qfloat)(this->*kernel_function)(i,j);
1319
+ }
1320
+ return data;
1321
+ }
1322
+
1323
+ Qfloat *get_QD() const
1324
+ {
1325
+ return QD;
1326
+ }
1327
+
1328
+ void swap_index(int i, int j) const
1329
+ {
1330
+ cache->swap_index(i,j);
1331
+ Kernel::swap_index(i,j);
1332
+ swap(QD[i],QD[j]);
1333
+ }
1334
+
1335
+ ~ONE_CLASS_Q()
1336
+ {
1337
+ delete cache;
1338
+ delete[] QD;
1339
+ }
1340
+ private:
1341
+ Cache *cache;
1342
+ Qfloat *QD;
1343
+ };
1344
+
1345
+ class SVR_Q: public Kernel
1346
+ {
1347
+ public:
1348
+ SVR_Q(const svm_problem& prob, const svm_parameter& param)
1349
+ :Kernel(prob.l, prob.x, param)
1350
+ {
1351
+ l = prob.l;
1352
+ cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1353
+ QD = new Qfloat[2*l];
1354
+ sign = new schar[2*l];
1355
+ index = new int[2*l];
1356
+ for(int k=0;k<l;k++)
1357
+ {
1358
+ sign[k] = 1;
1359
+ sign[k+l] = -1;
1360
+ index[k] = k;
1361
+ index[k+l] = k;
1362
+ QD[k]= (Qfloat)(this->*kernel_function)(k,k);
1363
+ QD[k+l]=QD[k];
1364
+ }
1365
+ buffer[0] = new Qfloat[2*l];
1366
+ buffer[1] = new Qfloat[2*l];
1367
+ next_buffer = 0;
1368
+ }
1369
+
1370
+ void swap_index(int i, int j) const
1371
+ {
1372
+ swap(sign[i],sign[j]);
1373
+ swap(index[i],index[j]);
1374
+ swap(QD[i],QD[j]);
1375
+ }
1376
+
1377
+ Qfloat *get_Q(int i, int len) const
1378
+ {
1379
+ Qfloat *data;
1380
+ int j, real_i = index[i];
1381
+ if(cache->get_data(real_i,&data,l) < l)
1382
+ {
1383
+ for(j=0;j<l;j++)
1384
+ data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1385
+ }
1386
+
1387
+ // reorder and copy
1388
+ Qfloat *buf = buffer[next_buffer];
1389
+ next_buffer = 1 - next_buffer;
1390
+ schar si = sign[i];
1391
+ for(int j=0;j<len;j++)
1392
+ buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1393
+ return buf;
1394
+ }
1395
+
1396
+ Qfloat *get_QD() const
1397
+ {
1398
+ return QD;
1399
+ }
1400
+
1401
+ ~SVR_Q()
1402
+ {
1403
+ delete cache;
1404
+ delete[] sign;
1405
+ delete[] index;
1406
+ delete[] buffer[0];
1407
+ delete[] buffer[1];
1408
+ delete[] QD;
1409
+ }
1410
+ private:
1411
+ int l;
1412
+ Cache *cache;
1413
+ schar *sign;
1414
+ int *index;
1415
+ mutable int next_buffer;
1416
+ Qfloat *buffer[2];
1417
+ Qfloat *QD;
1418
+ };
1419
+
1420
+ //
1421
+ // construct and solve various formulations
1422
+ //
1423
+ static void solve_c_svc(
1424
+ const svm_problem *prob, const svm_parameter* param,
1425
+ double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1426
+ {
1427
+ int l = prob->l;
1428
+ double *minus_ones = new double[l];
1429
+ schar *y = new schar[l];
1430
+
1431
+ int i;
1432
+
1433
+ for(i=0;i<l;i++)
1434
+ {
1435
+ alpha[i] = 0;
1436
+ minus_ones[i] = -1;
1437
+ if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1438
+ }
1439
+
1440
+ Solver s;
1441
+ s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1442
+ alpha, Cp, Cn, param->eps, si, param->shrinking);
1443
+
1444
+ double sum_alpha=0;
1445
+ for(i=0;i<l;i++)
1446
+ sum_alpha += alpha[i];
1447
+
1448
+ if (Cp==Cn)
1449
+ info("nu = %f\n", sum_alpha/(Cp*prob->l));
1450
+
1451
+ for(i=0;i<l;i++)
1452
+ alpha[i] *= y[i];
1453
+
1454
+ delete[] minus_ones;
1455
+ delete[] y;
1456
+ }
1457
+
1458
+ static void solve_nu_svc(
1459
+ const svm_problem *prob, const svm_parameter *param,
1460
+ double *alpha, Solver::SolutionInfo* si)
1461
+ {
1462
+ int i;
1463
+ int l = prob->l;
1464
+ double nu = param->nu;
1465
+
1466
+ schar *y = new schar[l];
1467
+
1468
+ for(i=0;i<l;i++)
1469
+ if(prob->y[i]>0)
1470
+ y[i] = +1;
1471
+ else
1472
+ y[i] = -1;
1473
+
1474
+ double sum_pos = nu*l/2;
1475
+ double sum_neg = nu*l/2;
1476
+
1477
+ for(i=0;i<l;i++)
1478
+ if(y[i] == +1)
1479
+ {
1480
+ alpha[i] = min(1.0,sum_pos);
1481
+ sum_pos -= alpha[i];
1482
+ }
1483
+ else
1484
+ {
1485
+ alpha[i] = min(1.0,sum_neg);
1486
+ sum_neg -= alpha[i];
1487
+ }
1488
+
1489
+ double *zeros = new double[l];
1490
+
1491
+ for(i=0;i<l;i++)
1492
+ zeros[i] = 0;
1493
+
1494
+ Solver_NU s;
1495
+ s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1496
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1497
+ double r = si->r;
1498
+
1499
+ info("C = %f\n",1/r);
1500
+
1501
+ for(i=0;i<l;i++)
1502
+ alpha[i] *= y[i]/r;
1503
+
1504
+ si->rho /= r;
1505
+ si->obj /= (r*r);
1506
+ si->upper_bound_p = 1/r;
1507
+ si->upper_bound_n = 1/r;
1508
+
1509
+ delete[] y;
1510
+ delete[] zeros;
1511
+ }
1512
+
1513
+ static void solve_one_class(
1514
+ const svm_problem *prob, const svm_parameter *param,
1515
+ double *alpha, Solver::SolutionInfo* si)
1516
+ {
1517
+ int l = prob->l;
1518
+ double *zeros = new double[l];
1519
+ schar *ones = new schar[l];
1520
+ int i;
1521
+
1522
+ int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1523
+
1524
+ for(i=0;i<n;i++)
1525
+ alpha[i] = 1;
1526
+ if(n<prob->l)
1527
+ alpha[n] = param->nu * prob->l - n;
1528
+ for(i=n+1;i<l;i++)
1529
+ alpha[i] = 0;
1530
+
1531
+ for(i=0;i<l;i++)
1532
+ {
1533
+ zeros[i] = 0;
1534
+ ones[i] = 1;
1535
+ }
1536
+
1537
+ Solver s;
1538
+ s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1539
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1540
+
1541
+ delete[] zeros;
1542
+ delete[] ones;
1543
+ }
1544
+
1545
+ static void solve_epsilon_svr(
1546
+ const svm_problem *prob, const svm_parameter *param,
1547
+ double *alpha, Solver::SolutionInfo* si)
1548
+ {
1549
+ int l = prob->l;
1550
+ double *alpha2 = new double[2*l];
1551
+ double *linear_term = new double[2*l];
1552
+ schar *y = new schar[2*l];
1553
+ int i;
1554
+
1555
+ for(i=0;i<l;i++)
1556
+ {
1557
+ alpha2[i] = 0;
1558
+ linear_term[i] = param->p - prob->y[i];
1559
+ y[i] = 1;
1560
+
1561
+ alpha2[i+l] = 0;
1562
+ linear_term[i+l] = param->p + prob->y[i];
1563
+ y[i+l] = -1;
1564
+ }
1565
+
1566
+ Solver s;
1567
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1568
+ alpha2, param->C, param->C, param->eps, si, param->shrinking);
1569
+
1570
+ double sum_alpha = 0;
1571
+ for(i=0;i<l;i++)
1572
+ {
1573
+ alpha[i] = alpha2[i] - alpha2[i+l];
1574
+ sum_alpha += fabs(alpha[i]);
1575
+ }
1576
+ info("nu = %f\n",sum_alpha/(param->C*l));
1577
+
1578
+ delete[] alpha2;
1579
+ delete[] linear_term;
1580
+ delete[] y;
1581
+ }
1582
+
1583
+ static void solve_nu_svr(
1584
+ const svm_problem *prob, const svm_parameter *param,
1585
+ double *alpha, Solver::SolutionInfo* si)
1586
+ {
1587
+ int l = prob->l;
1588
+ double C = param->C;
1589
+ double *alpha2 = new double[2*l];
1590
+ double *linear_term = new double[2*l];
1591
+ schar *y = new schar[2*l];
1592
+ int i;
1593
+
1594
+ double sum = C * param->nu * l / 2;
1595
+ for(i=0;i<l;i++)
1596
+ {
1597
+ alpha2[i] = alpha2[i+l] = min(sum,C);
1598
+ sum -= alpha2[i];
1599
+
1600
+ linear_term[i] = - prob->y[i];
1601
+ y[i] = 1;
1602
+
1603
+ linear_term[i+l] = prob->y[i];
1604
+ y[i+l] = -1;
1605
+ }
1606
+
1607
+ Solver_NU s;
1608
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1609
+ alpha2, C, C, param->eps, si, param->shrinking);
1610
+
1611
+ info("epsilon = %f\n",-si->r);
1612
+
1613
+ for(i=0;i<l;i++)
1614
+ alpha[i] = alpha2[i] - alpha2[i+l];
1615
+
1616
+ delete[] alpha2;
1617
+ delete[] linear_term;
1618
+ delete[] y;
1619
+ }
1620
+
1621
+ //
1622
+ // decision_function
1623
+ //
1624
+ struct decision_function
1625
+ {
1626
+ double *alpha;
1627
+ double rho;
1628
+ double obj;
1629
+ };
1630
+
1631
+ decision_function svm_train_one(
1632
+ const svm_problem *prob, const svm_parameter *param,
1633
+ double Cp, double Cn)
1634
+ {
1635
+ double *alpha = Malloc(double,prob->l);
1636
+ Solver::SolutionInfo si;
1637
+ switch(param->svm_type)
1638
+ {
1639
+ case C_SVC:
1640
+ solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1641
+ break;
1642
+ case NU_SVC:
1643
+ solve_nu_svc(prob,param,alpha,&si);
1644
+ break;
1645
+ case ONE_CLASS:
1646
+ solve_one_class(prob,param,alpha,&si);
1647
+ break;
1648
+ case EPSILON_SVR:
1649
+ solve_epsilon_svr(prob,param,alpha,&si);
1650
+ break;
1651
+ case NU_SVR:
1652
+ solve_nu_svr(prob,param,alpha,&si);
1653
+ break;
1654
+ }
1655
+
1656
+ info("obj = %f, rho = %f\n",si.obj,si.rho);
1657
+
1658
+ // output SVs
1659
+
1660
+ int nSV = 0;
1661
+ int nBSV = 0;
1662
+ for(int i=0;i<prob->l;i++)
1663
+ {
1664
+ if(fabs(alpha[i]) > 0)
1665
+ {
1666
+ ++nSV;
1667
+ if(prob->y[i] > 0)
1668
+ {
1669
+ if(fabs(alpha[i]) >= si.upper_bound_p)
1670
+ ++nBSV;
1671
+ }
1672
+ else
1673
+ {
1674
+ if(fabs(alpha[i]) >= si.upper_bound_n)
1675
+ ++nBSV;
1676
+ }
1677
+ }
1678
+ }
1679
+
1680
+ info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1681
+
1682
+ decision_function f;
1683
+ f.alpha = alpha;
1684
+ f.rho = si.rho;
1685
+ f.obj = si.obj;
1686
+ return f;
1687
+ }
1688
+
1689
+ //
1690
+ // svm_model
1691
+ //
1692
+ struct svm_model
1693
+ {
1694
+ svm_parameter param; // parameter
1695
+ int nr_class; // number of classes, = 2 in regression/one class svm
1696
+ int l; // total #SV
1697
+ svm_node **SV; // SVs (SV[l])
1698
+ double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
1699
+ double *rho; // constants in decision functions (rho[k*(k-1)/2])
1700
+ double *probA; // pariwise probability information
1701
+ double *probB;
1702
+
1703
+ double *obj;
1704
+
1705
+ // for classification only
1706
+
1707
+ int *label; // label of each class (label[k])
1708
+ int *nSV; // number of SVs for each class (nSV[k])
1709
+ // nSV[0] + nSV[1] + ... + nSV[k-1] = l
1710
+ // XXX
1711
+ int free_sv; // 1 if svm_model is created by svm_load_model
1712
+ // 0 if svm_model is created by svm_train
1713
+ };
1714
+
1715
+ // Get the rho element of the model. Only works when there are two
1716
+ // labels (1 classifier).
1717
+ double svm_get_model_rho(struct svm_model *model)
1718
+ {
1719
+ if (model->nr_class > 2)
1720
+ info("warning: rho requested for model with more than 2 labels");
1721
+ return model->rho[0];
1722
+ }
1723
+
1724
+ int svm_get_model_num_coefs(struct svm_model *model)
1725
+ {
1726
+ return model->l;
1727
+ }
1728
+
1729
+ // Get the coefficients of the model. Only works when there are two
1730
+ // labels (1 classifier).
1731
+ void svm_get_model_coefs(struct svm_model *model, double* out_array)
1732
+ {
1733
+ if (model->nr_class > 2)
1734
+ info("warning: coefficients requested for model with more than 2 labels");
1735
+ memcpy(out_array, model->sv_coef[0], sizeof(double) * model->l);
1736
+ }
1737
+
1738
+ // Get the permutation of the indices of the coefficients w.r.t. to the input problem.
1739
+ void svm_get_model_perm(struct svm_model *model, int* out_array)
1740
+ {
1741
+ if (model->nr_class > 2)
1742
+ info("warning: permutation requested for model with more than 2 labels");
1743
+ int i;
1744
+ for ( i = 0; i < model->l; ++i)
1745
+ {
1746
+ struct svm_node* n = model->SV[i];
1747
+ if (0 != n->index)
1748
+ {
1749
+ info("warning: missing 0 index");
1750
+ out_array[i] = -1;
1751
+ }
1752
+ else
1753
+ out_array[i] = (int)n->value;
1754
+ }
1755
+ }
1756
+
1757
+ // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1758
+ void sigmoid_train(
1759
+ int l, const double *dec_values, const double *labels,
1760
+ double& A, double& B)
1761
+ {
1762
+ double prior1=0, prior0 = 0;
1763
+ int i;
1764
+
1765
+ for (i=0;i<l;i++)
1766
+ if (labels[i] > 0) prior1+=1;
1767
+ else prior0+=1;
1768
+
1769
+ int max_iter=100; // Maximal number of iterations
1770
+ double min_step=1e-10; // Minimal step taken in line search
1771
+ double sigma=1e-12; // For numerically strict PD of Hessian
1772
+ double eps=1e-5;
1773
+ double hiTarget=(prior1+1.0)/(prior1+2.0);
1774
+ double loTarget=1/(prior0+2.0);
1775
+ double *t=Malloc(double,l);
1776
+ double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1777
+ double newA,newB,newf,d1,d2;
1778
+ int iter;
1779
+
1780
+ // Initial Point and Initial Fun Value
1781
+ A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1782
+ double fval = 0.0;
1783
+
1784
+ for (i=0;i<l;i++)
1785
+ {
1786
+ if (labels[i]>0) t[i]=hiTarget;
1787
+ else t[i]=loTarget;
1788
+ fApB = dec_values[i]*A+B;
1789
+ if (fApB>=0)
1790
+ fval += t[i]*fApB + log(1+exp(-fApB));
1791
+ else
1792
+ fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1793
+ }
1794
+ for (iter=0;iter<max_iter;iter++)
1795
+ {
1796
+ // Update Gradient and Hessian (use H' = H + sigma I)
1797
+ h11=sigma; // numerically ensures strict PD
1798
+ h22=sigma;
1799
+ h21=0.0;g1=0.0;g2=0.0;
1800
+ for (i=0;i<l;i++)
1801
+ {
1802
+ fApB = dec_values[i]*A+B;
1803
+ if (fApB >= 0)
1804
+ {
1805
+ p=exp(-fApB)/(1.0+exp(-fApB));
1806
+ q=1.0/(1.0+exp(-fApB));
1807
+ }
1808
+ else
1809
+ {
1810
+ p=1.0/(1.0+exp(fApB));
1811
+ q=exp(fApB)/(1.0+exp(fApB));
1812
+ }
1813
+ d2=p*q;
1814
+ h11+=dec_values[i]*dec_values[i]*d2;
1815
+ h22+=d2;
1816
+ h21+=dec_values[i]*d2;
1817
+ d1=t[i]-p;
1818
+ g1+=dec_values[i]*d1;
1819
+ g2+=d1;
1820
+ }
1821
+
1822
+ // Stopping Criteria
1823
+ if (fabs(g1)<eps && fabs(g2)<eps)
1824
+ break;
1825
+
1826
+ // Finding Newton direction: -inv(H') * g
1827
+ det=h11*h22-h21*h21;
1828
+ dA=-(h22*g1 - h21 * g2) / det;
1829
+ dB=-(-h21*g1+ h11 * g2) / det;
1830
+ gd=g1*dA+g2*dB;
1831
+
1832
+
1833
+ stepsize = 1; // Line Search
1834
+ while (stepsize >= min_step)
1835
+ {
1836
+ newA = A + stepsize * dA;
1837
+ newB = B + stepsize * dB;
1838
+
1839
+ // New function value
1840
+ newf = 0.0;
1841
+ for (i=0;i<l;i++)
1842
+ {
1843
+ fApB = dec_values[i]*newA+newB;
1844
+ if (fApB >= 0)
1845
+ newf += t[i]*fApB + log(1+exp(-fApB));
1846
+ else
1847
+ newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1848
+ }
1849
+ // Check sufficient decrease
1850
+ if (newf<fval+0.0001*stepsize*gd)
1851
+ {
1852
+ A=newA;B=newB;fval=newf;
1853
+ break;
1854
+ }
1855
+ else
1856
+ stepsize = stepsize / 2.0;
1857
+ }
1858
+
1859
+ if (stepsize < min_step)
1860
+ {
1861
+ info("Line search fails in two-class probability estimates\n");
1862
+ break;
1863
+ }
1864
+ }
1865
+
1866
+ if (iter>=max_iter)
1867
+ info("Reaching maximal iterations in two-class probability estimates\n");
1868
+ free(t);
1869
+ }
1870
+
1871
+ double sigmoid_predict(double decision_value, double A, double B)
1872
+ {
1873
+ double fApB = decision_value*A+B;
1874
+ if (fApB >= 0)
1875
+ return exp(-fApB)/(1.0+exp(-fApB));
1876
+ else
1877
+ return 1.0/(1+exp(fApB)) ;
1878
+ }
1879
+
1880
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1881
+ void multiclass_probability(int k, double **r, double *p)
1882
+ {
1883
+ int t,j;
1884
+ int iter = 0, max_iter=max(100,k);
1885
+ double **Q=Malloc(double *,k);
1886
+ double *Qp=Malloc(double,k);
1887
+ double pQp, eps=0.005/k;
1888
+
1889
+ for (t=0;t<k;t++)
1890
+ {
1891
+ p[t]=1.0/k; // Valid if k = 1
1892
+ Q[t]=Malloc(double,k);
1893
+ Q[t][t]=0;
1894
+ for (j=0;j<t;j++)
1895
+ {
1896
+ Q[t][t]+=r[j][t]*r[j][t];
1897
+ Q[t][j]=Q[j][t];
1898
+ }
1899
+ for (j=t+1;j<k;j++)
1900
+ {
1901
+ Q[t][t]+=r[j][t]*r[j][t];
1902
+ Q[t][j]=-r[j][t]*r[t][j];
1903
+ }
1904
+ }
1905
+ for (iter=0;iter<max_iter;iter++)
1906
+ {
1907
+ // stopping condition, recalculate QP,pQP for numerical accuracy
1908
+ pQp=0;
1909
+ for (t=0;t<k;t++)
1910
+ {
1911
+ Qp[t]=0;
1912
+ for (j=0;j<k;j++)
1913
+ Qp[t]+=Q[t][j]*p[j];
1914
+ pQp+=p[t]*Qp[t];
1915
+ }
1916
+ double max_error=0;
1917
+ for (t=0;t<k;t++)
1918
+ {
1919
+ double error=fabs(Qp[t]-pQp);
1920
+ if (error>max_error)
1921
+ max_error=error;
1922
+ }
1923
+ if (max_error<eps) break;
1924
+
1925
+ for (t=0;t<k;t++)
1926
+ {
1927
+ double diff=(-Qp[t]+pQp)/Q[t][t];
1928
+ p[t]+=diff;
1929
+ pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1930
+ for (j=0;j<k;j++)
1931
+ {
1932
+ Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1933
+ p[j]/=(1+diff);
1934
+ }
1935
+ }
1936
+ }
1937
+ if (iter>=max_iter)
1938
+ info("Exceeds max_iter in multiclass_prob\n");
1939
+ for(t=0;t<k;t++) free(Q[t]);
1940
+ free(Q);
1941
+ free(Qp);
1942
+ }
1943
+
1944
+ // Cross-validation decision values for probability estimates
1945
+ void svm_binary_svc_probability(
1946
+ const svm_problem *prob, const svm_parameter *param,
1947
+ double Cp, double Cn, double& probA, double& probB)
1948
+ {
1949
+ int i;
1950
+ int nr_fold = 5;
1951
+ int *perm = Malloc(int,prob->l);
1952
+ double *dec_values = Malloc(double,prob->l);
1953
+
1954
+ // random shuffle
1955
+ for(i=0;i<prob->l;i++) perm[i]=i;
1956
+ for(i=0;i<prob->l;i++)
1957
+ {
1958
+ int j = i+rand()%(prob->l-i);
1959
+ swap(perm[i],perm[j]);
1960
+ }
1961
+ for(i=0;i<nr_fold;i++)
1962
+ {
1963
+ int begin = i*prob->l/nr_fold;
1964
+ int end = (i+1)*prob->l/nr_fold;
1965
+ int j,k;
1966
+ struct svm_problem subprob;
1967
+
1968
+ subprob.l = prob->l-(end-begin);
1969
+ subprob.x = Malloc(struct svm_node*,subprob.l);
1970
+ subprob.y = Malloc(double,subprob.l);
1971
+
1972
+ k=0;
1973
+ for(j=0;j<begin;j++)
1974
+ {
1975
+ subprob.x[k] = prob->x[perm[j]];
1976
+ subprob.y[k] = prob->y[perm[j]];
1977
+ ++k;
1978
+ }
1979
+ for(j=end;j<prob->l;j++)
1980
+ {
1981
+ subprob.x[k] = prob->x[perm[j]];
1982
+ subprob.y[k] = prob->y[perm[j]];
1983
+ ++k;
1984
+ }
1985
+ int p_count=0,n_count=0;
1986
+ for(j=0;j<k;j++)
1987
+ if(subprob.y[j]>0)
1988
+ p_count++;
1989
+ else
1990
+ n_count++;
1991
+
1992
+ if(p_count==0 && n_count==0)
1993
+ for(j=begin;j<end;j++)
1994
+ dec_values[perm[j]] = 0;
1995
+ else if(p_count > 0 && n_count == 0)
1996
+ for(j=begin;j<end;j++)
1997
+ dec_values[perm[j]] = 1;
1998
+ else if(p_count == 0 && n_count > 0)
1999
+ for(j=begin;j<end;j++)
2000
+ dec_values[perm[j]] = -1;
2001
+ else
2002
+ {
2003
+ svm_parameter subparam = *param;
2004
+ subparam.probability=0;
2005
+ subparam.C=1.0;
2006
+ subparam.nr_weight=2;
2007
+ subparam.weight_label = Malloc(int,2);
2008
+ subparam.weight = Malloc(double,2);
2009
+ subparam.weight_label[0]=+1;
2010
+ subparam.weight_label[1]=-1;
2011
+ subparam.weight[0]=Cp;
2012
+ subparam.weight[1]=Cn;
2013
+ struct svm_model *submodel = svm_train(&subprob,&subparam);
2014
+ for(j=begin;j<end;j++)
2015
+ {
2016
+ svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2017
+ // ensure +1 -1 order; reason not using CV subroutine
2018
+ dec_values[perm[j]] *= submodel->label[0];
2019
+ }
2020
+ svm_destroy_model(submodel);
2021
+ svm_destroy_param(&subparam);
2022
+ }
2023
+ free(subprob.x);
2024
+ free(subprob.y);
2025
+ }
2026
+ sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2027
+ free(dec_values);
2028
+ free(perm);
2029
+ }
2030
+
2031
+ // Return parameter of a Laplace distribution
2032
+ double svm_svr_probability(
2033
+ const svm_problem *prob, const svm_parameter *param)
2034
+ {
2035
+ int i;
2036
+ int nr_fold = 5;
2037
+ double *ymv = Malloc(double,prob->l);
2038
+ double mae = 0;
2039
+
2040
+ svm_parameter newparam = *param;
2041
+ newparam.probability = 0;
2042
+ svm_cross_validation(prob,&newparam,nr_fold,ymv);
2043
+ for(i=0;i<prob->l;i++)
2044
+ {
2045
+ ymv[i]=prob->y[i]-ymv[i];
2046
+ mae += fabs(ymv[i]);
2047
+ }
2048
+ mae /= prob->l;
2049
+ double std=sqrt(2*mae*mae);
2050
+ int count=0;
2051
+ mae=0;
2052
+ for(i=0;i<prob->l;i++)
2053
+ if (fabs(ymv[i]) > 5*std)
2054
+ count=count+1;
2055
+ else
2056
+ mae+=fabs(ymv[i]);
2057
+ mae /= (prob->l-count);
2058
+ info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2059
+ free(ymv);
2060
+ return mae;
2061
+ }
2062
+
2063
+
2064
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2065
+ // perm, length l, must be allocated before calling this subroutine
2066
+ void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2067
+ {
2068
+ int l = prob->l;
2069
+ int max_nr_class = 16;
2070
+ int nr_class = 0;
2071
+ int *label = Malloc(int,max_nr_class);
2072
+ int *count = Malloc(int,max_nr_class);
2073
+ int *data_label = Malloc(int,l);
2074
+ int i;
2075
+
2076
+ for(i=0;i<l;i++)
2077
+ {
2078
+ int this_label = (int)prob->y[i];
2079
+ int j;
2080
+ for(j=0;j<nr_class;j++)
2081
+ {
2082
+ if(this_label == label[j])
2083
+ {
2084
+ ++count[j];
2085
+ break;
2086
+ }
2087
+ }
2088
+ data_label[i] = j;
2089
+ if(j == nr_class)
2090
+ {
2091
+ if(nr_class == max_nr_class)
2092
+ {
2093
+ max_nr_class *= 2;
2094
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2095
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2096
+ }
2097
+ label[nr_class] = this_label;
2098
+ count[nr_class] = 1;
2099
+ ++nr_class;
2100
+ }
2101
+ }
2102
+
2103
+ int *start = Malloc(int,nr_class);
2104
+ start[0] = 0;
2105
+ for(i=1;i<nr_class;i++)
2106
+ start[i] = start[i-1]+count[i-1];
2107
+ for(i=0;i<l;i++)
2108
+ {
2109
+ perm[start[data_label[i]]] = i;
2110
+ ++start[data_label[i]];
2111
+ }
2112
+ start[0] = 0;
2113
+ for(i=1;i<nr_class;i++)
2114
+ start[i] = start[i-1]+count[i-1];
2115
+
2116
+ *nr_class_ret = nr_class;
2117
+ *label_ret = label;
2118
+ *start_ret = start;
2119
+ *count_ret = count;
2120
+ free(data_label);
2121
+ }
2122
+
2123
+ //
2124
+ // Interface functions
2125
+ //
2126
+ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2127
+ {
2128
+ svm_model *model = Malloc(svm_model,1);
2129
+ model->param = *param;
2130
+ model->free_sv = 0; // XXX
2131
+
2132
+ if(param->svm_type == ONE_CLASS ||
2133
+ param->svm_type == EPSILON_SVR ||
2134
+ param->svm_type == NU_SVR)
2135
+ {
2136
+ // regression or one-class-svm
2137
+ model->nr_class = 2;
2138
+ model->label = NULL;
2139
+ model->nSV = NULL;
2140
+ model->probA = NULL; model->probB = NULL;
2141
+ model->sv_coef = Malloc(double *,1);
2142
+
2143
+ if(param->probability &&
2144
+ (param->svm_type == EPSILON_SVR ||
2145
+ param->svm_type == NU_SVR))
2146
+ {
2147
+ model->probA = Malloc(double,1);
2148
+ model->probA[0] = svm_svr_probability(prob,param);
2149
+ }
2150
+
2151
+ decision_function f = svm_train_one(prob,param,0,0);
2152
+ model->rho = Malloc(double,1);
2153
+ model->rho[0] = f.rho;
2154
+ model->obj = Malloc(double,1);
2155
+ model->obj[0] = f.obj;
2156
+
2157
+ int nSV = 0;
2158
+ int i;
2159
+ for(i=0;i<prob->l;i++)
2160
+ if(fabs(f.alpha[i]) > 0) ++nSV;
2161
+ model->l = nSV;
2162
+ model->SV = Malloc(svm_node *,nSV);
2163
+ model->sv_coef[0] = Malloc(double,nSV);
2164
+ int j = 0;
2165
+ for(i=0;i<prob->l;i++)
2166
+ if(fabs(f.alpha[i]) > 0)
2167
+ {
2168
+ model->SV[j] = prob->x[i];
2169
+ model->sv_coef[0][j] = f.alpha[i];
2170
+ ++j;
2171
+ }
2172
+
2173
+ free(f.alpha);
2174
+ }
2175
+ else
2176
+ {
2177
+ // classification
2178
+ int l = prob->l;
2179
+ int nr_class;
2180
+ int *label = NULL;
2181
+ int *start = NULL;
2182
+ int *count = NULL;
2183
+ int *perm = Malloc(int,l);
2184
+
2185
+ // group training data of the same class
2186
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2187
+ svm_node **x = Malloc(svm_node *,l);
2188
+ int i;
2189
+ for(i=0;i<l;i++)
2190
+ x[i] = prob->x[perm[i]];
2191
+
2192
+ // calculate weighted C
2193
+
2194
+ double *weighted_C = Malloc(double, nr_class);
2195
+ for(i=0;i<nr_class;i++)
2196
+ weighted_C[i] = param->C;
2197
+ for(i=0;i<param->nr_weight;i++)
2198
+ {
2199
+ int j;
2200
+ for(j=0;j<nr_class;j++)
2201
+ if(param->weight_label[i] == label[j])
2202
+ break;
2203
+ if(j == nr_class)
2204
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2205
+ else
2206
+ weighted_C[j] *= param->weight[i];
2207
+ }
2208
+
2209
+ // train k*(k-1)/2 models
2210
+
2211
+ bool *nonzero = Malloc(bool,l);
2212
+ for(i=0;i<l;i++)
2213
+ nonzero[i] = false;
2214
+ decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2215
+
2216
+ double *probA=NULL,*probB=NULL;
2217
+ if (param->probability)
2218
+ {
2219
+ probA=Malloc(double,nr_class*(nr_class-1)/2);
2220
+ probB=Malloc(double,nr_class*(nr_class-1)/2);
2221
+ }
2222
+
2223
+ int p = 0;
2224
+ for(i=0;i<nr_class;i++)
2225
+ for(int j=i+1;j<nr_class;j++)
2226
+ {
2227
+ svm_problem sub_prob;
2228
+ int si = start[i], sj = start[j];
2229
+ int ci = count[i], cj = count[j];
2230
+ sub_prob.l = ci+cj;
2231
+ sub_prob.x = Malloc(svm_node *,sub_prob.l);
2232
+ sub_prob.y = Malloc(double,sub_prob.l);
2233
+ int k;
2234
+ for(k=0;k<ci;k++)
2235
+ {
2236
+ sub_prob.x[k] = x[si+k];
2237
+ sub_prob.y[k] = +1;
2238
+ }
2239
+ for(k=0;k<cj;k++)
2240
+ {
2241
+ sub_prob.x[ci+k] = x[sj+k];
2242
+ sub_prob.y[ci+k] = -1;
2243
+ }
2244
+
2245
+ if(param->probability)
2246
+ svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2247
+
2248
+ f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2249
+ for(k=0;k<ci;k++)
2250
+ if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2251
+ nonzero[si+k] = true;
2252
+ for(k=0;k<cj;k++)
2253
+ if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2254
+ nonzero[sj+k] = true;
2255
+ free(sub_prob.x);
2256
+ free(sub_prob.y);
2257
+ ++p;
2258
+ }
2259
+
2260
+ // build output
2261
+
2262
+ model->nr_class = nr_class;
2263
+
2264
+ model->label = Malloc(int,nr_class);
2265
+ for(i=0;i<nr_class;i++)
2266
+ model->label[i] = label[i];
2267
+
2268
+ model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2269
+ model->obj = Malloc(double,nr_class*(nr_class-1)/2);
2270
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2271
+ {
2272
+ model->rho[i] = f[i].rho;
2273
+ model->obj[i] = f[i].obj;
2274
+ }
2275
+
2276
+
2277
+ if(param->probability)
2278
+ {
2279
+ model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2280
+ model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2281
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2282
+ {
2283
+ model->probA[i] = probA[i];
2284
+ model->probB[i] = probB[i];
2285
+ }
2286
+ }
2287
+ else
2288
+ {
2289
+ model->probA=NULL;
2290
+ model->probB=NULL;
2291
+ }
2292
+
2293
+ int total_sv = 0;
2294
+ int *nz_count = Malloc(int,nr_class);
2295
+ model->nSV = Malloc(int,nr_class);
2296
+ for(i=0;i<nr_class;i++)
2297
+ {
2298
+ int nSV = 0;
2299
+ for(int j=0;j<count[i];j++)
2300
+ if(nonzero[start[i]+j])
2301
+ {
2302
+ ++nSV;
2303
+ ++total_sv;
2304
+ }
2305
+ model->nSV[i] = nSV;
2306
+ nz_count[i] = nSV;
2307
+ }
2308
+
2309
+ info("Total nSV = %d\n",total_sv);
2310
+
2311
+ model->l = total_sv;
2312
+ model->SV = Malloc(svm_node *,total_sv);
2313
+ p = 0;
2314
+ for(i=0;i<l;i++)
2315
+ if(nonzero[i]) model->SV[p++] = x[i];
2316
+
2317
+ int *nz_start = Malloc(int,nr_class);
2318
+ nz_start[0] = 0;
2319
+ for(i=1;i<nr_class;i++)
2320
+ nz_start[i] = nz_start[i-1]+nz_count[i-1];
2321
+
2322
+ model->sv_coef = Malloc(double *,nr_class-1);
2323
+ for(i=0;i<nr_class-1;i++)
2324
+ model->sv_coef[i] = Malloc(double,total_sv);
2325
+
2326
+ p = 0;
2327
+ for(i=0;i<nr_class;i++)
2328
+ for(int j=i+1;j<nr_class;j++)
2329
+ {
2330
+ // classifier (i,j): coefficients with
2331
+ // i are in sv_coef[j-1][nz_start[i]...],
2332
+ // j are in sv_coef[i][nz_start[j]...]
2333
+
2334
+ int si = start[i];
2335
+ int sj = start[j];
2336
+ int ci = count[i];
2337
+ int cj = count[j];
2338
+
2339
+ int q = nz_start[i];
2340
+ int k;
2341
+ for(k=0;k<ci;k++)
2342
+ if(nonzero[si+k])
2343
+ model->sv_coef[j-1][q++] = f[p].alpha[k];
2344
+ q = nz_start[j];
2345
+ for(k=0;k<cj;k++)
2346
+ if(nonzero[sj+k])
2347
+ model->sv_coef[i][q++] = f[p].alpha[ci+k];
2348
+ ++p;
2349
+ }
2350
+
2351
+ free(label);
2352
+ free(probA);
2353
+ free(probB);
2354
+ free(count);
2355
+ free(perm);
2356
+ free(start);
2357
+ free(x);
2358
+ free(weighted_C);
2359
+ free(nonzero);
2360
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2361
+ free(f[i].alpha);
2362
+ free(f);
2363
+ free(nz_count);
2364
+ free(nz_start);
2365
+ }
2366
+ return model;
2367
+ }
2368
+
2369
+ // Stratified cross validation
2370
+ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2371
+ {
2372
+ int i;
2373
+ int *fold_start = Malloc(int,nr_fold+1);
2374
+ int l = prob->l;
2375
+ int *perm = Malloc(int,l);
2376
+ int nr_class;
2377
+
2378
+ // stratified cv may not give leave-one-out rate
2379
+ // Each class to l folds -> some folds may have zero elements
2380
+ if((param->svm_type == C_SVC ||
2381
+ param->svm_type == NU_SVC) && nr_fold < l)
2382
+ {
2383
+ int *start = NULL;
2384
+ int *label = NULL;
2385
+ int *count = NULL;
2386
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2387
+
2388
+ // random shuffle and then data grouped by fold using the array perm
2389
+ int *fold_count = Malloc(int,nr_fold);
2390
+ int c;
2391
+ int *index = Malloc(int,l);
2392
+ for(i=0;i<l;i++)
2393
+ index[i]=perm[i];
2394
+ for (c=0; c<nr_class; c++)
2395
+ for(i=0;i<count[c];i++)
2396
+ {
2397
+ int j = i+rand()%(count[c]-i);
2398
+ swap(index[start[c]+j],index[start[c]+i]);
2399
+ }
2400
+ for(i=0;i<nr_fold;i++)
2401
+ {
2402
+ fold_count[i] = 0;
2403
+ for (c=0; c<nr_class;c++)
2404
+ fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2405
+ }
2406
+ fold_start[0]=0;
2407
+ for (i=1;i<=nr_fold;i++)
2408
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2409
+ for (c=0; c<nr_class;c++)
2410
+ for(i=0;i<nr_fold;i++)
2411
+ {
2412
+ int begin = start[c]+i*count[c]/nr_fold;
2413
+ int end = start[c]+(i+1)*count[c]/nr_fold;
2414
+ for(int j=begin;j<end;j++)
2415
+ {
2416
+ perm[fold_start[i]] = index[j];
2417
+ fold_start[i]++;
2418
+ }
2419
+ }
2420
+ fold_start[0]=0;
2421
+ for (i=1;i<=nr_fold;i++)
2422
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2423
+ free(start);
2424
+ free(label);
2425
+ free(count);
2426
+ free(index);
2427
+ free(fold_count);
2428
+ }
2429
+ else
2430
+ {
2431
+ for(i=0;i<l;i++) perm[i]=i;
2432
+ for(i=0;i<l;i++)
2433
+ {
2434
+ int j = i+rand()%(l-i);
2435
+ swap(perm[i],perm[j]);
2436
+ }
2437
+ for(i=0;i<=nr_fold;i++)
2438
+ fold_start[i]=i*l/nr_fold;
2439
+ }
2440
+
2441
+ for(i=0;i<nr_fold;i++)
2442
+ {
2443
+ int begin = fold_start[i];
2444
+ int end = fold_start[i+1];
2445
+ int j,k;
2446
+ struct svm_problem subprob;
2447
+
2448
+ subprob.l = l-(end-begin);
2449
+ subprob.x = Malloc(struct svm_node*,subprob.l);
2450
+ subprob.y = Malloc(double,subprob.l);
2451
+
2452
+ k=0;
2453
+ for(j=0;j<begin;j++)
2454
+ {
2455
+ subprob.x[k] = prob->x[perm[j]];
2456
+ subprob.y[k] = prob->y[perm[j]];
2457
+ ++k;
2458
+ }
2459
+ for(j=end;j<l;j++)
2460
+ {
2461
+ subprob.x[k] = prob->x[perm[j]];
2462
+ subprob.y[k] = prob->y[perm[j]];
2463
+ ++k;
2464
+ }
2465
+ struct svm_model *submodel = svm_train(&subprob,param);
2466
+ if(param->probability &&
2467
+ (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2468
+ {
2469
+ double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2470
+ for(j=begin;j<end;j++)
2471
+ target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2472
+ free(prob_estimates);
2473
+ }
2474
+ else
2475
+ for(j=begin;j<end;j++)
2476
+ target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2477
+ svm_destroy_model(submodel);
2478
+ free(subprob.x);
2479
+ free(subprob.y);
2480
+ }
2481
+ free(fold_start);
2482
+ free(perm);
2483
+ }
2484
+
2485
+ double svm_get_obj(const svm_model *model, const int i)
2486
+ {
2487
+ return model->obj[i];
2488
+ }
2489
+
2490
+ int svm_get_svm_type(const svm_model *model)
2491
+ {
2492
+ return model->param.svm_type;
2493
+ }
2494
+
2495
+ int svm_get_nr_class(const svm_model *model)
2496
+ {
2497
+ return model->nr_class;
2498
+ }
2499
+
2500
+ void svm_get_labels(const svm_model *model, int* label)
2501
+ {
2502
+ if (model->label != NULL)
2503
+ for(int i=0;i<model->nr_class;i++)
2504
+ label[i] = model->label[i];
2505
+ }
2506
+
2507
+ double svm_get_svr_probability(const svm_model *model)
2508
+ {
2509
+ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2510
+ model->probA!=NULL)
2511
+ return model->probA[0];
2512
+ else
2513
+ {
2514
+ info("Model doesn't contain information for SVR probability inference\n");
2515
+ return 0;
2516
+ }
2517
+ }
2518
+
2519
+ void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2520
+ {
2521
+ if(model->param.svm_type == ONE_CLASS ||
2522
+ model->param.svm_type == EPSILON_SVR ||
2523
+ model->param.svm_type == NU_SVR)
2524
+ {
2525
+ double *sv_coef = model->sv_coef[0];
2526
+ double sum = 0;
2527
+ for(int i=0;i<model->l;i++)
2528
+ sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2529
+ sum -= model->rho[0];
2530
+ *dec_values = sum;
2531
+ }
2532
+ else
2533
+ {
2534
+ int i;
2535
+ int nr_class = model->nr_class;
2536
+ int l = model->l;
2537
+
2538
+ double *kvalue = Malloc(double,l);
2539
+ for(i=0;i<l;i++)
2540
+ kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2541
+
2542
+ int *start = Malloc(int,nr_class);
2543
+ start[0] = 0;
2544
+ for(i=1;i<nr_class;i++)
2545
+ start[i] = start[i-1]+model->nSV[i-1];
2546
+
2547
+ int p=0;
2548
+ for(i=0;i<nr_class;i++)
2549
+ for(int j=i+1;j<nr_class;j++)
2550
+ {
2551
+ double sum = 0;
2552
+ int si = start[i];
2553
+ int sj = start[j];
2554
+ int ci = model->nSV[i];
2555
+ int cj = model->nSV[j];
2556
+
2557
+ int k;
2558
+ double *coef1 = model->sv_coef[j-1];
2559
+ double *coef2 = model->sv_coef[i];
2560
+ for(k=0;k<ci;k++)
2561
+ sum += coef1[si+k] * kvalue[si+k];
2562
+ for(k=0;k<cj;k++)
2563
+ sum += coef2[sj+k] * kvalue[sj+k];
2564
+ sum -= model->rho[p];
2565
+ dec_values[p] = sum;
2566
+ p++;
2567
+ }
2568
+
2569
+ free(kvalue);
2570
+ free(start);
2571
+ }
2572
+ }
2573
+
2574
+ double svm_predict(const svm_model *model, const svm_node *x)
2575
+ {
2576
+ if(model->param.svm_type == ONE_CLASS ||
2577
+ model->param.svm_type == EPSILON_SVR ||
2578
+ model->param.svm_type == NU_SVR)
2579
+ {
2580
+ double res;
2581
+ svm_predict_values(model, x, &res);
2582
+
2583
+ if(model->param.svm_type == ONE_CLASS)
2584
+ return (res>0)?1:-1;
2585
+ else
2586
+ return res;
2587
+ }
2588
+ else
2589
+ {
2590
+ int i;
2591
+ int nr_class = model->nr_class;
2592
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2593
+ svm_predict_values(model, x, dec_values);
2594
+
2595
+ int *vote = Malloc(int,nr_class);
2596
+ for(i=0;i<nr_class;i++)
2597
+ vote[i] = 0;
2598
+ int pos=0;
2599
+ for(i=0;i<nr_class;i++)
2600
+ for(int j=i+1;j<nr_class;j++)
2601
+ {
2602
+ if(dec_values[pos++] > 0)
2603
+ ++vote[i];
2604
+ else
2605
+ ++vote[j];
2606
+ }
2607
+
2608
+ int vote_max_idx = 0;
2609
+ for(i=1;i<nr_class;i++)
2610
+ if(vote[i] > vote[vote_max_idx])
2611
+ vote_max_idx = i;
2612
+ free(vote);
2613
+ free(dec_values);
2614
+ return model->label[vote_max_idx];
2615
+ }
2616
+ }
2617
+
2618
+ double svm_predict_probability(
2619
+ const svm_model *model, const svm_node *x, double *prob_estimates)
2620
+ {
2621
+ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2622
+ model->probA!=NULL && model->probB!=NULL)
2623
+ {
2624
+ int i;
2625
+ int nr_class = model->nr_class;
2626
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2627
+ svm_predict_values(model, x, dec_values);
2628
+
2629
+ double min_prob=1e-7;
2630
+ double **pairwise_prob=Malloc(double *,nr_class);
2631
+ for(i=0;i<nr_class;i++)
2632
+ pairwise_prob[i]=Malloc(double,nr_class);
2633
+ int k=0;
2634
+ for(i=0;i<nr_class;i++)
2635
+ for(int j=i+1;j<nr_class;j++)
2636
+ {
2637
+ pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2638
+ pairwise_prob[j][i]=1-pairwise_prob[i][j];
2639
+ k++;
2640
+ }
2641
+ multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2642
+
2643
+ int prob_max_idx = 0;
2644
+ for(i=1;i<nr_class;i++)
2645
+ if(prob_estimates[i] > prob_estimates[prob_max_idx])
2646
+ prob_max_idx = i;
2647
+ for(i=0;i<nr_class;i++)
2648
+ free(pairwise_prob[i]);
2649
+ free(dec_values);
2650
+ free(pairwise_prob);
2651
+ return model->label[prob_max_idx];
2652
+ }
2653
+ else
2654
+ return svm_predict(model, x);
2655
+ }
2656
+
2657
+ const char *svm_type_table[] =
2658
+ {
2659
+ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2660
+ };
2661
+
2662
+ const char *kernel_type_table[]=
2663
+ {
2664
+ "linear","polynomial","rbf","sigmoid","precomputed",NULL
2665
+ };
2666
+
2667
+ int svm_save_model(const char *model_file_name, const svm_model *model)
2668
+ {
2669
+ FILE *fp = fopen(model_file_name,"w");
2670
+ if(fp==NULL) return -1;
2671
+
2672
+ const svm_parameter& param = model->param;
2673
+
2674
+ fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2675
+ fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2676
+
2677
+ if(param.kernel_type == POLY)
2678
+ fprintf(fp,"degree %d\n", param.degree);
2679
+
2680
+ if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2681
+ fprintf(fp,"gamma %g\n", param.gamma);
2682
+
2683
+ if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2684
+ fprintf(fp,"coef0 %g\n", param.coef0);
2685
+
2686
+ int nr_class = model->nr_class;
2687
+ int l = model->l;
2688
+ fprintf(fp, "nr_class %d\n", nr_class);
2689
+ fprintf(fp, "total_sv %d\n",l);
2690
+
2691
+ {
2692
+ fprintf(fp, "rho");
2693
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2694
+ fprintf(fp," %g",model->rho[i]);
2695
+ fprintf(fp, "\n");
2696
+ }
2697
+
2698
+ if(model->label)
2699
+ {
2700
+ fprintf(fp, "label");
2701
+ for(int i=0;i<nr_class;i++)
2702
+ fprintf(fp," %d",model->label[i]);
2703
+ fprintf(fp, "\n");
2704
+ }
2705
+
2706
+ if(model->probA) // regression has probA only
2707
+ {
2708
+ fprintf(fp, "probA");
2709
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2710
+ fprintf(fp," %g",model->probA[i]);
2711
+ fprintf(fp, "\n");
2712
+ }
2713
+ if(model->probB)
2714
+ {
2715
+ fprintf(fp, "probB");
2716
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2717
+ fprintf(fp," %g",model->probB[i]);
2718
+ fprintf(fp, "\n");
2719
+ }
2720
+
2721
+ if(model->nSV)
2722
+ {
2723
+ fprintf(fp, "nr_sv");
2724
+ for(int i=0;i<nr_class;i++)
2725
+ fprintf(fp," %d",model->nSV[i]);
2726
+ fprintf(fp, "\n");
2727
+ }
2728
+
2729
+ fprintf(fp, "SV\n");
2730
+ const double * const *sv_coef = model->sv_coef;
2731
+ const svm_node * const *SV = model->SV;
2732
+
2733
+ for(int i=0;i<l;i++)
2734
+ {
2735
+ for(int j=0;j<nr_class-1;j++)
2736
+ fprintf(fp, "%.16g ",sv_coef[j][i]);
2737
+
2738
+ const svm_node *p = SV[i];
2739
+
2740
+ if(param.kernel_type == PRECOMPUTED)
2741
+ fprintf(fp,"0:%d ",(int)(p->value));
2742
+ else
2743
+ while(p->index != -1)
2744
+ {
2745
+ fprintf(fp,"%d:%.8g ",p->index,p->value);
2746
+ p++;
2747
+ }
2748
+ fprintf(fp, "\n");
2749
+ }
2750
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2751
+ else return 0;
2752
+ }
2753
+
2754
+ svm_model *svm_load_model(const char *model_file_name)
2755
+ {
2756
+ FILE *fp = fopen(model_file_name,"rb");
2757
+ if(fp==NULL) return NULL;
2758
+
2759
+ // read parameters
2760
+
2761
+ svm_model *model = Malloc(svm_model,1);
2762
+ svm_parameter& param = model->param;
2763
+ model->rho = NULL;
2764
+ model->probA = NULL;
2765
+ model->probB = NULL;
2766
+ model->label = NULL;
2767
+ model->nSV = NULL;
2768
+
2769
+ model->obj = NULL;
2770
+
2771
+ char cmd[81];
2772
+ while(1)
2773
+ {
2774
+ fscanf(fp,"%80s",cmd);
2775
+
2776
+ if(strcmp(cmd,"svm_type")==0)
2777
+ {
2778
+ fscanf(fp,"%80s",cmd);
2779
+ int i;
2780
+ for(i=0;svm_type_table[i];i++)
2781
+ {
2782
+ if(strcmp(svm_type_table[i],cmd)==0)
2783
+ {
2784
+ param.svm_type=i;
2785
+ break;
2786
+ }
2787
+ }
2788
+ if(svm_type_table[i] == NULL)
2789
+ {
2790
+ fprintf(stderr,"unknown svm type.\n");
2791
+ free(model->rho);
2792
+ free(model->obj);
2793
+ free(model->label);
2794
+ free(model->nSV);
2795
+ free(model);
2796
+ return NULL;
2797
+ }
2798
+ }
2799
+ else if(strcmp(cmd,"kernel_type")==0)
2800
+ {
2801
+ fscanf(fp,"%80s",cmd);
2802
+ int i;
2803
+ for(i=0;kernel_type_table[i];i++)
2804
+ {
2805
+ if(strcmp(kernel_type_table[i],cmd)==0)
2806
+ {
2807
+ param.kernel_type=i;
2808
+ break;
2809
+ }
2810
+ }
2811
+ if(kernel_type_table[i] == NULL)
2812
+ {
2813
+ fprintf(stderr,"unknown kernel function.\n");
2814
+ free(model->rho);
2815
+ free(model->obj);
2816
+ free(model->label);
2817
+ free(model->nSV);
2818
+ free(model);
2819
+ return NULL;
2820
+ }
2821
+ }
2822
+ else if(strcmp(cmd,"degree")==0)
2823
+ fscanf(fp,"%d",&param.degree);
2824
+ else if(strcmp(cmd,"gamma")==0)
2825
+ fscanf(fp,"%lf",&param.gamma);
2826
+ else if(strcmp(cmd,"coef0")==0)
2827
+ fscanf(fp,"%lf",&param.coef0);
2828
+ else if(strcmp(cmd,"nr_class")==0)
2829
+ fscanf(fp,"%d",&model->nr_class);
2830
+ else if(strcmp(cmd,"total_sv")==0)
2831
+ fscanf(fp,"%d",&model->l);
2832
+ else if(strcmp(cmd,"rho")==0)
2833
+ {
2834
+ int n = model->nr_class * (model->nr_class-1)/2;
2835
+ model->rho = Malloc(double,n);
2836
+ for(int i=0;i<n;i++)
2837
+ fscanf(fp,"%lf",&model->rho[i]);
2838
+ }
2839
+ else if(strcmp(cmd,"label")==0)
2840
+ {
2841
+ int n = model->nr_class;
2842
+ model->label = Malloc(int,n);
2843
+ for(int i=0;i<n;i++)
2844
+ fscanf(fp,"%d",&model->label[i]);
2845
+ }
2846
+ else if(strcmp(cmd,"probA")==0)
2847
+ {
2848
+ int n = model->nr_class * (model->nr_class-1)/2;
2849
+ model->probA = Malloc(double,n);
2850
+ for(int i=0;i<n;i++)
2851
+ fscanf(fp,"%lf",&model->probA[i]);
2852
+ }
2853
+ else if(strcmp(cmd,"probB")==0)
2854
+ {
2855
+ int n = model->nr_class * (model->nr_class-1)/2;
2856
+ model->probB = Malloc(double,n);
2857
+ for(int i=0;i<n;i++)
2858
+ fscanf(fp,"%lf",&model->probB[i]);
2859
+ }
2860
+ else if(strcmp(cmd,"nr_sv")==0)
2861
+ {
2862
+ int n = model->nr_class;
2863
+ model->nSV = Malloc(int,n);
2864
+ for(int i=0;i<n;i++)
2865
+ fscanf(fp,"%d",&model->nSV[i]);
2866
+ }
2867
+ else if(strcmp(cmd,"SV")==0)
2868
+ {
2869
+ while(1)
2870
+ {
2871
+ int c = getc(fp);
2872
+ if(c==EOF || c=='\n') break;
2873
+ }
2874
+ break;
2875
+ }
2876
+ else
2877
+ {
2878
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2879
+ free(model->rho);
2880
+ free(model->obj);
2881
+ free(model->label);
2882
+ free(model->nSV);
2883
+ free(model);
2884
+ return NULL;
2885
+ }
2886
+ }
2887
+
2888
+ // read sv_coef and SV
2889
+
2890
+ int elements = 0;
2891
+ long pos = ftell(fp);
2892
+
2893
+ while(1)
2894
+ {
2895
+ int c = fgetc(fp);
2896
+ switch(c)
2897
+ {
2898
+ case '\n':
2899
+ // count the '-1' element
2900
+ case ':':
2901
+ ++elements;
2902
+ break;
2903
+ case EOF:
2904
+ goto out;
2905
+ default:
2906
+ ;
2907
+ }
2908
+ }
2909
+ out:
2910
+ fseek(fp,pos,SEEK_SET);
2911
+
2912
+ int m = model->nr_class - 1;
2913
+ int l = model->l;
2914
+ model->sv_coef = Malloc(double *,m);
2915
+ int i;
2916
+ for(i=0;i<m;i++)
2917
+ model->sv_coef[i] = Malloc(double,l);
2918
+ model->SV = Malloc(svm_node*,l);
2919
+ svm_node *x_space=NULL;
2920
+ if(l>0) x_space = Malloc(svm_node,elements);
2921
+
2922
+ int j=0;
2923
+ for(i=0;i<l;i++)
2924
+ {
2925
+ model->SV[i] = &x_space[j];
2926
+ for(int k=0;k<m;k++)
2927
+ fscanf(fp,"%lf",&model->sv_coef[k][i]);
2928
+ while(1)
2929
+ {
2930
+ int c;
2931
+ do {
2932
+ c = getc(fp);
2933
+ if(c=='\n') goto out2;
2934
+ } while(isspace(c));
2935
+ ungetc(c,fp);
2936
+ fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
2937
+ ++j;
2938
+ }
2939
+ out2:
2940
+ x_space[j++].index = -1;
2941
+ }
2942
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2943
+
2944
+ model->free_sv = 1; // XXX
2945
+ return model;
2946
+ }
2947
+
2948
+ void svm_destroy_model(svm_model* model)
2949
+ {
2950
+ if(model->free_sv && model->l > 0)
2951
+ free((void *)(model->SV[0]));
2952
+ for(int i=0;i<model->nr_class-1;i++)
2953
+ free(model->sv_coef[i]);
2954
+ free(model->SV);
2955
+ free(model->sv_coef);
2956
+ free(model->rho);
2957
+ free(model->obj);
2958
+ free(model->label);
2959
+ free(model->probA);
2960
+ free(model->probB);
2961
+ free(model->nSV);
2962
+ free(model);
2963
+ }
2964
+
2965
+ void svm_destroy_param(svm_parameter* param)
2966
+ {
2967
+ free(param->weight_label);
2968
+ free(param->weight);
2969
+ }
2970
+
2971
+ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2972
+ {
2973
+ // svm_type
2974
+
2975
+ int svm_type = param->svm_type;
2976
+ if(svm_type != C_SVC &&
2977
+ svm_type != NU_SVC &&
2978
+ svm_type != ONE_CLASS &&
2979
+ svm_type != EPSILON_SVR &&
2980
+ svm_type != NU_SVR)
2981
+ return "unknown svm type";
2982
+
2983
+ // kernel_type, degree
2984
+
2985
+ int kernel_type = param->kernel_type;
2986
+ if(kernel_type != LINEAR &&
2987
+ kernel_type != POLY &&
2988
+ kernel_type != RBF &&
2989
+ kernel_type != SIGMOID &&
2990
+ kernel_type != PRECOMPUTED)
2991
+ return "unknown kernel type";
2992
+
2993
+ if(param->degree < 0)
2994
+ return "degree of polynomial kernel < 0";
2995
+
2996
+ // cache_size,eps,C,nu,p,shrinking
2997
+
2998
+ if(param->cache_size <= 0)
2999
+ return "cache_size <= 0";
3000
+
3001
+ if(param->eps <= 0)
3002
+ return "eps <= 0";
3003
+
3004
+ if(svm_type == C_SVC ||
3005
+ svm_type == EPSILON_SVR ||
3006
+ svm_type == NU_SVR)
3007
+ if(param->C <= 0)
3008
+ return "C <= 0";
3009
+
3010
+ if(svm_type == NU_SVC ||
3011
+ svm_type == ONE_CLASS ||
3012
+ svm_type == NU_SVR)
3013
+ if(param->nu <= 0 || param->nu > 1)
3014
+ return "nu <= 0 or nu > 1";
3015
+
3016
+ if(svm_type == EPSILON_SVR)
3017
+ if(param->p < 0)
3018
+ return "p < 0";
3019
+
3020
+ if(param->shrinking != 0 &&
3021
+ param->shrinking != 1)
3022
+ return "shrinking != 0 and shrinking != 1";
3023
+
3024
+ if(param->probability != 0 &&
3025
+ param->probability != 1)
3026
+ return "probability != 0 and probability != 1";
3027
+
3028
+ if(param->probability == 1 &&
3029
+ svm_type == ONE_CLASS)
3030
+ return "one-class SVM probability output not supported yet";
3031
+
3032
+
3033
+ // check whether nu-svc is feasible
3034
+
3035
+ if(svm_type == NU_SVC)
3036
+ {
3037
+ int l = prob->l;
3038
+ int max_nr_class = 16;
3039
+ int nr_class = 0;
3040
+ int *label = Malloc(int,max_nr_class);
3041
+ int *count = Malloc(int,max_nr_class);
3042
+
3043
+ int i;
3044
+ for(i=0;i<l;i++)
3045
+ {
3046
+ int this_label = (int)prob->y[i];
3047
+ int j;
3048
+ for(j=0;j<nr_class;j++)
3049
+ if(this_label == label[j])
3050
+ {
3051
+ ++count[j];
3052
+ break;
3053
+ }
3054
+ if(j == nr_class)
3055
+ {
3056
+ if(nr_class == max_nr_class)
3057
+ {
3058
+ max_nr_class *= 2;
3059
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
3060
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
3061
+ }
3062
+ label[nr_class] = this_label;
3063
+ count[nr_class] = 1;
3064
+ ++nr_class;
3065
+ }
3066
+ }
3067
+
3068
+ for(i=0;i<nr_class;i++)
3069
+ {
3070
+ int n1 = count[i];
3071
+ for(int j=i+1;j<nr_class;j++)
3072
+ {
3073
+ int n2 = count[j];
3074
+ if(param->nu*(n1+n2)/2 > min(n1,n2))
3075
+ {
3076
+ free(label);
3077
+ free(count);
3078
+ return "specified nu is infeasible";
3079
+ }
3080
+ }
3081
+ }
3082
+ free(label);
3083
+ free(count);
3084
+ }
3085
+
3086
+ return NULL;
3087
+ }
3088
+
3089
+ int svm_check_probability_model(const svm_model *model)
3090
+ {
3091
+ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3092
+ model->probA!=NULL && model->probB!=NULL) ||
3093
+ ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3094
+ model->probA!=NULL);
3095
+ }