tomz-libsvm-ruby-swig 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/AUTHORS ADDED
@@ -0,0 +1,3 @@
1
+ Tom Zeng <tom.z.zeng@mail.com> (Ruby SWIG interface to LIBSVM)
2
+ FeedbackMine <feedbackmine@feedbackmine.com> (gem)
3
+ Chih-Chung Chang and Chih-Jen Lin <cjlin@csie.ntu.edu.tw> (developers of LIBSVM)
data/COPYING ADDED
@@ -0,0 +1,24 @@
1
+ == LICENSE:
2
+
3
+ (The MIT License)
4
+
5
+ Copyright (c) 2009 Tom Zeng
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining
8
+ a copy of this software and associated documentation files (the
9
+ 'Software'), to deal in the Software without restriction, including
10
+ without limitation the rights to use, copy, modify, merge, publish,
11
+ distribute, sublicense, and/or sell copies of the Software, and to
12
+ permit persons to whom the Software is furnished to do so, subject to
13
+ the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be
16
+ included in all copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
19
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ 2009-03-04 Tom Zeng (tom.z.zeng@gmail.com)
2
+ * adopted the gem spec by feedbackmine.com
3
+
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.txt
2
+ COPYING
3
+ AUTHORS
4
+ Manifest.txt
5
+ README.txt
6
+ Rakefile
7
+ lib/svm.rb
8
+ ext/svmc_wrap.cxx
9
+ ext/svm.cpp
10
+ ext/svm.h
11
+ ext/extconf.rb
data/README.txt ADDED
@@ -0,0 +1,55 @@
1
+ = libsvm-ruby-swig
2
+
3
+ * Ruby interface to LIBSVM (using SWIG)
4
+ * http://www.tomzconsulting.com
5
+
6
+ == DESCRIPTION:
7
+
8
+ This is the Ruby port of the LIBSVM Python SWIG (Simplified Wrapper and
9
+ Interface Generator) interface.
10
+
11
+ A modified version of LIBSVM 2.88 is included, it contains changes merged from:
12
+ git://github.com/npinto/libsvm-2.88_objs-np.git
13
+ git://github.com/alanfalloon/libsvm-2.88_output_model_params.git
14
+ to expose additional data/parameters in the model object. You don't need your
15
+ own copy of SWIG to use this library - all needed files are generated using
16
+ SWIG already.
17
+
18
+ Look for the README file in the ruby subdirectory for instructions.
19
+ The binaries included were built under Ubuntu Linux 2.6.24-23-generic,
20
+ you should run make under the libsvm-2.88 and libsvm-2.88/ruby
21
+ directories to regenerate the executables for your environment.
22
+
23
+ == INSTALL:
24
+
25
+ sudo gem sources -a http://gems.github.com (you only have to do this once)
26
+ sudo gem install tomz-libsvm-ruby-swig
27
+
28
+ == SYNOPSIS:
29
+
30
+ Quick Interactive Tutorial using irb (adopted from the python code from Toby
31
+ Segaran's "Programming Collective Intelligence" book):
32
+
33
+ irb(main):001:0> require 'svm'
34
+ => true
35
+ irb(main):002:0> prob = Problem.new([1,-1],[[1,0,1],[-1,0,-1]])
36
+ irb(main):003:0> param = Parameter.new(:kernel_type => LINEAR, :C => 10)
37
+ irb(main):004:0> m = Model.new(prob,param)
38
+ irb(main):005:0> m.predict([1,1,1])
39
+ => 1.0
40
+ irb(main):006:0> m.predict([0,0,1])
41
+ => 1.0
42
+ irb(main):007:0> m.predict([0,0,-1])
43
+ => -1.0
44
+ irb(main):008:0> m.save("test.model")
45
+ irb(main):009:0> m2 = Model.new("test.model")
46
+ irb(main):010:0> m2.predict([0,0,-1])
47
+ => -1.0
48
+
49
+ == AUTHOR:
50
+
51
+ Tom Zeng
52
+ http://www.tomzconsulting.com
53
+ http://www.linkedin.com/in/tomzeng
54
+ tom.z.zeng _at_ gmail _dot_ com
55
+
data/Rakefile ADDED
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ task :default => ["make_gem"]
5
+
6
+ EXT = "ext/blah.#{Hoe::DLEXT}"
7
+
8
+ Hoe.new('libsvm-ruby-swig', '0.2.0') do |p|
9
+ p.author = 'Tom Zeng'
10
+ p.email = 'tom.z.zeng@gmail.com'
11
+ p.url = 'http://www.tomzconsulting.com'
12
+ p.summary = 'Ruby wrapper of LIBSVM using SWIG'
13
+ p.description = 'Ruby wrapper of LIBSVM using SWIG'
14
+
15
+ p.spec_extras[:extensions] = "ext/extconf.rb"
16
+ p.clean_globs << EXT << "ext/*.o" << "ext/Makefile"
17
+ end
18
+
19
+ task :make_gem => EXT
20
+
21
+ file EXT => ["ext/extconf.rb", "ext/svmc_wrap.cxx", "ext/svm.cpp", "ext/svm.h"] do
22
+ Dir.chdir "ext" do
23
+ ruby "extconf.rb"
24
+ sh "make"
25
+ end
26
+ end
27
+
28
+ task :copy_files do
29
+ cp "libsvm-2.88/svm.h","ext/"
30
+ cp "libsvm-2.88/svm.cpp","ext/"
31
+ cp "libsvm-2.88/ruby/svmc_wrap.cxx","ext/"
32
+ cp "libsvm-2.88/ruby/svm.rb","lib/"
33
+ end
data/ext/extconf.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+ $CFLAGS = "#{ENV['CFLAGS']} -Wall -O3 "
3
+ if CONFIG["MAJOR"].to_i >= 1 && CONFIG["MINOR"].to_i >= 8
4
+ $CFLAGS << " -DHAVE_DEFINE_ALLOC_FUNCTION"
5
+ end
6
+ create_makefile('svmc')
data/ext/svm.cpp ADDED
@@ -0,0 +1,3095 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <ctype.h>
5
+ #include <float.h>
6
+ #include <string.h>
7
+ #include <stdarg.h>
8
+ #include "svm.h"
9
+ typedef float Qfloat;
10
+ typedef signed char schar;
11
+ #ifndef min
12
+ template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
13
+ #endif
14
+ #ifndef max
15
+ template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
16
+ #endif
17
+ template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
18
+ template <class S, class T> inline void clone(T*& dst, S* src, int n)
19
+ {
20
+ dst = new T[n];
21
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
+ }
23
+ inline double powi(double base, int times)
24
+ {
25
+ double tmp = base, ret = 1.0;
26
+
27
+ for(int t=times; t>0; t/=2)
28
+ {
29
+ if(t%2==1) ret*=tmp;
30
+ tmp = tmp * tmp;
31
+ }
32
+ return ret;
33
+ }
34
+ #define INF HUGE_VAL
35
+ #define TAU 1e-12
36
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
37
+ #if 1
38
+ int info_on = 0;
39
+ static void info(const char *fmt,...)
40
+ {
41
+ va_list ap;
42
+ if (info_on==1) {
43
+ va_start(ap,fmt);
44
+ vprintf(fmt,ap);
45
+ va_end(ap);
46
+ }
47
+ }
48
+ static void info_flush()
49
+ {
50
+ if (info_on==1) fflush(stdout);
51
+ }
52
+ #else
53
+ static void info(char *fmt,...) {}
54
+ static void info_flush() {}
55
+ #endif
56
+
57
+ //
58
+ // Kernel Cache
59
+ //
60
+ // l is the number of total data items
61
+ // size is the cache size limit in bytes
62
+ //
63
+ class Cache
64
+ {
65
+ public:
66
+ Cache(int l,long int size);
67
+ ~Cache();
68
+
69
+ // request data [0,len)
70
+ // return some position p where [p,len) need to be filled
71
+ // (p >= len if nothing needs to be filled)
72
+ int get_data(const int index, Qfloat **data, int len);
73
+ void swap_index(int i, int j);
74
+ private:
75
+ int l;
76
+ long int size;
77
+ struct head_t
78
+ {
79
+ head_t *prev, *next; // a circular list
80
+ Qfloat *data;
81
+ int len; // data[0,len) is cached in this entry
82
+ };
83
+
84
+ head_t *head;
85
+ head_t lru_head;
86
+ void lru_delete(head_t *h);
87
+ void lru_insert(head_t *h);
88
+ };
89
+
90
+ Cache::Cache(int l_,long int size_):l(l_),size(size_)
91
+ {
92
+ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
93
+ size /= sizeof(Qfloat);
94
+ size -= l * sizeof(head_t) / sizeof(Qfloat);
95
+ size = max(size, 2 * (long int) l); // cache must be large enough for two columns
96
+ lru_head.next = lru_head.prev = &lru_head;
97
+ }
98
+
99
+ Cache::~Cache()
100
+ {
101
+ for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
102
+ free(h->data);
103
+ free(head);
104
+ }
105
+
106
+ void Cache::lru_delete(head_t *h)
107
+ {
108
+ // delete from current location
109
+ h->prev->next = h->next;
110
+ h->next->prev = h->prev;
111
+ }
112
+
113
+ void Cache::lru_insert(head_t *h)
114
+ {
115
+ // insert to last position
116
+ h->next = &lru_head;
117
+ h->prev = lru_head.prev;
118
+ h->prev->next = h;
119
+ h->next->prev = h;
120
+ }
121
+
122
+ int Cache::get_data(const int index, Qfloat **data, int len)
123
+ {
124
+ head_t *h = &head[index];
125
+ if(h->len) lru_delete(h);
126
+ int more = len - h->len;
127
+
128
+ if(more > 0)
129
+ {
130
+ // free old space
131
+ while(size < more)
132
+ {
133
+ head_t *old = lru_head.next;
134
+ lru_delete(old);
135
+ free(old->data);
136
+ size += old->len;
137
+ old->data = 0;
138
+ old->len = 0;
139
+ }
140
+
141
+ // allocate new space
142
+ h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
143
+ size -= more;
144
+ swap(h->len,len);
145
+ }
146
+
147
+ lru_insert(h);
148
+ *data = h->data;
149
+ return len;
150
+ }
151
+
152
+ void Cache::swap_index(int i, int j)
153
+ {
154
+ if(i==j) return;
155
+
156
+ if(head[i].len) lru_delete(&head[i]);
157
+ if(head[j].len) lru_delete(&head[j]);
158
+ swap(head[i].data,head[j].data);
159
+ swap(head[i].len,head[j].len);
160
+ if(head[i].len) lru_insert(&head[i]);
161
+ if(head[j].len) lru_insert(&head[j]);
162
+
163
+ if(i>j) swap(i,j);
164
+ for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
165
+ {
166
+ if(h->len > i)
167
+ {
168
+ if(h->len > j)
169
+ swap(h->data[i],h->data[j]);
170
+ else
171
+ {
172
+ // give up
173
+ lru_delete(h);
174
+ free(h->data);
175
+ size += h->len;
176
+ h->data = 0;
177
+ h->len = 0;
178
+ }
179
+ }
180
+ }
181
+ }
182
+
183
+ //
184
+ // Kernel evaluation
185
+ //
186
+ // the static method k_function is for doing single kernel evaluation
187
+ // the constructor of Kernel prepares to calculate the l*l kernel matrix
188
+ // the member function get_Q is for getting one column from the Q Matrix
189
+ //
190
+ class QMatrix {
191
+ public:
192
+ virtual Qfloat *get_Q(int column, int len) const = 0;
193
+ virtual Qfloat *get_QD() const = 0;
194
+ virtual void swap_index(int i, int j) const = 0;
195
+ virtual ~QMatrix() {}
196
+ };
197
+
198
+ class Kernel: public QMatrix {
199
+ public:
200
+ Kernel(int l, svm_node * const * x, const svm_parameter& param);
201
+ virtual ~Kernel();
202
+
203
+ static double k_function(const svm_node *x, const svm_node *y,
204
+ const svm_parameter& param);
205
+ virtual Qfloat *get_Q(int column, int len) const = 0;
206
+ virtual Qfloat *get_QD() const = 0;
207
+ virtual void swap_index(int i, int j) const // no so const...
208
+ {
209
+ swap(x[i],x[j]);
210
+ if(x_square) swap(x_square[i],x_square[j]);
211
+ }
212
+ protected:
213
+
214
+ double (Kernel::*kernel_function)(int i, int j) const;
215
+
216
+ private:
217
+ const svm_node **x;
218
+ double *x_square;
219
+
220
+ // svm_parameter
221
+ const int kernel_type;
222
+ const int degree;
223
+ const double gamma;
224
+ const double coef0;
225
+
226
+ static double dot(const svm_node *px, const svm_node *py);
227
+ double kernel_linear(int i, int j) const
228
+ {
229
+ return dot(x[i],x[j]);
230
+ }
231
+ double kernel_poly(int i, int j) const
232
+ {
233
+ return powi(gamma*dot(x[i],x[j])+coef0,degree);
234
+ }
235
+ double kernel_rbf(int i, int j) const
236
+ {
237
+ return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
238
+ }
239
+ double kernel_sigmoid(int i, int j) const
240
+ {
241
+ return tanh(gamma*dot(x[i],x[j])+coef0);
242
+ }
243
+ double kernel_precomputed(int i, int j) const
244
+ {
245
+ return x[i][(int)(x[j][0].value)].value;
246
+ }
247
+ };
248
+
249
+ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
250
+ :kernel_type(param.kernel_type), degree(param.degree),
251
+ gamma(param.gamma), coef0(param.coef0)
252
+ {
253
+ switch(kernel_type)
254
+ {
255
+ case LINEAR:
256
+ kernel_function = &Kernel::kernel_linear;
257
+ break;
258
+ case POLY:
259
+ kernel_function = &Kernel::kernel_poly;
260
+ break;
261
+ case RBF:
262
+ kernel_function = &Kernel::kernel_rbf;
263
+ break;
264
+ case SIGMOID:
265
+ kernel_function = &Kernel::kernel_sigmoid;
266
+ break;
267
+ case PRECOMPUTED:
268
+ kernel_function = &Kernel::kernel_precomputed;
269
+ break;
270
+ }
271
+
272
+ clone(x,x_,l);
273
+
274
+ if(kernel_type == RBF)
275
+ {
276
+ x_square = new double[l];
277
+ for(int i=0;i<l;i++)
278
+ x_square[i] = dot(x[i],x[i]);
279
+ }
280
+ else
281
+ x_square = 0;
282
+ }
283
+
284
+ Kernel::~Kernel()
285
+ {
286
+ delete[] x;
287
+ delete[] x_square;
288
+ }
289
+
290
+ double Kernel::dot(const svm_node *px, const svm_node *py)
291
+ {
292
+ double sum = 0;
293
+ while(px->index != -1 && py->index != -1)
294
+ {
295
+ if(px->index == py->index)
296
+ {
297
+ sum += px->value * py->value;
298
+ ++px;
299
+ ++py;
300
+ }
301
+ else
302
+ {
303
+ if(px->index > py->index)
304
+ ++py;
305
+ else
306
+ ++px;
307
+ }
308
+ }
309
+ return sum;
310
+ }
311
+
312
+ double Kernel::k_function(const svm_node *x, const svm_node *y,
313
+ const svm_parameter& param)
314
+ {
315
+ switch(param.kernel_type)
316
+ {
317
+ case LINEAR:
318
+ return dot(x,y);
319
+ case POLY:
320
+ return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
321
+ case RBF:
322
+ {
323
+ double sum = 0;
324
+ while(x->index != -1 && y->index !=-1)
325
+ {
326
+ if(x->index == y->index)
327
+ {
328
+ double d = x->value - y->value;
329
+ sum += d*d;
330
+ ++x;
331
+ ++y;
332
+ }
333
+ else
334
+ {
335
+ if(x->index > y->index)
336
+ {
337
+ sum += y->value * y->value;
338
+ ++y;
339
+ }
340
+ else
341
+ {
342
+ sum += x->value * x->value;
343
+ ++x;
344
+ }
345
+ }
346
+ }
347
+
348
+ while(x->index != -1)
349
+ {
350
+ sum += x->value * x->value;
351
+ ++x;
352
+ }
353
+
354
+ while(y->index != -1)
355
+ {
356
+ sum += y->value * y->value;
357
+ ++y;
358
+ }
359
+
360
+ return exp(-param.gamma*sum);
361
+ }
362
+ case SIGMOID:
363
+ return tanh(param.gamma*dot(x,y)+param.coef0);
364
+ case PRECOMPUTED: //x: test (validation), y: SV
365
+ return x[(int)(y->value)].value;
366
+ default:
367
+ return 0; // Unreachable
368
+ }
369
+ }
370
+
371
+ // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
372
+ // Solves:
373
+ //
374
+ // min 0.5(\alpha^T Q \alpha) + p^T \alpha
375
+ //
376
+ // y^T \alpha = \delta
377
+ // y_i = +1 or -1
378
+ // 0 <= alpha_i <= Cp for y_i = 1
379
+ // 0 <= alpha_i <= Cn for y_i = -1
380
+ //
381
+ // Given:
382
+ //
383
+ // Q, p, y, Cp, Cn, and an initial feasible point \alpha
384
+ // l is the size of vectors and matrices
385
+ // eps is the stopping tolerance
386
+ //
387
+ // solution will be put in \alpha, objective value will be put in obj
388
+ //
389
+ class Solver {
390
+ public:
391
+ Solver() {};
392
+ virtual ~Solver() {};
393
+
394
+ struct SolutionInfo {
395
+ double obj;
396
+ double rho;
397
+ double upper_bound_p;
398
+ double upper_bound_n;
399
+ double r; // for Solver_NU
400
+ };
401
+
402
+ void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
403
+ double *alpha_, double Cp, double Cn, double eps,
404
+ SolutionInfo* si, int shrinking);
405
+ protected:
406
+ int active_size;
407
+ schar *y;
408
+ double *G; // gradient of objective function
409
+ enum { LOWER_BOUND, UPPER_BOUND, FREE };
410
+ char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
411
+ double *alpha;
412
+ const QMatrix *Q;
413
+ const Qfloat *QD;
414
+ double eps;
415
+ double Cp,Cn;
416
+ double *p;
417
+ int *active_set;
418
+ double *G_bar; // gradient, if we treat free variables as 0
419
+ int l;
420
+ bool unshrink; // XXX
421
+
422
+ double get_C(int i)
423
+ {
424
+ return (y[i] > 0)? Cp : Cn;
425
+ }
426
+ void update_alpha_status(int i)
427
+ {
428
+ if(alpha[i] >= get_C(i))
429
+ alpha_status[i] = UPPER_BOUND;
430
+ else if(alpha[i] <= 0)
431
+ alpha_status[i] = LOWER_BOUND;
432
+ else alpha_status[i] = FREE;
433
+ }
434
+ bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
435
+ bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
436
+ bool is_free(int i) { return alpha_status[i] == FREE; }
437
+ void swap_index(int i, int j);
438
+ void reconstruct_gradient();
439
+ virtual int select_working_set(int &i, int &j);
440
+ virtual double calculate_rho();
441
+ virtual void do_shrinking();
442
+ private:
443
+ bool be_shrunk(int i, double Gmax1, double Gmax2);
444
+ };
445
+
446
+ void Solver::swap_index(int i, int j)
447
+ {
448
+ Q->swap_index(i,j);
449
+ swap(y[i],y[j]);
450
+ swap(G[i],G[j]);
451
+ swap(alpha_status[i],alpha_status[j]);
452
+ swap(alpha[i],alpha[j]);
453
+ swap(p[i],p[j]);
454
+ swap(active_set[i],active_set[j]);
455
+ swap(G_bar[i],G_bar[j]);
456
+ }
457
+
458
+ void Solver::reconstruct_gradient()
459
+ {
460
+ // reconstruct inactive elements of G from G_bar and free variables
461
+
462
+ if(active_size == l) return;
463
+
464
+ int i,j;
465
+ int nr_free = 0;
466
+
467
+ for(j=active_size;j<l;j++)
468
+ G[j] = G_bar[j] + p[j];
469
+
470
+ for(j=0;j<active_size;j++)
471
+ if(is_free(j))
472
+ nr_free++;
473
+
474
+ if(2*nr_free < active_size)
475
+ info("\nWarning: using -h 0 may be faster\n");
476
+
477
+ if (nr_free*l > 2*active_size*(l-active_size))
478
+ {
479
+ for(i=active_size;i<l;i++)
480
+ {
481
+ const Qfloat *Q_i = Q->get_Q(i,active_size);
482
+ for(j=0;j<active_size;j++)
483
+ if(is_free(j))
484
+ G[i] += alpha[j] * Q_i[j];
485
+ }
486
+ }
487
+ else
488
+ {
489
+ for(i=0;i<active_size;i++)
490
+ if(is_free(i))
491
+ {
492
+ const Qfloat *Q_i = Q->get_Q(i,l);
493
+ double alpha_i = alpha[i];
494
+ for(j=active_size;j<l;j++)
495
+ G[j] += alpha_i * Q_i[j];
496
+ }
497
+ }
498
+ }
499
+
500
+ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
501
+ double *alpha_, double Cp, double Cn, double eps,
502
+ SolutionInfo* si, int shrinking)
503
+ {
504
+ this->l = l;
505
+ this->Q = &Q;
506
+ QD=Q.get_QD();
507
+ clone(p, p_,l);
508
+ clone(y, y_,l);
509
+ clone(alpha,alpha_,l);
510
+ this->Cp = Cp;
511
+ this->Cn = Cn;
512
+ this->eps = eps;
513
+ unshrink = false;
514
+
515
+ // initialize alpha_status
516
+ {
517
+ alpha_status = new char[l];
518
+ for(int i=0;i<l;i++)
519
+ update_alpha_status(i);
520
+ }
521
+
522
+ // initialize active set (for shrinking)
523
+ {
524
+ active_set = new int[l];
525
+ for(int i=0;i<l;i++)
526
+ active_set[i] = i;
527
+ active_size = l;
528
+ }
529
+
530
+ // initialize gradient
531
+ {
532
+ G = new double[l];
533
+ G_bar = new double[l];
534
+ int i;
535
+ for(i=0;i<l;i++)
536
+ {
537
+ G[i] = p[i];
538
+ G_bar[i] = 0;
539
+ }
540
+ for(i=0;i<l;i++)
541
+ if(!is_lower_bound(i))
542
+ {
543
+ const Qfloat *Q_i = Q.get_Q(i,l);
544
+ double alpha_i = alpha[i];
545
+ int j;
546
+ for(j=0;j<l;j++)
547
+ G[j] += alpha_i*Q_i[j];
548
+ if(is_upper_bound(i))
549
+ for(j=0;j<l;j++)
550
+ G_bar[j] += get_C(i) * Q_i[j];
551
+ }
552
+ }
553
+
554
+ // optimization step
555
+
556
+ int iter = 0;
557
+ int counter = min(l,1000)+1;
558
+
559
+ while(1)
560
+ {
561
+ // show progress and do shrinking
562
+
563
+ if(--counter == 0)
564
+ {
565
+ counter = min(l,1000);
566
+ if(shrinking) do_shrinking();
567
+ info("."); info_flush();
568
+ }
569
+
570
+ int i,j;
571
+ if(select_working_set(i,j)!=0)
572
+ {
573
+ // reconstruct the whole gradient
574
+ reconstruct_gradient();
575
+ // reset active set size and check
576
+ active_size = l;
577
+ info("*"); info_flush();
578
+ if(select_working_set(i,j)!=0)
579
+ break;
580
+ else
581
+ counter = 1; // do shrinking next iteration
582
+ }
583
+
584
+ ++iter;
585
+
586
+ // update alpha[i] and alpha[j], handle bounds carefully
587
+
588
+ const Qfloat *Q_i = Q.get_Q(i,active_size);
589
+ const Qfloat *Q_j = Q.get_Q(j,active_size);
590
+
591
+ double C_i = get_C(i);
592
+ double C_j = get_C(j);
593
+
594
+ double old_alpha_i = alpha[i];
595
+ double old_alpha_j = alpha[j];
596
+
597
+ if(y[i]!=y[j])
598
+ {
599
+ double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
600
+ if (quad_coef <= 0)
601
+ quad_coef = TAU;
602
+ double delta = (-G[i]-G[j])/quad_coef;
603
+ double diff = alpha[i] - alpha[j];
604
+ alpha[i] += delta;
605
+ alpha[j] += delta;
606
+
607
+ if(diff > 0)
608
+ {
609
+ if(alpha[j] < 0)
610
+ {
611
+ alpha[j] = 0;
612
+ alpha[i] = diff;
613
+ }
614
+ }
615
+ else
616
+ {
617
+ if(alpha[i] < 0)
618
+ {
619
+ alpha[i] = 0;
620
+ alpha[j] = -diff;
621
+ }
622
+ }
623
+ if(diff > C_i - C_j)
624
+ {
625
+ if(alpha[i] > C_i)
626
+ {
627
+ alpha[i] = C_i;
628
+ alpha[j] = C_i - diff;
629
+ }
630
+ }
631
+ else
632
+ {
633
+ if(alpha[j] > C_j)
634
+ {
635
+ alpha[j] = C_j;
636
+ alpha[i] = C_j + diff;
637
+ }
638
+ }
639
+ }
640
+ else
641
+ {
642
+ double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
643
+ if (quad_coef <= 0)
644
+ quad_coef = TAU;
645
+ double delta = (G[i]-G[j])/quad_coef;
646
+ double sum = alpha[i] + alpha[j];
647
+ alpha[i] -= delta;
648
+ alpha[j] += delta;
649
+
650
+ if(sum > C_i)
651
+ {
652
+ if(alpha[i] > C_i)
653
+ {
654
+ alpha[i] = C_i;
655
+ alpha[j] = sum - C_i;
656
+ }
657
+ }
658
+ else
659
+ {
660
+ if(alpha[j] < 0)
661
+ {
662
+ alpha[j] = 0;
663
+ alpha[i] = sum;
664
+ }
665
+ }
666
+ if(sum > C_j)
667
+ {
668
+ if(alpha[j] > C_j)
669
+ {
670
+ alpha[j] = C_j;
671
+ alpha[i] = sum - C_j;
672
+ }
673
+ }
674
+ else
675
+ {
676
+ if(alpha[i] < 0)
677
+ {
678
+ alpha[i] = 0;
679
+ alpha[j] = sum;
680
+ }
681
+ }
682
+ }
683
+
684
+ // update G
685
+
686
+ double delta_alpha_i = alpha[i] - old_alpha_i;
687
+ double delta_alpha_j = alpha[j] - old_alpha_j;
688
+
689
+ for(int k=0;k<active_size;k++)
690
+ {
691
+ G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
692
+ }
693
+
694
+ // update alpha_status and G_bar
695
+
696
+ {
697
+ bool ui = is_upper_bound(i);
698
+ bool uj = is_upper_bound(j);
699
+ update_alpha_status(i);
700
+ update_alpha_status(j);
701
+ int k;
702
+ if(ui != is_upper_bound(i))
703
+ {
704
+ Q_i = Q.get_Q(i,l);
705
+ if(ui)
706
+ for(k=0;k<l;k++)
707
+ G_bar[k] -= C_i * Q_i[k];
708
+ else
709
+ for(k=0;k<l;k++)
710
+ G_bar[k] += C_i * Q_i[k];
711
+ }
712
+
713
+ if(uj != is_upper_bound(j))
714
+ {
715
+ Q_j = Q.get_Q(j,l);
716
+ if(uj)
717
+ for(k=0;k<l;k++)
718
+ G_bar[k] -= C_j * Q_j[k];
719
+ else
720
+ for(k=0;k<l;k++)
721
+ G_bar[k] += C_j * Q_j[k];
722
+ }
723
+ }
724
+ }
725
+
726
+ // calculate rho
727
+
728
+ si->rho = calculate_rho();
729
+
730
+ // calculate objective value
731
+ {
732
+ double v = 0;
733
+ int i;
734
+ for(i=0;i<l;i++)
735
+ v += alpha[i] * (G[i] + p[i]);
736
+
737
+ si->obj = v/2;
738
+ }
739
+
740
+ // put back the solution
741
+ {
742
+ for(int i=0;i<l;i++)
743
+ alpha_[active_set[i]] = alpha[i];
744
+ }
745
+
746
+ // juggle everything back
747
+ /*{
748
+ for(int i=0;i<l;i++)
749
+ while(active_set[i] != i)
750
+ swap_index(i,active_set[i]);
751
+ // or Q.swap_index(i,active_set[i]);
752
+ }*/
753
+
754
+ si->upper_bound_p = Cp;
755
+ si->upper_bound_n = Cn;
756
+
757
+ info("\noptimization finished, #iter = %d\n",iter);
758
+
759
+ delete[] p;
760
+ delete[] y;
761
+ delete[] alpha;
762
+ delete[] alpha_status;
763
+ delete[] active_set;
764
+ delete[] G;
765
+ delete[] G_bar;
766
+ }
767
+
768
+ // return 1 if already optimal, return 0 otherwise
769
+ int Solver::select_working_set(int &out_i, int &out_j)
770
+ {
771
+ // return i,j such that
772
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
773
+ // j: minimizes the decrease of obj value
774
+ // (if quadratic coefficeint <= 0, replace it with tau)
775
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
776
+
777
+ double Gmax = -INF;
778
+ double Gmax2 = -INF;
779
+ int Gmax_idx = -1;
780
+ int Gmin_idx = -1;
781
+ double obj_diff_min = INF;
782
+
783
+ for(int t=0;t<active_size;t++)
784
+ if(y[t]==+1)
785
+ {
786
+ if(!is_upper_bound(t))
787
+ if(-G[t] >= Gmax)
788
+ {
789
+ Gmax = -G[t];
790
+ Gmax_idx = t;
791
+ }
792
+ }
793
+ else
794
+ {
795
+ if(!is_lower_bound(t))
796
+ if(G[t] >= Gmax)
797
+ {
798
+ Gmax = G[t];
799
+ Gmax_idx = t;
800
+ }
801
+ }
802
+
803
+ int i = Gmax_idx;
804
+ const Qfloat *Q_i = NULL;
805
+ if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
806
+ Q_i = Q->get_Q(i,active_size);
807
+
808
+ for(int j=0;j<active_size;j++)
809
+ {
810
+ if(y[j]==+1)
811
+ {
812
+ if (!is_lower_bound(j))
813
+ {
814
+ double grad_diff=Gmax+G[j];
815
+ if (G[j] >= Gmax2)
816
+ Gmax2 = G[j];
817
+ if (grad_diff > 0)
818
+ {
819
+ double obj_diff;
820
+ double quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
821
+ if (quad_coef > 0)
822
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
823
+ else
824
+ obj_diff = -(grad_diff*grad_diff)/TAU;
825
+
826
+ if (obj_diff <= obj_diff_min)
827
+ {
828
+ Gmin_idx=j;
829
+ obj_diff_min = obj_diff;
830
+ }
831
+ }
832
+ }
833
+ }
834
+ else
835
+ {
836
+ if (!is_upper_bound(j))
837
+ {
838
+ double grad_diff= Gmax-G[j];
839
+ if (-G[j] >= Gmax2)
840
+ Gmax2 = -G[j];
841
+ if (grad_diff > 0)
842
+ {
843
+ double obj_diff;
844
+ double quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
845
+ if (quad_coef > 0)
846
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
847
+ else
848
+ obj_diff = -(grad_diff*grad_diff)/TAU;
849
+
850
+ if (obj_diff <= obj_diff_min)
851
+ {
852
+ Gmin_idx=j;
853
+ obj_diff_min = obj_diff;
854
+ }
855
+ }
856
+ }
857
+ }
858
+ }
859
+
860
+ if(Gmax+Gmax2 < eps)
861
+ return 1;
862
+
863
+ out_i = Gmax_idx;
864
+ out_j = Gmin_idx;
865
+ return 0;
866
+ }
867
+
868
+ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
869
+ {
870
+ if(is_upper_bound(i))
871
+ {
872
+ if(y[i]==+1)
873
+ return(-G[i] > Gmax1);
874
+ else
875
+ return(-G[i] > Gmax2);
876
+ }
877
+ else if(is_lower_bound(i))
878
+ {
879
+ if(y[i]==+1)
880
+ return(G[i] > Gmax2);
881
+ else
882
+ return(G[i] > Gmax1);
883
+ }
884
+ else
885
+ return(false);
886
+ }
887
+
888
+ void Solver::do_shrinking()
889
+ {
890
+ int i;
891
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
892
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
893
+
894
+ // find maximal violating pair first
895
+ for(i=0;i<active_size;i++)
896
+ {
897
+ if(y[i]==+1)
898
+ {
899
+ if(!is_upper_bound(i))
900
+ {
901
+ if(-G[i] >= Gmax1)
902
+ Gmax1 = -G[i];
903
+ }
904
+ if(!is_lower_bound(i))
905
+ {
906
+ if(G[i] >= Gmax2)
907
+ Gmax2 = G[i];
908
+ }
909
+ }
910
+ else
911
+ {
912
+ if(!is_upper_bound(i))
913
+ {
914
+ if(-G[i] >= Gmax2)
915
+ Gmax2 = -G[i];
916
+ }
917
+ if(!is_lower_bound(i))
918
+ {
919
+ if(G[i] >= Gmax1)
920
+ Gmax1 = G[i];
921
+ }
922
+ }
923
+ }
924
+
925
+ if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
926
+ {
927
+ unshrink = true;
928
+ reconstruct_gradient();
929
+ active_size = l;
930
+ info("*"); info_flush();
931
+ }
932
+
933
+ for(i=0;i<active_size;i++)
934
+ if (be_shrunk(i, Gmax1, Gmax2))
935
+ {
936
+ active_size--;
937
+ while (active_size > i)
938
+ {
939
+ if (!be_shrunk(active_size, Gmax1, Gmax2))
940
+ {
941
+ swap_index(i,active_size);
942
+ break;
943
+ }
944
+ active_size--;
945
+ }
946
+ }
947
+ }
948
+
949
+ double Solver::calculate_rho()
950
+ {
951
+ double r;
952
+ int nr_free = 0;
953
+ double ub = INF, lb = -INF, sum_free = 0;
954
+ for(int i=0;i<active_size;i++)
955
+ {
956
+ double yG = y[i]*G[i];
957
+
958
+ if(is_upper_bound(i))
959
+ {
960
+ if(y[i]==-1)
961
+ ub = min(ub,yG);
962
+ else
963
+ lb = max(lb,yG);
964
+ }
965
+ else if(is_lower_bound(i))
966
+ {
967
+ if(y[i]==+1)
968
+ ub = min(ub,yG);
969
+ else
970
+ lb = max(lb,yG);
971
+ }
972
+ else
973
+ {
974
+ ++nr_free;
975
+ sum_free += yG;
976
+ }
977
+ }
978
+
979
+ if(nr_free>0)
980
+ r = sum_free/nr_free;
981
+ else
982
+ r = (ub+lb)/2;
983
+
984
+ return r;
985
+ }
986
+
987
+ //
988
+ // Solver for nu-svm classification and regression
989
+ //
990
+ // additional constraint: e^T \alpha = constant
991
+ //
992
+ class Solver_NU : public Solver
993
+ {
994
+ public:
995
+ Solver_NU() {}
996
+ void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
997
+ double *alpha, double Cp, double Cn, double eps,
998
+ SolutionInfo* si, int shrinking)
999
+ {
1000
+ this->si = si;
1001
+ Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1002
+ }
1003
+ private:
1004
+ SolutionInfo *si;
1005
+ int select_working_set(int &i, int &j);
1006
+ double calculate_rho();
1007
+ bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1008
+ void do_shrinking();
1009
+ };
1010
+
1011
+ // return 1 if already optimal, return 0 otherwise
1012
+ int Solver_NU::select_working_set(int &out_i, int &out_j)
1013
+ {
1014
+ // return i,j such that y_i = y_j and
1015
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1016
+ // j: minimizes the decrease of obj value
1017
+ // (if quadratic coefficeint <= 0, replace it with tau)
1018
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1019
+
1020
+ double Gmaxp = -INF;
1021
+ double Gmaxp2 = -INF;
1022
+ int Gmaxp_idx = -1;
1023
+
1024
+ double Gmaxn = -INF;
1025
+ double Gmaxn2 = -INF;
1026
+ int Gmaxn_idx = -1;
1027
+
1028
+ int Gmin_idx = -1;
1029
+ double obj_diff_min = INF;
1030
+
1031
+ for(int t=0;t<active_size;t++)
1032
+ if(y[t]==+1)
1033
+ {
1034
+ if(!is_upper_bound(t))
1035
+ if(-G[t] >= Gmaxp)
1036
+ {
1037
+ Gmaxp = -G[t];
1038
+ Gmaxp_idx = t;
1039
+ }
1040
+ }
1041
+ else
1042
+ {
1043
+ if(!is_lower_bound(t))
1044
+ if(G[t] >= Gmaxn)
1045
+ {
1046
+ Gmaxn = G[t];
1047
+ Gmaxn_idx = t;
1048
+ }
1049
+ }
1050
+
1051
+ int ip = Gmaxp_idx;
1052
+ int in = Gmaxn_idx;
1053
+ const Qfloat *Q_ip = NULL;
1054
+ const Qfloat *Q_in = NULL;
1055
+ if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1056
+ Q_ip = Q->get_Q(ip,active_size);
1057
+ if(in != -1)
1058
+ Q_in = Q->get_Q(in,active_size);
1059
+
1060
+ for(int j=0;j<active_size;j++)
1061
+ {
1062
+ if(y[j]==+1)
1063
+ {
1064
+ if (!is_lower_bound(j))
1065
+ {
1066
+ double grad_diff=Gmaxp+G[j];
1067
+ if (G[j] >= Gmaxp2)
1068
+ Gmaxp2 = G[j];
1069
+ if (grad_diff > 0)
1070
+ {
1071
+ double obj_diff;
1072
+ double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1073
+ if (quad_coef > 0)
1074
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1075
+ else
1076
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1077
+
1078
+ if (obj_diff <= obj_diff_min)
1079
+ {
1080
+ Gmin_idx=j;
1081
+ obj_diff_min = obj_diff;
1082
+ }
1083
+ }
1084
+ }
1085
+ }
1086
+ else
1087
+ {
1088
+ if (!is_upper_bound(j))
1089
+ {
1090
+ double grad_diff=Gmaxn-G[j];
1091
+ if (-G[j] >= Gmaxn2)
1092
+ Gmaxn2 = -G[j];
1093
+ if (grad_diff > 0)
1094
+ {
1095
+ double obj_diff;
1096
+ double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1097
+ if (quad_coef > 0)
1098
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1099
+ else
1100
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1101
+
1102
+ if (obj_diff <= obj_diff_min)
1103
+ {
1104
+ Gmin_idx=j;
1105
+ obj_diff_min = obj_diff;
1106
+ }
1107
+ }
1108
+ }
1109
+ }
1110
+ }
1111
+
1112
+ if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1113
+ return 1;
1114
+
1115
+ if (y[Gmin_idx] == +1)
1116
+ out_i = Gmaxp_idx;
1117
+ else
1118
+ out_i = Gmaxn_idx;
1119
+ out_j = Gmin_idx;
1120
+
1121
+ return 0;
1122
+ }
1123
+
1124
+ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1125
+ {
1126
+ if(is_upper_bound(i))
1127
+ {
1128
+ if(y[i]==+1)
1129
+ return(-G[i] > Gmax1);
1130
+ else
1131
+ return(-G[i] > Gmax4);
1132
+ }
1133
+ else if(is_lower_bound(i))
1134
+ {
1135
+ if(y[i]==+1)
1136
+ return(G[i] > Gmax2);
1137
+ else
1138
+ return(G[i] > Gmax3);
1139
+ }
1140
+ else
1141
+ return(false);
1142
+ }
1143
+
1144
+ void Solver_NU::do_shrinking()
1145
+ {
1146
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1147
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1148
+ double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1149
+ double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1150
+
1151
+ // find maximal violating pair first
1152
+ int i;
1153
+ for(i=0;i<active_size;i++)
1154
+ {
1155
+ if(!is_upper_bound(i))
1156
+ {
1157
+ if(y[i]==+1)
1158
+ {
1159
+ if(-G[i] > Gmax1) Gmax1 = -G[i];
1160
+ }
1161
+ else if(-G[i] > Gmax4) Gmax4 = -G[i];
1162
+ }
1163
+ if(!is_lower_bound(i))
1164
+ {
1165
+ if(y[i]==+1)
1166
+ {
1167
+ if(G[i] > Gmax2) Gmax2 = G[i];
1168
+ }
1169
+ else if(G[i] > Gmax3) Gmax3 = G[i];
1170
+ }
1171
+ }
1172
+
1173
+ if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1174
+ {
1175
+ unshrink = true;
1176
+ reconstruct_gradient();
1177
+ active_size = l;
1178
+ }
1179
+
1180
+ for(i=0;i<active_size;i++)
1181
+ if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1182
+ {
1183
+ active_size--;
1184
+ while (active_size > i)
1185
+ {
1186
+ if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1187
+ {
1188
+ swap_index(i,active_size);
1189
+ break;
1190
+ }
1191
+ active_size--;
1192
+ }
1193
+ }
1194
+ }
1195
+
1196
+ double Solver_NU::calculate_rho()
1197
+ {
1198
+ int nr_free1 = 0,nr_free2 = 0;
1199
+ double ub1 = INF, ub2 = INF;
1200
+ double lb1 = -INF, lb2 = -INF;
1201
+ double sum_free1 = 0, sum_free2 = 0;
1202
+
1203
+ for(int i=0;i<active_size;i++)
1204
+ {
1205
+ if(y[i]==+1)
1206
+ {
1207
+ if(is_upper_bound(i))
1208
+ lb1 = max(lb1,G[i]);
1209
+ else if(is_lower_bound(i))
1210
+ ub1 = min(ub1,G[i]);
1211
+ else
1212
+ {
1213
+ ++nr_free1;
1214
+ sum_free1 += G[i];
1215
+ }
1216
+ }
1217
+ else
1218
+ {
1219
+ if(is_upper_bound(i))
1220
+ lb2 = max(lb2,G[i]);
1221
+ else if(is_lower_bound(i))
1222
+ ub2 = min(ub2,G[i]);
1223
+ else
1224
+ {
1225
+ ++nr_free2;
1226
+ sum_free2 += G[i];
1227
+ }
1228
+ }
1229
+ }
1230
+
1231
+ double r1,r2;
1232
+ if(nr_free1 > 0)
1233
+ r1 = sum_free1/nr_free1;
1234
+ else
1235
+ r1 = (ub1+lb1)/2;
1236
+
1237
+ if(nr_free2 > 0)
1238
+ r2 = sum_free2/nr_free2;
1239
+ else
1240
+ r2 = (ub2+lb2)/2;
1241
+
1242
+ si->r = (r1+r2)/2;
1243
+ return (r1-r2)/2;
1244
+ }
1245
+
1246
+ //
1247
+ // Q matrices for various formulations
1248
+ //
1249
+ class SVC_Q: public Kernel
1250
+ {
1251
+ public:
1252
+ SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1253
+ :Kernel(prob.l, prob.x, param)
1254
+ {
1255
+ clone(y,y_,prob.l);
1256
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1257
+ QD = new Qfloat[prob.l];
1258
+ for(int i=0;i<prob.l;i++)
1259
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1260
+ }
1261
+
1262
+ Qfloat *get_Q(int i, int len) const
1263
+ {
1264
+ Qfloat *data;
1265
+ int start, j;
1266
+ if((start = cache->get_data(i,&data,len)) < len)
1267
+ {
1268
+ for(j=start;j<len;j++)
1269
+ data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1270
+ }
1271
+ return data;
1272
+ }
1273
+
1274
+ Qfloat *get_QD() const
1275
+ {
1276
+ return QD;
1277
+ }
1278
+
1279
+ void swap_index(int i, int j) const
1280
+ {
1281
+ cache->swap_index(i,j);
1282
+ Kernel::swap_index(i,j);
1283
+ swap(y[i],y[j]);
1284
+ swap(QD[i],QD[j]);
1285
+ }
1286
+
1287
+ ~SVC_Q()
1288
+ {
1289
+ delete[] y;
1290
+ delete cache;
1291
+ delete[] QD;
1292
+ }
1293
+ private:
1294
+ schar *y;
1295
+ Cache *cache;
1296
+ Qfloat *QD;
1297
+ };
1298
+
1299
+ class ONE_CLASS_Q: public Kernel
1300
+ {
1301
+ public:
1302
+ ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1303
+ :Kernel(prob.l, prob.x, param)
1304
+ {
1305
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1306
+ QD = new Qfloat[prob.l];
1307
+ for(int i=0;i<prob.l;i++)
1308
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1309
+ }
1310
+
1311
+ Qfloat *get_Q(int i, int len) const
1312
+ {
1313
+ Qfloat *data;
1314
+ int start, j;
1315
+ if((start = cache->get_data(i,&data,len)) < len)
1316
+ {
1317
+ for(j=start;j<len;j++)
1318
+ data[j] = (Qfloat)(this->*kernel_function)(i,j);
1319
+ }
1320
+ return data;
1321
+ }
1322
+
1323
+ Qfloat *get_QD() const
1324
+ {
1325
+ return QD;
1326
+ }
1327
+
1328
+ void swap_index(int i, int j) const
1329
+ {
1330
+ cache->swap_index(i,j);
1331
+ Kernel::swap_index(i,j);
1332
+ swap(QD[i],QD[j]);
1333
+ }
1334
+
1335
+ ~ONE_CLASS_Q()
1336
+ {
1337
+ delete cache;
1338
+ delete[] QD;
1339
+ }
1340
+ private:
1341
+ Cache *cache;
1342
+ Qfloat *QD;
1343
+ };
1344
+
1345
+ class SVR_Q: public Kernel
1346
+ {
1347
+ public:
1348
+ SVR_Q(const svm_problem& prob, const svm_parameter& param)
1349
+ :Kernel(prob.l, prob.x, param)
1350
+ {
1351
+ l = prob.l;
1352
+ cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1353
+ QD = new Qfloat[2*l];
1354
+ sign = new schar[2*l];
1355
+ index = new int[2*l];
1356
+ for(int k=0;k<l;k++)
1357
+ {
1358
+ sign[k] = 1;
1359
+ sign[k+l] = -1;
1360
+ index[k] = k;
1361
+ index[k+l] = k;
1362
+ QD[k]= (Qfloat)(this->*kernel_function)(k,k);
1363
+ QD[k+l]=QD[k];
1364
+ }
1365
+ buffer[0] = new Qfloat[2*l];
1366
+ buffer[1] = new Qfloat[2*l];
1367
+ next_buffer = 0;
1368
+ }
1369
+
1370
+ void swap_index(int i, int j) const
1371
+ {
1372
+ swap(sign[i],sign[j]);
1373
+ swap(index[i],index[j]);
1374
+ swap(QD[i],QD[j]);
1375
+ }
1376
+
1377
+ Qfloat *get_Q(int i, int len) const
1378
+ {
1379
+ Qfloat *data;
1380
+ int j, real_i = index[i];
1381
+ if(cache->get_data(real_i,&data,l) < l)
1382
+ {
1383
+ for(j=0;j<l;j++)
1384
+ data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1385
+ }
1386
+
1387
+ // reorder and copy
1388
+ Qfloat *buf = buffer[next_buffer];
1389
+ next_buffer = 1 - next_buffer;
1390
+ schar si = sign[i];
1391
+ for(int j=0;j<len;j++)
1392
+ buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1393
+ return buf;
1394
+ }
1395
+
1396
+ Qfloat *get_QD() const
1397
+ {
1398
+ return QD;
1399
+ }
1400
+
1401
+ ~SVR_Q()
1402
+ {
1403
+ delete cache;
1404
+ delete[] sign;
1405
+ delete[] index;
1406
+ delete[] buffer[0];
1407
+ delete[] buffer[1];
1408
+ delete[] QD;
1409
+ }
1410
+ private:
1411
+ int l;
1412
+ Cache *cache;
1413
+ schar *sign;
1414
+ int *index;
1415
+ mutable int next_buffer;
1416
+ Qfloat *buffer[2];
1417
+ Qfloat *QD;
1418
+ };
1419
+
1420
+ //
1421
+ // construct and solve various formulations
1422
+ //
1423
+ static void solve_c_svc(
1424
+ const svm_problem *prob, const svm_parameter* param,
1425
+ double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1426
+ {
1427
+ int l = prob->l;
1428
+ double *minus_ones = new double[l];
1429
+ schar *y = new schar[l];
1430
+
1431
+ int i;
1432
+
1433
+ for(i=0;i<l;i++)
1434
+ {
1435
+ alpha[i] = 0;
1436
+ minus_ones[i] = -1;
1437
+ if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1438
+ }
1439
+
1440
+ Solver s;
1441
+ s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1442
+ alpha, Cp, Cn, param->eps, si, param->shrinking);
1443
+
1444
+ double sum_alpha=0;
1445
+ for(i=0;i<l;i++)
1446
+ sum_alpha += alpha[i];
1447
+
1448
+ if (Cp==Cn)
1449
+ info("nu = %f\n", sum_alpha/(Cp*prob->l));
1450
+
1451
+ for(i=0;i<l;i++)
1452
+ alpha[i] *= y[i];
1453
+
1454
+ delete[] minus_ones;
1455
+ delete[] y;
1456
+ }
1457
+
1458
+ static void solve_nu_svc(
1459
+ const svm_problem *prob, const svm_parameter *param,
1460
+ double *alpha, Solver::SolutionInfo* si)
1461
+ {
1462
+ int i;
1463
+ int l = prob->l;
1464
+ double nu = param->nu;
1465
+
1466
+ schar *y = new schar[l];
1467
+
1468
+ for(i=0;i<l;i++)
1469
+ if(prob->y[i]>0)
1470
+ y[i] = +1;
1471
+ else
1472
+ y[i] = -1;
1473
+
1474
+ double sum_pos = nu*l/2;
1475
+ double sum_neg = nu*l/2;
1476
+
1477
+ for(i=0;i<l;i++)
1478
+ if(y[i] == +1)
1479
+ {
1480
+ alpha[i] = min(1.0,sum_pos);
1481
+ sum_pos -= alpha[i];
1482
+ }
1483
+ else
1484
+ {
1485
+ alpha[i] = min(1.0,sum_neg);
1486
+ sum_neg -= alpha[i];
1487
+ }
1488
+
1489
+ double *zeros = new double[l];
1490
+
1491
+ for(i=0;i<l;i++)
1492
+ zeros[i] = 0;
1493
+
1494
+ Solver_NU s;
1495
+ s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1496
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1497
+ double r = si->r;
1498
+
1499
+ info("C = %f\n",1/r);
1500
+
1501
+ for(i=0;i<l;i++)
1502
+ alpha[i] *= y[i]/r;
1503
+
1504
+ si->rho /= r;
1505
+ si->obj /= (r*r);
1506
+ si->upper_bound_p = 1/r;
1507
+ si->upper_bound_n = 1/r;
1508
+
1509
+ delete[] y;
1510
+ delete[] zeros;
1511
+ }
1512
+
1513
+ static void solve_one_class(
1514
+ const svm_problem *prob, const svm_parameter *param,
1515
+ double *alpha, Solver::SolutionInfo* si)
1516
+ {
1517
+ int l = prob->l;
1518
+ double *zeros = new double[l];
1519
+ schar *ones = new schar[l];
1520
+ int i;
1521
+
1522
+ int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1523
+
1524
+ for(i=0;i<n;i++)
1525
+ alpha[i] = 1;
1526
+ if(n<prob->l)
1527
+ alpha[n] = param->nu * prob->l - n;
1528
+ for(i=n+1;i<l;i++)
1529
+ alpha[i] = 0;
1530
+
1531
+ for(i=0;i<l;i++)
1532
+ {
1533
+ zeros[i] = 0;
1534
+ ones[i] = 1;
1535
+ }
1536
+
1537
+ Solver s;
1538
+ s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1539
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1540
+
1541
+ delete[] zeros;
1542
+ delete[] ones;
1543
+ }
1544
+
1545
+ static void solve_epsilon_svr(
1546
+ const svm_problem *prob, const svm_parameter *param,
1547
+ double *alpha, Solver::SolutionInfo* si)
1548
+ {
1549
+ int l = prob->l;
1550
+ double *alpha2 = new double[2*l];
1551
+ double *linear_term = new double[2*l];
1552
+ schar *y = new schar[2*l];
1553
+ int i;
1554
+
1555
+ for(i=0;i<l;i++)
1556
+ {
1557
+ alpha2[i] = 0;
1558
+ linear_term[i] = param->p - prob->y[i];
1559
+ y[i] = 1;
1560
+
1561
+ alpha2[i+l] = 0;
1562
+ linear_term[i+l] = param->p + prob->y[i];
1563
+ y[i+l] = -1;
1564
+ }
1565
+
1566
+ Solver s;
1567
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1568
+ alpha2, param->C, param->C, param->eps, si, param->shrinking);
1569
+
1570
+ double sum_alpha = 0;
1571
+ for(i=0;i<l;i++)
1572
+ {
1573
+ alpha[i] = alpha2[i] - alpha2[i+l];
1574
+ sum_alpha += fabs(alpha[i]);
1575
+ }
1576
+ info("nu = %f\n",sum_alpha/(param->C*l));
1577
+
1578
+ delete[] alpha2;
1579
+ delete[] linear_term;
1580
+ delete[] y;
1581
+ }
1582
+
1583
+ static void solve_nu_svr(
1584
+ const svm_problem *prob, const svm_parameter *param,
1585
+ double *alpha, Solver::SolutionInfo* si)
1586
+ {
1587
+ int l = prob->l;
1588
+ double C = param->C;
1589
+ double *alpha2 = new double[2*l];
1590
+ double *linear_term = new double[2*l];
1591
+ schar *y = new schar[2*l];
1592
+ int i;
1593
+
1594
+ double sum = C * param->nu * l / 2;
1595
+ for(i=0;i<l;i++)
1596
+ {
1597
+ alpha2[i] = alpha2[i+l] = min(sum,C);
1598
+ sum -= alpha2[i];
1599
+
1600
+ linear_term[i] = - prob->y[i];
1601
+ y[i] = 1;
1602
+
1603
+ linear_term[i+l] = prob->y[i];
1604
+ y[i+l] = -1;
1605
+ }
1606
+
1607
+ Solver_NU s;
1608
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1609
+ alpha2, C, C, param->eps, si, param->shrinking);
1610
+
1611
+ info("epsilon = %f\n",-si->r);
1612
+
1613
+ for(i=0;i<l;i++)
1614
+ alpha[i] = alpha2[i] - alpha2[i+l];
1615
+
1616
+ delete[] alpha2;
1617
+ delete[] linear_term;
1618
+ delete[] y;
1619
+ }
1620
+
1621
+ //
1622
+ // decision_function
1623
+ //
1624
+ struct decision_function
1625
+ {
1626
+ double *alpha;
1627
+ double rho;
1628
+ double obj;
1629
+ };
1630
+
1631
+ decision_function svm_train_one(
1632
+ const svm_problem *prob, const svm_parameter *param,
1633
+ double Cp, double Cn)
1634
+ {
1635
+ double *alpha = Malloc(double,prob->l);
1636
+ Solver::SolutionInfo si;
1637
+ switch(param->svm_type)
1638
+ {
1639
+ case C_SVC:
1640
+ solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1641
+ break;
1642
+ case NU_SVC:
1643
+ solve_nu_svc(prob,param,alpha,&si);
1644
+ break;
1645
+ case ONE_CLASS:
1646
+ solve_one_class(prob,param,alpha,&si);
1647
+ break;
1648
+ case EPSILON_SVR:
1649
+ solve_epsilon_svr(prob,param,alpha,&si);
1650
+ break;
1651
+ case NU_SVR:
1652
+ solve_nu_svr(prob,param,alpha,&si);
1653
+ break;
1654
+ }
1655
+
1656
+ info("obj = %f, rho = %f\n",si.obj,si.rho);
1657
+
1658
+ // output SVs
1659
+
1660
+ int nSV = 0;
1661
+ int nBSV = 0;
1662
+ for(int i=0;i<prob->l;i++)
1663
+ {
1664
+ if(fabs(alpha[i]) > 0)
1665
+ {
1666
+ ++nSV;
1667
+ if(prob->y[i] > 0)
1668
+ {
1669
+ if(fabs(alpha[i]) >= si.upper_bound_p)
1670
+ ++nBSV;
1671
+ }
1672
+ else
1673
+ {
1674
+ if(fabs(alpha[i]) >= si.upper_bound_n)
1675
+ ++nBSV;
1676
+ }
1677
+ }
1678
+ }
1679
+
1680
+ info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1681
+
1682
+ decision_function f;
1683
+ f.alpha = alpha;
1684
+ f.rho = si.rho;
1685
+ f.obj = si.obj;
1686
+ return f;
1687
+ }
1688
+
1689
+ //
1690
+ // svm_model
1691
+ //
1692
+ struct svm_model
1693
+ {
1694
+ svm_parameter param; // parameter
1695
+ int nr_class; // number of classes, = 2 in regression/one class svm
1696
+ int l; // total #SV
1697
+ svm_node **SV; // SVs (SV[l])
1698
+ double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
1699
+ double *rho; // constants in decision functions (rho[k*(k-1)/2])
1700
+ double *probA; // pariwise probability information
1701
+ double *probB;
1702
+
1703
+ double *obj;
1704
+
1705
+ // for classification only
1706
+
1707
+ int *label; // label of each class (label[k])
1708
+ int *nSV; // number of SVs for each class (nSV[k])
1709
+ // nSV[0] + nSV[1] + ... + nSV[k-1] = l
1710
+ // XXX
1711
+ int free_sv; // 1 if svm_model is created by svm_load_model
1712
+ // 0 if svm_model is created by svm_train
1713
+ };
1714
+
1715
+ // Get the rho element of the model. Only works when there are two
1716
+ // labels (1 classifier).
1717
+ double svm_get_model_rho(struct svm_model *model)
1718
+ {
1719
+ if (model->nr_class > 2)
1720
+ info("warning: rho requested for model with more than 2 labels");
1721
+ return model->rho[0];
1722
+ }
1723
+
1724
+ int svm_get_model_num_coefs(struct svm_model *model)
1725
+ {
1726
+ return model->l;
1727
+ }
1728
+
1729
+ // Get the coefficients of the model. Only works when there are two
1730
+ // labels (1 classifier).
1731
+ void svm_get_model_coefs(struct svm_model *model, double* out_array)
1732
+ {
1733
+ if (model->nr_class > 2)
1734
+ info("warning: coefficients requested for model with more than 2 labels");
1735
+ memcpy(out_array, model->sv_coef[0], sizeof(double) * model->l);
1736
+ }
1737
+
1738
+ // Get the permutation of the indices of the coefficients w.r.t. to the input problem.
1739
+ void svm_get_model_perm(struct svm_model *model, int* out_array)
1740
+ {
1741
+ if (model->nr_class > 2)
1742
+ info("warning: permutation requested for model with more than 2 labels");
1743
+ int i;
1744
+ for ( i = 0; i < model->l; ++i)
1745
+ {
1746
+ struct svm_node* n = model->SV[i];
1747
+ if (0 != n->index)
1748
+ {
1749
+ info("warning: missing 0 index");
1750
+ out_array[i] = -1;
1751
+ }
1752
+ else
1753
+ out_array[i] = (int)n->value;
1754
+ }
1755
+ }
1756
+
1757
+ // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1758
+ void sigmoid_train(
1759
+ int l, const double *dec_values, const double *labels,
1760
+ double& A, double& B)
1761
+ {
1762
+ double prior1=0, prior0 = 0;
1763
+ int i;
1764
+
1765
+ for (i=0;i<l;i++)
1766
+ if (labels[i] > 0) prior1+=1;
1767
+ else prior0+=1;
1768
+
1769
+ int max_iter=100; // Maximal number of iterations
1770
+ double min_step=1e-10; // Minimal step taken in line search
1771
+ double sigma=1e-12; // For numerically strict PD of Hessian
1772
+ double eps=1e-5;
1773
+ double hiTarget=(prior1+1.0)/(prior1+2.0);
1774
+ double loTarget=1/(prior0+2.0);
1775
+ double *t=Malloc(double,l);
1776
+ double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1777
+ double newA,newB,newf,d1,d2;
1778
+ int iter;
1779
+
1780
+ // Initial Point and Initial Fun Value
1781
+ A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1782
+ double fval = 0.0;
1783
+
1784
+ for (i=0;i<l;i++)
1785
+ {
1786
+ if (labels[i]>0) t[i]=hiTarget;
1787
+ else t[i]=loTarget;
1788
+ fApB = dec_values[i]*A+B;
1789
+ if (fApB>=0)
1790
+ fval += t[i]*fApB + log(1+exp(-fApB));
1791
+ else
1792
+ fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1793
+ }
1794
+ for (iter=0;iter<max_iter;iter++)
1795
+ {
1796
+ // Update Gradient and Hessian (use H' = H + sigma I)
1797
+ h11=sigma; // numerically ensures strict PD
1798
+ h22=sigma;
1799
+ h21=0.0;g1=0.0;g2=0.0;
1800
+ for (i=0;i<l;i++)
1801
+ {
1802
+ fApB = dec_values[i]*A+B;
1803
+ if (fApB >= 0)
1804
+ {
1805
+ p=exp(-fApB)/(1.0+exp(-fApB));
1806
+ q=1.0/(1.0+exp(-fApB));
1807
+ }
1808
+ else
1809
+ {
1810
+ p=1.0/(1.0+exp(fApB));
1811
+ q=exp(fApB)/(1.0+exp(fApB));
1812
+ }
1813
+ d2=p*q;
1814
+ h11+=dec_values[i]*dec_values[i]*d2;
1815
+ h22+=d2;
1816
+ h21+=dec_values[i]*d2;
1817
+ d1=t[i]-p;
1818
+ g1+=dec_values[i]*d1;
1819
+ g2+=d1;
1820
+ }
1821
+
1822
+ // Stopping Criteria
1823
+ if (fabs(g1)<eps && fabs(g2)<eps)
1824
+ break;
1825
+
1826
+ // Finding Newton direction: -inv(H') * g
1827
+ det=h11*h22-h21*h21;
1828
+ dA=-(h22*g1 - h21 * g2) / det;
1829
+ dB=-(-h21*g1+ h11 * g2) / det;
1830
+ gd=g1*dA+g2*dB;
1831
+
1832
+
1833
+ stepsize = 1; // Line Search
1834
+ while (stepsize >= min_step)
1835
+ {
1836
+ newA = A + stepsize * dA;
1837
+ newB = B + stepsize * dB;
1838
+
1839
+ // New function value
1840
+ newf = 0.0;
1841
+ for (i=0;i<l;i++)
1842
+ {
1843
+ fApB = dec_values[i]*newA+newB;
1844
+ if (fApB >= 0)
1845
+ newf += t[i]*fApB + log(1+exp(-fApB));
1846
+ else
1847
+ newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1848
+ }
1849
+ // Check sufficient decrease
1850
+ if (newf<fval+0.0001*stepsize*gd)
1851
+ {
1852
+ A=newA;B=newB;fval=newf;
1853
+ break;
1854
+ }
1855
+ else
1856
+ stepsize = stepsize / 2.0;
1857
+ }
1858
+
1859
+ if (stepsize < min_step)
1860
+ {
1861
+ info("Line search fails in two-class probability estimates\n");
1862
+ break;
1863
+ }
1864
+ }
1865
+
1866
+ if (iter>=max_iter)
1867
+ info("Reaching maximal iterations in two-class probability estimates\n");
1868
+ free(t);
1869
+ }
1870
+
1871
+ double sigmoid_predict(double decision_value, double A, double B)
1872
+ {
1873
+ double fApB = decision_value*A+B;
1874
+ if (fApB >= 0)
1875
+ return exp(-fApB)/(1.0+exp(-fApB));
1876
+ else
1877
+ return 1.0/(1+exp(fApB)) ;
1878
+ }
1879
+
1880
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1881
+ void multiclass_probability(int k, double **r, double *p)
1882
+ {
1883
+ int t,j;
1884
+ int iter = 0, max_iter=max(100,k);
1885
+ double **Q=Malloc(double *,k);
1886
+ double *Qp=Malloc(double,k);
1887
+ double pQp, eps=0.005/k;
1888
+
1889
+ for (t=0;t<k;t++)
1890
+ {
1891
+ p[t]=1.0/k; // Valid if k = 1
1892
+ Q[t]=Malloc(double,k);
1893
+ Q[t][t]=0;
1894
+ for (j=0;j<t;j++)
1895
+ {
1896
+ Q[t][t]+=r[j][t]*r[j][t];
1897
+ Q[t][j]=Q[j][t];
1898
+ }
1899
+ for (j=t+1;j<k;j++)
1900
+ {
1901
+ Q[t][t]+=r[j][t]*r[j][t];
1902
+ Q[t][j]=-r[j][t]*r[t][j];
1903
+ }
1904
+ }
1905
+ for (iter=0;iter<max_iter;iter++)
1906
+ {
1907
+ // stopping condition, recalculate QP,pQP for numerical accuracy
1908
+ pQp=0;
1909
+ for (t=0;t<k;t++)
1910
+ {
1911
+ Qp[t]=0;
1912
+ for (j=0;j<k;j++)
1913
+ Qp[t]+=Q[t][j]*p[j];
1914
+ pQp+=p[t]*Qp[t];
1915
+ }
1916
+ double max_error=0;
1917
+ for (t=0;t<k;t++)
1918
+ {
1919
+ double error=fabs(Qp[t]-pQp);
1920
+ if (error>max_error)
1921
+ max_error=error;
1922
+ }
1923
+ if (max_error<eps) break;
1924
+
1925
+ for (t=0;t<k;t++)
1926
+ {
1927
+ double diff=(-Qp[t]+pQp)/Q[t][t];
1928
+ p[t]+=diff;
1929
+ pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1930
+ for (j=0;j<k;j++)
1931
+ {
1932
+ Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1933
+ p[j]/=(1+diff);
1934
+ }
1935
+ }
1936
+ }
1937
+ if (iter>=max_iter)
1938
+ info("Exceeds max_iter in multiclass_prob\n");
1939
+ for(t=0;t<k;t++) free(Q[t]);
1940
+ free(Q);
1941
+ free(Qp);
1942
+ }
1943
+
1944
+ // Cross-validation decision values for probability estimates
1945
+ void svm_binary_svc_probability(
1946
+ const svm_problem *prob, const svm_parameter *param,
1947
+ double Cp, double Cn, double& probA, double& probB)
1948
+ {
1949
+ int i;
1950
+ int nr_fold = 5;
1951
+ int *perm = Malloc(int,prob->l);
1952
+ double *dec_values = Malloc(double,prob->l);
1953
+
1954
+ // random shuffle
1955
+ for(i=0;i<prob->l;i++) perm[i]=i;
1956
+ for(i=0;i<prob->l;i++)
1957
+ {
1958
+ int j = i+rand()%(prob->l-i);
1959
+ swap(perm[i],perm[j]);
1960
+ }
1961
+ for(i=0;i<nr_fold;i++)
1962
+ {
1963
+ int begin = i*prob->l/nr_fold;
1964
+ int end = (i+1)*prob->l/nr_fold;
1965
+ int j,k;
1966
+ struct svm_problem subprob;
1967
+
1968
+ subprob.l = prob->l-(end-begin);
1969
+ subprob.x = Malloc(struct svm_node*,subprob.l);
1970
+ subprob.y = Malloc(double,subprob.l);
1971
+
1972
+ k=0;
1973
+ for(j=0;j<begin;j++)
1974
+ {
1975
+ subprob.x[k] = prob->x[perm[j]];
1976
+ subprob.y[k] = prob->y[perm[j]];
1977
+ ++k;
1978
+ }
1979
+ for(j=end;j<prob->l;j++)
1980
+ {
1981
+ subprob.x[k] = prob->x[perm[j]];
1982
+ subprob.y[k] = prob->y[perm[j]];
1983
+ ++k;
1984
+ }
1985
+ int p_count=0,n_count=0;
1986
+ for(j=0;j<k;j++)
1987
+ if(subprob.y[j]>0)
1988
+ p_count++;
1989
+ else
1990
+ n_count++;
1991
+
1992
+ if(p_count==0 && n_count==0)
1993
+ for(j=begin;j<end;j++)
1994
+ dec_values[perm[j]] = 0;
1995
+ else if(p_count > 0 && n_count == 0)
1996
+ for(j=begin;j<end;j++)
1997
+ dec_values[perm[j]] = 1;
1998
+ else if(p_count == 0 && n_count > 0)
1999
+ for(j=begin;j<end;j++)
2000
+ dec_values[perm[j]] = -1;
2001
+ else
2002
+ {
2003
+ svm_parameter subparam = *param;
2004
+ subparam.probability=0;
2005
+ subparam.C=1.0;
2006
+ subparam.nr_weight=2;
2007
+ subparam.weight_label = Malloc(int,2);
2008
+ subparam.weight = Malloc(double,2);
2009
+ subparam.weight_label[0]=+1;
2010
+ subparam.weight_label[1]=-1;
2011
+ subparam.weight[0]=Cp;
2012
+ subparam.weight[1]=Cn;
2013
+ struct svm_model *submodel = svm_train(&subprob,&subparam);
2014
+ for(j=begin;j<end;j++)
2015
+ {
2016
+ svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2017
+ // ensure +1 -1 order; reason not using CV subroutine
2018
+ dec_values[perm[j]] *= submodel->label[0];
2019
+ }
2020
+ svm_destroy_model(submodel);
2021
+ svm_destroy_param(&subparam);
2022
+ }
2023
+ free(subprob.x);
2024
+ free(subprob.y);
2025
+ }
2026
+ sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2027
+ free(dec_values);
2028
+ free(perm);
2029
+ }
2030
+
2031
+ // Return parameter of a Laplace distribution
2032
+ double svm_svr_probability(
2033
+ const svm_problem *prob, const svm_parameter *param)
2034
+ {
2035
+ int i;
2036
+ int nr_fold = 5;
2037
+ double *ymv = Malloc(double,prob->l);
2038
+ double mae = 0;
2039
+
2040
+ svm_parameter newparam = *param;
2041
+ newparam.probability = 0;
2042
+ svm_cross_validation(prob,&newparam,nr_fold,ymv);
2043
+ for(i=0;i<prob->l;i++)
2044
+ {
2045
+ ymv[i]=prob->y[i]-ymv[i];
2046
+ mae += fabs(ymv[i]);
2047
+ }
2048
+ mae /= prob->l;
2049
+ double std=sqrt(2*mae*mae);
2050
+ int count=0;
2051
+ mae=0;
2052
+ for(i=0;i<prob->l;i++)
2053
+ if (fabs(ymv[i]) > 5*std)
2054
+ count=count+1;
2055
+ else
2056
+ mae+=fabs(ymv[i]);
2057
+ mae /= (prob->l-count);
2058
+ info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2059
+ free(ymv);
2060
+ return mae;
2061
+ }
2062
+
2063
+
2064
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2065
+ // perm, length l, must be allocated before calling this subroutine
2066
+ void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2067
+ {
2068
+ int l = prob->l;
2069
+ int max_nr_class = 16;
2070
+ int nr_class = 0;
2071
+ int *label = Malloc(int,max_nr_class);
2072
+ int *count = Malloc(int,max_nr_class);
2073
+ int *data_label = Malloc(int,l);
2074
+ int i;
2075
+
2076
+ for(i=0;i<l;i++)
2077
+ {
2078
+ int this_label = (int)prob->y[i];
2079
+ int j;
2080
+ for(j=0;j<nr_class;j++)
2081
+ {
2082
+ if(this_label == label[j])
2083
+ {
2084
+ ++count[j];
2085
+ break;
2086
+ }
2087
+ }
2088
+ data_label[i] = j;
2089
+ if(j == nr_class)
2090
+ {
2091
+ if(nr_class == max_nr_class)
2092
+ {
2093
+ max_nr_class *= 2;
2094
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2095
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2096
+ }
2097
+ label[nr_class] = this_label;
2098
+ count[nr_class] = 1;
2099
+ ++nr_class;
2100
+ }
2101
+ }
2102
+
2103
+ int *start = Malloc(int,nr_class);
2104
+ start[0] = 0;
2105
+ for(i=1;i<nr_class;i++)
2106
+ start[i] = start[i-1]+count[i-1];
2107
+ for(i=0;i<l;i++)
2108
+ {
2109
+ perm[start[data_label[i]]] = i;
2110
+ ++start[data_label[i]];
2111
+ }
2112
+ start[0] = 0;
2113
+ for(i=1;i<nr_class;i++)
2114
+ start[i] = start[i-1]+count[i-1];
2115
+
2116
+ *nr_class_ret = nr_class;
2117
+ *label_ret = label;
2118
+ *start_ret = start;
2119
+ *count_ret = count;
2120
+ free(data_label);
2121
+ }
2122
+
2123
+ //
2124
+ // Interface functions
2125
+ //
2126
+ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2127
+ {
2128
+ svm_model *model = Malloc(svm_model,1);
2129
+ model->param = *param;
2130
+ model->free_sv = 0; // XXX
2131
+
2132
+ if(param->svm_type == ONE_CLASS ||
2133
+ param->svm_type == EPSILON_SVR ||
2134
+ param->svm_type == NU_SVR)
2135
+ {
2136
+ // regression or one-class-svm
2137
+ model->nr_class = 2;
2138
+ model->label = NULL;
2139
+ model->nSV = NULL;
2140
+ model->probA = NULL; model->probB = NULL;
2141
+ model->sv_coef = Malloc(double *,1);
2142
+
2143
+ if(param->probability &&
2144
+ (param->svm_type == EPSILON_SVR ||
2145
+ param->svm_type == NU_SVR))
2146
+ {
2147
+ model->probA = Malloc(double,1);
2148
+ model->probA[0] = svm_svr_probability(prob,param);
2149
+ }
2150
+
2151
+ decision_function f = svm_train_one(prob,param,0,0);
2152
+ model->rho = Malloc(double,1);
2153
+ model->rho[0] = f.rho;
2154
+ model->obj = Malloc(double,1);
2155
+ model->obj[0] = f.obj;
2156
+
2157
+ int nSV = 0;
2158
+ int i;
2159
+ for(i=0;i<prob->l;i++)
2160
+ if(fabs(f.alpha[i]) > 0) ++nSV;
2161
+ model->l = nSV;
2162
+ model->SV = Malloc(svm_node *,nSV);
2163
+ model->sv_coef[0] = Malloc(double,nSV);
2164
+ int j = 0;
2165
+ for(i=0;i<prob->l;i++)
2166
+ if(fabs(f.alpha[i]) > 0)
2167
+ {
2168
+ model->SV[j] = prob->x[i];
2169
+ model->sv_coef[0][j] = f.alpha[i];
2170
+ ++j;
2171
+ }
2172
+
2173
+ free(f.alpha);
2174
+ }
2175
+ else
2176
+ {
2177
+ // classification
2178
+ int l = prob->l;
2179
+ int nr_class;
2180
+ int *label = NULL;
2181
+ int *start = NULL;
2182
+ int *count = NULL;
2183
+ int *perm = Malloc(int,l);
2184
+
2185
+ // group training data of the same class
2186
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2187
+ svm_node **x = Malloc(svm_node *,l);
2188
+ int i;
2189
+ for(i=0;i<l;i++)
2190
+ x[i] = prob->x[perm[i]];
2191
+
2192
+ // calculate weighted C
2193
+
2194
+ double *weighted_C = Malloc(double, nr_class);
2195
+ for(i=0;i<nr_class;i++)
2196
+ weighted_C[i] = param->C;
2197
+ for(i=0;i<param->nr_weight;i++)
2198
+ {
2199
+ int j;
2200
+ for(j=0;j<nr_class;j++)
2201
+ if(param->weight_label[i] == label[j])
2202
+ break;
2203
+ if(j == nr_class)
2204
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2205
+ else
2206
+ weighted_C[j] *= param->weight[i];
2207
+ }
2208
+
2209
+ // train k*(k-1)/2 models
2210
+
2211
+ bool *nonzero = Malloc(bool,l);
2212
+ for(i=0;i<l;i++)
2213
+ nonzero[i] = false;
2214
+ decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2215
+
2216
+ double *probA=NULL,*probB=NULL;
2217
+ if (param->probability)
2218
+ {
2219
+ probA=Malloc(double,nr_class*(nr_class-1)/2);
2220
+ probB=Malloc(double,nr_class*(nr_class-1)/2);
2221
+ }
2222
+
2223
+ int p = 0;
2224
+ for(i=0;i<nr_class;i++)
2225
+ for(int j=i+1;j<nr_class;j++)
2226
+ {
2227
+ svm_problem sub_prob;
2228
+ int si = start[i], sj = start[j];
2229
+ int ci = count[i], cj = count[j];
2230
+ sub_prob.l = ci+cj;
2231
+ sub_prob.x = Malloc(svm_node *,sub_prob.l);
2232
+ sub_prob.y = Malloc(double,sub_prob.l);
2233
+ int k;
2234
+ for(k=0;k<ci;k++)
2235
+ {
2236
+ sub_prob.x[k] = x[si+k];
2237
+ sub_prob.y[k] = +1;
2238
+ }
2239
+ for(k=0;k<cj;k++)
2240
+ {
2241
+ sub_prob.x[ci+k] = x[sj+k];
2242
+ sub_prob.y[ci+k] = -1;
2243
+ }
2244
+
2245
+ if(param->probability)
2246
+ svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2247
+
2248
+ f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2249
+ for(k=0;k<ci;k++)
2250
+ if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2251
+ nonzero[si+k] = true;
2252
+ for(k=0;k<cj;k++)
2253
+ if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2254
+ nonzero[sj+k] = true;
2255
+ free(sub_prob.x);
2256
+ free(sub_prob.y);
2257
+ ++p;
2258
+ }
2259
+
2260
+ // build output
2261
+
2262
+ model->nr_class = nr_class;
2263
+
2264
+ model->label = Malloc(int,nr_class);
2265
+ for(i=0;i<nr_class;i++)
2266
+ model->label[i] = label[i];
2267
+
2268
+ model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2269
+ model->obj = Malloc(double,nr_class*(nr_class-1)/2);
2270
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2271
+ {
2272
+ model->rho[i] = f[i].rho;
2273
+ model->obj[i] = f[i].obj;
2274
+ }
2275
+
2276
+
2277
+ if(param->probability)
2278
+ {
2279
+ model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2280
+ model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2281
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2282
+ {
2283
+ model->probA[i] = probA[i];
2284
+ model->probB[i] = probB[i];
2285
+ }
2286
+ }
2287
+ else
2288
+ {
2289
+ model->probA=NULL;
2290
+ model->probB=NULL;
2291
+ }
2292
+
2293
+ int total_sv = 0;
2294
+ int *nz_count = Malloc(int,nr_class);
2295
+ model->nSV = Malloc(int,nr_class);
2296
+ for(i=0;i<nr_class;i++)
2297
+ {
2298
+ int nSV = 0;
2299
+ for(int j=0;j<count[i];j++)
2300
+ if(nonzero[start[i]+j])
2301
+ {
2302
+ ++nSV;
2303
+ ++total_sv;
2304
+ }
2305
+ model->nSV[i] = nSV;
2306
+ nz_count[i] = nSV;
2307
+ }
2308
+
2309
+ info("Total nSV = %d\n",total_sv);
2310
+
2311
+ model->l = total_sv;
2312
+ model->SV = Malloc(svm_node *,total_sv);
2313
+ p = 0;
2314
+ for(i=0;i<l;i++)
2315
+ if(nonzero[i]) model->SV[p++] = x[i];
2316
+
2317
+ int *nz_start = Malloc(int,nr_class);
2318
+ nz_start[0] = 0;
2319
+ for(i=1;i<nr_class;i++)
2320
+ nz_start[i] = nz_start[i-1]+nz_count[i-1];
2321
+
2322
+ model->sv_coef = Malloc(double *,nr_class-1);
2323
+ for(i=0;i<nr_class-1;i++)
2324
+ model->sv_coef[i] = Malloc(double,total_sv);
2325
+
2326
+ p = 0;
2327
+ for(i=0;i<nr_class;i++)
2328
+ for(int j=i+1;j<nr_class;j++)
2329
+ {
2330
+ // classifier (i,j): coefficients with
2331
+ // i are in sv_coef[j-1][nz_start[i]...],
2332
+ // j are in sv_coef[i][nz_start[j]...]
2333
+
2334
+ int si = start[i];
2335
+ int sj = start[j];
2336
+ int ci = count[i];
2337
+ int cj = count[j];
2338
+
2339
+ int q = nz_start[i];
2340
+ int k;
2341
+ for(k=0;k<ci;k++)
2342
+ if(nonzero[si+k])
2343
+ model->sv_coef[j-1][q++] = f[p].alpha[k];
2344
+ q = nz_start[j];
2345
+ for(k=0;k<cj;k++)
2346
+ if(nonzero[sj+k])
2347
+ model->sv_coef[i][q++] = f[p].alpha[ci+k];
2348
+ ++p;
2349
+ }
2350
+
2351
+ free(label);
2352
+ free(probA);
2353
+ free(probB);
2354
+ free(count);
2355
+ free(perm);
2356
+ free(start);
2357
+ free(x);
2358
+ free(weighted_C);
2359
+ free(nonzero);
2360
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2361
+ free(f[i].alpha);
2362
+ free(f);
2363
+ free(nz_count);
2364
+ free(nz_start);
2365
+ }
2366
+ return model;
2367
+ }
2368
+
2369
+ // Stratified cross validation
2370
+ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2371
+ {
2372
+ int i;
2373
+ int *fold_start = Malloc(int,nr_fold+1);
2374
+ int l = prob->l;
2375
+ int *perm = Malloc(int,l);
2376
+ int nr_class;
2377
+
2378
+ // stratified cv may not give leave-one-out rate
2379
+ // Each class to l folds -> some folds may have zero elements
2380
+ if((param->svm_type == C_SVC ||
2381
+ param->svm_type == NU_SVC) && nr_fold < l)
2382
+ {
2383
+ int *start = NULL;
2384
+ int *label = NULL;
2385
+ int *count = NULL;
2386
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2387
+
2388
+ // random shuffle and then data grouped by fold using the array perm
2389
+ int *fold_count = Malloc(int,nr_fold);
2390
+ int c;
2391
+ int *index = Malloc(int,l);
2392
+ for(i=0;i<l;i++)
2393
+ index[i]=perm[i];
2394
+ for (c=0; c<nr_class; c++)
2395
+ for(i=0;i<count[c];i++)
2396
+ {
2397
+ int j = i+rand()%(count[c]-i);
2398
+ swap(index[start[c]+j],index[start[c]+i]);
2399
+ }
2400
+ for(i=0;i<nr_fold;i++)
2401
+ {
2402
+ fold_count[i] = 0;
2403
+ for (c=0; c<nr_class;c++)
2404
+ fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2405
+ }
2406
+ fold_start[0]=0;
2407
+ for (i=1;i<=nr_fold;i++)
2408
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2409
+ for (c=0; c<nr_class;c++)
2410
+ for(i=0;i<nr_fold;i++)
2411
+ {
2412
+ int begin = start[c]+i*count[c]/nr_fold;
2413
+ int end = start[c]+(i+1)*count[c]/nr_fold;
2414
+ for(int j=begin;j<end;j++)
2415
+ {
2416
+ perm[fold_start[i]] = index[j];
2417
+ fold_start[i]++;
2418
+ }
2419
+ }
2420
+ fold_start[0]=0;
2421
+ for (i=1;i<=nr_fold;i++)
2422
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2423
+ free(start);
2424
+ free(label);
2425
+ free(count);
2426
+ free(index);
2427
+ free(fold_count);
2428
+ }
2429
+ else
2430
+ {
2431
+ for(i=0;i<l;i++) perm[i]=i;
2432
+ for(i=0;i<l;i++)
2433
+ {
2434
+ int j = i+rand()%(l-i);
2435
+ swap(perm[i],perm[j]);
2436
+ }
2437
+ for(i=0;i<=nr_fold;i++)
2438
+ fold_start[i]=i*l/nr_fold;
2439
+ }
2440
+
2441
+ for(i=0;i<nr_fold;i++)
2442
+ {
2443
+ int begin = fold_start[i];
2444
+ int end = fold_start[i+1];
2445
+ int j,k;
2446
+ struct svm_problem subprob;
2447
+
2448
+ subprob.l = l-(end-begin);
2449
+ subprob.x = Malloc(struct svm_node*,subprob.l);
2450
+ subprob.y = Malloc(double,subprob.l);
2451
+
2452
+ k=0;
2453
+ for(j=0;j<begin;j++)
2454
+ {
2455
+ subprob.x[k] = prob->x[perm[j]];
2456
+ subprob.y[k] = prob->y[perm[j]];
2457
+ ++k;
2458
+ }
2459
+ for(j=end;j<l;j++)
2460
+ {
2461
+ subprob.x[k] = prob->x[perm[j]];
2462
+ subprob.y[k] = prob->y[perm[j]];
2463
+ ++k;
2464
+ }
2465
+ struct svm_model *submodel = svm_train(&subprob,param);
2466
+ if(param->probability &&
2467
+ (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2468
+ {
2469
+ double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2470
+ for(j=begin;j<end;j++)
2471
+ target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2472
+ free(prob_estimates);
2473
+ }
2474
+ else
2475
+ for(j=begin;j<end;j++)
2476
+ target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2477
+ svm_destroy_model(submodel);
2478
+ free(subprob.x);
2479
+ free(subprob.y);
2480
+ }
2481
+ free(fold_start);
2482
+ free(perm);
2483
+ }
2484
+
2485
+ double svm_get_obj(const svm_model *model, const int i)
2486
+ {
2487
+ return model->obj[i];
2488
+ }
2489
+
2490
+ int svm_get_svm_type(const svm_model *model)
2491
+ {
2492
+ return model->param.svm_type;
2493
+ }
2494
+
2495
+ int svm_get_nr_class(const svm_model *model)
2496
+ {
2497
+ return model->nr_class;
2498
+ }
2499
+
2500
+ void svm_get_labels(const svm_model *model, int* label)
2501
+ {
2502
+ if (model->label != NULL)
2503
+ for(int i=0;i<model->nr_class;i++)
2504
+ label[i] = model->label[i];
2505
+ }
2506
+
2507
+ double svm_get_svr_probability(const svm_model *model)
2508
+ {
2509
+ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2510
+ model->probA!=NULL)
2511
+ return model->probA[0];
2512
+ else
2513
+ {
2514
+ info("Model doesn't contain information for SVR probability inference\n");
2515
+ return 0;
2516
+ }
2517
+ }
2518
+
2519
+ void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2520
+ {
2521
+ if(model->param.svm_type == ONE_CLASS ||
2522
+ model->param.svm_type == EPSILON_SVR ||
2523
+ model->param.svm_type == NU_SVR)
2524
+ {
2525
+ double *sv_coef = model->sv_coef[0];
2526
+ double sum = 0;
2527
+ for(int i=0;i<model->l;i++)
2528
+ sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2529
+ sum -= model->rho[0];
2530
+ *dec_values = sum;
2531
+ }
2532
+ else
2533
+ {
2534
+ int i;
2535
+ int nr_class = model->nr_class;
2536
+ int l = model->l;
2537
+
2538
+ double *kvalue = Malloc(double,l);
2539
+ for(i=0;i<l;i++)
2540
+ kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2541
+
2542
+ int *start = Malloc(int,nr_class);
2543
+ start[0] = 0;
2544
+ for(i=1;i<nr_class;i++)
2545
+ start[i] = start[i-1]+model->nSV[i-1];
2546
+
2547
+ int p=0;
2548
+ for(i=0;i<nr_class;i++)
2549
+ for(int j=i+1;j<nr_class;j++)
2550
+ {
2551
+ double sum = 0;
2552
+ int si = start[i];
2553
+ int sj = start[j];
2554
+ int ci = model->nSV[i];
2555
+ int cj = model->nSV[j];
2556
+
2557
+ int k;
2558
+ double *coef1 = model->sv_coef[j-1];
2559
+ double *coef2 = model->sv_coef[i];
2560
+ for(k=0;k<ci;k++)
2561
+ sum += coef1[si+k] * kvalue[si+k];
2562
+ for(k=0;k<cj;k++)
2563
+ sum += coef2[sj+k] * kvalue[sj+k];
2564
+ sum -= model->rho[p];
2565
+ dec_values[p] = sum;
2566
+ p++;
2567
+ }
2568
+
2569
+ free(kvalue);
2570
+ free(start);
2571
+ }
2572
+ }
2573
+
2574
+ double svm_predict(const svm_model *model, const svm_node *x)
2575
+ {
2576
+ if(model->param.svm_type == ONE_CLASS ||
2577
+ model->param.svm_type == EPSILON_SVR ||
2578
+ model->param.svm_type == NU_SVR)
2579
+ {
2580
+ double res;
2581
+ svm_predict_values(model, x, &res);
2582
+
2583
+ if(model->param.svm_type == ONE_CLASS)
2584
+ return (res>0)?1:-1;
2585
+ else
2586
+ return res;
2587
+ }
2588
+ else
2589
+ {
2590
+ int i;
2591
+ int nr_class = model->nr_class;
2592
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2593
+ svm_predict_values(model, x, dec_values);
2594
+
2595
+ int *vote = Malloc(int,nr_class);
2596
+ for(i=0;i<nr_class;i++)
2597
+ vote[i] = 0;
2598
+ int pos=0;
2599
+ for(i=0;i<nr_class;i++)
2600
+ for(int j=i+1;j<nr_class;j++)
2601
+ {
2602
+ if(dec_values[pos++] > 0)
2603
+ ++vote[i];
2604
+ else
2605
+ ++vote[j];
2606
+ }
2607
+
2608
+ int vote_max_idx = 0;
2609
+ for(i=1;i<nr_class;i++)
2610
+ if(vote[i] > vote[vote_max_idx])
2611
+ vote_max_idx = i;
2612
+ free(vote);
2613
+ free(dec_values);
2614
+ return model->label[vote_max_idx];
2615
+ }
2616
+ }
2617
+
2618
+ double svm_predict_probability(
2619
+ const svm_model *model, const svm_node *x, double *prob_estimates)
2620
+ {
2621
+ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2622
+ model->probA!=NULL && model->probB!=NULL)
2623
+ {
2624
+ int i;
2625
+ int nr_class = model->nr_class;
2626
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2627
+ svm_predict_values(model, x, dec_values);
2628
+
2629
+ double min_prob=1e-7;
2630
+ double **pairwise_prob=Malloc(double *,nr_class);
2631
+ for(i=0;i<nr_class;i++)
2632
+ pairwise_prob[i]=Malloc(double,nr_class);
2633
+ int k=0;
2634
+ for(i=0;i<nr_class;i++)
2635
+ for(int j=i+1;j<nr_class;j++)
2636
+ {
2637
+ pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2638
+ pairwise_prob[j][i]=1-pairwise_prob[i][j];
2639
+ k++;
2640
+ }
2641
+ multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2642
+
2643
+ int prob_max_idx = 0;
2644
+ for(i=1;i<nr_class;i++)
2645
+ if(prob_estimates[i] > prob_estimates[prob_max_idx])
2646
+ prob_max_idx = i;
2647
+ for(i=0;i<nr_class;i++)
2648
+ free(pairwise_prob[i]);
2649
+ free(dec_values);
2650
+ free(pairwise_prob);
2651
+ return model->label[prob_max_idx];
2652
+ }
2653
+ else
2654
+ return svm_predict(model, x);
2655
+ }
2656
+
2657
+ const char *svm_type_table[] =
2658
+ {
2659
+ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2660
+ };
2661
+
2662
+ const char *kernel_type_table[]=
2663
+ {
2664
+ "linear","polynomial","rbf","sigmoid","precomputed",NULL
2665
+ };
2666
+
2667
+ int svm_save_model(const char *model_file_name, const svm_model *model)
2668
+ {
2669
+ FILE *fp = fopen(model_file_name,"w");
2670
+ if(fp==NULL) return -1;
2671
+
2672
+ const svm_parameter& param = model->param;
2673
+
2674
+ fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2675
+ fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2676
+
2677
+ if(param.kernel_type == POLY)
2678
+ fprintf(fp,"degree %d\n", param.degree);
2679
+
2680
+ if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2681
+ fprintf(fp,"gamma %g\n", param.gamma);
2682
+
2683
+ if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2684
+ fprintf(fp,"coef0 %g\n", param.coef0);
2685
+
2686
+ int nr_class = model->nr_class;
2687
+ int l = model->l;
2688
+ fprintf(fp, "nr_class %d\n", nr_class);
2689
+ fprintf(fp, "total_sv %d\n",l);
2690
+
2691
+ {
2692
+ fprintf(fp, "rho");
2693
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2694
+ fprintf(fp," %g",model->rho[i]);
2695
+ fprintf(fp, "\n");
2696
+ }
2697
+
2698
+ if(model->label)
2699
+ {
2700
+ fprintf(fp, "label");
2701
+ for(int i=0;i<nr_class;i++)
2702
+ fprintf(fp," %d",model->label[i]);
2703
+ fprintf(fp, "\n");
2704
+ }
2705
+
2706
+ if(model->probA) // regression has probA only
2707
+ {
2708
+ fprintf(fp, "probA");
2709
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2710
+ fprintf(fp," %g",model->probA[i]);
2711
+ fprintf(fp, "\n");
2712
+ }
2713
+ if(model->probB)
2714
+ {
2715
+ fprintf(fp, "probB");
2716
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2717
+ fprintf(fp," %g",model->probB[i]);
2718
+ fprintf(fp, "\n");
2719
+ }
2720
+
2721
+ if(model->nSV)
2722
+ {
2723
+ fprintf(fp, "nr_sv");
2724
+ for(int i=0;i<nr_class;i++)
2725
+ fprintf(fp," %d",model->nSV[i]);
2726
+ fprintf(fp, "\n");
2727
+ }
2728
+
2729
+ fprintf(fp, "SV\n");
2730
+ const double * const *sv_coef = model->sv_coef;
2731
+ const svm_node * const *SV = model->SV;
2732
+
2733
+ for(int i=0;i<l;i++)
2734
+ {
2735
+ for(int j=0;j<nr_class-1;j++)
2736
+ fprintf(fp, "%.16g ",sv_coef[j][i]);
2737
+
2738
+ const svm_node *p = SV[i];
2739
+
2740
+ if(param.kernel_type == PRECOMPUTED)
2741
+ fprintf(fp,"0:%d ",(int)(p->value));
2742
+ else
2743
+ while(p->index != -1)
2744
+ {
2745
+ fprintf(fp,"%d:%.8g ",p->index,p->value);
2746
+ p++;
2747
+ }
2748
+ fprintf(fp, "\n");
2749
+ }
2750
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2751
+ else return 0;
2752
+ }
2753
+
2754
+ svm_model *svm_load_model(const char *model_file_name)
2755
+ {
2756
+ FILE *fp = fopen(model_file_name,"rb");
2757
+ if(fp==NULL) return NULL;
2758
+
2759
+ // read parameters
2760
+
2761
+ svm_model *model = Malloc(svm_model,1);
2762
+ svm_parameter& param = model->param;
2763
+ model->rho = NULL;
2764
+ model->probA = NULL;
2765
+ model->probB = NULL;
2766
+ model->label = NULL;
2767
+ model->nSV = NULL;
2768
+
2769
+ model->obj = NULL;
2770
+
2771
+ char cmd[81];
2772
+ while(1)
2773
+ {
2774
+ fscanf(fp,"%80s",cmd);
2775
+
2776
+ if(strcmp(cmd,"svm_type")==0)
2777
+ {
2778
+ fscanf(fp,"%80s",cmd);
2779
+ int i;
2780
+ for(i=0;svm_type_table[i];i++)
2781
+ {
2782
+ if(strcmp(svm_type_table[i],cmd)==0)
2783
+ {
2784
+ param.svm_type=i;
2785
+ break;
2786
+ }
2787
+ }
2788
+ if(svm_type_table[i] == NULL)
2789
+ {
2790
+ fprintf(stderr,"unknown svm type.\n");
2791
+ free(model->rho);
2792
+ free(model->obj);
2793
+ free(model->label);
2794
+ free(model->nSV);
2795
+ free(model);
2796
+ return NULL;
2797
+ }
2798
+ }
2799
+ else if(strcmp(cmd,"kernel_type")==0)
2800
+ {
2801
+ fscanf(fp,"%80s",cmd);
2802
+ int i;
2803
+ for(i=0;kernel_type_table[i];i++)
2804
+ {
2805
+ if(strcmp(kernel_type_table[i],cmd)==0)
2806
+ {
2807
+ param.kernel_type=i;
2808
+ break;
2809
+ }
2810
+ }
2811
+ if(kernel_type_table[i] == NULL)
2812
+ {
2813
+ fprintf(stderr,"unknown kernel function.\n");
2814
+ free(model->rho);
2815
+ free(model->obj);
2816
+ free(model->label);
2817
+ free(model->nSV);
2818
+ free(model);
2819
+ return NULL;
2820
+ }
2821
+ }
2822
+ else if(strcmp(cmd,"degree")==0)
2823
+ fscanf(fp,"%d",&param.degree);
2824
+ else if(strcmp(cmd,"gamma")==0)
2825
+ fscanf(fp,"%lf",&param.gamma);
2826
+ else if(strcmp(cmd,"coef0")==0)
2827
+ fscanf(fp,"%lf",&param.coef0);
2828
+ else if(strcmp(cmd,"nr_class")==0)
2829
+ fscanf(fp,"%d",&model->nr_class);
2830
+ else if(strcmp(cmd,"total_sv")==0)
2831
+ fscanf(fp,"%d",&model->l);
2832
+ else if(strcmp(cmd,"rho")==0)
2833
+ {
2834
+ int n = model->nr_class * (model->nr_class-1)/2;
2835
+ model->rho = Malloc(double,n);
2836
+ for(int i=0;i<n;i++)
2837
+ fscanf(fp,"%lf",&model->rho[i]);
2838
+ }
2839
+ else if(strcmp(cmd,"label")==0)
2840
+ {
2841
+ int n = model->nr_class;
2842
+ model->label = Malloc(int,n);
2843
+ for(int i=0;i<n;i++)
2844
+ fscanf(fp,"%d",&model->label[i]);
2845
+ }
2846
+ else if(strcmp(cmd,"probA")==0)
2847
+ {
2848
+ int n = model->nr_class * (model->nr_class-1)/2;
2849
+ model->probA = Malloc(double,n);
2850
+ for(int i=0;i<n;i++)
2851
+ fscanf(fp,"%lf",&model->probA[i]);
2852
+ }
2853
+ else if(strcmp(cmd,"probB")==0)
2854
+ {
2855
+ int n = model->nr_class * (model->nr_class-1)/2;
2856
+ model->probB = Malloc(double,n);
2857
+ for(int i=0;i<n;i++)
2858
+ fscanf(fp,"%lf",&model->probB[i]);
2859
+ }
2860
+ else if(strcmp(cmd,"nr_sv")==0)
2861
+ {
2862
+ int n = model->nr_class;
2863
+ model->nSV = Malloc(int,n);
2864
+ for(int i=0;i<n;i++)
2865
+ fscanf(fp,"%d",&model->nSV[i]);
2866
+ }
2867
+ else if(strcmp(cmd,"SV")==0)
2868
+ {
2869
+ while(1)
2870
+ {
2871
+ int c = getc(fp);
2872
+ if(c==EOF || c=='\n') break;
2873
+ }
2874
+ break;
2875
+ }
2876
+ else
2877
+ {
2878
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2879
+ free(model->rho);
2880
+ free(model->obj);
2881
+ free(model->label);
2882
+ free(model->nSV);
2883
+ free(model);
2884
+ return NULL;
2885
+ }
2886
+ }
2887
+
2888
+ // read sv_coef and SV
2889
+
2890
+ int elements = 0;
2891
+ long pos = ftell(fp);
2892
+
2893
+ while(1)
2894
+ {
2895
+ int c = fgetc(fp);
2896
+ switch(c)
2897
+ {
2898
+ case '\n':
2899
+ // count the '-1' element
2900
+ case ':':
2901
+ ++elements;
2902
+ break;
2903
+ case EOF:
2904
+ goto out;
2905
+ default:
2906
+ ;
2907
+ }
2908
+ }
2909
+ out:
2910
+ fseek(fp,pos,SEEK_SET);
2911
+
2912
+ int m = model->nr_class - 1;
2913
+ int l = model->l;
2914
+ model->sv_coef = Malloc(double *,m);
2915
+ int i;
2916
+ for(i=0;i<m;i++)
2917
+ model->sv_coef[i] = Malloc(double,l);
2918
+ model->SV = Malloc(svm_node*,l);
2919
+ svm_node *x_space=NULL;
2920
+ if(l>0) x_space = Malloc(svm_node,elements);
2921
+
2922
+ int j=0;
2923
+ for(i=0;i<l;i++)
2924
+ {
2925
+ model->SV[i] = &x_space[j];
2926
+ for(int k=0;k<m;k++)
2927
+ fscanf(fp,"%lf",&model->sv_coef[k][i]);
2928
+ while(1)
2929
+ {
2930
+ int c;
2931
+ do {
2932
+ c = getc(fp);
2933
+ if(c=='\n') goto out2;
2934
+ } while(isspace(c));
2935
+ ungetc(c,fp);
2936
+ fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
2937
+ ++j;
2938
+ }
2939
+ out2:
2940
+ x_space[j++].index = -1;
2941
+ }
2942
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2943
+
2944
+ model->free_sv = 1; // XXX
2945
+ return model;
2946
+ }
2947
+
2948
+ void svm_destroy_model(svm_model* model)
2949
+ {
2950
+ if(model->free_sv && model->l > 0)
2951
+ free((void *)(model->SV[0]));
2952
+ for(int i=0;i<model->nr_class-1;i++)
2953
+ free(model->sv_coef[i]);
2954
+ free(model->SV);
2955
+ free(model->sv_coef);
2956
+ free(model->rho);
2957
+ free(model->obj);
2958
+ free(model->label);
2959
+ free(model->probA);
2960
+ free(model->probB);
2961
+ free(model->nSV);
2962
+ free(model);
2963
+ }
2964
+
2965
+ void svm_destroy_param(svm_parameter* param)
2966
+ {
2967
+ free(param->weight_label);
2968
+ free(param->weight);
2969
+ }
2970
+
2971
+ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2972
+ {
2973
+ // svm_type
2974
+
2975
+ int svm_type = param->svm_type;
2976
+ if(svm_type != C_SVC &&
2977
+ svm_type != NU_SVC &&
2978
+ svm_type != ONE_CLASS &&
2979
+ svm_type != EPSILON_SVR &&
2980
+ svm_type != NU_SVR)
2981
+ return "unknown svm type";
2982
+
2983
+ // kernel_type, degree
2984
+
2985
+ int kernel_type = param->kernel_type;
2986
+ if(kernel_type != LINEAR &&
2987
+ kernel_type != POLY &&
2988
+ kernel_type != RBF &&
2989
+ kernel_type != SIGMOID &&
2990
+ kernel_type != PRECOMPUTED)
2991
+ return "unknown kernel type";
2992
+
2993
+ if(param->degree < 0)
2994
+ return "degree of polynomial kernel < 0";
2995
+
2996
+ // cache_size,eps,C,nu,p,shrinking
2997
+
2998
+ if(param->cache_size <= 0)
2999
+ return "cache_size <= 0";
3000
+
3001
+ if(param->eps <= 0)
3002
+ return "eps <= 0";
3003
+
3004
+ if(svm_type == C_SVC ||
3005
+ svm_type == EPSILON_SVR ||
3006
+ svm_type == NU_SVR)
3007
+ if(param->C <= 0)
3008
+ return "C <= 0";
3009
+
3010
+ if(svm_type == NU_SVC ||
3011
+ svm_type == ONE_CLASS ||
3012
+ svm_type == NU_SVR)
3013
+ if(param->nu <= 0 || param->nu > 1)
3014
+ return "nu <= 0 or nu > 1";
3015
+
3016
+ if(svm_type == EPSILON_SVR)
3017
+ if(param->p < 0)
3018
+ return "p < 0";
3019
+
3020
+ if(param->shrinking != 0 &&
3021
+ param->shrinking != 1)
3022
+ return "shrinking != 0 and shrinking != 1";
3023
+
3024
+ if(param->probability != 0 &&
3025
+ param->probability != 1)
3026
+ return "probability != 0 and probability != 1";
3027
+
3028
+ if(param->probability == 1 &&
3029
+ svm_type == ONE_CLASS)
3030
+ return "one-class SVM probability output not supported yet";
3031
+
3032
+
3033
+ // check whether nu-svc is feasible
3034
+
3035
+ if(svm_type == NU_SVC)
3036
+ {
3037
+ int l = prob->l;
3038
+ int max_nr_class = 16;
3039
+ int nr_class = 0;
3040
+ int *label = Malloc(int,max_nr_class);
3041
+ int *count = Malloc(int,max_nr_class);
3042
+
3043
+ int i;
3044
+ for(i=0;i<l;i++)
3045
+ {
3046
+ int this_label = (int)prob->y[i];
3047
+ int j;
3048
+ for(j=0;j<nr_class;j++)
3049
+ if(this_label == label[j])
3050
+ {
3051
+ ++count[j];
3052
+ break;
3053
+ }
3054
+ if(j == nr_class)
3055
+ {
3056
+ if(nr_class == max_nr_class)
3057
+ {
3058
+ max_nr_class *= 2;
3059
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
3060
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
3061
+ }
3062
+ label[nr_class] = this_label;
3063
+ count[nr_class] = 1;
3064
+ ++nr_class;
3065
+ }
3066
+ }
3067
+
3068
+ for(i=0;i<nr_class;i++)
3069
+ {
3070
+ int n1 = count[i];
3071
+ for(int j=i+1;j<nr_class;j++)
3072
+ {
3073
+ int n2 = count[j];
3074
+ if(param->nu*(n1+n2)/2 > min(n1,n2))
3075
+ {
3076
+ free(label);
3077
+ free(count);
3078
+ return "specified nu is infeasible";
3079
+ }
3080
+ }
3081
+ }
3082
+ free(label);
3083
+ free(count);
3084
+ }
3085
+
3086
+ return NULL;
3087
+ }
3088
+
3089
+ int svm_check_probability_model(const svm_model *model)
3090
+ {
3091
+ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3092
+ model->probA!=NULL && model->probB!=NULL) ||
3093
+ ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3094
+ model->probA!=NULL);
3095
+ }