rsvm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('libsvm/libsvm')
@@ -0,0 +1,3213 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <ctype.h>
5
+ #include <float.h>
6
+ #include <string.h>
7
+ #include <stdarg.h>
8
+ #include <limits.h>
9
+ #include <locale.h>
10
+ #include "svm.h"
11
+ int libsvm_version = LIBSVM_VERSION;
12
+ typedef float Qfloat;
13
+ typedef signed char schar;
14
+ #ifndef min
15
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16
+ #endif
17
+ #ifndef max
18
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
19
+ #endif
20
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
21
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
22
+ {
23
+ dst = new T[n];
24
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
25
+ }
26
+ static inline double powi(double base, int times)
27
+ {
28
+ double tmp = base, ret = 1.0;
29
+
30
+ for(int t=times; t>0; t/=2)
31
+ {
32
+ if(t%2==1) ret*=tmp;
33
+ tmp = tmp * tmp;
34
+ }
35
+ return ret;
36
+ }
37
+ #define INF HUGE_VAL
38
+ #define TAU 1e-12
39
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
40
+
41
+ static void print_string_stdout(const char *s)
42
+ {
43
+ fputs(s,stdout);
44
+ fflush(stdout);
45
+ }
46
+ static void (*svm_print_string) (const char *) = &print_string_stdout;
47
+ #if 1
48
+ static void info(const char *fmt,...)
49
+ {
50
+ char buf[BUFSIZ];
51
+ va_list ap;
52
+ va_start(ap,fmt);
53
+ vsprintf(buf,fmt,ap);
54
+ va_end(ap);
55
+ (*svm_print_string)(buf);
56
+ }
57
+ #else
58
+ static void info(const char *fmt,...) {}
59
+ #endif
60
+
61
+ //
62
+ // Kernel Cache
63
+ //
64
+ // l is the number of total data items
65
+ // size is the cache size limit in bytes
66
+ //
67
+ class Cache
68
+ {
69
+ public:
70
+ Cache(int l,long int size);
71
+ ~Cache();
72
+
73
+ // request data [0,len)
74
+ // return some position p where [p,len) need to be filled
75
+ // (p >= len if nothing needs to be filled)
76
+ int get_data(const int index, Qfloat **data, int len);
77
+ void swap_index(int i, int j);
78
+ private:
79
+ int l;
80
+ long int size;
81
+ struct head_t
82
+ {
83
+ head_t *prev, *next; // a circular list
84
+ Qfloat *data;
85
+ int len; // data[0,len) is cached in this entry
86
+ };
87
+
88
+ head_t *head;
89
+ head_t lru_head;
90
+ void lru_delete(head_t *h);
91
+ void lru_insert(head_t *h);
92
+ };
93
+
94
+ Cache::Cache(int l_,long int size_):l(l_),size(size_)
95
+ {
96
+ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
97
+ size /= sizeof(Qfloat);
98
+ size -= l * sizeof(head_t) / sizeof(Qfloat);
99
+ size = max(size, 2 * (long int) l); // cache must be large enough for two columns
100
+ lru_head.next = lru_head.prev = &lru_head;
101
+ }
102
+
103
+ Cache::~Cache()
104
+ {
105
+ for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
106
+ free(h->data);
107
+ free(head);
108
+ }
109
+
110
+ void Cache::lru_delete(head_t *h)
111
+ {
112
+ // delete from current location
113
+ h->prev->next = h->next;
114
+ h->next->prev = h->prev;
115
+ }
116
+
117
+ void Cache::lru_insert(head_t *h)
118
+ {
119
+ // insert to last position
120
+ h->next = &lru_head;
121
+ h->prev = lru_head.prev;
122
+ h->prev->next = h;
123
+ h->next->prev = h;
124
+ }
125
+
126
+ int Cache::get_data(const int index, Qfloat **data, int len)
127
+ {
128
+ head_t *h = &head[index];
129
+ if(h->len) lru_delete(h);
130
+ int more = len - h->len;
131
+
132
+ if(more > 0)
133
+ {
134
+ // free old space
135
+ while(size < more)
136
+ {
137
+ head_t *old = lru_head.next;
138
+ lru_delete(old);
139
+ free(old->data);
140
+ size += old->len;
141
+ old->data = 0;
142
+ old->len = 0;
143
+ }
144
+
145
+ // allocate new space
146
+ h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
147
+ size -= more;
148
+ swap(h->len,len);
149
+ }
150
+
151
+ lru_insert(h);
152
+ *data = h->data;
153
+ return len;
154
+ }
155
+
156
+ void Cache::swap_index(int i, int j)
157
+ {
158
+ if(i==j) return;
159
+
160
+ if(head[i].len) lru_delete(&head[i]);
161
+ if(head[j].len) lru_delete(&head[j]);
162
+ swap(head[i].data,head[j].data);
163
+ swap(head[i].len,head[j].len);
164
+ if(head[i].len) lru_insert(&head[i]);
165
+ if(head[j].len) lru_insert(&head[j]);
166
+
167
+ if(i>j) swap(i,j);
168
+ for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
169
+ {
170
+ if(h->len > i)
171
+ {
172
+ if(h->len > j)
173
+ swap(h->data[i],h->data[j]);
174
+ else
175
+ {
176
+ // give up
177
+ lru_delete(h);
178
+ free(h->data);
179
+ size += h->len;
180
+ h->data = 0;
181
+ h->len = 0;
182
+ }
183
+ }
184
+ }
185
+ }
186
+
187
+ //
188
+ // Kernel evaluation
189
+ //
190
+ // the static method k_function is for doing single kernel evaluation
191
+ // the constructor of Kernel prepares to calculate the l*l kernel matrix
192
+ // the member function get_Q is for getting one column from the Q Matrix
193
+ //
194
+ class QMatrix {
195
+ public:
196
+ virtual Qfloat *get_Q(int column, int len) const = 0;
197
+ virtual double *get_QD() const = 0;
198
+ virtual void swap_index(int i, int j) const = 0;
199
+ virtual ~QMatrix() {}
200
+ };
201
+
202
+ class Kernel: public QMatrix {
203
+ public:
204
+ Kernel(int l, svm_node * const * x, const svm_parameter& param);
205
+ virtual ~Kernel();
206
+
207
+ static double k_function(const svm_node *x, const svm_node *y,
208
+ const svm_parameter& param);
209
+ virtual Qfloat *get_Q(int column, int len) const = 0;
210
+ virtual double *get_QD() const = 0;
211
+ virtual void swap_index(int i, int j) const // no so const...
212
+ {
213
+ swap(x[i],x[j]);
214
+ if(x_square) swap(x_square[i],x_square[j]);
215
+ }
216
+ protected:
217
+
218
+ double (Kernel::*kernel_function)(int i, int j) const;
219
+
220
+ private:
221
+ const svm_node **x;
222
+ double *x_square;
223
+
224
+ // svm_parameter
225
+ const int kernel_type;
226
+ const int degree;
227
+ const double gamma;
228
+ const double coef0;
229
+
230
+ static double dot(const svm_node *px, const svm_node *py);
231
+ double kernel_linear(int i, int j) const
232
+ {
233
+ return dot(x[i],x[j]);
234
+ }
235
+ double kernel_poly(int i, int j) const
236
+ {
237
+ return powi(gamma*dot(x[i],x[j])+coef0,degree);
238
+ }
239
+ double kernel_rbf(int i, int j) const
240
+ {
241
+ return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
242
+ }
243
+ double kernel_sigmoid(int i, int j) const
244
+ {
245
+ return tanh(gamma*dot(x[i],x[j])+coef0);
246
+ }
247
+ double kernel_precomputed(int i, int j) const
248
+ {
249
+ return x[i][(int)(x[j][0].value)].value;
250
+ }
251
+ };
252
+
253
+ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
254
+ :kernel_type(param.kernel_type), degree(param.degree),
255
+ gamma(param.gamma), coef0(param.coef0)
256
+ {
257
+ switch(kernel_type)
258
+ {
259
+ case LINEAR:
260
+ kernel_function = &Kernel::kernel_linear;
261
+ break;
262
+ case POLY:
263
+ kernel_function = &Kernel::kernel_poly;
264
+ break;
265
+ case RBF:
266
+ kernel_function = &Kernel::kernel_rbf;
267
+ break;
268
+ case SIGMOID:
269
+ kernel_function = &Kernel::kernel_sigmoid;
270
+ break;
271
+ case PRECOMPUTED:
272
+ kernel_function = &Kernel::kernel_precomputed;
273
+ break;
274
+ }
275
+
276
+ clone(x,x_,l);
277
+
278
+ if(kernel_type == RBF)
279
+ {
280
+ x_square = new double[l];
281
+ for(int i=0;i<l;i++)
282
+ x_square[i] = dot(x[i],x[i]);
283
+ }
284
+ else
285
+ x_square = 0;
286
+ }
287
+
288
+ Kernel::~Kernel()
289
+ {
290
+ delete[] x;
291
+ delete[] x_square;
292
+ }
293
+
294
+ double Kernel::dot(const svm_node *px, const svm_node *py)
295
+ {
296
+ double sum = 0;
297
+ while(px->index != -1 && py->index != -1)
298
+ {
299
+ if(px->index == py->index)
300
+ {
301
+ sum += px->value * py->value;
302
+ ++px;
303
+ ++py;
304
+ }
305
+ else
306
+ {
307
+ if(px->index > py->index)
308
+ ++py;
309
+ else
310
+ ++px;
311
+ }
312
+ }
313
+ return sum;
314
+ }
315
+
316
+ double Kernel::k_function(const svm_node *x, const svm_node *y,
317
+ const svm_parameter& param)
318
+ {
319
+ switch(param.kernel_type)
320
+ {
321
+ case LINEAR:
322
+ return dot(x,y);
323
+ case POLY:
324
+ return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
325
+ case RBF:
326
+ {
327
+ double sum = 0;
328
+ while(x->index != -1 && y->index !=-1)
329
+ {
330
+ if(x->index == y->index)
331
+ {
332
+ double d = x->value - y->value;
333
+ sum += d*d;
334
+ ++x;
335
+ ++y;
336
+ }
337
+ else
338
+ {
339
+ if(x->index > y->index)
340
+ {
341
+ sum += y->value * y->value;
342
+ ++y;
343
+ }
344
+ else
345
+ {
346
+ sum += x->value * x->value;
347
+ ++x;
348
+ }
349
+ }
350
+ }
351
+
352
+ while(x->index != -1)
353
+ {
354
+ sum += x->value * x->value;
355
+ ++x;
356
+ }
357
+
358
+ while(y->index != -1)
359
+ {
360
+ sum += y->value * y->value;
361
+ ++y;
362
+ }
363
+
364
+ return exp(-param.gamma*sum);
365
+ }
366
+ case SIGMOID:
367
+ return tanh(param.gamma*dot(x,y)+param.coef0);
368
+ case PRECOMPUTED: //x: test (validation), y: SV
369
+ return x[(int)(y->value)].value;
370
+ default:
371
+ return 0; // Unreachable
372
+ }
373
+ }
374
+
375
+ // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
376
+ // Solves:
377
+ //
378
+ // min 0.5(\alpha^T Q \alpha) + p^T \alpha
379
+ //
380
+ // y^T \alpha = \delta
381
+ // y_i = +1 or -1
382
+ // 0 <= alpha_i <= Cp for y_i = 1
383
+ // 0 <= alpha_i <= Cn for y_i = -1
384
+ //
385
+ // Given:
386
+ //
387
+ // Q, p, y, Cp, Cn, and an initial feasible point \alpha
388
+ // l is the size of vectors and matrices
389
+ // eps is the stopping tolerance
390
+ //
391
+ // solution will be put in \alpha, objective value will be put in obj
392
+ //
393
+ class Solver {
394
+ public:
395
+ Solver() {};
396
+ virtual ~Solver() {};
397
+
398
+ struct SolutionInfo {
399
+ double obj;
400
+ double rho;
401
+ double *upper_bound;
402
+ double r; // for Solver_NU
403
+ };
404
+
405
+ void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
406
+ double *alpha_, const double* C_, double eps,
407
+ SolutionInfo* si, int shrinking);
408
+ protected:
409
+ int active_size;
410
+ schar *y;
411
+ double *G; // gradient of objective function
412
+ enum { LOWER_BOUND, UPPER_BOUND, FREE };
413
+ char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
414
+ double *alpha;
415
+ const QMatrix *Q;
416
+ const double *QD;
417
+ double eps;
418
+ double Cp,Cn;
419
+ double *C;
420
+ double *p;
421
+ int *active_set;
422
+ double *G_bar; // gradient, if we treat free variables as 0
423
+ int l;
424
+ bool unshrink; // XXX
425
+
426
+ double get_C(int i)
427
+ {
428
+ return C[i];
429
+ }
430
+ void update_alpha_status(int i)
431
+ {
432
+ if(alpha[i] >= get_C(i))
433
+ alpha_status[i] = UPPER_BOUND;
434
+ else if(alpha[i] <= 0)
435
+ alpha_status[i] = LOWER_BOUND;
436
+ else alpha_status[i] = FREE;
437
+ }
438
+ bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
439
+ bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
440
+ bool is_free(int i) { return alpha_status[i] == FREE; }
441
+ void swap_index(int i, int j);
442
+ void reconstruct_gradient();
443
+ virtual int select_working_set(int &i, int &j);
444
+ virtual double calculate_rho();
445
+ virtual void do_shrinking();
446
+ private:
447
+ bool be_shrunk(int i, double Gmax1, double Gmax2);
448
+ };
449
+
450
+ void Solver::swap_index(int i, int j)
451
+ {
452
+ Q->swap_index(i,j);
453
+ swap(y[i],y[j]);
454
+ swap(G[i],G[j]);
455
+ swap(alpha_status[i],alpha_status[j]);
456
+ swap(alpha[i],alpha[j]);
457
+ swap(p[i],p[j]);
458
+ swap(active_set[i],active_set[j]);
459
+ swap(G_bar[i],G_bar[j]);
460
+ swap(C[i],C[j]);
461
+ }
462
+
463
+ void Solver::reconstruct_gradient()
464
+ {
465
+ // reconstruct inactive elements of G from G_bar and free variables
466
+
467
+ if(active_size == l) return;
468
+
469
+ int i,j;
470
+ int nr_free = 0;
471
+
472
+ for(j=active_size;j<l;j++)
473
+ G[j] = G_bar[j] + p[j];
474
+
475
+ for(j=0;j<active_size;j++)
476
+ if(is_free(j))
477
+ nr_free++;
478
+
479
+ if(2*nr_free < active_size)
480
+ info("\nWARNING: using -h 0 may be faster\n");
481
+
482
+ if (nr_free*l > 2*active_size*(l-active_size))
483
+ {
484
+ for(i=active_size;i<l;i++)
485
+ {
486
+ const Qfloat *Q_i = Q->get_Q(i,active_size);
487
+ for(j=0;j<active_size;j++)
488
+ if(is_free(j))
489
+ G[i] += alpha[j] * Q_i[j];
490
+ }
491
+ }
492
+ else
493
+ {
494
+ for(i=0;i<active_size;i++)
495
+ if(is_free(i))
496
+ {
497
+ const Qfloat *Q_i = Q->get_Q(i,l);
498
+ double alpha_i = alpha[i];
499
+ for(j=active_size;j<l;j++)
500
+ G[j] += alpha_i * Q_i[j];
501
+ }
502
+ }
503
+ }
504
+
505
+ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
506
+ double *alpha_, const double* C_, double eps,
507
+ SolutionInfo* si, int shrinking)
508
+ {
509
+ this->l = l;
510
+ this->Q = &Q;
511
+ QD=Q.get_QD();
512
+ clone(p, p_,l);
513
+ clone(y, y_,l);
514
+ clone(alpha,alpha_,l);
515
+ clone(C,C_,l);
516
+ this->eps = eps;
517
+ unshrink = false;
518
+
519
+ // initialize alpha_status
520
+ {
521
+ alpha_status = new char[l];
522
+ for(int i=0;i<l;i++)
523
+ update_alpha_status(i);
524
+ }
525
+
526
+ // initialize active set (for shrinking)
527
+ {
528
+ active_set = new int[l];
529
+ for(int i=0;i<l;i++)
530
+ active_set[i] = i;
531
+ active_size = l;
532
+ }
533
+
534
+ // initialize gradient
535
+ {
536
+ G = new double[l];
537
+ G_bar = new double[l];
538
+ int i;
539
+ for(i=0;i<l;i++)
540
+ {
541
+ G[i] = p[i];
542
+ G_bar[i] = 0;
543
+ }
544
+ for(i=0;i<l;i++)
545
+ if(!is_lower_bound(i))
546
+ {
547
+ const Qfloat *Q_i = Q.get_Q(i,l);
548
+ double alpha_i = alpha[i];
549
+ int j;
550
+ for(j=0;j<l;j++)
551
+ G[j] += alpha_i*Q_i[j];
552
+ if(is_upper_bound(i))
553
+ for(j=0;j<l;j++)
554
+ G_bar[j] += get_C(i) * Q_i[j];
555
+ }
556
+ }
557
+
558
+ // optimization step
559
+
560
+ int iter = 0;
561
+ int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
562
+ int counter = min(l,1000)+1;
563
+
564
+ while(iter < max_iter)
565
+ {
566
+ // show progress and do shrinking
567
+
568
+ if(--counter == 0)
569
+ {
570
+ counter = min(l,1000);
571
+ if(shrinking) do_shrinking();
572
+ info(".");
573
+ }
574
+
575
+ int i,j;
576
+ if(select_working_set(i,j)!=0)
577
+ {
578
+ // reconstruct the whole gradient
579
+ reconstruct_gradient();
580
+ // reset active set size and check
581
+ active_size = l;
582
+ info("*");
583
+ if(select_working_set(i,j)!=0)
584
+ break;
585
+ else
586
+ counter = 1; // do shrinking next iteration
587
+ }
588
+
589
+ ++iter;
590
+
591
+ // update alpha[i] and alpha[j], handle bounds carefully
592
+
593
+ const Qfloat *Q_i = Q.get_Q(i,active_size);
594
+ const Qfloat *Q_j = Q.get_Q(j,active_size);
595
+
596
+ double C_i = get_C(i);
597
+ double C_j = get_C(j);
598
+
599
+ double old_alpha_i = alpha[i];
600
+ double old_alpha_j = alpha[j];
601
+
602
+ if(y[i]!=y[j])
603
+ {
604
+ double quad_coef = QD[i]+QD[j]+2*Q_i[j];
605
+ if (quad_coef <= 0)
606
+ quad_coef = TAU;
607
+ double delta = (-G[i]-G[j])/quad_coef;
608
+ double diff = alpha[i] - alpha[j];
609
+ alpha[i] += delta;
610
+ alpha[j] += delta;
611
+
612
+ if(diff > 0)
613
+ {
614
+ if(alpha[j] < 0)
615
+ {
616
+ alpha[j] = 0;
617
+ alpha[i] = diff;
618
+ }
619
+ }
620
+ else
621
+ {
622
+ if(alpha[i] < 0)
623
+ {
624
+ alpha[i] = 0;
625
+ alpha[j] = -diff;
626
+ }
627
+ }
628
+ if(diff > C_i - C_j)
629
+ {
630
+ if(alpha[i] > C_i)
631
+ {
632
+ alpha[i] = C_i;
633
+ alpha[j] = C_i - diff;
634
+ }
635
+ }
636
+ else
637
+ {
638
+ if(alpha[j] > C_j)
639
+ {
640
+ alpha[j] = C_j;
641
+ alpha[i] = C_j + diff;
642
+ }
643
+ }
644
+ }
645
+ else
646
+ {
647
+ double quad_coef = QD[i]+QD[j]-2*Q_i[j];
648
+ if (quad_coef <= 0)
649
+ quad_coef = TAU;
650
+ double delta = (G[i]-G[j])/quad_coef;
651
+ double sum = alpha[i] + alpha[j];
652
+ alpha[i] -= delta;
653
+ alpha[j] += delta;
654
+
655
+ if(sum > C_i)
656
+ {
657
+ if(alpha[i] > C_i)
658
+ {
659
+ alpha[i] = C_i;
660
+ alpha[j] = sum - C_i;
661
+ }
662
+ }
663
+ else
664
+ {
665
+ if(alpha[j] < 0)
666
+ {
667
+ alpha[j] = 0;
668
+ alpha[i] = sum;
669
+ }
670
+ }
671
+ if(sum > C_j)
672
+ {
673
+ if(alpha[j] > C_j)
674
+ {
675
+ alpha[j] = C_j;
676
+ alpha[i] = sum - C_j;
677
+ }
678
+ }
679
+ else
680
+ {
681
+ if(alpha[i] < 0)
682
+ {
683
+ alpha[i] = 0;
684
+ alpha[j] = sum;
685
+ }
686
+ }
687
+ }
688
+
689
+ // update G
690
+
691
+ double delta_alpha_i = alpha[i] - old_alpha_i;
692
+ double delta_alpha_j = alpha[j] - old_alpha_j;
693
+
694
+ for(int k=0;k<active_size;k++)
695
+ {
696
+ G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
697
+ }
698
+
699
+ // update alpha_status and G_bar
700
+
701
+ {
702
+ bool ui = is_upper_bound(i);
703
+ bool uj = is_upper_bound(j);
704
+ update_alpha_status(i);
705
+ update_alpha_status(j);
706
+ int k;
707
+ if(ui != is_upper_bound(i))
708
+ {
709
+ Q_i = Q.get_Q(i,l);
710
+ if(ui)
711
+ for(k=0;k<l;k++)
712
+ G_bar[k] -= C_i * Q_i[k];
713
+ else
714
+ for(k=0;k<l;k++)
715
+ G_bar[k] += C_i * Q_i[k];
716
+ }
717
+
718
+ if(uj != is_upper_bound(j))
719
+ {
720
+ Q_j = Q.get_Q(j,l);
721
+ if(uj)
722
+ for(k=0;k<l;k++)
723
+ G_bar[k] -= C_j * Q_j[k];
724
+ else
725
+ for(k=0;k<l;k++)
726
+ G_bar[k] += C_j * Q_j[k];
727
+ }
728
+ }
729
+ }
730
+
731
+ if(iter >= max_iter)
732
+ {
733
+ if(active_size < l)
734
+ {
735
+ // reconstruct the whole gradient to calculate objective value
736
+ reconstruct_gradient();
737
+ active_size = l;
738
+ info("*");
739
+ }
740
+ info("\nWARNING: reaching max number of iterations");
741
+ }
742
+
743
+ // calculate rho
744
+
745
+ si->rho = calculate_rho();
746
+
747
+ // calculate objective value
748
+ {
749
+ double v = 0;
750
+ int i;
751
+ for(i=0;i<l;i++)
752
+ v += alpha[i] * (G[i] + p[i]);
753
+
754
+ si->obj = v/2;
755
+ }
756
+
757
+ // put back the solution
758
+ {
759
+ for(int i=0;i<l;i++)
760
+ alpha_[active_set[i]] = alpha[i];
761
+ }
762
+
763
+ // juggle everything back
764
+ /*{
765
+ for(int i=0;i<l;i++)
766
+ while(active_set[i] != i)
767
+ swap_index(i,active_set[i]);
768
+ // or Q.swap_index(i,active_set[i]);
769
+ }*/
770
+
771
+ for(int i=0;i<l;i++)
772
+ si->upper_bound[i] = C[i];
773
+
774
+ info("\noptimization finished, #iter = %d\n",iter);
775
+
776
+ delete[] p;
777
+ delete[] y;
778
+ delete[] C;
779
+ delete[] alpha;
780
+ delete[] alpha_status;
781
+ delete[] active_set;
782
+ delete[] G;
783
+ delete[] G_bar;
784
+ }
785
+
786
+ // return 1 if already optimal, return 0 otherwise
787
+ int Solver::select_working_set(int &out_i, int &out_j)
788
+ {
789
+ // return i,j such that
790
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
791
+ // j: minimizes the decrease of obj value
792
+ // (if quadratic coefficeint <= 0, replace it with tau)
793
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
794
+
795
+ double Gmax = -INF;
796
+ double Gmax2 = -INF;
797
+ int Gmax_idx = -1;
798
+ int Gmin_idx = -1;
799
+ double obj_diff_min = INF;
800
+
801
+ for(int t=0;t<active_size;t++)
802
+ if(y[t]==+1)
803
+ {
804
+ if(!is_upper_bound(t))
805
+ if(-G[t] >= Gmax)
806
+ {
807
+ Gmax = -G[t];
808
+ Gmax_idx = t;
809
+ }
810
+ }
811
+ else
812
+ {
813
+ if(!is_lower_bound(t))
814
+ if(G[t] >= Gmax)
815
+ {
816
+ Gmax = G[t];
817
+ Gmax_idx = t;
818
+ }
819
+ }
820
+
821
+ int i = Gmax_idx;
822
+ const Qfloat *Q_i = NULL;
823
+ if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
824
+ Q_i = Q->get_Q(i,active_size);
825
+
826
+ for(int j=0;j<active_size;j++)
827
+ {
828
+ if(y[j]==+1)
829
+ {
830
+ if (!is_lower_bound(j))
831
+ {
832
+ double grad_diff=Gmax+G[j];
833
+ if (G[j] >= Gmax2)
834
+ Gmax2 = G[j];
835
+ if (grad_diff > 0)
836
+ {
837
+ double obj_diff;
838
+ double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
839
+ if (quad_coef > 0)
840
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
841
+ else
842
+ obj_diff = -(grad_diff*grad_diff)/TAU;
843
+
844
+ if (obj_diff <= obj_diff_min)
845
+ {
846
+ Gmin_idx=j;
847
+ obj_diff_min = obj_diff;
848
+ }
849
+ }
850
+ }
851
+ }
852
+ else
853
+ {
854
+ if (!is_upper_bound(j))
855
+ {
856
+ double grad_diff= Gmax-G[j];
857
+ if (-G[j] >= Gmax2)
858
+ Gmax2 = -G[j];
859
+ if (grad_diff > 0)
860
+ {
861
+ double obj_diff;
862
+ double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
863
+ if (quad_coef > 0)
864
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
865
+ else
866
+ obj_diff = -(grad_diff*grad_diff)/TAU;
867
+
868
+ if (obj_diff <= obj_diff_min)
869
+ {
870
+ Gmin_idx=j;
871
+ obj_diff_min = obj_diff;
872
+ }
873
+ }
874
+ }
875
+ }
876
+ }
877
+
878
+ if(Gmax+Gmax2 < eps)
879
+ return 1;
880
+
881
+ out_i = Gmax_idx;
882
+ out_j = Gmin_idx;
883
+ return 0;
884
+ }
885
+
886
+ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
887
+ {
888
+ if(is_upper_bound(i))
889
+ {
890
+ if(y[i]==+1)
891
+ return(-G[i] > Gmax1);
892
+ else
893
+ return(-G[i] > Gmax2);
894
+ }
895
+ else if(is_lower_bound(i))
896
+ {
897
+ if(y[i]==+1)
898
+ return(G[i] > Gmax2);
899
+ else
900
+ return(G[i] > Gmax1);
901
+ }
902
+ else
903
+ return(false);
904
+ }
905
+
906
+ void Solver::do_shrinking()
907
+ {
908
+ int i;
909
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
910
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
911
+
912
+ // find maximal violating pair first
913
+ for(i=0;i<active_size;i++)
914
+ {
915
+ if(y[i]==+1)
916
+ {
917
+ if(!is_upper_bound(i))
918
+ {
919
+ if(-G[i] >= Gmax1)
920
+ Gmax1 = -G[i];
921
+ }
922
+ if(!is_lower_bound(i))
923
+ {
924
+ if(G[i] >= Gmax2)
925
+ Gmax2 = G[i];
926
+ }
927
+ }
928
+ else
929
+ {
930
+ if(!is_upper_bound(i))
931
+ {
932
+ if(-G[i] >= Gmax2)
933
+ Gmax2 = -G[i];
934
+ }
935
+ if(!is_lower_bound(i))
936
+ {
937
+ if(G[i] >= Gmax1)
938
+ Gmax1 = G[i];
939
+ }
940
+ }
941
+ }
942
+
943
+ if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
944
+ {
945
+ unshrink = true;
946
+ reconstruct_gradient();
947
+ active_size = l;
948
+ info("*");
949
+ }
950
+
951
+ for(i=0;i<active_size;i++)
952
+ if (be_shrunk(i, Gmax1, Gmax2))
953
+ {
954
+ active_size--;
955
+ while (active_size > i)
956
+ {
957
+ if (!be_shrunk(active_size, Gmax1, Gmax2))
958
+ {
959
+ swap_index(i,active_size);
960
+ break;
961
+ }
962
+ active_size--;
963
+ }
964
+ }
965
+ }
966
+
967
+ double Solver::calculate_rho()
968
+ {
969
+ double r;
970
+ int nr_free = 0;
971
+ double ub = INF, lb = -INF, sum_free = 0;
972
+ for(int i=0;i<active_size;i++)
973
+ {
974
+ double yG = y[i]*G[i];
975
+
976
+ if(is_upper_bound(i))
977
+ {
978
+ if(y[i]==-1)
979
+ ub = min(ub,yG);
980
+ else
981
+ lb = max(lb,yG);
982
+ }
983
+ else if(is_lower_bound(i))
984
+ {
985
+ if(y[i]==+1)
986
+ ub = min(ub,yG);
987
+ else
988
+ lb = max(lb,yG);
989
+ }
990
+ else
991
+ {
992
+ ++nr_free;
993
+ sum_free += yG;
994
+ }
995
+ }
996
+
997
+ if(nr_free>0)
998
+ r = sum_free/nr_free;
999
+ else
1000
+ r = (ub+lb)/2;
1001
+
1002
+ return r;
1003
+ }
1004
+
1005
+ //
1006
+ // Solver for nu-svm classification and regression
1007
+ //
1008
+ // additional constraint: e^T \alpha = constant
1009
+ //
1010
+ class Solver_NU : public Solver
1011
+ {
1012
+ public:
1013
+ Solver_NU() {}
1014
+ void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1015
+ double *alpha, double* C_, double eps,
1016
+ SolutionInfo* si, int shrinking)
1017
+ {
1018
+ this->si = si;
1019
+ Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking);
1020
+ }
1021
+ private:
1022
+ SolutionInfo *si;
1023
+ int select_working_set(int &i, int &j);
1024
+ double calculate_rho();
1025
+ bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1026
+ void do_shrinking();
1027
+ };
1028
+
1029
+ // return 1 if already optimal, return 0 otherwise
1030
+ int Solver_NU::select_working_set(int &out_i, int &out_j)
1031
+ {
1032
+ // return i,j such that y_i = y_j and
1033
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1034
+ // j: minimizes the decrease of obj value
1035
+ // (if quadratic coefficeint <= 0, replace it with tau)
1036
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1037
+
1038
+ double Gmaxp = -INF;
1039
+ double Gmaxp2 = -INF;
1040
+ int Gmaxp_idx = -1;
1041
+
1042
+ double Gmaxn = -INF;
1043
+ double Gmaxn2 = -INF;
1044
+ int Gmaxn_idx = -1;
1045
+
1046
+ int Gmin_idx = -1;
1047
+ double obj_diff_min = INF;
1048
+
1049
+ for(int t=0;t<active_size;t++)
1050
+ if(y[t]==+1)
1051
+ {
1052
+ if(!is_upper_bound(t))
1053
+ if(-G[t] >= Gmaxp)
1054
+ {
1055
+ Gmaxp = -G[t];
1056
+ Gmaxp_idx = t;
1057
+ }
1058
+ }
1059
+ else
1060
+ {
1061
+ if(!is_lower_bound(t))
1062
+ if(G[t] >= Gmaxn)
1063
+ {
1064
+ Gmaxn = G[t];
1065
+ Gmaxn_idx = t;
1066
+ }
1067
+ }
1068
+
1069
+ int ip = Gmaxp_idx;
1070
+ int in = Gmaxn_idx;
1071
+ const Qfloat *Q_ip = NULL;
1072
+ const Qfloat *Q_in = NULL;
1073
+ if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1074
+ Q_ip = Q->get_Q(ip,active_size);
1075
+ if(in != -1)
1076
+ Q_in = Q->get_Q(in,active_size);
1077
+
1078
+ for(int j=0;j<active_size;j++)
1079
+ {
1080
+ if(y[j]==+1)
1081
+ {
1082
+ if (!is_lower_bound(j))
1083
+ {
1084
+ double grad_diff=Gmaxp+G[j];
1085
+ if (G[j] >= Gmaxp2)
1086
+ Gmaxp2 = G[j];
1087
+ if (grad_diff > 0)
1088
+ {
1089
+ double obj_diff;
1090
+ double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1091
+ if (quad_coef > 0)
1092
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1093
+ else
1094
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1095
+
1096
+ if (obj_diff <= obj_diff_min)
1097
+ {
1098
+ Gmin_idx=j;
1099
+ obj_diff_min = obj_diff;
1100
+ }
1101
+ }
1102
+ }
1103
+ }
1104
+ else
1105
+ {
1106
+ if (!is_upper_bound(j))
1107
+ {
1108
+ double grad_diff=Gmaxn-G[j];
1109
+ if (-G[j] >= Gmaxn2)
1110
+ Gmaxn2 = -G[j];
1111
+ if (grad_diff > 0)
1112
+ {
1113
+ double obj_diff;
1114
+ double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1115
+ if (quad_coef > 0)
1116
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1117
+ else
1118
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1119
+
1120
+ if (obj_diff <= obj_diff_min)
1121
+ {
1122
+ Gmin_idx=j;
1123
+ obj_diff_min = obj_diff;
1124
+ }
1125
+ }
1126
+ }
1127
+ }
1128
+ }
1129
+
1130
+ if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1131
+ return 1;
1132
+
1133
+ if (y[Gmin_idx] == +1)
1134
+ out_i = Gmaxp_idx;
1135
+ else
1136
+ out_i = Gmaxn_idx;
1137
+ out_j = Gmin_idx;
1138
+
1139
+ return 0;
1140
+ }
1141
+
1142
+ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1143
+ {
1144
+ if(is_upper_bound(i))
1145
+ {
1146
+ if(y[i]==+1)
1147
+ return(-G[i] > Gmax1);
1148
+ else
1149
+ return(-G[i] > Gmax4);
1150
+ }
1151
+ else if(is_lower_bound(i))
1152
+ {
1153
+ if(y[i]==+1)
1154
+ return(G[i] > Gmax2);
1155
+ else
1156
+ return(G[i] > Gmax3);
1157
+ }
1158
+ else
1159
+ return(false);
1160
+ }
1161
+
1162
+ void Solver_NU::do_shrinking()
1163
+ {
1164
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1165
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1166
+ double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1167
+ double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1168
+
1169
+ // find maximal violating pair first
1170
+ int i;
1171
+ for(i=0;i<active_size;i++)
1172
+ {
1173
+ if(!is_upper_bound(i))
1174
+ {
1175
+ if(y[i]==+1)
1176
+ {
1177
+ if(-G[i] > Gmax1) Gmax1 = -G[i];
1178
+ }
1179
+ else if(-G[i] > Gmax4) Gmax4 = -G[i];
1180
+ }
1181
+ if(!is_lower_bound(i))
1182
+ {
1183
+ if(y[i]==+1)
1184
+ {
1185
+ if(G[i] > Gmax2) Gmax2 = G[i];
1186
+ }
1187
+ else if(G[i] > Gmax3) Gmax3 = G[i];
1188
+ }
1189
+ }
1190
+
1191
+ if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1192
+ {
1193
+ unshrink = true;
1194
+ reconstruct_gradient();
1195
+ active_size = l;
1196
+ }
1197
+
1198
+ for(i=0;i<active_size;i++)
1199
+ if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1200
+ {
1201
+ active_size--;
1202
+ while (active_size > i)
1203
+ {
1204
+ if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1205
+ {
1206
+ swap_index(i,active_size);
1207
+ break;
1208
+ }
1209
+ active_size--;
1210
+ }
1211
+ }
1212
+ }
1213
+
1214
+ double Solver_NU::calculate_rho()
1215
+ {
1216
+ int nr_free1 = 0,nr_free2 = 0;
1217
+ double ub1 = INF, ub2 = INF;
1218
+ double lb1 = -INF, lb2 = -INF;
1219
+ double sum_free1 = 0, sum_free2 = 0;
1220
+
1221
+ for(int i=0;i<active_size;i++)
1222
+ {
1223
+ if(y[i]==+1)
1224
+ {
1225
+ if(is_upper_bound(i))
1226
+ lb1 = max(lb1,G[i]);
1227
+ else if(is_lower_bound(i))
1228
+ ub1 = min(ub1,G[i]);
1229
+ else
1230
+ {
1231
+ ++nr_free1;
1232
+ sum_free1 += G[i];
1233
+ }
1234
+ }
1235
+ else
1236
+ {
1237
+ if(is_upper_bound(i))
1238
+ lb2 = max(lb2,G[i]);
1239
+ else if(is_lower_bound(i))
1240
+ ub2 = min(ub2,G[i]);
1241
+ else
1242
+ {
1243
+ ++nr_free2;
1244
+ sum_free2 += G[i];
1245
+ }
1246
+ }
1247
+ }
1248
+
1249
+ double r1,r2;
1250
+ if(nr_free1 > 0)
1251
+ r1 = sum_free1/nr_free1;
1252
+ else
1253
+ r1 = (ub1+lb1)/2;
1254
+
1255
+ if(nr_free2 > 0)
1256
+ r2 = sum_free2/nr_free2;
1257
+ else
1258
+ r2 = (ub2+lb2)/2;
1259
+
1260
+ si->r = (r1+r2)/2;
1261
+ return (r1-r2)/2;
1262
+ }
1263
+
1264
+ //
1265
+ // Q matrices for various formulations
1266
+ //
1267
+ class SVC_Q: public Kernel
1268
+ {
1269
+ public:
1270
+ SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1271
+ :Kernel(prob.l, prob.x, param)
1272
+ {
1273
+ clone(y,y_,prob.l);
1274
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1275
+ QD = new double[prob.l];
1276
+ for(int i=0;i<prob.l;i++)
1277
+ QD[i] = (this->*kernel_function)(i,i);
1278
+ }
1279
+
1280
+ Qfloat *get_Q(int i, int len) const
1281
+ {
1282
+ Qfloat *data;
1283
+ int start, j;
1284
+ if((start = cache->get_data(i,&data,len)) < len)
1285
+ {
1286
+ for(j=start;j<len;j++)
1287
+ data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1288
+ }
1289
+ return data;
1290
+ }
1291
+
1292
+ double *get_QD() const
1293
+ {
1294
+ return QD;
1295
+ }
1296
+
1297
+ void swap_index(int i, int j) const
1298
+ {
1299
+ cache->swap_index(i,j);
1300
+ Kernel::swap_index(i,j);
1301
+ swap(y[i],y[j]);
1302
+ swap(QD[i],QD[j]);
1303
+ }
1304
+
1305
+ ~SVC_Q()
1306
+ {
1307
+ delete[] y;
1308
+ delete cache;
1309
+ delete[] QD;
1310
+ }
1311
+ private:
1312
+ schar *y;
1313
+ Cache *cache;
1314
+ double *QD;
1315
+ };
1316
+
1317
+ class ONE_CLASS_Q: public Kernel
1318
+ {
1319
+ public:
1320
+ ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1321
+ :Kernel(prob.l, prob.x, param)
1322
+ {
1323
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1324
+ QD = new double[prob.l];
1325
+ for(int i=0;i<prob.l;i++)
1326
+ QD[i] = (this->*kernel_function)(i,i);
1327
+ }
1328
+
1329
+ Qfloat *get_Q(int i, int len) const
1330
+ {
1331
+ Qfloat *data;
1332
+ int start, j;
1333
+ if((start = cache->get_data(i,&data,len)) < len)
1334
+ {
1335
+ for(j=start;j<len;j++)
1336
+ data[j] = (Qfloat)(this->*kernel_function)(i,j);
1337
+ }
1338
+ return data;
1339
+ }
1340
+
1341
+ double *get_QD() const
1342
+ {
1343
+ return QD;
1344
+ }
1345
+
1346
+ void swap_index(int i, int j) const
1347
+ {
1348
+ cache->swap_index(i,j);
1349
+ Kernel::swap_index(i,j);
1350
+ swap(QD[i],QD[j]);
1351
+ }
1352
+
1353
+ ~ONE_CLASS_Q()
1354
+ {
1355
+ delete cache;
1356
+ delete[] QD;
1357
+ }
1358
+ private:
1359
+ Cache *cache;
1360
+ double *QD;
1361
+ };
1362
+
1363
+ class SVR_Q: public Kernel
1364
+ {
1365
+ public:
1366
+ SVR_Q(const svm_problem& prob, const svm_parameter& param)
1367
+ :Kernel(prob.l, prob.x, param)
1368
+ {
1369
+ l = prob.l;
1370
+ cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1371
+ QD = new double[2*l];
1372
+ sign = new schar[2*l];
1373
+ index = new int[2*l];
1374
+ for(int k=0;k<l;k++)
1375
+ {
1376
+ sign[k] = 1;
1377
+ sign[k+l] = -1;
1378
+ index[k] = k;
1379
+ index[k+l] = k;
1380
+ QD[k] = (this->*kernel_function)(k,k);
1381
+ QD[k+l] = QD[k];
1382
+ }
1383
+ buffer[0] = new Qfloat[2*l];
1384
+ buffer[1] = new Qfloat[2*l];
1385
+ next_buffer = 0;
1386
+ }
1387
+
1388
+ void swap_index(int i, int j) const
1389
+ {
1390
+ swap(sign[i],sign[j]);
1391
+ swap(index[i],index[j]);
1392
+ swap(QD[i],QD[j]);
1393
+ }
1394
+
1395
+ Qfloat *get_Q(int i, int len) const
1396
+ {
1397
+ Qfloat *data;
1398
+ int j, real_i = index[i];
1399
+ if(cache->get_data(real_i,&data,l) < l)
1400
+ {
1401
+ for(j=0;j<l;j++)
1402
+ data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1403
+ }
1404
+
1405
+ // reorder and copy
1406
+ Qfloat *buf = buffer[next_buffer];
1407
+ next_buffer = 1 - next_buffer;
1408
+ schar si = sign[i];
1409
+ for(j=0;j<len;j++)
1410
+ buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1411
+ return buf;
1412
+ }
1413
+
1414
+ double *get_QD() const
1415
+ {
1416
+ return QD;
1417
+ }
1418
+
1419
+ ~SVR_Q()
1420
+ {
1421
+ delete cache;
1422
+ delete[] sign;
1423
+ delete[] index;
1424
+ delete[] buffer[0];
1425
+ delete[] buffer[1];
1426
+ delete[] QD;
1427
+ }
1428
+ private:
1429
+ int l;
1430
+ Cache *cache;
1431
+ schar *sign;
1432
+ int *index;
1433
+ mutable int next_buffer;
1434
+ Qfloat *buffer[2];
1435
+ double *QD;
1436
+ };
1437
+
1438
+ //
1439
+ // construct and solve various formulations
1440
+ //
1441
+ static void solve_c_svc(
1442
+ const svm_problem *prob, const svm_parameter* param,
1443
+ double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1444
+ {
1445
+ int l = prob->l;
1446
+ double *minus_ones = new double[l];
1447
+ schar *y = new schar[l];
1448
+ double *C = new double[l];
1449
+
1450
+ int i;
1451
+
1452
+ for(i=0;i<l;i++)
1453
+ {
1454
+ alpha[i] = 0;
1455
+ minus_ones[i] = -1;
1456
+ if(prob->y[i] > 0)
1457
+ {
1458
+ y[i] = +1;
1459
+ C[i] = prob->W[i]*Cp;
1460
+ }
1461
+ else
1462
+ {
1463
+ y[i] = -1;
1464
+ C[i] = prob->W[i]*Cn;
1465
+ }
1466
+ }
1467
+
1468
+ Solver s;
1469
+ s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1470
+ alpha, C, param->eps, si, param->shrinking);
1471
+
1472
+ /*
1473
+ double sum_alpha=0;
1474
+ for(i=0;i<l;i++)
1475
+ sum_alpha += alpha[i];
1476
+ if (Cp==Cn)
1477
+ info("nu = %f\n", sum_alpha/(Cp*prob->l));
1478
+ */
1479
+
1480
+ for(i=0;i<l;i++)
1481
+ alpha[i] *= y[i];
1482
+
1483
+ delete[] C;
1484
+ delete[] minus_ones;
1485
+ delete[] y;
1486
+ }
1487
+
1488
+ static void solve_nu_svc(
1489
+ const svm_problem *prob, const svm_parameter *param,
1490
+ double *alpha, Solver::SolutionInfo* si)
1491
+ {
1492
+ int i;
1493
+ int l = prob->l;
1494
+ double nu = param->nu;
1495
+
1496
+ schar *y = new schar[l];
1497
+ double *C = new double[l];
1498
+
1499
+ for(i=0;i<l;i++)
1500
+ {
1501
+ if(prob->y[i]>0)
1502
+ y[i] = +1;
1503
+ else
1504
+ y[i] = -1;
1505
+ C[i] = prob->W[i];
1506
+ }
1507
+
1508
+ double nu_l = 0;
1509
+ for(i=0;i<l;i++) nu_l += nu*C[i];
1510
+ double sum_pos = nu_l/2;
1511
+ double sum_neg = nu_l/2;
1512
+
1513
+ for(i=0;i<l;i++)
1514
+ if(y[i] == +1)
1515
+ {
1516
+ alpha[i] = min(C[i],sum_pos);
1517
+ sum_pos -= alpha[i];
1518
+ }
1519
+ else
1520
+ {
1521
+ alpha[i] = min(C[i],sum_neg);
1522
+ sum_neg -= alpha[i];
1523
+ }
1524
+
1525
+ double *zeros = new double[l];
1526
+
1527
+ for(i=0;i<l;i++)
1528
+ zeros[i] = 0;
1529
+
1530
+ Solver_NU s;
1531
+ s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1532
+ alpha, C, param->eps, si, param->shrinking);
1533
+ double r = si->r;
1534
+
1535
+ info("C = %f\n",1/r);
1536
+
1537
+ for(i=0;i<l;i++)
1538
+ {
1539
+ alpha[i] *= y[i]/r;
1540
+ si->upper_bound[i] /= r;
1541
+ }
1542
+
1543
+ si->rho /= r;
1544
+ si->obj /= (r*r);
1545
+
1546
+ delete[] C;
1547
+ delete[] y;
1548
+ delete[] zeros;
1549
+ }
1550
+
1551
+ static void solve_one_class(
1552
+ const svm_problem *prob, const svm_parameter *param,
1553
+ double *alpha, Solver::SolutionInfo* si)
1554
+ {
1555
+ int l = prob->l;
1556
+ double *zeros = new double[l];
1557
+ schar *ones = new schar[l];
1558
+ double *C = new double[l];
1559
+ int i;
1560
+
1561
+ double nu_l = 0;
1562
+
1563
+ for(i=0;i<l;i++)
1564
+ {
1565
+ C[i] = prob->W[i];
1566
+ nu_l += C[i] * param->nu;
1567
+ }
1568
+
1569
+ i = 0;
1570
+ while(nu_l > 0)
1571
+ {
1572
+ alpha[i] = min(C[i],nu_l);
1573
+ nu_l -= alpha[i];
1574
+ ++i;
1575
+ }
1576
+ for(;i<l;i++)
1577
+ alpha[i] = 0;
1578
+
1579
+ for(i=0;i<l;i++)
1580
+ {
1581
+ zeros[i] = 0;
1582
+ ones[i] = 1;
1583
+ }
1584
+
1585
+ Solver s;
1586
+ s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1587
+ alpha, C, param->eps, si, param->shrinking);
1588
+
1589
+ delete[] C;
1590
+ delete[] zeros;
1591
+ delete[] ones;
1592
+ }
1593
+
1594
+ static void solve_epsilon_svr(
1595
+ const svm_problem *prob, const svm_parameter *param,
1596
+ double *alpha, Solver::SolutionInfo* si)
1597
+ {
1598
+ int l = prob->l;
1599
+ double *alpha2 = new double[2*l];
1600
+ double *linear_term = new double[2*l];
1601
+ double *C = new double[2*l];
1602
+ schar *y = new schar[2*l];
1603
+ int i;
1604
+
1605
+ for(i=0;i<l;i++)
1606
+ {
1607
+ alpha2[i] = 0;
1608
+ linear_term[i] = param->p - prob->y[i];
1609
+ y[i] = 1;
1610
+ C[i] = prob->W[i]*param->C;
1611
+
1612
+ alpha2[i+l] = 0;
1613
+ linear_term[i+l] = param->p + prob->y[i];
1614
+ y[i+l] = -1;
1615
+ C[i+l] = prob->W[i]*param->C;
1616
+ }
1617
+
1618
+ Solver s;
1619
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1620
+ alpha2, C, param->eps, si, param->shrinking);
1621
+ double sum_alpha = 0;
1622
+ for(i=0;i<l;i++)
1623
+ {
1624
+ alpha[i] = alpha2[i] - alpha2[i+l];
1625
+ sum_alpha += fabs(alpha[i]);
1626
+ }
1627
+ //info("nu = %f\n",sum_alpha/(param->C*l));
1628
+ delete[] alpha2;
1629
+ delete[] linear_term;
1630
+ delete[] C;
1631
+ delete[] y;
1632
+ }
1633
+
1634
+ static void solve_nu_svr(
1635
+ const svm_problem *prob, const svm_parameter *param,
1636
+ double *alpha, Solver::SolutionInfo* si)
1637
+ {
1638
+ int l = prob->l;
1639
+ double *C = new double[2*l];
1640
+ double *alpha2 = new double[2*l];
1641
+ double *linear_term = new double[2*l];
1642
+ schar *y = new schar[2*l];
1643
+ int i;
1644
+
1645
+ double sum = 0;
1646
+ for(i=0;i<l;i++)
1647
+ {
1648
+ C[i] = C[i+l] = prob->W[i]*param->C;
1649
+ sum += C[i] * param->nu;
1650
+ }
1651
+ sum /= 2;
1652
+
1653
+ for(i=0;i<l;i++)
1654
+ {
1655
+ alpha2[i] = alpha2[i+l] = min(sum,C[i]);
1656
+ sum -= alpha2[i];
1657
+
1658
+ linear_term[i] = - prob->y[i];
1659
+ y[i] = 1;
1660
+
1661
+ linear_term[i+l] = prob->y[i];
1662
+ y[i+l] = -1;
1663
+ }
1664
+
1665
+ Solver_NU s;
1666
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1667
+ alpha2, C, param->eps, si, param->shrinking);
1668
+
1669
+ info("epsilon = %f\n",-si->r);
1670
+
1671
+ for(i=0;i<l;i++)
1672
+ alpha[i] = alpha2[i] - alpha2[i+l];
1673
+
1674
+ delete[] alpha2;
1675
+ delete[] linear_term;
1676
+ delete[] C;
1677
+ delete[] y;
1678
+ }
1679
+
1680
+ //
1681
+ // decision_function
1682
+ //
1683
+ struct decision_function
1684
+ {
1685
+ double *alpha;
1686
+ double rho;
1687
+ };
1688
+
1689
+ static decision_function svm_train_one(
1690
+ const svm_problem *prob, const svm_parameter *param,
1691
+ double Cp, double Cn)
1692
+ {
1693
+ double *alpha = Malloc(double,prob->l);
1694
+ Solver::SolutionInfo si;
1695
+ switch(param->svm_type)
1696
+ {
1697
+ case C_SVC:
1698
+ si.upper_bound = Malloc(double,prob->l);
1699
+ solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1700
+ break;
1701
+ case NU_SVC:
1702
+ si.upper_bound = Malloc(double,prob->l);
1703
+ solve_nu_svc(prob,param,alpha,&si);
1704
+ break;
1705
+ case ONE_CLASS:
1706
+ si.upper_bound = Malloc(double,prob->l);
1707
+ solve_one_class(prob,param,alpha,&si);
1708
+ break;
1709
+ case EPSILON_SVR:
1710
+ si.upper_bound = Malloc(double,2*prob->l);
1711
+ solve_epsilon_svr(prob,param,alpha,&si);
1712
+ break;
1713
+ case NU_SVR:
1714
+ si.upper_bound = Malloc(double,2*prob->l);
1715
+ solve_nu_svr(prob,param,alpha,&si);
1716
+ break;
1717
+ }
1718
+
1719
+ info("obj = %f, rho = %f\n",si.obj,si.rho);
1720
+
1721
+ // output SVs
1722
+
1723
+ int nSV = 0;
1724
+ int nBSV = 0;
1725
+ for(int i=0;i<prob->l;i++)
1726
+ {
1727
+ if(fabs(alpha[i]) > 0)
1728
+ {
1729
+ ++nSV;
1730
+ if(prob->y[i] > 0)
1731
+ {
1732
+ if(fabs(alpha[i]) >= si.upper_bound[i])
1733
+ ++nBSV;
1734
+ }
1735
+ else
1736
+ {
1737
+ if(fabs(alpha[i]) >= si.upper_bound[i])
1738
+ ++nBSV;
1739
+ }
1740
+ }
1741
+ }
1742
+
1743
+ free(si.upper_bound);
1744
+
1745
+ info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1746
+
1747
+ decision_function f;
1748
+ f.alpha = alpha;
1749
+ f.rho = si.rho;
1750
+ return f;
1751
+ }
1752
+
1753
+ // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1754
+ static void sigmoid_train(
1755
+ int l, const double *dec_values, const double *labels,
1756
+ double& A, double& B)
1757
+ {
1758
+ double prior1=0, prior0 = 0;
1759
+ int i;
1760
+
1761
+ for (i=0;i<l;i++)
1762
+ if (labels[i] > 0) prior1+=1;
1763
+ else prior0+=1;
1764
+
1765
+ int max_iter=100; // Maximal number of iterations
1766
+ double min_step=1e-10; // Minimal step taken in line search
1767
+ double sigma=1e-12; // For numerically strict PD of Hessian
1768
+ double eps=1e-5;
1769
+ double hiTarget=(prior1+1.0)/(prior1+2.0);
1770
+ double loTarget=1/(prior0+2.0);
1771
+ double *t=Malloc(double,l);
1772
+ double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1773
+ double newA,newB,newf,d1,d2;
1774
+ int iter;
1775
+
1776
+ // Initial Point and Initial Fun Value
1777
+ A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1778
+ double fval = 0.0;
1779
+
1780
+ for (i=0;i<l;i++)
1781
+ {
1782
+ if (labels[i]>0) t[i]=hiTarget;
1783
+ else t[i]=loTarget;
1784
+ fApB = dec_values[i]*A+B;
1785
+ if (fApB>=0)
1786
+ fval += t[i]*fApB + log(1+exp(-fApB));
1787
+ else
1788
+ fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1789
+ }
1790
+ for (iter=0;iter<max_iter;iter++)
1791
+ {
1792
+ // Update Gradient and Hessian (use H' = H + sigma I)
1793
+ h11=sigma; // numerically ensures strict PD
1794
+ h22=sigma;
1795
+ h21=0.0;g1=0.0;g2=0.0;
1796
+ for (i=0;i<l;i++)
1797
+ {
1798
+ fApB = dec_values[i]*A+B;
1799
+ if (fApB >= 0)
1800
+ {
1801
+ p=exp(-fApB)/(1.0+exp(-fApB));
1802
+ q=1.0/(1.0+exp(-fApB));
1803
+ }
1804
+ else
1805
+ {
1806
+ p=1.0/(1.0+exp(fApB));
1807
+ q=exp(fApB)/(1.0+exp(fApB));
1808
+ }
1809
+ d2=p*q;
1810
+ h11+=dec_values[i]*dec_values[i]*d2;
1811
+ h22+=d2;
1812
+ h21+=dec_values[i]*d2;
1813
+ d1=t[i]-p;
1814
+ g1+=dec_values[i]*d1;
1815
+ g2+=d1;
1816
+ }
1817
+
1818
+ // Stopping Criteria
1819
+ if (fabs(g1)<eps && fabs(g2)<eps)
1820
+ break;
1821
+
1822
+ // Finding Newton direction: -inv(H') * g
1823
+ det=h11*h22-h21*h21;
1824
+ dA=-(h22*g1 - h21 * g2) / det;
1825
+ dB=-(-h21*g1+ h11 * g2) / det;
1826
+ gd=g1*dA+g2*dB;
1827
+
1828
+
1829
+ stepsize = 1; // Line Search
1830
+ while (stepsize >= min_step)
1831
+ {
1832
+ newA = A + stepsize * dA;
1833
+ newB = B + stepsize * dB;
1834
+
1835
+ // New function value
1836
+ newf = 0.0;
1837
+ for (i=0;i<l;i++)
1838
+ {
1839
+ fApB = dec_values[i]*newA+newB;
1840
+ if (fApB >= 0)
1841
+ newf += t[i]*fApB + log(1+exp(-fApB));
1842
+ else
1843
+ newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1844
+ }
1845
+ // Check sufficient decrease
1846
+ if (newf<fval+0.0001*stepsize*gd)
1847
+ {
1848
+ A=newA;B=newB;fval=newf;
1849
+ break;
1850
+ }
1851
+ else
1852
+ stepsize = stepsize / 2.0;
1853
+ }
1854
+
1855
+ if (stepsize < min_step)
1856
+ {
1857
+ info("Line search fails in two-class probability estimates\n");
1858
+ break;
1859
+ }
1860
+ }
1861
+
1862
+ if (iter>=max_iter)
1863
+ info("Reaching maximal iterations in two-class probability estimates\n");
1864
+ free(t);
1865
+ }
1866
+
1867
+ static double sigmoid_predict(double decision_value, double A, double B)
1868
+ {
1869
+ double fApB = decision_value*A+B;
1870
+ // 1-p used later; avoid catastrophic cancellation
1871
+ if (fApB >= 0)
1872
+ return exp(-fApB)/(1.0+exp(-fApB));
1873
+ else
1874
+ return 1.0/(1+exp(fApB)) ;
1875
+ }
1876
+
1877
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1878
+ static void multiclass_probability(int k, double **r, double *p)
1879
+ {
1880
+ int t,j;
1881
+ int iter = 0, max_iter=max(100,k);
1882
+ double **Q=Malloc(double *,k);
1883
+ double *Qp=Malloc(double,k);
1884
+ double pQp, eps=0.005/k;
1885
+
1886
+ for (t=0;t<k;t++)
1887
+ {
1888
+ p[t]=1.0/k; // Valid if k = 1
1889
+ Q[t]=Malloc(double,k);
1890
+ Q[t][t]=0;
1891
+ for (j=0;j<t;j++)
1892
+ {
1893
+ Q[t][t]+=r[j][t]*r[j][t];
1894
+ Q[t][j]=Q[j][t];
1895
+ }
1896
+ for (j=t+1;j<k;j++)
1897
+ {
1898
+ Q[t][t]+=r[j][t]*r[j][t];
1899
+ Q[t][j]=-r[j][t]*r[t][j];
1900
+ }
1901
+ }
1902
+ for (iter=0;iter<max_iter;iter++)
1903
+ {
1904
+ // stopping condition, recalculate QP,pQP for numerical accuracy
1905
+ pQp=0;
1906
+ for (t=0;t<k;t++)
1907
+ {
1908
+ Qp[t]=0;
1909
+ for (j=0;j<k;j++)
1910
+ Qp[t]+=Q[t][j]*p[j];
1911
+ pQp+=p[t]*Qp[t];
1912
+ }
1913
+ double max_error=0;
1914
+ for (t=0;t<k;t++)
1915
+ {
1916
+ double error=fabs(Qp[t]-pQp);
1917
+ if (error>max_error)
1918
+ max_error=error;
1919
+ }
1920
+ if (max_error<eps) break;
1921
+
1922
+ for (t=0;t<k;t++)
1923
+ {
1924
+ double diff=(-Qp[t]+pQp)/Q[t][t];
1925
+ p[t]+=diff;
1926
+ pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1927
+ for (j=0;j<k;j++)
1928
+ {
1929
+ Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1930
+ p[j]/=(1+diff);
1931
+ }
1932
+ }
1933
+ }
1934
+ if (iter>=max_iter)
1935
+ info("Exceeds max_iter in multiclass_prob\n");
1936
+ for(t=0;t<k;t++) free(Q[t]);
1937
+ free(Q);
1938
+ free(Qp);
1939
+ }
1940
+
1941
+ // Cross-validation decision values for probability estimates
1942
+ static void svm_binary_svc_probability(
1943
+ const svm_problem *prob, const svm_parameter *param,
1944
+ double Cp, double Cn, double& probA, double& probB)
1945
+ {
1946
+ int i;
1947
+ int nr_fold = 5;
1948
+ int *perm = Malloc(int,prob->l);
1949
+ double *dec_values = Malloc(double,prob->l);
1950
+
1951
+ // random shuffle
1952
+ for(i=0;i<prob->l;i++) perm[i]=i;
1953
+ for(i=0;i<prob->l;i++)
1954
+ {
1955
+ int j = i+rand()%(prob->l-i);
1956
+ swap(perm[i],perm[j]);
1957
+ }
1958
+ for(i=0;i<nr_fold;i++)
1959
+ {
1960
+ int begin = i*prob->l/nr_fold;
1961
+ int end = (i+1)*prob->l/nr_fold;
1962
+ int j,k;
1963
+ struct svm_problem subprob;
1964
+
1965
+ subprob.l = prob->l-(end-begin);
1966
+ subprob.x = Malloc(struct svm_node*,subprob.l);
1967
+ subprob.y = Malloc(double,subprob.l);
1968
+ subprob.W = Malloc(double,subprob.l);
1969
+
1970
+ k=0;
1971
+ for(j=0;j<begin;j++)
1972
+ {
1973
+ subprob.x[k] = prob->x[perm[j]];
1974
+ subprob.y[k] = prob->y[perm[j]];
1975
+ subprob.W[k] = prob->W[perm[j]];
1976
+ ++k;
1977
+ }
1978
+ for(j=end;j<prob->l;j++)
1979
+ {
1980
+ subprob.x[k] = prob->x[perm[j]];
1981
+ subprob.y[k] = prob->y[perm[j]];
1982
+ subprob.W[k] = prob->W[perm[j]];
1983
+ ++k;
1984
+ }
1985
+ int p_count=0,n_count=0;
1986
+ for(j=0;j<k;j++)
1987
+ if(subprob.y[j]>0)
1988
+ p_count++;
1989
+ else
1990
+ n_count++;
1991
+
1992
+ if(p_count==0 && n_count==0)
1993
+ for(j=begin;j<end;j++)
1994
+ dec_values[perm[j]] = 0;
1995
+ else if(p_count > 0 && n_count == 0)
1996
+ for(j=begin;j<end;j++)
1997
+ dec_values[perm[j]] = 1;
1998
+ else if(p_count == 0 && n_count > 0)
1999
+ for(j=begin;j<end;j++)
2000
+ dec_values[perm[j]] = -1;
2001
+ else
2002
+ {
2003
+ svm_parameter subparam = *param;
2004
+ subparam.probability=0;
2005
+ subparam.C=1.0;
2006
+ subparam.nr_weight=2;
2007
+ subparam.weight_label = Malloc(int,2);
2008
+ subparam.weight = Malloc(double,2);
2009
+ subparam.weight_label[0]=+1;
2010
+ subparam.weight_label[1]=-1;
2011
+ subparam.weight[0]=Cp;
2012
+ subparam.weight[1]=Cn;
2013
+ struct svm_model *submodel = svm_train(&subprob,&subparam);
2014
+ for(j=begin;j<end;j++)
2015
+ {
2016
+ svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2017
+ // ensure +1 -1 order; reason not using CV subroutine
2018
+ dec_values[perm[j]] *= submodel->label[0];
2019
+ }
2020
+ svm_free_and_destroy_model(&submodel);
2021
+ svm_destroy_param(&subparam);
2022
+ }
2023
+ free(subprob.x);
2024
+ free(subprob.y);
2025
+ free(subprob.W);
2026
+ }
2027
+ sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2028
+ free(dec_values);
2029
+ free(perm);
2030
+ }
2031
+
2032
+ // Return parameter of a Laplace distribution
2033
+ static double svm_svr_probability(
2034
+ const svm_problem *prob, const svm_parameter *param)
2035
+ {
2036
+ int i;
2037
+ int nr_fold = 5;
2038
+ double *ymv = Malloc(double,prob->l);
2039
+ double mae = 0;
2040
+
2041
+ svm_parameter newparam = *param;
2042
+ newparam.probability = 0;
2043
+ svm_cross_validation(prob,&newparam,nr_fold,ymv);
2044
+ for(i=0;i<prob->l;i++)
2045
+ {
2046
+ ymv[i]=prob->y[i]-ymv[i];
2047
+ mae += fabs(ymv[i]);
2048
+ }
2049
+ mae /= prob->l;
2050
+ double std=sqrt(2*mae*mae);
2051
+ int count=0;
2052
+ mae=0;
2053
+ for(i=0;i<prob->l;i++)
2054
+ if (fabs(ymv[i]) > 5*std)
2055
+ count=count+1;
2056
+ else
2057
+ mae+=fabs(ymv[i]);
2058
+ mae /= (prob->l-count);
2059
+ info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2060
+ free(ymv);
2061
+ return mae;
2062
+ }
2063
+
2064
+
2065
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2066
+ // perm, length l, must be allocated before calling this subroutine
2067
+ static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2068
+ {
2069
+ int l = prob->l;
2070
+ int max_nr_class = 16;
2071
+ int nr_class = 0;
2072
+ int *label = Malloc(int,max_nr_class);
2073
+ int *count = Malloc(int,max_nr_class);
2074
+ int *data_label = Malloc(int,l);
2075
+ int i;
2076
+
2077
+ for(i=0;i<l;i++)
2078
+ {
2079
+ int this_label = (int)prob->y[i];
2080
+ int j;
2081
+ for(j=0;j<nr_class;j++)
2082
+ {
2083
+ if(this_label == label[j])
2084
+ {
2085
+ ++count[j];
2086
+ break;
2087
+ }
2088
+ }
2089
+ data_label[i] = j;
2090
+ if(j == nr_class)
2091
+ {
2092
+ if(nr_class == max_nr_class)
2093
+ {
2094
+ max_nr_class *= 2;
2095
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2096
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2097
+ }
2098
+ label[nr_class] = this_label;
2099
+ count[nr_class] = 1;
2100
+ ++nr_class;
2101
+ }
2102
+ }
2103
+
2104
+ int *start = Malloc(int,nr_class);
2105
+ start[0] = 0;
2106
+ for(i=1;i<nr_class;i++)
2107
+ start[i] = start[i-1]+count[i-1];
2108
+ for(i=0;i<l;i++)
2109
+ {
2110
+ perm[start[data_label[i]]] = i;
2111
+ ++start[data_label[i]];
2112
+ }
2113
+ start[0] = 0;
2114
+ for(i=1;i<nr_class;i++)
2115
+ start[i] = start[i-1]+count[i-1];
2116
+
2117
+ *nr_class_ret = nr_class;
2118
+ *label_ret = label;
2119
+ *start_ret = start;
2120
+ *count_ret = count;
2121
+ free(data_label);
2122
+ }
2123
+
2124
+ //
2125
+ // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
2126
+ //
2127
+ static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
2128
+ {
2129
+ int i;
2130
+ int l = 0;
2131
+ for(i=0;i<prob->l;i++)
2132
+ if(prob->W[i] > 0) l++;
2133
+ *newprob = *prob;
2134
+ newprob->l = l;
2135
+ newprob->x = Malloc(svm_node*,l);
2136
+ newprob->y = Malloc(double,l);
2137
+ newprob->W = Malloc(double,l);
2138
+
2139
+ int j = 0;
2140
+ for(i=0;i<prob->l;i++)
2141
+ if(prob->W[i] > 0)
2142
+ {
2143
+ newprob->x[j] = prob->x[i];
2144
+ newprob->y[j] = prob->y[i];
2145
+ newprob->W[j] = prob->W[i];
2146
+ j++;
2147
+ }
2148
+ }
2149
+
2150
+ //
2151
+ // Interface functions
2152
+ //
2153
+ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2154
+ {
2155
+ svm_problem newprob;
2156
+ remove_zero_weight(&newprob, prob);
2157
+ prob = &newprob;
2158
+
2159
+ svm_model *model = Malloc(svm_model,1);
2160
+ model->param = *param;
2161
+ model->free_sv = 0; // XXX
2162
+
2163
+ if(param->svm_type == ONE_CLASS ||
2164
+ param->svm_type == EPSILON_SVR ||
2165
+ param->svm_type == NU_SVR)
2166
+ {
2167
+ // regression or one-class-svm
2168
+ model->nr_class = 2;
2169
+ model->label = NULL;
2170
+ model->nSV = NULL;
2171
+ model->probA = NULL; model->probB = NULL;
2172
+ model->sv_coef = Malloc(double *,1);
2173
+
2174
+ if(param->probability &&
2175
+ (param->svm_type == EPSILON_SVR ||
2176
+ param->svm_type == NU_SVR))
2177
+ {
2178
+ model->probA = Malloc(double,1);
2179
+ model->probA[0] = svm_svr_probability(prob,param);
2180
+ }
2181
+
2182
+ decision_function f = svm_train_one(prob,param,0,0);
2183
+ model->rho = Malloc(double,1);
2184
+ model->rho[0] = f.rho;
2185
+
2186
+ int nSV = 0;
2187
+ int i;
2188
+ for(i=0;i<prob->l;i++)
2189
+ if(fabs(f.alpha[i]) > 0) ++nSV;
2190
+ model->l = nSV;
2191
+ model->SV = Malloc(svm_node *,nSV);
2192
+ model->sv_coef[0] = Malloc(double,nSV);
2193
+ int j = 0;
2194
+ for(i=0;i<prob->l;i++)
2195
+ if(fabs(f.alpha[i]) > 0)
2196
+ {
2197
+ model->SV[j] = prob->x[i];
2198
+ model->sv_coef[0][j] = f.alpha[i];
2199
+ ++j;
2200
+ }
2201
+
2202
+ free(f.alpha);
2203
+ }
2204
+ else
2205
+ {
2206
+ // classification
2207
+ int l = prob->l;
2208
+ int nr_class;
2209
+ int *label = NULL;
2210
+ int *start = NULL;
2211
+ int *count = NULL;
2212
+ int *perm = Malloc(int,l);
2213
+
2214
+ // group training data of the same class
2215
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2216
+ if(nr_class == 1)
2217
+ info("WARNING: training data in only one class. See README for details.\n");
2218
+
2219
+ svm_node **x = Malloc(svm_node *,l);
2220
+ double *W;
2221
+ W = Malloc(double,l);
2222
+
2223
+ int i;
2224
+ for(i=0;i<l;i++)
2225
+ {
2226
+ x[i] = prob->x[perm[i]];
2227
+ W[i] = prob->W[perm[i]];
2228
+ }
2229
+
2230
+ // calculate weighted C
2231
+
2232
+ double *weighted_C = Malloc(double, nr_class);
2233
+ for(i=0;i<nr_class;i++)
2234
+ weighted_C[i] = param->C;
2235
+ for(i=0;i<param->nr_weight;i++)
2236
+ {
2237
+ int j;
2238
+ for(j=0;j<nr_class;j++)
2239
+ if(param->weight_label[i] == label[j])
2240
+ break;
2241
+ if(j == nr_class)
2242
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2243
+ else
2244
+ weighted_C[j] *= param->weight[i];
2245
+ }
2246
+
2247
+ // train k*(k-1)/2 models
2248
+
2249
+ bool *nonzero = Malloc(bool,l);
2250
+ for(i=0;i<l;i++)
2251
+ nonzero[i] = false;
2252
+ decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2253
+
2254
+ double *probA=NULL,*probB=NULL;
2255
+ if (param->probability)
2256
+ {
2257
+ probA=Malloc(double,nr_class*(nr_class-1)/2);
2258
+ probB=Malloc(double,nr_class*(nr_class-1)/2);
2259
+ }
2260
+
2261
+ int p = 0;
2262
+ for(i=0;i<nr_class;i++)
2263
+ for(int j=i+1;j<nr_class;j++)
2264
+ {
2265
+ svm_problem sub_prob;
2266
+ int si = start[i], sj = start[j];
2267
+ int ci = count[i], cj = count[j];
2268
+ sub_prob.l = ci+cj;
2269
+ sub_prob.x = Malloc(svm_node *,sub_prob.l);
2270
+ sub_prob.y = Malloc(double,sub_prob.l);
2271
+ sub_prob.W = Malloc(double,sub_prob.l);
2272
+ int k;
2273
+ for(k=0;k<ci;k++)
2274
+ {
2275
+ sub_prob.x[k] = x[si+k];
2276
+ sub_prob.y[k] = +1;
2277
+ sub_prob.W[k] = W[si+k];
2278
+ }
2279
+ for(k=0;k<cj;k++)
2280
+ {
2281
+ sub_prob.x[ci+k] = x[sj+k];
2282
+ sub_prob.y[ci+k] = -1;
2283
+ sub_prob.W[ci+k] = W[sj+k];
2284
+ }
2285
+
2286
+ if(param->probability)
2287
+ svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2288
+
2289
+ f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2290
+ for(k=0;k<ci;k++)
2291
+ if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2292
+ nonzero[si+k] = true;
2293
+ for(k=0;k<cj;k++)
2294
+ if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2295
+ nonzero[sj+k] = true;
2296
+ free(sub_prob.x);
2297
+ free(sub_prob.y);
2298
+ free(sub_prob.W);
2299
+ ++p;
2300
+ }
2301
+
2302
+ // build output
2303
+
2304
+ model->nr_class = nr_class;
2305
+
2306
+ model->label = Malloc(int,nr_class);
2307
+ for(i=0;i<nr_class;i++)
2308
+ model->label[i] = label[i];
2309
+
2310
+ model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2311
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2312
+ model->rho[i] = f[i].rho;
2313
+
2314
+ if(param->probability)
2315
+ {
2316
+ model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2317
+ model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2318
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2319
+ {
2320
+ model->probA[i] = probA[i];
2321
+ model->probB[i] = probB[i];
2322
+ }
2323
+ }
2324
+ else
2325
+ {
2326
+ model->probA=NULL;
2327
+ model->probB=NULL;
2328
+ }
2329
+
2330
+ int total_sv = 0;
2331
+ int *nz_count = Malloc(int,nr_class);
2332
+ model->nSV = Malloc(int,nr_class);
2333
+ for(i=0;i<nr_class;i++)
2334
+ {
2335
+ int nSV = 0;
2336
+ for(int j=0;j<count[i];j++)
2337
+ if(nonzero[start[i]+j])
2338
+ {
2339
+ ++nSV;
2340
+ ++total_sv;
2341
+ }
2342
+ model->nSV[i] = nSV;
2343
+ nz_count[i] = nSV;
2344
+ }
2345
+
2346
+ info("Total nSV = %d\n",total_sv);
2347
+
2348
+ model->l = total_sv;
2349
+ model->SV = Malloc(svm_node *,total_sv);
2350
+ p = 0;
2351
+ for(i=0;i<l;i++)
2352
+ if(nonzero[i]) model->SV[p++] = x[i];
2353
+
2354
+ int *nz_start = Malloc(int,nr_class);
2355
+ nz_start[0] = 0;
2356
+ for(i=1;i<nr_class;i++)
2357
+ nz_start[i] = nz_start[i-1]+nz_count[i-1];
2358
+
2359
+ model->sv_coef = Malloc(double *,nr_class-1);
2360
+ for(i=0;i<nr_class-1;i++)
2361
+ model->sv_coef[i] = Malloc(double,total_sv);
2362
+
2363
+ p = 0;
2364
+ for(i=0;i<nr_class;i++)
2365
+ for(int j=i+1;j<nr_class;j++)
2366
+ {
2367
+ // classifier (i,j): coefficients with
2368
+ // i are in sv_coef[j-1][nz_start[i]...],
2369
+ // j are in sv_coef[i][nz_start[j]...]
2370
+
2371
+ int si = start[i];
2372
+ int sj = start[j];
2373
+ int ci = count[i];
2374
+ int cj = count[j];
2375
+
2376
+ int q = nz_start[i];
2377
+ int k;
2378
+ for(k=0;k<ci;k++)
2379
+ if(nonzero[si+k])
2380
+ model->sv_coef[j-1][q++] = f[p].alpha[k];
2381
+ q = nz_start[j];
2382
+ for(k=0;k<cj;k++)
2383
+ if(nonzero[sj+k])
2384
+ model->sv_coef[i][q++] = f[p].alpha[ci+k];
2385
+ ++p;
2386
+ }
2387
+
2388
+ free(label);
2389
+ free(probA);
2390
+ free(probB);
2391
+ free(count);
2392
+ free(perm);
2393
+ free(start);
2394
+ free(W);
2395
+ free(x);
2396
+ free(weighted_C);
2397
+ free(nonzero);
2398
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2399
+ free(f[i].alpha);
2400
+ free(f);
2401
+ free(nz_count);
2402
+ free(nz_start);
2403
+ }
2404
+ free(newprob.x);
2405
+ free(newprob.y);
2406
+ free(newprob.W);
2407
+ return model;
2408
+ }
2409
+
2410
+ // Stratified cross validation
2411
+ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2412
+ {
2413
+ int i;
2414
+ int *fold_start = Malloc(int,nr_fold+1);
2415
+ int l = prob->l;
2416
+ int *perm = Malloc(int,l);
2417
+ int nr_class;
2418
+
2419
+ // stratified cv may not give leave-one-out rate
2420
+ // Each class to l folds -> some folds may have zero elements
2421
+ if((param->svm_type == C_SVC ||
2422
+ param->svm_type == NU_SVC) && nr_fold < l)
2423
+ {
2424
+ int *start = NULL;
2425
+ int *label = NULL;
2426
+ int *count = NULL;
2427
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2428
+
2429
+ // random shuffle and then data grouped by fold using the array perm
2430
+ int *fold_count = Malloc(int,nr_fold);
2431
+ int c;
2432
+ int *index = Malloc(int,l);
2433
+ for(i=0;i<l;i++)
2434
+ index[i]=perm[i];
2435
+ for (c=0; c<nr_class; c++)
2436
+ for(i=0;i<count[c];i++)
2437
+ {
2438
+ int j = i+rand()%(count[c]-i);
2439
+ swap(index[start[c]+j],index[start[c]+i]);
2440
+ }
2441
+ for(i=0;i<nr_fold;i++)
2442
+ {
2443
+ fold_count[i] = 0;
2444
+ for (c=0; c<nr_class;c++)
2445
+ fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2446
+ }
2447
+ fold_start[0]=0;
2448
+ for (i=1;i<=nr_fold;i++)
2449
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2450
+ for (c=0; c<nr_class;c++)
2451
+ for(i=0;i<nr_fold;i++)
2452
+ {
2453
+ int begin = start[c]+i*count[c]/nr_fold;
2454
+ int end = start[c]+(i+1)*count[c]/nr_fold;
2455
+ for(int j=begin;j<end;j++)
2456
+ {
2457
+ perm[fold_start[i]] = index[j];
2458
+ fold_start[i]++;
2459
+ }
2460
+ }
2461
+ fold_start[0]=0;
2462
+ for (i=1;i<=nr_fold;i++)
2463
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2464
+ free(start);
2465
+ free(label);
2466
+ free(count);
2467
+ free(index);
2468
+ free(fold_count);
2469
+ }
2470
+ else
2471
+ {
2472
+ for(i=0;i<l;i++) perm[i]=i;
2473
+ for(i=0;i<l;i++)
2474
+ {
2475
+ int j = i+rand()%(l-i);
2476
+ swap(perm[i],perm[j]);
2477
+ }
2478
+ for(i=0;i<=nr_fold;i++)
2479
+ fold_start[i]=i*l/nr_fold;
2480
+ }
2481
+
2482
+ for(i=0;i<nr_fold;i++)
2483
+ {
2484
+ int begin = fold_start[i];
2485
+ int end = fold_start[i+1];
2486
+ int j,k;
2487
+ struct svm_problem subprob;
2488
+
2489
+ subprob.l = l-(end-begin);
2490
+ subprob.x = Malloc(struct svm_node*,subprob.l);
2491
+ subprob.y = Malloc(double,subprob.l);
2492
+
2493
+ subprob.W = Malloc(double,subprob.l);
2494
+ k=0;
2495
+ for(j=0;j<begin;j++)
2496
+ {
2497
+ subprob.x[k] = prob->x[perm[j]];
2498
+ subprob.y[k] = prob->y[perm[j]];
2499
+ subprob.W[k] = prob->W[perm[j]];
2500
+ ++k;
2501
+ }
2502
+ for(j=end;j<l;j++)
2503
+ {
2504
+ subprob.x[k] = prob->x[perm[j]];
2505
+ subprob.y[k] = prob->y[perm[j]];
2506
+ subprob.W[k] = prob->W[perm[j]];
2507
+ ++k;
2508
+ }
2509
+ struct svm_model *submodel = svm_train(&subprob,param);
2510
+ if(param->probability &&
2511
+ (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2512
+ {
2513
+ double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2514
+ for(j=begin;j<end;j++)
2515
+ target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2516
+ free(prob_estimates);
2517
+ }
2518
+ else
2519
+ for(j=begin;j<end;j++)
2520
+ target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2521
+ svm_free_and_destroy_model(&submodel);
2522
+ free(subprob.x);
2523
+ free(subprob.y);
2524
+ free(subprob.W);
2525
+ }
2526
+ free(fold_start);
2527
+ free(perm);
2528
+ }
2529
+
2530
+
2531
+ int svm_get_svm_type(const svm_model *model)
2532
+ {
2533
+ return model->param.svm_type;
2534
+ }
2535
+
2536
+ int svm_get_nr_class(const svm_model *model)
2537
+ {
2538
+ return model->nr_class;
2539
+ }
2540
+
2541
+ void svm_get_labels(const svm_model *model, int* label)
2542
+ {
2543
+ if (model->label != NULL)
2544
+ for(int i=0;i<model->nr_class;i++)
2545
+ label[i] = model->label[i];
2546
+ }
2547
+
2548
+ double svm_get_svr_probability(const svm_model *model)
2549
+ {
2550
+ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2551
+ model->probA!=NULL)
2552
+ return model->probA[0];
2553
+ else
2554
+ {
2555
+ fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2556
+ return 0;
2557
+ }
2558
+ }
2559
+
2560
+ double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2561
+ {
2562
+ int i;
2563
+ if(model->param.svm_type == ONE_CLASS ||
2564
+ model->param.svm_type == EPSILON_SVR ||
2565
+ model->param.svm_type == NU_SVR)
2566
+ {
2567
+ double *sv_coef = model->sv_coef[0];
2568
+ double sum = 0;
2569
+ for(i=0;i<model->l;i++)
2570
+ sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2571
+ sum -= model->rho[0];
2572
+ *dec_values = sum;
2573
+
2574
+ if(model->param.svm_type == ONE_CLASS)
2575
+ return (sum>0)?1:-1;
2576
+ else
2577
+ return sum;
2578
+ }
2579
+ else
2580
+ {
2581
+ int nr_class = model->nr_class;
2582
+ int l = model->l;
2583
+
2584
+ double *kvalue = Malloc(double,l);
2585
+ for(i=0;i<l;i++)
2586
+ kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2587
+
2588
+ int *start = Malloc(int,nr_class);
2589
+ start[0] = 0;
2590
+ for(i=1;i<nr_class;i++)
2591
+ start[i] = start[i-1]+model->nSV[i-1];
2592
+
2593
+ int *vote = Malloc(int,nr_class);
2594
+ for(i=0;i<nr_class;i++)
2595
+ vote[i] = 0;
2596
+
2597
+ int p=0;
2598
+ for(i=0;i<nr_class;i++)
2599
+ for(int j=i+1;j<nr_class;j++)
2600
+ {
2601
+ double sum = 0;
2602
+ int si = start[i];
2603
+ int sj = start[j];
2604
+ int ci = model->nSV[i];
2605
+ int cj = model->nSV[j];
2606
+
2607
+ int k;
2608
+ double *coef1 = model->sv_coef[j-1];
2609
+ double *coef2 = model->sv_coef[i];
2610
+ for(k=0;k<ci;k++)
2611
+ sum += coef1[si+k] * kvalue[si+k];
2612
+ for(k=0;k<cj;k++)
2613
+ sum += coef2[sj+k] * kvalue[sj+k];
2614
+ sum -= model->rho[p];
2615
+ dec_values[p] = sum;
2616
+
2617
+ if(dec_values[p] > 0)
2618
+ ++vote[i];
2619
+ else
2620
+ ++vote[j];
2621
+ p++;
2622
+ }
2623
+
2624
+ int vote_max_idx = 0;
2625
+ for(i=1;i<nr_class;i++)
2626
+ if(vote[i] > vote[vote_max_idx])
2627
+ vote_max_idx = i;
2628
+
2629
+ free(kvalue);
2630
+ free(start);
2631
+ free(vote);
2632
+ return model->label[vote_max_idx];
2633
+ }
2634
+ }
2635
+
2636
+ double svm_predict(const svm_model *model, const svm_node *x)
2637
+ {
2638
+ int nr_class = model->nr_class;
2639
+ double *dec_values;
2640
+ if(model->param.svm_type == ONE_CLASS ||
2641
+ model->param.svm_type == EPSILON_SVR ||
2642
+ model->param.svm_type == NU_SVR)
2643
+ dec_values = Malloc(double, 1);
2644
+ else
2645
+ dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2646
+ double pred_result = svm_predict_values(model, x, dec_values);
2647
+ free(dec_values);
2648
+ return pred_result;
2649
+ }
2650
+
2651
+ double svm_predict_probability(
2652
+ const svm_model *model, const svm_node *x, double *prob_estimates)
2653
+ {
2654
+ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2655
+ model->probA!=NULL && model->probB!=NULL)
2656
+ {
2657
+ int i;
2658
+ int nr_class = model->nr_class;
2659
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2660
+ svm_predict_values(model, x, dec_values);
2661
+
2662
+ double min_prob=1e-7;
2663
+ double **pairwise_prob=Malloc(double *,nr_class);
2664
+ for(i=0;i<nr_class;i++)
2665
+ pairwise_prob[i]=Malloc(double,nr_class);
2666
+ int k=0;
2667
+ for(i=0;i<nr_class;i++)
2668
+ for(int j=i+1;j<nr_class;j++)
2669
+ {
2670
+ pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2671
+ pairwise_prob[j][i]=1-pairwise_prob[i][j];
2672
+ k++;
2673
+ }
2674
+ multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2675
+
2676
+ int prob_max_idx = 0;
2677
+ for(i=1;i<nr_class;i++)
2678
+ if(prob_estimates[i] > prob_estimates[prob_max_idx])
2679
+ prob_max_idx = i;
2680
+ for(i=0;i<nr_class;i++)
2681
+ free(pairwise_prob[i]);
2682
+ free(dec_values);
2683
+ free(pairwise_prob);
2684
+ return model->label[prob_max_idx];
2685
+ }
2686
+ else
2687
+ return svm_predict(model, x);
2688
+ }
2689
+
2690
+ static const char *svm_type_table[] =
2691
+ {
2692
+ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2693
+ };
2694
+
2695
+ static const char *kernel_type_table[]=
2696
+ {
2697
+ "linear","polynomial","rbf","sigmoid","precomputed",NULL
2698
+ };
2699
+
2700
+ int svm_save_model(const char *model_file_name, const svm_model *model)
2701
+ {
2702
+ FILE *fp = fopen(model_file_name,"w");
2703
+ if(fp==NULL) return -1;
2704
+
2705
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2706
+ setlocale(LC_ALL, "C");
2707
+
2708
+ const svm_parameter& param = model->param;
2709
+
2710
+ fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2711
+ fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2712
+
2713
+ if(param.kernel_type == POLY)
2714
+ fprintf(fp,"degree %d\n", param.degree);
2715
+
2716
+ if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2717
+ fprintf(fp,"gamma %g\n", param.gamma);
2718
+
2719
+ if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2720
+ fprintf(fp,"coef0 %g\n", param.coef0);
2721
+
2722
+ int nr_class = model->nr_class;
2723
+ int l = model->l;
2724
+ fprintf(fp, "nr_class %d\n", nr_class);
2725
+ fprintf(fp, "total_sv %d\n",l);
2726
+
2727
+ {
2728
+ fprintf(fp, "rho");
2729
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2730
+ fprintf(fp," %g",model->rho[i]);
2731
+ fprintf(fp, "\n");
2732
+ }
2733
+
2734
+ if(model->label)
2735
+ {
2736
+ fprintf(fp, "label");
2737
+ for(int i=0;i<nr_class;i++)
2738
+ fprintf(fp," %d",model->label[i]);
2739
+ fprintf(fp, "\n");
2740
+ }
2741
+
2742
+ if(model->probA) // regression has probA only
2743
+ {
2744
+ fprintf(fp, "probA");
2745
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2746
+ fprintf(fp," %g",model->probA[i]);
2747
+ fprintf(fp, "\n");
2748
+ }
2749
+ if(model->probB)
2750
+ {
2751
+ fprintf(fp, "probB");
2752
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2753
+ fprintf(fp," %g",model->probB[i]);
2754
+ fprintf(fp, "\n");
2755
+ }
2756
+
2757
+ if(model->nSV)
2758
+ {
2759
+ fprintf(fp, "nr_sv");
2760
+ for(int i=0;i<nr_class;i++)
2761
+ fprintf(fp," %d",model->nSV[i]);
2762
+ fprintf(fp, "\n");
2763
+ }
2764
+
2765
+ fprintf(fp, "SV\n");
2766
+ const double * const *sv_coef = model->sv_coef;
2767
+ const svm_node * const *SV = model->SV;
2768
+
2769
+ for(int i=0;i<l;i++)
2770
+ {
2771
+ for(int j=0;j<nr_class-1;j++)
2772
+ fprintf(fp, "%.16g ",sv_coef[j][i]);
2773
+
2774
+ const svm_node *p = SV[i];
2775
+
2776
+ if(param.kernel_type == PRECOMPUTED)
2777
+ fprintf(fp,"0:%d ",(int)(p->value));
2778
+ else
2779
+ while(p->index != -1)
2780
+ {
2781
+ fprintf(fp,"%d:%.8g ",p->index,p->value);
2782
+ p++;
2783
+ }
2784
+ fprintf(fp, "\n");
2785
+ }
2786
+
2787
+ setlocale(LC_ALL, old_locale);
2788
+ free(old_locale);
2789
+
2790
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2791
+ else return 0;
2792
+ }
2793
+
2794
+ static char *line = NULL;
2795
+ static int max_line_len;
2796
+
2797
+ static char* readline(FILE *input)
2798
+ {
2799
+ int len;
2800
+
2801
+ if(fgets(line,max_line_len,input) == NULL)
2802
+ return NULL;
2803
+
2804
+ while(strrchr(line,'\n') == NULL)
2805
+ {
2806
+ max_line_len *= 2;
2807
+ line = (char *) realloc(line,max_line_len);
2808
+ len = (int) strlen(line);
2809
+ if(fgets(line+len,max_line_len-len,input) == NULL)
2810
+ break;
2811
+ }
2812
+ return line;
2813
+ }
2814
+
2815
+ svm_model *svm_load_model(const char *model_file_name)
2816
+ {
2817
+ FILE *fp = fopen(model_file_name,"rb");
2818
+ if(fp==NULL) return NULL;
2819
+
2820
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2821
+ setlocale(LC_ALL, "C");
2822
+
2823
+ // read parameters
2824
+
2825
+ svm_model *model = Malloc(svm_model,1);
2826
+ svm_parameter& param = model->param;
2827
+ model->rho = NULL;
2828
+ model->probA = NULL;
2829
+ model->probB = NULL;
2830
+ model->label = NULL;
2831
+ model->nSV = NULL;
2832
+
2833
+ char cmd[81];
2834
+ while(1)
2835
+ {
2836
+ fscanf(fp,"%80s",cmd);
2837
+
2838
+ if(strcmp(cmd,"svm_type")==0)
2839
+ {
2840
+ fscanf(fp,"%80s",cmd);
2841
+ int i;
2842
+ for(i=0;svm_type_table[i];i++)
2843
+ {
2844
+ if(strcmp(svm_type_table[i],cmd)==0)
2845
+ {
2846
+ param.svm_type=i;
2847
+ break;
2848
+ }
2849
+ }
2850
+ if(svm_type_table[i] == NULL)
2851
+ {
2852
+ fprintf(stderr,"unknown svm type.\n");
2853
+
2854
+ setlocale(LC_ALL, old_locale);
2855
+ free(old_locale);
2856
+ free(model->rho);
2857
+ free(model->label);
2858
+ free(model->nSV);
2859
+ free(model);
2860
+ return NULL;
2861
+ }
2862
+ }
2863
+ else if(strcmp(cmd,"kernel_type")==0)
2864
+ {
2865
+ fscanf(fp,"%80s",cmd);
2866
+ int i;
2867
+ for(i=0;kernel_type_table[i];i++)
2868
+ {
2869
+ if(strcmp(kernel_type_table[i],cmd)==0)
2870
+ {
2871
+ param.kernel_type=i;
2872
+ break;
2873
+ }
2874
+ }
2875
+ if(kernel_type_table[i] == NULL)
2876
+ {
2877
+ fprintf(stderr,"unknown kernel function.\n");
2878
+
2879
+ setlocale(LC_ALL, old_locale);
2880
+ free(old_locale);
2881
+ free(model->rho);
2882
+ free(model->label);
2883
+ free(model->nSV);
2884
+ free(model);
2885
+ return NULL;
2886
+ }
2887
+ }
2888
+ else if(strcmp(cmd,"degree")==0)
2889
+ fscanf(fp,"%d",&param.degree);
2890
+ else if(strcmp(cmd,"gamma")==0)
2891
+ fscanf(fp,"%lf",&param.gamma);
2892
+ else if(strcmp(cmd,"coef0")==0)
2893
+ fscanf(fp,"%lf",&param.coef0);
2894
+ else if(strcmp(cmd,"nr_class")==0)
2895
+ fscanf(fp,"%d",&model->nr_class);
2896
+ else if(strcmp(cmd,"total_sv")==0)
2897
+ fscanf(fp,"%d",&model->l);
2898
+ else if(strcmp(cmd,"rho")==0)
2899
+ {
2900
+ int n = model->nr_class * (model->nr_class-1)/2;
2901
+ model->rho = Malloc(double,n);
2902
+ for(int i=0;i<n;i++)
2903
+ fscanf(fp,"%lf",&model->rho[i]);
2904
+ }
2905
+ else if(strcmp(cmd,"label")==0)
2906
+ {
2907
+ int n = model->nr_class;
2908
+ model->label = Malloc(int,n);
2909
+ for(int i=0;i<n;i++)
2910
+ fscanf(fp,"%d",&model->label[i]);
2911
+ }
2912
+ else if(strcmp(cmd,"probA")==0)
2913
+ {
2914
+ int n = model->nr_class * (model->nr_class-1)/2;
2915
+ model->probA = Malloc(double,n);
2916
+ for(int i=0;i<n;i++)
2917
+ fscanf(fp,"%lf",&model->probA[i]);
2918
+ }
2919
+ else if(strcmp(cmd,"probB")==0)
2920
+ {
2921
+ int n = model->nr_class * (model->nr_class-1)/2;
2922
+ model->probB = Malloc(double,n);
2923
+ for(int i=0;i<n;i++)
2924
+ fscanf(fp,"%lf",&model->probB[i]);
2925
+ }
2926
+ else if(strcmp(cmd,"nr_sv")==0)
2927
+ {
2928
+ int n = model->nr_class;
2929
+ model->nSV = Malloc(int,n);
2930
+ for(int i=0;i<n;i++)
2931
+ fscanf(fp,"%d",&model->nSV[i]);
2932
+ }
2933
+ else if(strcmp(cmd,"SV")==0)
2934
+ {
2935
+ while(1)
2936
+ {
2937
+ int c = getc(fp);
2938
+ if(c==EOF || c=='\n') break;
2939
+ }
2940
+ break;
2941
+ }
2942
+ else
2943
+ {
2944
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2945
+
2946
+ setlocale(LC_ALL, old_locale);
2947
+ free(old_locale);
2948
+ free(model->rho);
2949
+ free(model->label);
2950
+ free(model->nSV);
2951
+ free(model);
2952
+ return NULL;
2953
+ }
2954
+ }
2955
+
2956
+ // read sv_coef and SV
2957
+
2958
+ int elements = 0;
2959
+ long pos = ftell(fp);
2960
+
2961
+ max_line_len = 1024;
2962
+ line = Malloc(char,max_line_len);
2963
+ char *p,*endptr,*idx,*val;
2964
+
2965
+ while(readline(fp)!=NULL)
2966
+ {
2967
+ p = strtok(line,":");
2968
+ while(1)
2969
+ {
2970
+ p = strtok(NULL,":");
2971
+ if(p == NULL)
2972
+ break;
2973
+ ++elements;
2974
+ }
2975
+ }
2976
+ elements += model->l;
2977
+
2978
+ fseek(fp,pos,SEEK_SET);
2979
+
2980
+ int m = model->nr_class - 1;
2981
+ int l = model->l;
2982
+ model->sv_coef = Malloc(double *,m);
2983
+ int i;
2984
+ for(i=0;i<m;i++)
2985
+ model->sv_coef[i] = Malloc(double,l);
2986
+ model->SV = Malloc(svm_node*,l);
2987
+ svm_node *x_space = NULL;
2988
+ if(l>0) x_space = Malloc(svm_node,elements);
2989
+
2990
+ int j=0;
2991
+ for(i=0;i<l;i++)
2992
+ {
2993
+ readline(fp);
2994
+ model->SV[i] = &x_space[j];
2995
+
2996
+ p = strtok(line, " \t");
2997
+ model->sv_coef[0][i] = strtod(p,&endptr);
2998
+ for(int k=1;k<m;k++)
2999
+ {
3000
+ p = strtok(NULL, " \t");
3001
+ model->sv_coef[k][i] = strtod(p,&endptr);
3002
+ }
3003
+
3004
+ while(1)
3005
+ {
3006
+ idx = strtok(NULL, ":");
3007
+ val = strtok(NULL, " \t");
3008
+
3009
+ if(val == NULL)
3010
+ break;
3011
+ x_space[j].index = (int) strtol(idx,&endptr,10);
3012
+ x_space[j].value = strtod(val,&endptr);
3013
+
3014
+ ++j;
3015
+ }
3016
+ x_space[j++].index = -1;
3017
+ }
3018
+ free(line);
3019
+
3020
+ setlocale(LC_ALL, old_locale);
3021
+ free(old_locale);
3022
+
3023
+ if (ferror(fp) != 0 || fclose(fp) != 0)
3024
+ return NULL;
3025
+
3026
+ model->free_sv = 1; // XXX
3027
+ return model;
3028
+ }
3029
+
3030
+ void svm_free_model_content(svm_model* model_ptr)
3031
+ {
3032
+ if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3033
+ free((void *)(model_ptr->SV[0]));
3034
+ if(model_ptr->sv_coef)
3035
+ {
3036
+ for(int i=0;i<model_ptr->nr_class-1;i++)
3037
+ free(model_ptr->sv_coef[i]);
3038
+ }
3039
+
3040
+ free(model_ptr->SV);
3041
+ model_ptr->SV = NULL;
3042
+
3043
+ free(model_ptr->sv_coef);
3044
+ model_ptr->sv_coef = NULL;
3045
+
3046
+ free(model_ptr->rho);
3047
+ model_ptr->rho = NULL;
3048
+
3049
+ free(model_ptr->label);
3050
+ model_ptr->label= NULL;
3051
+
3052
+ free(model_ptr->probA);
3053
+ model_ptr->probA = NULL;
3054
+
3055
+ free(model_ptr->probB);
3056
+ model_ptr->probB= NULL;
3057
+
3058
+ free(model_ptr->nSV);
3059
+ model_ptr->nSV = NULL;
3060
+ }
3061
+
3062
+ void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
3063
+ {
3064
+ if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3065
+ {
3066
+ svm_free_model_content(*model_ptr_ptr);
3067
+ free(*model_ptr_ptr);
3068
+ *model_ptr_ptr = NULL;
3069
+ }
3070
+ }
3071
+
3072
+ void svm_destroy_param(svm_parameter* param)
3073
+ {
3074
+ free(param->weight_label);
3075
+ free(param->weight);
3076
+ }
3077
+
3078
+ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3079
+ {
3080
+ // svm_type
3081
+
3082
+ int svm_type = param->svm_type;
3083
+ if(svm_type != C_SVC &&
3084
+ svm_type != NU_SVC &&
3085
+ svm_type != ONE_CLASS &&
3086
+ svm_type != EPSILON_SVR &&
3087
+ svm_type != NU_SVR)
3088
+ return "unknown svm type";
3089
+
3090
+ // kernel_type, degree
3091
+
3092
+ int kernel_type = param->kernel_type;
3093
+ if(kernel_type != LINEAR &&
3094
+ kernel_type != POLY &&
3095
+ kernel_type != RBF &&
3096
+ kernel_type != SIGMOID &&
3097
+ kernel_type != PRECOMPUTED)
3098
+ return "unknown kernel type";
3099
+
3100
+ if(param->gamma < 0)
3101
+ return "gamma < 0";
3102
+
3103
+ if(param->degree < 0)
3104
+ return "degree of polynomial kernel < 0";
3105
+
3106
+ // cache_size,eps,C,nu,p,shrinking
3107
+
3108
+ if(param->cache_size <= 0)
3109
+ return "cache_size <= 0";
3110
+
3111
+ if(param->eps <= 0)
3112
+ return "eps <= 0";
3113
+
3114
+ if(svm_type == C_SVC ||
3115
+ svm_type == EPSILON_SVR ||
3116
+ svm_type == NU_SVR)
3117
+ if(param->C <= 0)
3118
+ return "C <= 0";
3119
+
3120
+ if(svm_type == NU_SVC ||
3121
+ svm_type == ONE_CLASS ||
3122
+ svm_type == NU_SVR)
3123
+ if(param->nu <= 0 || param->nu > 1)
3124
+ return "nu <= 0 or nu > 1";
3125
+
3126
+ if(svm_type == EPSILON_SVR)
3127
+ if(param->p < 0)
3128
+ return "p < 0";
3129
+
3130
+ if(param->shrinking != 0 &&
3131
+ param->shrinking != 1)
3132
+ return "shrinking != 0 and shrinking != 1";
3133
+
3134
+ if(param->probability != 0 &&
3135
+ param->probability != 1)
3136
+ return "probability != 0 and probability != 1";
3137
+
3138
+ if(param->probability == 1 &&
3139
+ svm_type == ONE_CLASS)
3140
+ return "one-class SVM probability output not supported yet";
3141
+
3142
+
3143
+ // check whether nu-svc is feasible
3144
+
3145
+ if(svm_type == NU_SVC)
3146
+ {
3147
+ int l = prob->l;
3148
+ int max_nr_class = 16;
3149
+ int nr_class = 0;
3150
+ int *label = Malloc(int,max_nr_class);
3151
+ double *count = Malloc(double,max_nr_class);
3152
+
3153
+ int i;
3154
+ for(i=0;i<l;i++)
3155
+ {
3156
+ int this_label = (int)prob->y[i];
3157
+ int j;
3158
+ for(j=0;j<nr_class;j++)
3159
+ if(this_label == label[j])
3160
+ {
3161
+ count[j] += prob->W[i];
3162
+ break;
3163
+ }
3164
+ if(j == nr_class)
3165
+ {
3166
+ if(nr_class == max_nr_class)
3167
+ {
3168
+ max_nr_class *= 2;
3169
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
3170
+ count = (double *)realloc(count,max_nr_class*sizeof(double));
3171
+ }
3172
+ label[nr_class] = this_label;
3173
+ count[nr_class] = prob->W[i];
3174
+ ++nr_class;
3175
+ }
3176
+ }
3177
+
3178
+ for(i=0;i<nr_class;i++)
3179
+ {
3180
+ double n1 = count[i];
3181
+ for(int j=i+1;j<nr_class;j++)
3182
+ {
3183
+ double n2 = count[j];
3184
+ if(param->nu*(n1+n2)/2 > min(n1,n2))
3185
+ {
3186
+ free(label);
3187
+ free(count);
3188
+ return "specified nu is infeasible";
3189
+ }
3190
+ }
3191
+ }
3192
+ free(label);
3193
+ free(count);
3194
+ }
3195
+
3196
+ return NULL;
3197
+ }
3198
+
3199
+ int svm_check_probability_model(const svm_model *model)
3200
+ {
3201
+ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3202
+ model->probA!=NULL && model->probB!=NULL) ||
3203
+ ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3204
+ model->probA!=NULL);
3205
+ }
3206
+
3207
+ void svm_set_print_string_function(void (*print_func)(const char *))
3208
+ {
3209
+ if(print_func == NULL)
3210
+ svm_print_string = &print_string_stdout;
3211
+ else
3212
+ svm_print_string = print_func;
3213
+ }