hexgnu-libsvm-ruby-swig 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/svm.cpp ADDED
@@ -0,0 +1,3072 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <ctype.h>
5
+ #include <float.h>
6
+ #include <string.h>
7
+ #include <stdarg.h>
8
+ #include "svm.h"
9
+ int libsvm_version = LIBSVM_VERSION;
10
+ typedef float Qfloat;
11
+ typedef signed char schar;
12
+ #ifndef min
13
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
14
+ #endif
15
+ #ifndef max
16
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
17
+ #endif
18
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
19
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
20
+ {
21
+ dst = new T[n];
22
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
23
+ }
24
+ static inline double powi(double base, int times)
25
+ {
26
+ double tmp = base, ret = 1.0;
27
+
28
+ for(int t=times; t>0; t/=2)
29
+ {
30
+ if(t%2==1) ret*=tmp;
31
+ tmp = tmp * tmp;
32
+ }
33
+ return ret;
34
+ }
35
+ #define INF HUGE_VAL
36
+ #define TAU 1e-12
37
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
38
+
39
+ static void print_string_stdout(const char *s)
40
+ {
41
+ fputs(s,stdout);
42
+ fflush(stdout);
43
+ }
44
+ void (*svm_print_string) (const char *) = &print_string_stdout;
45
+ #if 1
46
+ int info_on = 0;
47
+ static void info(const char *fmt,...)
48
+ {
49
+ char buf[BUFSIZ];
50
+ va_list ap;
51
+ if (info_on==1) {
52
+ va_start(ap,fmt);
53
+ vsprintf(buf,fmt,ap);
54
+ va_end(ap);
55
+ (*svm_print_string)(buf);
56
+ }
57
+ }
58
+ #else
59
+ static void info(const char *fmt,...) {}
60
+ #endif
61
+
62
+ //
63
+ // Kernel Cache
64
+ //
65
+ // l is the number of total data items
66
+ // size is the cache size limit in bytes
67
+ //
68
+ class Cache
69
+ {
70
+ public:
71
+ Cache(int l,long int size);
72
+ ~Cache();
73
+
74
+ // request data [0,len)
75
+ // return some position p where [p,len) need to be filled
76
+ // (p >= len if nothing needs to be filled)
77
+ int get_data(const int index, Qfloat **data, int len);
78
+ void swap_index(int i, int j);
79
+ private:
80
+ int l;
81
+ long int size;
82
+ struct head_t
83
+ {
84
+ head_t *prev, *next; // a circular list
85
+ Qfloat *data;
86
+ int len; // data[0,len) is cached in this entry
87
+ };
88
+
89
+ head_t *head;
90
+ head_t lru_head;
91
+ void lru_delete(head_t *h);
92
+ void lru_insert(head_t *h);
93
+ };
94
+
95
+ Cache::Cache(int l_,long int size_):l(l_),size(size_)
96
+ {
97
+ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
98
+ size /= sizeof(Qfloat);
99
+ size -= l * sizeof(head_t) / sizeof(Qfloat);
100
+ size = max(size, 2 * (long int) l); // cache must be large enough for two columns
101
+ lru_head.next = lru_head.prev = &lru_head;
102
+ }
103
+
104
+ Cache::~Cache()
105
+ {
106
+ for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
107
+ free(h->data);
108
+ free(head);
109
+ }
110
+
111
+ void Cache::lru_delete(head_t *h)
112
+ {
113
+ // delete from current location
114
+ h->prev->next = h->next;
115
+ h->next->prev = h->prev;
116
+ }
117
+
118
+ void Cache::lru_insert(head_t *h)
119
+ {
120
+ // insert to last position
121
+ h->next = &lru_head;
122
+ h->prev = lru_head.prev;
123
+ h->prev->next = h;
124
+ h->next->prev = h;
125
+ }
126
+
127
+ int Cache::get_data(const int index, Qfloat **data, int len)
128
+ {
129
+ head_t *h = &head[index];
130
+ if(h->len) lru_delete(h);
131
+ int more = len - h->len;
132
+
133
+ if(more > 0)
134
+ {
135
+ // free old space
136
+ while(size < more)
137
+ {
138
+ head_t *old = lru_head.next;
139
+ lru_delete(old);
140
+ free(old->data);
141
+ size += old->len;
142
+ old->data = 0;
143
+ old->len = 0;
144
+ }
145
+
146
+ // allocate new space
147
+ h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
148
+ size -= more;
149
+ swap(h->len,len);
150
+ }
151
+
152
+ lru_insert(h);
153
+ *data = h->data;
154
+ return len;
155
+ }
156
+
157
+ void Cache::swap_index(int i, int j)
158
+ {
159
+ if(i==j) return;
160
+
161
+ if(head[i].len) lru_delete(&head[i]);
162
+ if(head[j].len) lru_delete(&head[j]);
163
+ swap(head[i].data,head[j].data);
164
+ swap(head[i].len,head[j].len);
165
+ if(head[i].len) lru_insert(&head[i]);
166
+ if(head[j].len) lru_insert(&head[j]);
167
+
168
+ if(i>j) swap(i,j);
169
+ for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
170
+ {
171
+ if(h->len > i)
172
+ {
173
+ if(h->len > j)
174
+ swap(h->data[i],h->data[j]);
175
+ else
176
+ {
177
+ // give up
178
+ lru_delete(h);
179
+ free(h->data);
180
+ size += h->len;
181
+ h->data = 0;
182
+ h->len = 0;
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ //
189
+ // Kernel evaluation
190
+ //
191
+ // the static method k_function is for doing single kernel evaluation
192
+ // the constructor of Kernel prepares to calculate the l*l kernel matrix
193
+ // the member function get_Q is for getting one column from the Q Matrix
194
+ //
195
+ class QMatrix {
196
+ public:
197
+ virtual Qfloat *get_Q(int column, int len) const = 0;
198
+ virtual Qfloat *get_QD() const = 0;
199
+ virtual void swap_index(int i, int j) const = 0;
200
+ virtual ~QMatrix() {}
201
+ };
202
+
203
+ class Kernel: public QMatrix {
204
+ public:
205
+ Kernel(int l, svm_node * const * x, const svm_parameter& param);
206
+ virtual ~Kernel();
207
+
208
+ static double k_function(const svm_node *x, const svm_node *y,
209
+ const svm_parameter& param);
210
+ virtual Qfloat *get_Q(int column, int len) const = 0;
211
+ virtual Qfloat *get_QD() const = 0;
212
+ virtual void swap_index(int i, int j) const // no so const...
213
+ {
214
+ swap(x[i],x[j]);
215
+ if(x_square) swap(x_square[i],x_square[j]);
216
+ }
217
+ protected:
218
+
219
+ double (Kernel::*kernel_function)(int i, int j) const;
220
+
221
+ private:
222
+ const svm_node **x;
223
+ double *x_square;
224
+
225
+ // svm_parameter
226
+ const int kernel_type;
227
+ const int degree;
228
+ const double gamma;
229
+ const double coef0;
230
+
231
+ static double dot(const svm_node *px, const svm_node *py);
232
+ double kernel_linear(int i, int j) const
233
+ {
234
+ return dot(x[i],x[j]);
235
+ }
236
+ double kernel_poly(int i, int j) const
237
+ {
238
+ return powi(gamma*dot(x[i],x[j])+coef0,degree);
239
+ }
240
+ double kernel_rbf(int i, int j) const
241
+ {
242
+ return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
243
+ }
244
+ double kernel_sigmoid(int i, int j) const
245
+ {
246
+ return tanh(gamma*dot(x[i],x[j])+coef0);
247
+ }
248
+ double kernel_precomputed(int i, int j) const
249
+ {
250
+ return x[i][(int)(x[j][0].value)].value;
251
+ }
252
+ };
253
+
254
+ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
255
+ :kernel_type(param.kernel_type), degree(param.degree),
256
+ gamma(param.gamma), coef0(param.coef0)
257
+ {
258
+ switch(kernel_type)
259
+ {
260
+ case LINEAR:
261
+ kernel_function = &Kernel::kernel_linear;
262
+ break;
263
+ case POLY:
264
+ kernel_function = &Kernel::kernel_poly;
265
+ break;
266
+ case RBF:
267
+ kernel_function = &Kernel::kernel_rbf;
268
+ break;
269
+ case SIGMOID:
270
+ kernel_function = &Kernel::kernel_sigmoid;
271
+ break;
272
+ case PRECOMPUTED:
273
+ kernel_function = &Kernel::kernel_precomputed;
274
+ break;
275
+ }
276
+
277
+ clone(x,x_,l);
278
+
279
+ if(kernel_type == RBF)
280
+ {
281
+ x_square = new double[l];
282
+ for(int i=0;i<l;i++)
283
+ x_square[i] = dot(x[i],x[i]);
284
+ }
285
+ else
286
+ x_square = 0;
287
+ }
288
+
289
+ Kernel::~Kernel()
290
+ {
291
+ delete[] x;
292
+ delete[] x_square;
293
+ }
294
+
295
+ double Kernel::dot(const svm_node *px, const svm_node *py)
296
+ {
297
+ double sum = 0;
298
+ while(px->index != -1 && py->index != -1)
299
+ {
300
+ if(px->index == py->index)
301
+ {
302
+ sum += px->value * py->value;
303
+ ++px;
304
+ ++py;
305
+ }
306
+ else
307
+ {
308
+ if(px->index > py->index)
309
+ ++py;
310
+ else
311
+ ++px;
312
+ }
313
+ }
314
+ return sum;
315
+ }
316
+
317
+ double Kernel::k_function(const svm_node *x, const svm_node *y,
318
+ const svm_parameter& param)
319
+ {
320
+ switch(param.kernel_type)
321
+ {
322
+ case LINEAR:
323
+ return dot(x,y);
324
+ case POLY:
325
+ return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
326
+ case RBF:
327
+ {
328
+ double sum = 0;
329
+ while(x->index != -1 && y->index !=-1)
330
+ {
331
+ if(x->index == y->index)
332
+ {
333
+ double d = x->value - y->value;
334
+ sum += d*d;
335
+ ++x;
336
+ ++y;
337
+ }
338
+ else
339
+ {
340
+ if(x->index > y->index)
341
+ {
342
+ sum += y->value * y->value;
343
+ ++y;
344
+ }
345
+ else
346
+ {
347
+ sum += x->value * x->value;
348
+ ++x;
349
+ }
350
+ }
351
+ }
352
+
353
+ while(x->index != -1)
354
+ {
355
+ sum += x->value * x->value;
356
+ ++x;
357
+ }
358
+
359
+ while(y->index != -1)
360
+ {
361
+ sum += y->value * y->value;
362
+ ++y;
363
+ }
364
+
365
+ return exp(-param.gamma*sum);
366
+ }
367
+ case SIGMOID:
368
+ return tanh(param.gamma*dot(x,y)+param.coef0);
369
+ case PRECOMPUTED: //x: test (validation), y: SV
370
+ return x[(int)(y->value)].value;
371
+ default:
372
+ return 0; // Unreachable
373
+ }
374
+ }
375
+
376
+ // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
377
+ // Solves:
378
+ //
379
+ // min 0.5(\alpha^T Q \alpha) + p^T \alpha
380
+ //
381
+ // y^T \alpha = \delta
382
+ // y_i = +1 or -1
383
+ // 0 <= alpha_i <= Cp for y_i = 1
384
+ // 0 <= alpha_i <= Cn for y_i = -1
385
+ //
386
+ // Given:
387
+ //
388
+ // Q, p, y, Cp, Cn, and an initial feasible point \alpha
389
+ // l is the size of vectors and matrices
390
+ // eps is the stopping tolerance
391
+ //
392
+ // solution will be put in \alpha, objective value will be put in obj
393
+ //
394
+ class Solver {
395
+ public:
396
+ Solver() {};
397
+ virtual ~Solver() {};
398
+
399
+ struct SolutionInfo {
400
+ double obj;
401
+ double rho;
402
+ double upper_bound_p;
403
+ double upper_bound_n;
404
+ double r; // for Solver_NU
405
+ };
406
+
407
+ void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
408
+ double *alpha_, double Cp, double Cn, double eps,
409
+ SolutionInfo* si, int shrinking);
410
+ protected:
411
+ int active_size;
412
+ schar *y;
413
+ double *G; // gradient of objective function
414
+ enum { LOWER_BOUND, UPPER_BOUND, FREE };
415
+ char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
416
+ double *alpha;
417
+ const QMatrix *Q;
418
+ const Qfloat *QD;
419
+ double eps;
420
+ double Cp,Cn;
421
+ double *p;
422
+ int *active_set;
423
+ double *G_bar; // gradient, if we treat free variables as 0
424
+ int l;
425
+ bool unshrink; // XXX
426
+
427
+ double get_C(int i)
428
+ {
429
+ return (y[i] > 0)? Cp : Cn;
430
+ }
431
+ void update_alpha_status(int i)
432
+ {
433
+ if(alpha[i] >= get_C(i))
434
+ alpha_status[i] = UPPER_BOUND;
435
+ else if(alpha[i] <= 0)
436
+ alpha_status[i] = LOWER_BOUND;
437
+ else alpha_status[i] = FREE;
438
+ }
439
+ bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
440
+ bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
441
+ bool is_free(int i) { return alpha_status[i] == FREE; }
442
+ void swap_index(int i, int j);
443
+ void reconstruct_gradient();
444
+ virtual int select_working_set(int &i, int &j);
445
+ virtual double calculate_rho();
446
+ virtual void do_shrinking();
447
+ private:
448
+ bool be_shrunk(int i, double Gmax1, double Gmax2);
449
+ };
450
+
451
+ void Solver::swap_index(int i, int j)
452
+ {
453
+ Q->swap_index(i,j);
454
+ swap(y[i],y[j]);
455
+ swap(G[i],G[j]);
456
+ swap(alpha_status[i],alpha_status[j]);
457
+ swap(alpha[i],alpha[j]);
458
+ swap(p[i],p[j]);
459
+ swap(active_set[i],active_set[j]);
460
+ swap(G_bar[i],G_bar[j]);
461
+ }
462
+
463
+ void Solver::reconstruct_gradient()
464
+ {
465
+ // reconstruct inactive elements of G from G_bar and free variables
466
+
467
+ if(active_size == l) return;
468
+
469
+ int i,j;
470
+ int nr_free = 0;
471
+
472
+ for(j=active_size;j<l;j++)
473
+ G[j] = G_bar[j] + p[j];
474
+
475
+ for(j=0;j<active_size;j++)
476
+ if(is_free(j))
477
+ nr_free++;
478
+
479
+ if(2*nr_free < active_size)
480
+ info("\nWarning: using -h 0 may be faster\n");
481
+
482
+ if (nr_free*l > 2*active_size*(l-active_size))
483
+ {
484
+ for(i=active_size;i<l;i++)
485
+ {
486
+ const Qfloat *Q_i = Q->get_Q(i,active_size);
487
+ for(j=0;j<active_size;j++)
488
+ if(is_free(j))
489
+ G[i] += alpha[j] * Q_i[j];
490
+ }
491
+ }
492
+ else
493
+ {
494
+ for(i=0;i<active_size;i++)
495
+ if(is_free(i))
496
+ {
497
+ const Qfloat *Q_i = Q->get_Q(i,l);
498
+ double alpha_i = alpha[i];
499
+ for(j=active_size;j<l;j++)
500
+ G[j] += alpha_i * Q_i[j];
501
+ }
502
+ }
503
+ }
504
+
505
+ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
506
+ double *alpha_, double Cp, double Cn, double eps,
507
+ SolutionInfo* si, int shrinking)
508
+ {
509
+ this->l = l;
510
+ this->Q = &Q;
511
+ QD=Q.get_QD();
512
+ clone(p, p_,l);
513
+ clone(y, y_,l);
514
+ clone(alpha,alpha_,l);
515
+ this->Cp = Cp;
516
+ this->Cn = Cn;
517
+ this->eps = eps;
518
+ unshrink = false;
519
+
520
+ // initialize alpha_status
521
+ {
522
+ alpha_status = new char[l];
523
+ for(int i=0;i<l;i++)
524
+ update_alpha_status(i);
525
+ }
526
+
527
+ // initialize active set (for shrinking)
528
+ {
529
+ active_set = new int[l];
530
+ for(int i=0;i<l;i++)
531
+ active_set[i] = i;
532
+ active_size = l;
533
+ }
534
+
535
+ // initialize gradient
536
+ {
537
+ G = new double[l];
538
+ G_bar = new double[l];
539
+ int i;
540
+ for(i=0;i<l;i++)
541
+ {
542
+ G[i] = p[i];
543
+ G_bar[i] = 0;
544
+ }
545
+ for(i=0;i<l;i++)
546
+ if(!is_lower_bound(i))
547
+ {
548
+ const Qfloat *Q_i = Q.get_Q(i,l);
549
+ double alpha_i = alpha[i];
550
+ int j;
551
+ for(j=0;j<l;j++)
552
+ G[j] += alpha_i*Q_i[j];
553
+ if(is_upper_bound(i))
554
+ for(j=0;j<l;j++)
555
+ G_bar[j] += get_C(i) * Q_i[j];
556
+ }
557
+ }
558
+
559
+ // optimization step
560
+
561
+ int iter = 0;
562
+ int counter = min(l,1000)+1;
563
+
564
+ while(1)
565
+ {
566
+ // show progress and do shrinking
567
+
568
+ if(--counter == 0)
569
+ {
570
+ counter = min(l,1000);
571
+ if(shrinking) do_shrinking();
572
+ info(".");
573
+ }
574
+
575
+ int i,j;
576
+ if(select_working_set(i,j)!=0)
577
+ {
578
+ // reconstruct the whole gradient
579
+ reconstruct_gradient();
580
+ // reset active set size and check
581
+ active_size = l;
582
+ info("*");
583
+ if(select_working_set(i,j)!=0)
584
+ break;
585
+ else
586
+ counter = 1; // do shrinking next iteration
587
+ }
588
+
589
+ ++iter;
590
+
591
+ // update alpha[i] and alpha[j], handle bounds carefully
592
+
593
+ const Qfloat *Q_i = Q.get_Q(i,active_size);
594
+ const Qfloat *Q_j = Q.get_Q(j,active_size);
595
+
596
+ double C_i = get_C(i);
597
+ double C_j = get_C(j);
598
+
599
+ double old_alpha_i = alpha[i];
600
+ double old_alpha_j = alpha[j];
601
+
602
+ if(y[i]!=y[j])
603
+ {
604
+ double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
605
+ if (quad_coef <= 0)
606
+ quad_coef = TAU;
607
+ double delta = (-G[i]-G[j])/quad_coef;
608
+ double diff = alpha[i] - alpha[j];
609
+ alpha[i] += delta;
610
+ alpha[j] += delta;
611
+
612
+ if(diff > 0)
613
+ {
614
+ if(alpha[j] < 0)
615
+ {
616
+ alpha[j] = 0;
617
+ alpha[i] = diff;
618
+ }
619
+ }
620
+ else
621
+ {
622
+ if(alpha[i] < 0)
623
+ {
624
+ alpha[i] = 0;
625
+ alpha[j] = -diff;
626
+ }
627
+ }
628
+ if(diff > C_i - C_j)
629
+ {
630
+ if(alpha[i] > C_i)
631
+ {
632
+ alpha[i] = C_i;
633
+ alpha[j] = C_i - diff;
634
+ }
635
+ }
636
+ else
637
+ {
638
+ if(alpha[j] > C_j)
639
+ {
640
+ alpha[j] = C_j;
641
+ alpha[i] = C_j + diff;
642
+ }
643
+ }
644
+ }
645
+ else
646
+ {
647
+ double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
648
+ if (quad_coef <= 0)
649
+ quad_coef = TAU;
650
+ double delta = (G[i]-G[j])/quad_coef;
651
+ double sum = alpha[i] + alpha[j];
652
+ alpha[i] -= delta;
653
+ alpha[j] += delta;
654
+
655
+ if(sum > C_i)
656
+ {
657
+ if(alpha[i] > C_i)
658
+ {
659
+ alpha[i] = C_i;
660
+ alpha[j] = sum - C_i;
661
+ }
662
+ }
663
+ else
664
+ {
665
+ if(alpha[j] < 0)
666
+ {
667
+ alpha[j] = 0;
668
+ alpha[i] = sum;
669
+ }
670
+ }
671
+ if(sum > C_j)
672
+ {
673
+ if(alpha[j] > C_j)
674
+ {
675
+ alpha[j] = C_j;
676
+ alpha[i] = sum - C_j;
677
+ }
678
+ }
679
+ else
680
+ {
681
+ if(alpha[i] < 0)
682
+ {
683
+ alpha[i] = 0;
684
+ alpha[j] = sum;
685
+ }
686
+ }
687
+ }
688
+
689
+ // update G
690
+
691
+ double delta_alpha_i = alpha[i] - old_alpha_i;
692
+ double delta_alpha_j = alpha[j] - old_alpha_j;
693
+
694
+ for(int k=0;k<active_size;k++)
695
+ {
696
+ G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
697
+ }
698
+
699
+ // update alpha_status and G_bar
700
+
701
+ {
702
+ bool ui = is_upper_bound(i);
703
+ bool uj = is_upper_bound(j);
704
+ update_alpha_status(i);
705
+ update_alpha_status(j);
706
+ int k;
707
+ if(ui != is_upper_bound(i))
708
+ {
709
+ Q_i = Q.get_Q(i,l);
710
+ if(ui)
711
+ for(k=0;k<l;k++)
712
+ G_bar[k] -= C_i * Q_i[k];
713
+ else
714
+ for(k=0;k<l;k++)
715
+ G_bar[k] += C_i * Q_i[k];
716
+ }
717
+
718
+ if(uj != is_upper_bound(j))
719
+ {
720
+ Q_j = Q.get_Q(j,l);
721
+ if(uj)
722
+ for(k=0;k<l;k++)
723
+ G_bar[k] -= C_j * Q_j[k];
724
+ else
725
+ for(k=0;k<l;k++)
726
+ G_bar[k] += C_j * Q_j[k];
727
+ }
728
+ }
729
+ }
730
+
731
+ // calculate rho
732
+
733
+ si->rho = calculate_rho();
734
+
735
+ // calculate objective value
736
+ {
737
+ double v = 0;
738
+ int i;
739
+ for(i=0;i<l;i++)
740
+ v += alpha[i] * (G[i] + p[i]);
741
+
742
+ si->obj = v/2;
743
+ }
744
+
745
+ // put back the solution
746
+ {
747
+ for(int i=0;i<l;i++)
748
+ alpha_[active_set[i]] = alpha[i];
749
+ }
750
+
751
+ // juggle everything back
752
+ /*{
753
+ for(int i=0;i<l;i++)
754
+ while(active_set[i] != i)
755
+ swap_index(i,active_set[i]);
756
+ // or Q.swap_index(i,active_set[i]);
757
+ }*/
758
+
759
+ si->upper_bound_p = Cp;
760
+ si->upper_bound_n = Cn;
761
+
762
+ info("\noptimization finished, #iter = %d\n",iter);
763
+
764
+ delete[] p;
765
+ delete[] y;
766
+ delete[] alpha;
767
+ delete[] alpha_status;
768
+ delete[] active_set;
769
+ delete[] G;
770
+ delete[] G_bar;
771
+ }
772
+
773
+ // return 1 if already optimal, return 0 otherwise
774
+ int Solver::select_working_set(int &out_i, int &out_j)
775
+ {
776
+ // return i,j such that
777
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
778
+ // j: minimizes the decrease of obj value
779
+ // (if quadratic coefficeint <= 0, replace it with tau)
780
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
781
+
782
+ double Gmax = -INF;
783
+ double Gmax2 = -INF;
784
+ int Gmax_idx = -1;
785
+ int Gmin_idx = -1;
786
+ double obj_diff_min = INF;
787
+
788
+ for(int t=0;t<active_size;t++)
789
+ if(y[t]==+1)
790
+ {
791
+ if(!is_upper_bound(t))
792
+ if(-G[t] >= Gmax)
793
+ {
794
+ Gmax = -G[t];
795
+ Gmax_idx = t;
796
+ }
797
+ }
798
+ else
799
+ {
800
+ if(!is_lower_bound(t))
801
+ if(G[t] >= Gmax)
802
+ {
803
+ Gmax = G[t];
804
+ Gmax_idx = t;
805
+ }
806
+ }
807
+
808
+ int i = Gmax_idx;
809
+ const Qfloat *Q_i = NULL;
810
+ if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
811
+ Q_i = Q->get_Q(i,active_size);
812
+
813
+ for(int j=0;j<active_size;j++)
814
+ {
815
+ if(y[j]==+1)
816
+ {
817
+ if (!is_lower_bound(j))
818
+ {
819
+ double grad_diff=Gmax+G[j];
820
+ if (G[j] >= Gmax2)
821
+ Gmax2 = G[j];
822
+ if (grad_diff > 0)
823
+ {
824
+ double obj_diff;
825
+ double quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
826
+ if (quad_coef > 0)
827
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
828
+ else
829
+ obj_diff = -(grad_diff*grad_diff)/TAU;
830
+
831
+ if (obj_diff <= obj_diff_min)
832
+ {
833
+ Gmin_idx=j;
834
+ obj_diff_min = obj_diff;
835
+ }
836
+ }
837
+ }
838
+ }
839
+ else
840
+ {
841
+ if (!is_upper_bound(j))
842
+ {
843
+ double grad_diff= Gmax-G[j];
844
+ if (-G[j] >= Gmax2)
845
+ Gmax2 = -G[j];
846
+ if (grad_diff > 0)
847
+ {
848
+ double obj_diff;
849
+ double quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
850
+ if (quad_coef > 0)
851
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
852
+ else
853
+ obj_diff = -(grad_diff*grad_diff)/TAU;
854
+
855
+ if (obj_diff <= obj_diff_min)
856
+ {
857
+ Gmin_idx=j;
858
+ obj_diff_min = obj_diff;
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+
865
+ if(Gmax+Gmax2 < eps)
866
+ return 1;
867
+
868
+ out_i = Gmax_idx;
869
+ out_j = Gmin_idx;
870
+ return 0;
871
+ }
872
+
873
+ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
874
+ {
875
+ if(is_upper_bound(i))
876
+ {
877
+ if(y[i]==+1)
878
+ return(-G[i] > Gmax1);
879
+ else
880
+ return(-G[i] > Gmax2);
881
+ }
882
+ else if(is_lower_bound(i))
883
+ {
884
+ if(y[i]==+1)
885
+ return(G[i] > Gmax2);
886
+ else
887
+ return(G[i] > Gmax1);
888
+ }
889
+ else
890
+ return(false);
891
+ }
892
+
893
+ void Solver::do_shrinking()
894
+ {
895
+ int i;
896
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
897
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
898
+
899
+ // find maximal violating pair first
900
+ for(i=0;i<active_size;i++)
901
+ {
902
+ if(y[i]==+1)
903
+ {
904
+ if(!is_upper_bound(i))
905
+ {
906
+ if(-G[i] >= Gmax1)
907
+ Gmax1 = -G[i];
908
+ }
909
+ if(!is_lower_bound(i))
910
+ {
911
+ if(G[i] >= Gmax2)
912
+ Gmax2 = G[i];
913
+ }
914
+ }
915
+ else
916
+ {
917
+ if(!is_upper_bound(i))
918
+ {
919
+ if(-G[i] >= Gmax2)
920
+ Gmax2 = -G[i];
921
+ }
922
+ if(!is_lower_bound(i))
923
+ {
924
+ if(G[i] >= Gmax1)
925
+ Gmax1 = G[i];
926
+ }
927
+ }
928
+ }
929
+
930
+ if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
931
+ {
932
+ unshrink = true;
933
+ reconstruct_gradient();
934
+ active_size = l;
935
+ info("*");
936
+ }
937
+
938
+ for(i=0;i<active_size;i++)
939
+ if (be_shrunk(i, Gmax1, Gmax2))
940
+ {
941
+ active_size--;
942
+ while (active_size > i)
943
+ {
944
+ if (!be_shrunk(active_size, Gmax1, Gmax2))
945
+ {
946
+ swap_index(i,active_size);
947
+ break;
948
+ }
949
+ active_size--;
950
+ }
951
+ }
952
+ }
953
+
954
+ double Solver::calculate_rho()
955
+ {
956
+ double r;
957
+ int nr_free = 0;
958
+ double ub = INF, lb = -INF, sum_free = 0;
959
+ for(int i=0;i<active_size;i++)
960
+ {
961
+ double yG = y[i]*G[i];
962
+
963
+ if(is_upper_bound(i))
964
+ {
965
+ if(y[i]==-1)
966
+ ub = min(ub,yG);
967
+ else
968
+ lb = max(lb,yG);
969
+ }
970
+ else if(is_lower_bound(i))
971
+ {
972
+ if(y[i]==+1)
973
+ ub = min(ub,yG);
974
+ else
975
+ lb = max(lb,yG);
976
+ }
977
+ else
978
+ {
979
+ ++nr_free;
980
+ sum_free += yG;
981
+ }
982
+ }
983
+
984
+ if(nr_free>0)
985
+ r = sum_free/nr_free;
986
+ else
987
+ r = (ub+lb)/2;
988
+
989
+ return r;
990
+ }
991
+
992
+ //
993
+ // Solver for nu-svm classification and regression
994
+ //
995
+ // additional constraint: e^T \alpha = constant
996
+ //
997
+ class Solver_NU : public Solver
998
+ {
999
+ public:
1000
+ Solver_NU() {}
1001
+ void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1002
+ double *alpha, double Cp, double Cn, double eps,
1003
+ SolutionInfo* si, int shrinking)
1004
+ {
1005
+ this->si = si;
1006
+ Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1007
+ }
1008
+ private:
1009
+ SolutionInfo *si;
1010
+ int select_working_set(int &i, int &j);
1011
+ double calculate_rho();
1012
+ bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1013
+ void do_shrinking();
1014
+ };
1015
+
1016
+ // return 1 if already optimal, return 0 otherwise
1017
+ int Solver_NU::select_working_set(int &out_i, int &out_j)
1018
+ {
1019
+ // return i,j such that y_i = y_j and
1020
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1021
+ // j: minimizes the decrease of obj value
1022
+ // (if quadratic coefficeint <= 0, replace it with tau)
1023
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1024
+
1025
+ double Gmaxp = -INF;
1026
+ double Gmaxp2 = -INF;
1027
+ int Gmaxp_idx = -1;
1028
+
1029
+ double Gmaxn = -INF;
1030
+ double Gmaxn2 = -INF;
1031
+ int Gmaxn_idx = -1;
1032
+
1033
+ int Gmin_idx = -1;
1034
+ double obj_diff_min = INF;
1035
+
1036
+ for(int t=0;t<active_size;t++)
1037
+ if(y[t]==+1)
1038
+ {
1039
+ if(!is_upper_bound(t))
1040
+ if(-G[t] >= Gmaxp)
1041
+ {
1042
+ Gmaxp = -G[t];
1043
+ Gmaxp_idx = t;
1044
+ }
1045
+ }
1046
+ else
1047
+ {
1048
+ if(!is_lower_bound(t))
1049
+ if(G[t] >= Gmaxn)
1050
+ {
1051
+ Gmaxn = G[t];
1052
+ Gmaxn_idx = t;
1053
+ }
1054
+ }
1055
+
1056
+ int ip = Gmaxp_idx;
1057
+ int in = Gmaxn_idx;
1058
+ const Qfloat *Q_ip = NULL;
1059
+ const Qfloat *Q_in = NULL;
1060
+ if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1061
+ Q_ip = Q->get_Q(ip,active_size);
1062
+ if(in != -1)
1063
+ Q_in = Q->get_Q(in,active_size);
1064
+
1065
+ for(int j=0;j<active_size;j++)
1066
+ {
1067
+ if(y[j]==+1)
1068
+ {
1069
+ if (!is_lower_bound(j))
1070
+ {
1071
+ double grad_diff=Gmaxp+G[j];
1072
+ if (G[j] >= Gmaxp2)
1073
+ Gmaxp2 = G[j];
1074
+ if (grad_diff > 0)
1075
+ {
1076
+ double obj_diff;
1077
+ double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1078
+ if (quad_coef > 0)
1079
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1080
+ else
1081
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1082
+
1083
+ if (obj_diff <= obj_diff_min)
1084
+ {
1085
+ Gmin_idx=j;
1086
+ obj_diff_min = obj_diff;
1087
+ }
1088
+ }
1089
+ }
1090
+ }
1091
+ else
1092
+ {
1093
+ if (!is_upper_bound(j))
1094
+ {
1095
+ double grad_diff=Gmaxn-G[j];
1096
+ if (-G[j] >= Gmaxn2)
1097
+ Gmaxn2 = -G[j];
1098
+ if (grad_diff > 0)
1099
+ {
1100
+ double obj_diff;
1101
+ double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1102
+ if (quad_coef > 0)
1103
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1104
+ else
1105
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1106
+
1107
+ if (obj_diff <= obj_diff_min)
1108
+ {
1109
+ Gmin_idx=j;
1110
+ obj_diff_min = obj_diff;
1111
+ }
1112
+ }
1113
+ }
1114
+ }
1115
+ }
1116
+
1117
+ if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1118
+ return 1;
1119
+
1120
+ if (y[Gmin_idx] == +1)
1121
+ out_i = Gmaxp_idx;
1122
+ else
1123
+ out_i = Gmaxn_idx;
1124
+ out_j = Gmin_idx;
1125
+
1126
+ return 0;
1127
+ }
1128
+
1129
+ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1130
+ {
1131
+ if(is_upper_bound(i))
1132
+ {
1133
+ if(y[i]==+1)
1134
+ return(-G[i] > Gmax1);
1135
+ else
1136
+ return(-G[i] > Gmax4);
1137
+ }
1138
+ else if(is_lower_bound(i))
1139
+ {
1140
+ if(y[i]==+1)
1141
+ return(G[i] > Gmax2);
1142
+ else
1143
+ return(G[i] > Gmax3);
1144
+ }
1145
+ else
1146
+ return(false);
1147
+ }
1148
+
1149
+ void Solver_NU::do_shrinking()
1150
+ {
1151
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1152
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1153
+ double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1154
+ double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1155
+
1156
+ // find maximal violating pair first
1157
+ int i;
1158
+ for(i=0;i<active_size;i++)
1159
+ {
1160
+ if(!is_upper_bound(i))
1161
+ {
1162
+ if(y[i]==+1)
1163
+ {
1164
+ if(-G[i] > Gmax1) Gmax1 = -G[i];
1165
+ }
1166
+ else if(-G[i] > Gmax4) Gmax4 = -G[i];
1167
+ }
1168
+ if(!is_lower_bound(i))
1169
+ {
1170
+ if(y[i]==+1)
1171
+ {
1172
+ if(G[i] > Gmax2) Gmax2 = G[i];
1173
+ }
1174
+ else if(G[i] > Gmax3) Gmax3 = G[i];
1175
+ }
1176
+ }
1177
+
1178
+ if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1179
+ {
1180
+ unshrink = true;
1181
+ reconstruct_gradient();
1182
+ active_size = l;
1183
+ }
1184
+
1185
+ for(i=0;i<active_size;i++)
1186
+ if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1187
+ {
1188
+ active_size--;
1189
+ while (active_size > i)
1190
+ {
1191
+ if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1192
+ {
1193
+ swap_index(i,active_size);
1194
+ break;
1195
+ }
1196
+ active_size--;
1197
+ }
1198
+ }
1199
+ }
1200
+
1201
+ double Solver_NU::calculate_rho()
1202
+ {
1203
+ int nr_free1 = 0,nr_free2 = 0;
1204
+ double ub1 = INF, ub2 = INF;
1205
+ double lb1 = -INF, lb2 = -INF;
1206
+ double sum_free1 = 0, sum_free2 = 0;
1207
+
1208
+ for(int i=0;i<active_size;i++)
1209
+ {
1210
+ if(y[i]==+1)
1211
+ {
1212
+ if(is_upper_bound(i))
1213
+ lb1 = max(lb1,G[i]);
1214
+ else if(is_lower_bound(i))
1215
+ ub1 = min(ub1,G[i]);
1216
+ else
1217
+ {
1218
+ ++nr_free1;
1219
+ sum_free1 += G[i];
1220
+ }
1221
+ }
1222
+ else
1223
+ {
1224
+ if(is_upper_bound(i))
1225
+ lb2 = max(lb2,G[i]);
1226
+ else if(is_lower_bound(i))
1227
+ ub2 = min(ub2,G[i]);
1228
+ else
1229
+ {
1230
+ ++nr_free2;
1231
+ sum_free2 += G[i];
1232
+ }
1233
+ }
1234
+ }
1235
+
1236
+ double r1,r2;
1237
+ if(nr_free1 > 0)
1238
+ r1 = sum_free1/nr_free1;
1239
+ else
1240
+ r1 = (ub1+lb1)/2;
1241
+
1242
+ if(nr_free2 > 0)
1243
+ r2 = sum_free2/nr_free2;
1244
+ else
1245
+ r2 = (ub2+lb2)/2;
1246
+
1247
+ si->r = (r1+r2)/2;
1248
+ return (r1-r2)/2;
1249
+ }
1250
+
1251
+ //
1252
+ // Q matrices for various formulations
1253
+ //
1254
+ class SVC_Q: public Kernel
1255
+ {
1256
+ public:
1257
+ SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1258
+ :Kernel(prob.l, prob.x, param)
1259
+ {
1260
+ clone(y,y_,prob.l);
1261
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1262
+ QD = new Qfloat[prob.l];
1263
+ for(int i=0;i<prob.l;i++)
1264
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1265
+ }
1266
+
1267
+ Qfloat *get_Q(int i, int len) const
1268
+ {
1269
+ Qfloat *data;
1270
+ int start, j;
1271
+ if((start = cache->get_data(i,&data,len)) < len)
1272
+ {
1273
+ for(j=start;j<len;j++)
1274
+ data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1275
+ }
1276
+ return data;
1277
+ }
1278
+
1279
+ Qfloat *get_QD() const
1280
+ {
1281
+ return QD;
1282
+ }
1283
+
1284
+ void swap_index(int i, int j) const
1285
+ {
1286
+ cache->swap_index(i,j);
1287
+ Kernel::swap_index(i,j);
1288
+ swap(y[i],y[j]);
1289
+ swap(QD[i],QD[j]);
1290
+ }
1291
+
1292
+ ~SVC_Q()
1293
+ {
1294
+ delete[] y;
1295
+ delete cache;
1296
+ delete[] QD;
1297
+ }
1298
+ private:
1299
+ schar *y;
1300
+ Cache *cache;
1301
+ Qfloat *QD;
1302
+ };
1303
+
1304
+ class ONE_CLASS_Q: public Kernel
1305
+ {
1306
+ public:
1307
+ ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1308
+ :Kernel(prob.l, prob.x, param)
1309
+ {
1310
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1311
+ QD = new Qfloat[prob.l];
1312
+ for(int i=0;i<prob.l;i++)
1313
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1314
+ }
1315
+
1316
+ Qfloat *get_Q(int i, int len) const
1317
+ {
1318
+ Qfloat *data;
1319
+ int start, j;
1320
+ if((start = cache->get_data(i,&data,len)) < len)
1321
+ {
1322
+ for(j=start;j<len;j++)
1323
+ data[j] = (Qfloat)(this->*kernel_function)(i,j);
1324
+ }
1325
+ return data;
1326
+ }
1327
+
1328
+ Qfloat *get_QD() const
1329
+ {
1330
+ return QD;
1331
+ }
1332
+
1333
+ void swap_index(int i, int j) const
1334
+ {
1335
+ cache->swap_index(i,j);
1336
+ Kernel::swap_index(i,j);
1337
+ swap(QD[i],QD[j]);
1338
+ }
1339
+
1340
+ ~ONE_CLASS_Q()
1341
+ {
1342
+ delete cache;
1343
+ delete[] QD;
1344
+ }
1345
+ private:
1346
+ Cache *cache;
1347
+ Qfloat *QD;
1348
+ };
1349
+
1350
+ class SVR_Q: public Kernel
1351
+ {
1352
+ public:
1353
+ SVR_Q(const svm_problem& prob, const svm_parameter& param)
1354
+ :Kernel(prob.l, prob.x, param)
1355
+ {
1356
+ l = prob.l;
1357
+ cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1358
+ QD = new Qfloat[2*l];
1359
+ sign = new schar[2*l];
1360
+ index = new int[2*l];
1361
+ for(int k=0;k<l;k++)
1362
+ {
1363
+ sign[k] = 1;
1364
+ sign[k+l] = -1;
1365
+ index[k] = k;
1366
+ index[k+l] = k;
1367
+ QD[k]= (Qfloat)(this->*kernel_function)(k,k);
1368
+ QD[k+l]=QD[k];
1369
+ }
1370
+ buffer[0] = new Qfloat[2*l];
1371
+ buffer[1] = new Qfloat[2*l];
1372
+ next_buffer = 0;
1373
+ }
1374
+
1375
+ void swap_index(int i, int j) const
1376
+ {
1377
+ swap(sign[i],sign[j]);
1378
+ swap(index[i],index[j]);
1379
+ swap(QD[i],QD[j]);
1380
+ }
1381
+
1382
+ Qfloat *get_Q(int i, int len) const
1383
+ {
1384
+ Qfloat *data;
1385
+ int j, real_i = index[i];
1386
+ if(cache->get_data(real_i,&data,l) < l)
1387
+ {
1388
+ for(j=0;j<l;j++)
1389
+ data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1390
+ }
1391
+
1392
+ // reorder and copy
1393
+ Qfloat *buf = buffer[next_buffer];
1394
+ next_buffer = 1 - next_buffer;
1395
+ schar si = sign[i];
1396
+ for(j=0;j<len;j++)
1397
+ buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1398
+ return buf;
1399
+ }
1400
+
1401
+ Qfloat *get_QD() const
1402
+ {
1403
+ return QD;
1404
+ }
1405
+
1406
+ ~SVR_Q()
1407
+ {
1408
+ delete cache;
1409
+ delete[] sign;
1410
+ delete[] index;
1411
+ delete[] buffer[0];
1412
+ delete[] buffer[1];
1413
+ delete[] QD;
1414
+ }
1415
+ private:
1416
+ int l;
1417
+ Cache *cache;
1418
+ schar *sign;
1419
+ int *index;
1420
+ mutable int next_buffer;
1421
+ Qfloat *buffer[2];
1422
+ Qfloat *QD;
1423
+ };
1424
+
1425
+ //
1426
+ // construct and solve various formulations
1427
+ //
1428
+ static void solve_c_svc(
1429
+ const svm_problem *prob, const svm_parameter* param,
1430
+ double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1431
+ {
1432
+ int l = prob->l;
1433
+ double *minus_ones = new double[l];
1434
+ schar *y = new schar[l];
1435
+
1436
+ int i;
1437
+
1438
+ for(i=0;i<l;i++)
1439
+ {
1440
+ alpha[i] = 0;
1441
+ minus_ones[i] = -1;
1442
+ if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1443
+ }
1444
+
1445
+ Solver s;
1446
+ s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1447
+ alpha, Cp, Cn, param->eps, si, param->shrinking);
1448
+
1449
+ double sum_alpha=0;
1450
+ for(i=0;i<l;i++)
1451
+ sum_alpha += alpha[i];
1452
+
1453
+ if (Cp==Cn)
1454
+ info("nu = %f\n", sum_alpha/(Cp*prob->l));
1455
+
1456
+ for(i=0;i<l;i++)
1457
+ alpha[i] *= y[i];
1458
+
1459
+ delete[] minus_ones;
1460
+ delete[] y;
1461
+ }
1462
+
1463
+ static void solve_nu_svc(
1464
+ const svm_problem *prob, const svm_parameter *param,
1465
+ double *alpha, Solver::SolutionInfo* si)
1466
+ {
1467
+ int i;
1468
+ int l = prob->l;
1469
+ double nu = param->nu;
1470
+
1471
+ schar *y = new schar[l];
1472
+
1473
+ for(i=0;i<l;i++)
1474
+ if(prob->y[i]>0)
1475
+ y[i] = +1;
1476
+ else
1477
+ y[i] = -1;
1478
+
1479
+ double sum_pos = nu*l/2;
1480
+ double sum_neg = nu*l/2;
1481
+
1482
+ for(i=0;i<l;i++)
1483
+ if(y[i] == +1)
1484
+ {
1485
+ alpha[i] = min(1.0,sum_pos);
1486
+ sum_pos -= alpha[i];
1487
+ }
1488
+ else
1489
+ {
1490
+ alpha[i] = min(1.0,sum_neg);
1491
+ sum_neg -= alpha[i];
1492
+ }
1493
+
1494
+ double *zeros = new double[l];
1495
+
1496
+ for(i=0;i<l;i++)
1497
+ zeros[i] = 0;
1498
+
1499
+ Solver_NU s;
1500
+ s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1501
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1502
+ double r = si->r;
1503
+
1504
+ info("C = %f\n",1/r);
1505
+
1506
+ for(i=0;i<l;i++)
1507
+ alpha[i] *= y[i]/r;
1508
+
1509
+ si->rho /= r;
1510
+ si->obj /= (r*r);
1511
+ si->upper_bound_p = 1/r;
1512
+ si->upper_bound_n = 1/r;
1513
+
1514
+ delete[] y;
1515
+ delete[] zeros;
1516
+ }
1517
+
1518
+ static void solve_one_class(
1519
+ const svm_problem *prob, const svm_parameter *param,
1520
+ double *alpha, Solver::SolutionInfo* si)
1521
+ {
1522
+ int l = prob->l;
1523
+ double *zeros = new double[l];
1524
+ schar *ones = new schar[l];
1525
+ int i;
1526
+
1527
+ int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1528
+
1529
+ for(i=0;i<n;i++)
1530
+ alpha[i] = 1;
1531
+ if(n<prob->l)
1532
+ alpha[n] = param->nu * prob->l - n;
1533
+ for(i=n+1;i<l;i++)
1534
+ alpha[i] = 0;
1535
+
1536
+ for(i=0;i<l;i++)
1537
+ {
1538
+ zeros[i] = 0;
1539
+ ones[i] = 1;
1540
+ }
1541
+
1542
+ Solver s;
1543
+ s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1544
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1545
+
1546
+ delete[] zeros;
1547
+ delete[] ones;
1548
+ }
1549
+
1550
+ static void solve_epsilon_svr(
1551
+ const svm_problem *prob, const svm_parameter *param,
1552
+ double *alpha, Solver::SolutionInfo* si)
1553
+ {
1554
+ int l = prob->l;
1555
+ double *alpha2 = new double[2*l];
1556
+ double *linear_term = new double[2*l];
1557
+ schar *y = new schar[2*l];
1558
+ int i;
1559
+
1560
+ for(i=0;i<l;i++)
1561
+ {
1562
+ alpha2[i] = 0;
1563
+ linear_term[i] = param->p - prob->y[i];
1564
+ y[i] = 1;
1565
+
1566
+ alpha2[i+l] = 0;
1567
+ linear_term[i+l] = param->p + prob->y[i];
1568
+ y[i+l] = -1;
1569
+ }
1570
+
1571
+ Solver s;
1572
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1573
+ alpha2, param->C, param->C, param->eps, si, param->shrinking);
1574
+
1575
+ double sum_alpha = 0;
1576
+ for(i=0;i<l;i++)
1577
+ {
1578
+ alpha[i] = alpha2[i] - alpha2[i+l];
1579
+ sum_alpha += fabs(alpha[i]);
1580
+ }
1581
+ info("nu = %f\n",sum_alpha/(param->C*l));
1582
+
1583
+ delete[] alpha2;
1584
+ delete[] linear_term;
1585
+ delete[] y;
1586
+ }
1587
+
1588
+ static void solve_nu_svr(
1589
+ const svm_problem *prob, const svm_parameter *param,
1590
+ double *alpha, Solver::SolutionInfo* si)
1591
+ {
1592
+ int l = prob->l;
1593
+ double C = param->C;
1594
+ double *alpha2 = new double[2*l];
1595
+ double *linear_term = new double[2*l];
1596
+ schar *y = new schar[2*l];
1597
+ int i;
1598
+
1599
+ double sum = C * param->nu * l / 2;
1600
+ for(i=0;i<l;i++)
1601
+ {
1602
+ alpha2[i] = alpha2[i+l] = min(sum,C);
1603
+ sum -= alpha2[i];
1604
+
1605
+ linear_term[i] = - prob->y[i];
1606
+ y[i] = 1;
1607
+
1608
+ linear_term[i+l] = prob->y[i];
1609
+ y[i+l] = -1;
1610
+ }
1611
+
1612
+ Solver_NU s;
1613
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1614
+ alpha2, C, C, param->eps, si, param->shrinking);
1615
+
1616
+ info("epsilon = %f\n",-si->r);
1617
+
1618
+ for(i=0;i<l;i++)
1619
+ alpha[i] = alpha2[i] - alpha2[i+l];
1620
+
1621
+ delete[] alpha2;
1622
+ delete[] linear_term;
1623
+ delete[] y;
1624
+ }
1625
+
1626
+ //
1627
+ // decision_function
1628
+ //
1629
+ struct decision_function
1630
+ {
1631
+ double *alpha;
1632
+ double rho;
1633
+ };
1634
+
1635
+ static decision_function svm_train_one(
1636
+ const svm_problem *prob, const svm_parameter *param,
1637
+ double Cp, double Cn)
1638
+ {
1639
+ double *alpha = Malloc(double,prob->l);
1640
+ Solver::SolutionInfo si;
1641
+ switch(param->svm_type)
1642
+ {
1643
+ case C_SVC:
1644
+ solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1645
+ break;
1646
+ case NU_SVC:
1647
+ solve_nu_svc(prob,param,alpha,&si);
1648
+ break;
1649
+ case ONE_CLASS:
1650
+ solve_one_class(prob,param,alpha,&si);
1651
+ break;
1652
+ case EPSILON_SVR:
1653
+ solve_epsilon_svr(prob,param,alpha,&si);
1654
+ break;
1655
+ case NU_SVR:
1656
+ solve_nu_svr(prob,param,alpha,&si);
1657
+ break;
1658
+ }
1659
+
1660
+ info("obj = %f, rho = %f\n",si.obj,si.rho);
1661
+
1662
+ // output SVs
1663
+
1664
+ int nSV = 0;
1665
+ int nBSV = 0;
1666
+ for(int i=0;i<prob->l;i++)
1667
+ {
1668
+ if(fabs(alpha[i]) > 0)
1669
+ {
1670
+ ++nSV;
1671
+ if(prob->y[i] > 0)
1672
+ {
1673
+ if(fabs(alpha[i]) >= si.upper_bound_p)
1674
+ ++nBSV;
1675
+ }
1676
+ else
1677
+ {
1678
+ if(fabs(alpha[i]) >= si.upper_bound_n)
1679
+ ++nBSV;
1680
+ }
1681
+ }
1682
+ }
1683
+
1684
+ info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1685
+
1686
+ decision_function f;
1687
+ f.alpha = alpha;
1688
+ f.rho = si.rho;
1689
+ return f;
1690
+ }
1691
+
1692
+ //
1693
+ // svm_model
1694
+ //
1695
+ struct svm_model
1696
+ {
1697
+ svm_parameter param; // parameter
1698
+ int nr_class; // number of classes, = 2 in regression/one class svm
1699
+ int l; // total #SV
1700
+ svm_node **SV; // SVs (SV[l])
1701
+ double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
1702
+ double *rho; // constants in decision functions (rho[k*(k-1)/2])
1703
+ double *probA; // pariwise probability information
1704
+ double *probB;
1705
+
1706
+ // for classification only
1707
+
1708
+ int *label; // label of each class (label[k])
1709
+ int *nSV; // number of SVs for each class (nSV[k])
1710
+ // nSV[0] + nSV[1] + ... + nSV[k-1] = l
1711
+ // XXX
1712
+ int free_sv; // 1 if svm_model is created by svm_load_model
1713
+ // 0 if svm_model is created by svm_train
1714
+ };
1715
+
1716
+ // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1717
+ static void sigmoid_train(
1718
+ int l, const double *dec_values, const double *labels,
1719
+ double& A, double& B)
1720
+ {
1721
+ double prior1=0, prior0 = 0;
1722
+ int i;
1723
+
1724
+ for (i=0;i<l;i++)
1725
+ if (labels[i] > 0) prior1+=1;
1726
+ else prior0+=1;
1727
+
1728
+ int max_iter=100; // Maximal number of iterations
1729
+ double min_step=1e-10; // Minimal step taken in line search
1730
+ double sigma=1e-12; // For numerically strict PD of Hessian
1731
+ double eps=1e-5;
1732
+ double hiTarget=(prior1+1.0)/(prior1+2.0);
1733
+ double loTarget=1/(prior0+2.0);
1734
+ double *t=Malloc(double,l);
1735
+ double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1736
+ double newA,newB,newf,d1,d2;
1737
+ int iter;
1738
+
1739
+ // Initial Point and Initial Fun Value
1740
+ A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1741
+ double fval = 0.0;
1742
+
1743
+ for (i=0;i<l;i++)
1744
+ {
1745
+ if (labels[i]>0) t[i]=hiTarget;
1746
+ else t[i]=loTarget;
1747
+ fApB = dec_values[i]*A+B;
1748
+ if (fApB>=0)
1749
+ fval += t[i]*fApB + log(1+exp(-fApB));
1750
+ else
1751
+ fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1752
+ }
1753
+ for (iter=0;iter<max_iter;iter++)
1754
+ {
1755
+ // Update Gradient and Hessian (use H' = H + sigma I)
1756
+ h11=sigma; // numerically ensures strict PD
1757
+ h22=sigma;
1758
+ h21=0.0;g1=0.0;g2=0.0;
1759
+ for (i=0;i<l;i++)
1760
+ {
1761
+ fApB = dec_values[i]*A+B;
1762
+ if (fApB >= 0)
1763
+ {
1764
+ p=exp(-fApB)/(1.0+exp(-fApB));
1765
+ q=1.0/(1.0+exp(-fApB));
1766
+ }
1767
+ else
1768
+ {
1769
+ p=1.0/(1.0+exp(fApB));
1770
+ q=exp(fApB)/(1.0+exp(fApB));
1771
+ }
1772
+ d2=p*q;
1773
+ h11+=dec_values[i]*dec_values[i]*d2;
1774
+ h22+=d2;
1775
+ h21+=dec_values[i]*d2;
1776
+ d1=t[i]-p;
1777
+ g1+=dec_values[i]*d1;
1778
+ g2+=d1;
1779
+ }
1780
+
1781
+ // Stopping Criteria
1782
+ if (fabs(g1)<eps && fabs(g2)<eps)
1783
+ break;
1784
+
1785
+ // Finding Newton direction: -inv(H') * g
1786
+ det=h11*h22-h21*h21;
1787
+ dA=-(h22*g1 - h21 * g2) / det;
1788
+ dB=-(-h21*g1+ h11 * g2) / det;
1789
+ gd=g1*dA+g2*dB;
1790
+
1791
+
1792
+ stepsize = 1; // Line Search
1793
+ while (stepsize >= min_step)
1794
+ {
1795
+ newA = A + stepsize * dA;
1796
+ newB = B + stepsize * dB;
1797
+
1798
+ // New function value
1799
+ newf = 0.0;
1800
+ for (i=0;i<l;i++)
1801
+ {
1802
+ fApB = dec_values[i]*newA+newB;
1803
+ if (fApB >= 0)
1804
+ newf += t[i]*fApB + log(1+exp(-fApB));
1805
+ else
1806
+ newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1807
+ }
1808
+ // Check sufficient decrease
1809
+ if (newf<fval+0.0001*stepsize*gd)
1810
+ {
1811
+ A=newA;B=newB;fval=newf;
1812
+ break;
1813
+ }
1814
+ else
1815
+ stepsize = stepsize / 2.0;
1816
+ }
1817
+
1818
+ if (stepsize < min_step)
1819
+ {
1820
+ info("Line search fails in two-class probability estimates\n");
1821
+ break;
1822
+ }
1823
+ }
1824
+
1825
+ if (iter>=max_iter)
1826
+ info("Reaching maximal iterations in two-class probability estimates\n");
1827
+ free(t);
1828
+ }
1829
+
1830
+ static double sigmoid_predict(double decision_value, double A, double B)
1831
+ {
1832
+ double fApB = decision_value*A+B;
1833
+ if (fApB >= 0)
1834
+ return exp(-fApB)/(1.0+exp(-fApB));
1835
+ else
1836
+ return 1.0/(1+exp(fApB)) ;
1837
+ }
1838
+
1839
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1840
+ static void multiclass_probability(int k, double **r, double *p)
1841
+ {
1842
+ int t,j;
1843
+ int iter = 0, max_iter=max(100,k);
1844
+ double **Q=Malloc(double *,k);
1845
+ double *Qp=Malloc(double,k);
1846
+ double pQp, eps=0.005/k;
1847
+
1848
+ for (t=0;t<k;t++)
1849
+ {
1850
+ p[t]=1.0/k; // Valid if k = 1
1851
+ Q[t]=Malloc(double,k);
1852
+ Q[t][t]=0;
1853
+ for (j=0;j<t;j++)
1854
+ {
1855
+ Q[t][t]+=r[j][t]*r[j][t];
1856
+ Q[t][j]=Q[j][t];
1857
+ }
1858
+ for (j=t+1;j<k;j++)
1859
+ {
1860
+ Q[t][t]+=r[j][t]*r[j][t];
1861
+ Q[t][j]=-r[j][t]*r[t][j];
1862
+ }
1863
+ }
1864
+ for (iter=0;iter<max_iter;iter++)
1865
+ {
1866
+ // stopping condition, recalculate QP,pQP for numerical accuracy
1867
+ pQp=0;
1868
+ for (t=0;t<k;t++)
1869
+ {
1870
+ Qp[t]=0;
1871
+ for (j=0;j<k;j++)
1872
+ Qp[t]+=Q[t][j]*p[j];
1873
+ pQp+=p[t]*Qp[t];
1874
+ }
1875
+ double max_error=0;
1876
+ for (t=0;t<k;t++)
1877
+ {
1878
+ double error=fabs(Qp[t]-pQp);
1879
+ if (error>max_error)
1880
+ max_error=error;
1881
+ }
1882
+ if (max_error<eps) break;
1883
+
1884
+ for (t=0;t<k;t++)
1885
+ {
1886
+ double diff=(-Qp[t]+pQp)/Q[t][t];
1887
+ p[t]+=diff;
1888
+ pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1889
+ for (j=0;j<k;j++)
1890
+ {
1891
+ Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1892
+ p[j]/=(1+diff);
1893
+ }
1894
+ }
1895
+ }
1896
+ if (iter>=max_iter)
1897
+ info("Exceeds max_iter in multiclass_prob\n");
1898
+ for(t=0;t<k;t++) free(Q[t]);
1899
+ free(Q);
1900
+ free(Qp);
1901
+ }
1902
+
1903
+ // Cross-validation decision values for probability estimates
1904
+ static void svm_binary_svc_probability(
1905
+ const svm_problem *prob, const svm_parameter *param,
1906
+ double Cp, double Cn, double& probA, double& probB)
1907
+ {
1908
+ int i;
1909
+ int nr_fold = 5;
1910
+ int *perm = Malloc(int,prob->l);
1911
+ double *dec_values = Malloc(double,prob->l);
1912
+
1913
+ // random shuffle
1914
+ for(i=0;i<prob->l;i++) perm[i]=i;
1915
+ for(i=0;i<prob->l;i++)
1916
+ {
1917
+ int j = i+rand()%(prob->l-i);
1918
+ swap(perm[i],perm[j]);
1919
+ }
1920
+ for(i=0;i<nr_fold;i++)
1921
+ {
1922
+ int begin = i*prob->l/nr_fold;
1923
+ int end = (i+1)*prob->l/nr_fold;
1924
+ int j,k;
1925
+ struct svm_problem subprob;
1926
+
1927
+ subprob.l = prob->l-(end-begin);
1928
+ subprob.x = Malloc(struct svm_node*,subprob.l);
1929
+ subprob.y = Malloc(double,subprob.l);
1930
+
1931
+ k=0;
1932
+ for(j=0;j<begin;j++)
1933
+ {
1934
+ subprob.x[k] = prob->x[perm[j]];
1935
+ subprob.y[k] = prob->y[perm[j]];
1936
+ ++k;
1937
+ }
1938
+ for(j=end;j<prob->l;j++)
1939
+ {
1940
+ subprob.x[k] = prob->x[perm[j]];
1941
+ subprob.y[k] = prob->y[perm[j]];
1942
+ ++k;
1943
+ }
1944
+ int p_count=0,n_count=0;
1945
+ for(j=0;j<k;j++)
1946
+ if(subprob.y[j]>0)
1947
+ p_count++;
1948
+ else
1949
+ n_count++;
1950
+
1951
+ if(p_count==0 && n_count==0)
1952
+ for(j=begin;j<end;j++)
1953
+ dec_values[perm[j]] = 0;
1954
+ else if(p_count > 0 && n_count == 0)
1955
+ for(j=begin;j<end;j++)
1956
+ dec_values[perm[j]] = 1;
1957
+ else if(p_count == 0 && n_count > 0)
1958
+ for(j=begin;j<end;j++)
1959
+ dec_values[perm[j]] = -1;
1960
+ else
1961
+ {
1962
+ svm_parameter subparam = *param;
1963
+ subparam.probability=0;
1964
+ subparam.C=1.0;
1965
+ subparam.nr_weight=2;
1966
+ subparam.weight_label = Malloc(int,2);
1967
+ subparam.weight = Malloc(double,2);
1968
+ subparam.weight_label[0]=+1;
1969
+ subparam.weight_label[1]=-1;
1970
+ subparam.weight[0]=Cp;
1971
+ subparam.weight[1]=Cn;
1972
+ struct svm_model *submodel = svm_train(&subprob,&subparam);
1973
+ for(j=begin;j<end;j++)
1974
+ {
1975
+ svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1976
+ // ensure +1 -1 order; reason not using CV subroutine
1977
+ dec_values[perm[j]] *= submodel->label[0];
1978
+ }
1979
+ svm_destroy_model(submodel);
1980
+ svm_destroy_param(&subparam);
1981
+ }
1982
+ free(subprob.x);
1983
+ free(subprob.y);
1984
+ }
1985
+ sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1986
+ free(dec_values);
1987
+ free(perm);
1988
+ }
1989
+
1990
+ // Return parameter of a Laplace distribution
1991
+ static double svm_svr_probability(
1992
+ const svm_problem *prob, const svm_parameter *param)
1993
+ {
1994
+ int i;
1995
+ int nr_fold = 5;
1996
+ double *ymv = Malloc(double,prob->l);
1997
+ double mae = 0;
1998
+
1999
+ svm_parameter newparam = *param;
2000
+ newparam.probability = 0;
2001
+ svm_cross_validation(prob,&newparam,nr_fold,ymv);
2002
+ for(i=0;i<prob->l;i++)
2003
+ {
2004
+ ymv[i]=prob->y[i]-ymv[i];
2005
+ mae += fabs(ymv[i]);
2006
+ }
2007
+ mae /= prob->l;
2008
+ double std=sqrt(2*mae*mae);
2009
+ int count=0;
2010
+ mae=0;
2011
+ for(i=0;i<prob->l;i++)
2012
+ if (fabs(ymv[i]) > 5*std)
2013
+ count=count+1;
2014
+ else
2015
+ mae+=fabs(ymv[i]);
2016
+ mae /= (prob->l-count);
2017
+ info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2018
+ free(ymv);
2019
+ return mae;
2020
+ }
2021
+
2022
+
2023
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2024
+ // perm, length l, must be allocated before calling this subroutine
2025
+ static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2026
+ {
2027
+ int l = prob->l;
2028
+ int max_nr_class = 16;
2029
+ int nr_class = 0;
2030
+ int *label = Malloc(int,max_nr_class);
2031
+ int *count = Malloc(int,max_nr_class);
2032
+ int *data_label = Malloc(int,l);
2033
+ int i;
2034
+
2035
+ for(i=0;i<l;i++)
2036
+ {
2037
+ int this_label = (int)prob->y[i];
2038
+ int j;
2039
+ for(j=0;j<nr_class;j++)
2040
+ {
2041
+ if(this_label == label[j])
2042
+ {
2043
+ ++count[j];
2044
+ break;
2045
+ }
2046
+ }
2047
+ data_label[i] = j;
2048
+ if(j == nr_class)
2049
+ {
2050
+ if(nr_class == max_nr_class)
2051
+ {
2052
+ max_nr_class *= 2;
2053
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2054
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2055
+ }
2056
+ label[nr_class] = this_label;
2057
+ count[nr_class] = 1;
2058
+ ++nr_class;
2059
+ }
2060
+ }
2061
+
2062
+ int *start = Malloc(int,nr_class);
2063
+ start[0] = 0;
2064
+ for(i=1;i<nr_class;i++)
2065
+ start[i] = start[i-1]+count[i-1];
2066
+ for(i=0;i<l;i++)
2067
+ {
2068
+ perm[start[data_label[i]]] = i;
2069
+ ++start[data_label[i]];
2070
+ }
2071
+ start[0] = 0;
2072
+ for(i=1;i<nr_class;i++)
2073
+ start[i] = start[i-1]+count[i-1];
2074
+
2075
+ *nr_class_ret = nr_class;
2076
+ *label_ret = label;
2077
+ *start_ret = start;
2078
+ *count_ret = count;
2079
+ free(data_label);
2080
+ }
2081
+
2082
+ //
2083
+ // Interface functions
2084
+ //
2085
+ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2086
+ {
2087
+ svm_model *model = Malloc(svm_model,1);
2088
+ model->param = *param;
2089
+ model->free_sv = 0; // XXX
2090
+
2091
+ if(param->svm_type == ONE_CLASS ||
2092
+ param->svm_type == EPSILON_SVR ||
2093
+ param->svm_type == NU_SVR)
2094
+ {
2095
+ // regression or one-class-svm
2096
+ model->nr_class = 2;
2097
+ model->label = NULL;
2098
+ model->nSV = NULL;
2099
+ model->probA = NULL; model->probB = NULL;
2100
+ model->sv_coef = Malloc(double *,1);
2101
+
2102
+ if(param->probability &&
2103
+ (param->svm_type == EPSILON_SVR ||
2104
+ param->svm_type == NU_SVR))
2105
+ {
2106
+ model->probA = Malloc(double,1);
2107
+ model->probA[0] = svm_svr_probability(prob,param);
2108
+ }
2109
+
2110
+ decision_function f = svm_train_one(prob,param,0,0);
2111
+ model->rho = Malloc(double,1);
2112
+ model->rho[0] = f.rho;
2113
+
2114
+ int nSV = 0;
2115
+ int i;
2116
+ for(i=0;i<prob->l;i++)
2117
+ if(fabs(f.alpha[i]) > 0) ++nSV;
2118
+ model->l = nSV;
2119
+ model->SV = Malloc(svm_node *,nSV);
2120
+ model->sv_coef[0] = Malloc(double,nSV);
2121
+ int j = 0;
2122
+ for(i=0;i<prob->l;i++)
2123
+ if(fabs(f.alpha[i]) > 0)
2124
+ {
2125
+ model->SV[j] = prob->x[i];
2126
+ model->sv_coef[0][j] = f.alpha[i];
2127
+ ++j;
2128
+ }
2129
+
2130
+ free(f.alpha);
2131
+ }
2132
+ else
2133
+ {
2134
+ // classification
2135
+ int l = prob->l;
2136
+ int nr_class;
2137
+ int *label = NULL;
2138
+ int *start = NULL;
2139
+ int *count = NULL;
2140
+ int *perm = Malloc(int,l);
2141
+
2142
+ // group training data of the same class
2143
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2144
+ svm_node **x = Malloc(svm_node *,l);
2145
+ int i;
2146
+ for(i=0;i<l;i++)
2147
+ x[i] = prob->x[perm[i]];
2148
+
2149
+ // calculate weighted C
2150
+
2151
+ double *weighted_C = Malloc(double, nr_class);
2152
+ for(i=0;i<nr_class;i++)
2153
+ weighted_C[i] = param->C;
2154
+ for(i=0;i<param->nr_weight;i++)
2155
+ {
2156
+ int j;
2157
+ for(j=0;j<nr_class;j++)
2158
+ if(param->weight_label[i] == label[j])
2159
+ break;
2160
+ if(j == nr_class)
2161
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2162
+ else
2163
+ weighted_C[j] *= param->weight[i];
2164
+ }
2165
+
2166
+ // train k*(k-1)/2 models
2167
+
2168
+ bool *nonzero = Malloc(bool,l);
2169
+ for(i=0;i<l;i++)
2170
+ nonzero[i] = false;
2171
+ decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2172
+
2173
+ double *probA=NULL,*probB=NULL;
2174
+ if (param->probability)
2175
+ {
2176
+ probA=Malloc(double,nr_class*(nr_class-1)/2);
2177
+ probB=Malloc(double,nr_class*(nr_class-1)/2);
2178
+ }
2179
+
2180
+ int p = 0;
2181
+ for(i=0;i<nr_class;i++)
2182
+ for(int j=i+1;j<nr_class;j++)
2183
+ {
2184
+ svm_problem sub_prob;
2185
+ int si = start[i], sj = start[j];
2186
+ int ci = count[i], cj = count[j];
2187
+ sub_prob.l = ci+cj;
2188
+ sub_prob.x = Malloc(svm_node *,sub_prob.l);
2189
+ sub_prob.y = Malloc(double,sub_prob.l);
2190
+ int k;
2191
+ for(k=0;k<ci;k++)
2192
+ {
2193
+ sub_prob.x[k] = x[si+k];
2194
+ sub_prob.y[k] = +1;
2195
+ }
2196
+ for(k=0;k<cj;k++)
2197
+ {
2198
+ sub_prob.x[ci+k] = x[sj+k];
2199
+ sub_prob.y[ci+k] = -1;
2200
+ }
2201
+
2202
+ if(param->probability)
2203
+ svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2204
+
2205
+ f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2206
+ for(k=0;k<ci;k++)
2207
+ if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2208
+ nonzero[si+k] = true;
2209
+ for(k=0;k<cj;k++)
2210
+ if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2211
+ nonzero[sj+k] = true;
2212
+ free(sub_prob.x);
2213
+ free(sub_prob.y);
2214
+ ++p;
2215
+ }
2216
+
2217
+ // build output
2218
+
2219
+ model->nr_class = nr_class;
2220
+
2221
+ model->label = Malloc(int,nr_class);
2222
+ for(i=0;i<nr_class;i++)
2223
+ model->label[i] = label[i];
2224
+
2225
+ model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2226
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2227
+ model->rho[i] = f[i].rho;
2228
+
2229
+ if(param->probability)
2230
+ {
2231
+ model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2232
+ model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2233
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2234
+ {
2235
+ model->probA[i] = probA[i];
2236
+ model->probB[i] = probB[i];
2237
+ }
2238
+ }
2239
+ else
2240
+ {
2241
+ model->probA=NULL;
2242
+ model->probB=NULL;
2243
+ }
2244
+
2245
+ int total_sv = 0;
2246
+ int *nz_count = Malloc(int,nr_class);
2247
+ model->nSV = Malloc(int,nr_class);
2248
+ for(i=0;i<nr_class;i++)
2249
+ {
2250
+ int nSV = 0;
2251
+ for(int j=0;j<count[i];j++)
2252
+ if(nonzero[start[i]+j])
2253
+ {
2254
+ ++nSV;
2255
+ ++total_sv;
2256
+ }
2257
+ model->nSV[i] = nSV;
2258
+ nz_count[i] = nSV;
2259
+ }
2260
+
2261
+ info("Total nSV = %d\n",total_sv);
2262
+
2263
+ model->l = total_sv;
2264
+ model->SV = Malloc(svm_node *,total_sv);
2265
+ p = 0;
2266
+ for(i=0;i<l;i++)
2267
+ if(nonzero[i]) model->SV[p++] = x[i];
2268
+
2269
+ int *nz_start = Malloc(int,nr_class);
2270
+ nz_start[0] = 0;
2271
+ for(i=1;i<nr_class;i++)
2272
+ nz_start[i] = nz_start[i-1]+nz_count[i-1];
2273
+
2274
+ model->sv_coef = Malloc(double *,nr_class-1);
2275
+ for(i=0;i<nr_class-1;i++)
2276
+ model->sv_coef[i] = Malloc(double,total_sv);
2277
+
2278
+ p = 0;
2279
+ for(i=0;i<nr_class;i++)
2280
+ for(int j=i+1;j<nr_class;j++)
2281
+ {
2282
+ // classifier (i,j): coefficients with
2283
+ // i are in sv_coef[j-1][nz_start[i]...],
2284
+ // j are in sv_coef[i][nz_start[j]...]
2285
+
2286
+ int si = start[i];
2287
+ int sj = start[j];
2288
+ int ci = count[i];
2289
+ int cj = count[j];
2290
+
2291
+ int q = nz_start[i];
2292
+ int k;
2293
+ for(k=0;k<ci;k++)
2294
+ if(nonzero[si+k])
2295
+ model->sv_coef[j-1][q++] = f[p].alpha[k];
2296
+ q = nz_start[j];
2297
+ for(k=0;k<cj;k++)
2298
+ if(nonzero[sj+k])
2299
+ model->sv_coef[i][q++] = f[p].alpha[ci+k];
2300
+ ++p;
2301
+ }
2302
+
2303
+ free(label);
2304
+ free(probA);
2305
+ free(probB);
2306
+ free(count);
2307
+ free(perm);
2308
+ free(start);
2309
+ free(x);
2310
+ free(weighted_C);
2311
+ free(nonzero);
2312
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2313
+ free(f[i].alpha);
2314
+ free(f);
2315
+ free(nz_count);
2316
+ free(nz_start);
2317
+ }
2318
+ return model;
2319
+ }
2320
+
2321
+ // Stratified cross validation
2322
+ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2323
+ {
2324
+ int i;
2325
+ int *fold_start = Malloc(int,nr_fold+1);
2326
+ int l = prob->l;
2327
+ int *perm = Malloc(int,l);
2328
+ int nr_class;
2329
+
2330
+ // stratified cv may not give leave-one-out rate
2331
+ // Each class to l folds -> some folds may have zero elements
2332
+ if((param->svm_type == C_SVC ||
2333
+ param->svm_type == NU_SVC) && nr_fold < l)
2334
+ {
2335
+ int *start = NULL;
2336
+ int *label = NULL;
2337
+ int *count = NULL;
2338
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2339
+
2340
+ // random shuffle and then data grouped by fold using the array perm
2341
+ int *fold_count = Malloc(int,nr_fold);
2342
+ int c;
2343
+ int *index = Malloc(int,l);
2344
+ for(i=0;i<l;i++)
2345
+ index[i]=perm[i];
2346
+ for (c=0; c<nr_class; c++)
2347
+ for(i=0;i<count[c];i++)
2348
+ {
2349
+ int j = i+rand()%(count[c]-i);
2350
+ swap(index[start[c]+j],index[start[c]+i]);
2351
+ }
2352
+ for(i=0;i<nr_fold;i++)
2353
+ {
2354
+ fold_count[i] = 0;
2355
+ for (c=0; c<nr_class;c++)
2356
+ fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2357
+ }
2358
+ fold_start[0]=0;
2359
+ for (i=1;i<=nr_fold;i++)
2360
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2361
+ for (c=0; c<nr_class;c++)
2362
+ for(i=0;i<nr_fold;i++)
2363
+ {
2364
+ int begin = start[c]+i*count[c]/nr_fold;
2365
+ int end = start[c]+(i+1)*count[c]/nr_fold;
2366
+ for(int j=begin;j<end;j++)
2367
+ {
2368
+ perm[fold_start[i]] = index[j];
2369
+ fold_start[i]++;
2370
+ }
2371
+ }
2372
+ fold_start[0]=0;
2373
+ for (i=1;i<=nr_fold;i++)
2374
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2375
+ free(start);
2376
+ free(label);
2377
+ free(count);
2378
+ free(index);
2379
+ free(fold_count);
2380
+ }
2381
+ else
2382
+ {
2383
+ for(i=0;i<l;i++) perm[i]=i;
2384
+ for(i=0;i<l;i++)
2385
+ {
2386
+ int j = i+rand()%(l-i);
2387
+ swap(perm[i],perm[j]);
2388
+ }
2389
+ for(i=0;i<=nr_fold;i++)
2390
+ fold_start[i]=i*l/nr_fold;
2391
+ }
2392
+
2393
+ for(i=0;i<nr_fold;i++)
2394
+ {
2395
+ int begin = fold_start[i];
2396
+ int end = fold_start[i+1];
2397
+ int j,k;
2398
+ struct svm_problem subprob;
2399
+
2400
+ subprob.l = l-(end-begin);
2401
+ subprob.x = Malloc(struct svm_node*,subprob.l);
2402
+ subprob.y = Malloc(double,subprob.l);
2403
+
2404
+ k=0;
2405
+ for(j=0;j<begin;j++)
2406
+ {
2407
+ subprob.x[k] = prob->x[perm[j]];
2408
+ subprob.y[k] = prob->y[perm[j]];
2409
+ ++k;
2410
+ }
2411
+ for(j=end;j<l;j++)
2412
+ {
2413
+ subprob.x[k] = prob->x[perm[j]];
2414
+ subprob.y[k] = prob->y[perm[j]];
2415
+ ++k;
2416
+ }
2417
+ struct svm_model *submodel = svm_train(&subprob,param);
2418
+ if(param->probability &&
2419
+ (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2420
+ {
2421
+ double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2422
+ for(j=begin;j<end;j++)
2423
+ target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2424
+ free(prob_estimates);
2425
+ }
2426
+ else
2427
+ for(j=begin;j<end;j++)
2428
+ target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2429
+ svm_destroy_model(submodel);
2430
+ free(subprob.x);
2431
+ free(subprob.y);
2432
+ }
2433
+ free(fold_start);
2434
+ free(perm);
2435
+ }
2436
+
2437
+
2438
+ int svm_get_svm_type(const svm_model *model)
2439
+ {
2440
+ return model->param.svm_type;
2441
+ }
2442
+
2443
+ int svm_get_nr_class(const svm_model *model)
2444
+ {
2445
+ return model->nr_class;
2446
+ }
2447
+
2448
+ void svm_get_labels(const svm_model *model, int* label)
2449
+ {
2450
+ if (model->label != NULL)
2451
+ for(int i=0;i<model->nr_class;i++)
2452
+ label[i] = model->label[i];
2453
+ }
2454
+
2455
+ double svm_get_svr_probability(const svm_model *model)
2456
+ {
2457
+ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2458
+ model->probA!=NULL)
2459
+ return model->probA[0];
2460
+ else
2461
+ {
2462
+ fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2463
+ return 0;
2464
+ }
2465
+ }
2466
+
2467
+ void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2468
+ {
2469
+ if(model->param.svm_type == ONE_CLASS ||
2470
+ model->param.svm_type == EPSILON_SVR ||
2471
+ model->param.svm_type == NU_SVR)
2472
+ {
2473
+ double *sv_coef = model->sv_coef[0];
2474
+ double sum = 0;
2475
+ for(int i=0;i<model->l;i++)
2476
+ sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2477
+ sum -= model->rho[0];
2478
+ *dec_values = sum;
2479
+ }
2480
+ else
2481
+ {
2482
+ int i;
2483
+ int nr_class = model->nr_class;
2484
+ int l = model->l;
2485
+
2486
+ double *kvalue = Malloc(double,l);
2487
+ for(i=0;i<l;i++)
2488
+ kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2489
+
2490
+ int *start = Malloc(int,nr_class);
2491
+ start[0] = 0;
2492
+ for(i=1;i<nr_class;i++)
2493
+ start[i] = start[i-1]+model->nSV[i-1];
2494
+
2495
+ int p=0;
2496
+ for(i=0;i<nr_class;i++)
2497
+ for(int j=i+1;j<nr_class;j++)
2498
+ {
2499
+ double sum = 0;
2500
+ int si = start[i];
2501
+ int sj = start[j];
2502
+ int ci = model->nSV[i];
2503
+ int cj = model->nSV[j];
2504
+
2505
+ int k;
2506
+ double *coef1 = model->sv_coef[j-1];
2507
+ double *coef2 = model->sv_coef[i];
2508
+ for(k=0;k<ci;k++)
2509
+ sum += coef1[si+k] * kvalue[si+k];
2510
+ for(k=0;k<cj;k++)
2511
+ sum += coef2[sj+k] * kvalue[sj+k];
2512
+ sum -= model->rho[p];
2513
+ dec_values[p] = sum;
2514
+ p++;
2515
+ }
2516
+
2517
+ free(kvalue);
2518
+ free(start);
2519
+ }
2520
+ }
2521
+
2522
+ double svm_predict(const svm_model *model, const svm_node *x)
2523
+ {
2524
+ if(model->param.svm_type == ONE_CLASS ||
2525
+ model->param.svm_type == EPSILON_SVR ||
2526
+ model->param.svm_type == NU_SVR)
2527
+ {
2528
+ double res;
2529
+ svm_predict_values(model, x, &res);
2530
+
2531
+ if(model->param.svm_type == ONE_CLASS)
2532
+ return (res>0)?1:-1;
2533
+ else
2534
+ return res;
2535
+ }
2536
+ else
2537
+ {
2538
+ int i;
2539
+ int nr_class = model->nr_class;
2540
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2541
+ svm_predict_values(model, x, dec_values);
2542
+
2543
+ int *vote = Malloc(int,nr_class);
2544
+ for(i=0;i<nr_class;i++)
2545
+ vote[i] = 0;
2546
+ int pos=0;
2547
+ for(i=0;i<nr_class;i++)
2548
+ for(int j=i+1;j<nr_class;j++)
2549
+ {
2550
+ if(dec_values[pos++] > 0)
2551
+ ++vote[i];
2552
+ else
2553
+ ++vote[j];
2554
+ }
2555
+
2556
+ int vote_max_idx = 0;
2557
+ for(i=1;i<nr_class;i++)
2558
+ if(vote[i] > vote[vote_max_idx])
2559
+ vote_max_idx = i;
2560
+ free(vote);
2561
+ free(dec_values);
2562
+ return model->label[vote_max_idx];
2563
+ }
2564
+ }
2565
+
2566
+ double svm_predict_probability(
2567
+ const svm_model *model, const svm_node *x, double *prob_estimates)
2568
+ {
2569
+ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2570
+ model->probA!=NULL && model->probB!=NULL)
2571
+ {
2572
+ int i;
2573
+ int nr_class = model->nr_class;
2574
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2575
+ svm_predict_values(model, x, dec_values);
2576
+
2577
+ double min_prob=1e-7;
2578
+ double **pairwise_prob=Malloc(double *,nr_class);
2579
+ for(i=0;i<nr_class;i++)
2580
+ pairwise_prob[i]=Malloc(double,nr_class);
2581
+ int k=0;
2582
+ for(i=0;i<nr_class;i++)
2583
+ for(int j=i+1;j<nr_class;j++)
2584
+ {
2585
+ pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2586
+ pairwise_prob[j][i]=1-pairwise_prob[i][j];
2587
+ k++;
2588
+ }
2589
+ multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2590
+
2591
+ int prob_max_idx = 0;
2592
+ for(i=1;i<nr_class;i++)
2593
+ if(prob_estimates[i] > prob_estimates[prob_max_idx])
2594
+ prob_max_idx = i;
2595
+ for(i=0;i<nr_class;i++)
2596
+ free(pairwise_prob[i]);
2597
+ free(dec_values);
2598
+ free(pairwise_prob);
2599
+ return model->label[prob_max_idx];
2600
+ }
2601
+ else
2602
+ return svm_predict(model, x);
2603
+ }
2604
+
2605
+ static const char *svm_type_table[] =
2606
+ {
2607
+ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2608
+ };
2609
+
2610
+ static const char *kernel_type_table[]=
2611
+ {
2612
+ "linear","polynomial","rbf","sigmoid","precomputed",NULL
2613
+ };
2614
+
2615
+ int svm_save_model(const char *model_file_name, const svm_model *model)
2616
+ {
2617
+ FILE *fp = fopen(model_file_name,"w");
2618
+ if(fp==NULL) return -1;
2619
+
2620
+ const svm_parameter& param = model->param;
2621
+
2622
+ fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2623
+ fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2624
+
2625
+ if(param.kernel_type == POLY)
2626
+ fprintf(fp,"degree %d\n", param.degree);
2627
+
2628
+ if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2629
+ fprintf(fp,"gamma %g\n", param.gamma);
2630
+
2631
+ if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2632
+ fprintf(fp,"coef0 %g\n", param.coef0);
2633
+
2634
+ int nr_class = model->nr_class;
2635
+ int l = model->l;
2636
+ fprintf(fp, "nr_class %d\n", nr_class);
2637
+ fprintf(fp, "total_sv %d\n",l);
2638
+
2639
+ {
2640
+ fprintf(fp, "rho");
2641
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2642
+ fprintf(fp," %g",model->rho[i]);
2643
+ fprintf(fp, "\n");
2644
+ }
2645
+
2646
+ if(model->label)
2647
+ {
2648
+ fprintf(fp, "label");
2649
+ for(int i=0;i<nr_class;i++)
2650
+ fprintf(fp," %d",model->label[i]);
2651
+ fprintf(fp, "\n");
2652
+ }
2653
+
2654
+ if(model->probA) // regression has probA only
2655
+ {
2656
+ fprintf(fp, "probA");
2657
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2658
+ fprintf(fp," %g",model->probA[i]);
2659
+ fprintf(fp, "\n");
2660
+ }
2661
+ if(model->probB)
2662
+ {
2663
+ fprintf(fp, "probB");
2664
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2665
+ fprintf(fp," %g",model->probB[i]);
2666
+ fprintf(fp, "\n");
2667
+ }
2668
+
2669
+ if(model->nSV)
2670
+ {
2671
+ fprintf(fp, "nr_sv");
2672
+ for(int i=0;i<nr_class;i++)
2673
+ fprintf(fp," %d",model->nSV[i]);
2674
+ fprintf(fp, "\n");
2675
+ }
2676
+
2677
+ fprintf(fp, "SV\n");
2678
+ const double * const *sv_coef = model->sv_coef;
2679
+ const svm_node * const *SV = model->SV;
2680
+
2681
+ for(int i=0;i<l;i++)
2682
+ {
2683
+ for(int j=0;j<nr_class-1;j++)
2684
+ fprintf(fp, "%.16g ",sv_coef[j][i]);
2685
+
2686
+ const svm_node *p = SV[i];
2687
+
2688
+ if(param.kernel_type == PRECOMPUTED)
2689
+ fprintf(fp,"0:%d ",(int)(p->value));
2690
+ else
2691
+ while(p->index != -1)
2692
+ {
2693
+ fprintf(fp,"%d:%.8g ",p->index,p->value);
2694
+ p++;
2695
+ }
2696
+ fprintf(fp, "\n");
2697
+ }
2698
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2699
+ else return 0;
2700
+ }
2701
+
2702
+ static char *line = NULL;
2703
+ static int max_line_len;
2704
+
2705
+ static char* readline(FILE *input)
2706
+ {
2707
+ int len;
2708
+
2709
+ if(fgets(line,max_line_len,input) == NULL)
2710
+ return NULL;
2711
+
2712
+ while(strrchr(line,'\n') == NULL)
2713
+ {
2714
+ max_line_len *= 2;
2715
+ line = (char *) realloc(line,max_line_len);
2716
+ len = (int) strlen(line);
2717
+ if(fgets(line+len,max_line_len-len,input) == NULL)
2718
+ break;
2719
+ }
2720
+ return line;
2721
+ }
2722
+
2723
+ svm_model *svm_load_model(const char *model_file_name)
2724
+ {
2725
+ FILE *fp = fopen(model_file_name,"rb");
2726
+ if(fp==NULL) return NULL;
2727
+
2728
+ // read parameters
2729
+
2730
+ svm_model *model = Malloc(svm_model,1);
2731
+ svm_parameter& param = model->param;
2732
+ model->rho = NULL;
2733
+ model->probA = NULL;
2734
+ model->probB = NULL;
2735
+ model->label = NULL;
2736
+ model->nSV = NULL;
2737
+
2738
+ char cmd[81];
2739
+ while(1)
2740
+ {
2741
+ fscanf(fp,"%80s",cmd);
2742
+
2743
+ if(strcmp(cmd,"svm_type")==0)
2744
+ {
2745
+ fscanf(fp,"%80s",cmd);
2746
+ int i;
2747
+ for(i=0;svm_type_table[i];i++)
2748
+ {
2749
+ if(strcmp(svm_type_table[i],cmd)==0)
2750
+ {
2751
+ param.svm_type=i;
2752
+ break;
2753
+ }
2754
+ }
2755
+ if(svm_type_table[i] == NULL)
2756
+ {
2757
+ fprintf(stderr,"unknown svm type.\n");
2758
+ free(model->rho);
2759
+ free(model->label);
2760
+ free(model->nSV);
2761
+ free(model);
2762
+ return NULL;
2763
+ }
2764
+ }
2765
+ else if(strcmp(cmd,"kernel_type")==0)
2766
+ {
2767
+ fscanf(fp,"%80s",cmd);
2768
+ int i;
2769
+ for(i=0;kernel_type_table[i];i++)
2770
+ {
2771
+ if(strcmp(kernel_type_table[i],cmd)==0)
2772
+ {
2773
+ param.kernel_type=i;
2774
+ break;
2775
+ }
2776
+ }
2777
+ if(kernel_type_table[i] == NULL)
2778
+ {
2779
+ fprintf(stderr,"unknown kernel function.\n");
2780
+ free(model->rho);
2781
+ free(model->label);
2782
+ free(model->nSV);
2783
+ free(model);
2784
+ return NULL;
2785
+ }
2786
+ }
2787
+ else if(strcmp(cmd,"degree")==0)
2788
+ fscanf(fp,"%d",&param.degree);
2789
+ else if(strcmp(cmd,"gamma")==0)
2790
+ fscanf(fp,"%lf",&param.gamma);
2791
+ else if(strcmp(cmd,"coef0")==0)
2792
+ fscanf(fp,"%lf",&param.coef0);
2793
+ else if(strcmp(cmd,"nr_class")==0)
2794
+ fscanf(fp,"%d",&model->nr_class);
2795
+ else if(strcmp(cmd,"total_sv")==0)
2796
+ fscanf(fp,"%d",&model->l);
2797
+ else if(strcmp(cmd,"rho")==0)
2798
+ {
2799
+ int n = model->nr_class * (model->nr_class-1)/2;
2800
+ model->rho = Malloc(double,n);
2801
+ for(int i=0;i<n;i++)
2802
+ fscanf(fp,"%lf",&model->rho[i]);
2803
+ }
2804
+ else if(strcmp(cmd,"label")==0)
2805
+ {
2806
+ int n = model->nr_class;
2807
+ model->label = Malloc(int,n);
2808
+ for(int i=0;i<n;i++)
2809
+ fscanf(fp,"%d",&model->label[i]);
2810
+ }
2811
+ else if(strcmp(cmd,"probA")==0)
2812
+ {
2813
+ int n = model->nr_class * (model->nr_class-1)/2;
2814
+ model->probA = Malloc(double,n);
2815
+ for(int i=0;i<n;i++)
2816
+ fscanf(fp,"%lf",&model->probA[i]);
2817
+ }
2818
+ else if(strcmp(cmd,"probB")==0)
2819
+ {
2820
+ int n = model->nr_class * (model->nr_class-1)/2;
2821
+ model->probB = Malloc(double,n);
2822
+ for(int i=0;i<n;i++)
2823
+ fscanf(fp,"%lf",&model->probB[i]);
2824
+ }
2825
+ else if(strcmp(cmd,"nr_sv")==0)
2826
+ {
2827
+ int n = model->nr_class;
2828
+ model->nSV = Malloc(int,n);
2829
+ for(int i=0;i<n;i++)
2830
+ fscanf(fp,"%d",&model->nSV[i]);
2831
+ }
2832
+ else if(strcmp(cmd,"SV")==0)
2833
+ {
2834
+ while(1)
2835
+ {
2836
+ int c = getc(fp);
2837
+ if(c==EOF || c=='\n') break;
2838
+ }
2839
+ break;
2840
+ }
2841
+ else
2842
+ {
2843
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2844
+ free(model->rho);
2845
+ free(model->label);
2846
+ free(model->nSV);
2847
+ free(model);
2848
+ return NULL;
2849
+ }
2850
+ }
2851
+
2852
+ // read sv_coef and SV
2853
+
2854
+ int elements = 0;
2855
+ long pos = ftell(fp);
2856
+
2857
+ max_line_len = 1024;
2858
+ line = Malloc(char,max_line_len);
2859
+ char *p,*endptr,*idx,*val;
2860
+
2861
+ while(readline(fp)!=NULL)
2862
+ {
2863
+ p = strtok(line,":");
2864
+ while(1)
2865
+ {
2866
+ p = strtok(NULL,":");
2867
+ if(p == NULL)
2868
+ break;
2869
+ ++elements;
2870
+ }
2871
+ }
2872
+ elements += model->l;
2873
+
2874
+ fseek(fp,pos,SEEK_SET);
2875
+
2876
+ int m = model->nr_class - 1;
2877
+ int l = model->l;
2878
+ model->sv_coef = Malloc(double *,m);
2879
+ int i;
2880
+ for(i=0;i<m;i++)
2881
+ model->sv_coef[i] = Malloc(double,l);
2882
+ model->SV = Malloc(svm_node*,l);
2883
+ svm_node *x_space = NULL;
2884
+ if(l>0) x_space = Malloc(svm_node,elements);
2885
+
2886
+ int j=0;
2887
+ for(i=0;i<l;i++)
2888
+ {
2889
+ readline(fp);
2890
+ model->SV[i] = &x_space[j];
2891
+
2892
+ p = strtok(line, " \t");
2893
+ model->sv_coef[0][i] = strtod(p,&endptr);
2894
+ for(int k=1;k<m;k++)
2895
+ {
2896
+ p = strtok(NULL, " \t");
2897
+ model->sv_coef[k][i] = strtod(p,&endptr);
2898
+ }
2899
+
2900
+ while(1)
2901
+ {
2902
+ idx = strtok(NULL, ":");
2903
+ val = strtok(NULL, " \t");
2904
+
2905
+ if(val == NULL)
2906
+ break;
2907
+ x_space[j].index = (int) strtol(idx,&endptr,10);
2908
+ x_space[j].value = strtod(val,&endptr);
2909
+
2910
+ ++j;
2911
+ }
2912
+ x_space[j++].index = -1;
2913
+ }
2914
+ free(line);
2915
+
2916
+ if (ferror(fp) != 0 || fclose(fp) != 0)
2917
+ return NULL;
2918
+
2919
+ model->free_sv = 1; // XXX
2920
+ return model;
2921
+ }
2922
+
2923
+ void svm_destroy_model(svm_model* model)
2924
+ {
2925
+ if(model->free_sv && model->l > 0)
2926
+ free((void *)(model->SV[0]));
2927
+ for(int i=0;i<model->nr_class-1;i++)
2928
+ free(model->sv_coef[i]);
2929
+ free(model->SV);
2930
+ free(model->sv_coef);
2931
+ free(model->rho);
2932
+ free(model->label);
2933
+ free(model->probA);
2934
+ free(model->probB);
2935
+ free(model->nSV);
2936
+ free(model);
2937
+ }
2938
+
2939
+ void svm_destroy_param(svm_parameter* param)
2940
+ {
2941
+ free(param->weight_label);
2942
+ free(param->weight);
2943
+ }
2944
+
2945
+ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2946
+ {
2947
+ // svm_type
2948
+
2949
+ int svm_type = param->svm_type;
2950
+ if(svm_type != C_SVC &&
2951
+ svm_type != NU_SVC &&
2952
+ svm_type != ONE_CLASS &&
2953
+ svm_type != EPSILON_SVR &&
2954
+ svm_type != NU_SVR)
2955
+ return "unknown svm type";
2956
+
2957
+ // kernel_type, degree
2958
+
2959
+ int kernel_type = param->kernel_type;
2960
+ if(kernel_type != LINEAR &&
2961
+ kernel_type != POLY &&
2962
+ kernel_type != RBF &&
2963
+ kernel_type != SIGMOID &&
2964
+ kernel_type != PRECOMPUTED)
2965
+ return "unknown kernel type";
2966
+
2967
+ if(param->gamma < 0)
2968
+ return "gamma < 0";
2969
+
2970
+ if(param->degree < 0)
2971
+ return "degree of polynomial kernel < 0";
2972
+
2973
+ // cache_size,eps,C,nu,p,shrinking
2974
+
2975
+ if(param->cache_size <= 0)
2976
+ return "cache_size <= 0";
2977
+
2978
+ if(param->eps <= 0)
2979
+ return "eps <= 0";
2980
+
2981
+ if(svm_type == C_SVC ||
2982
+ svm_type == EPSILON_SVR ||
2983
+ svm_type == NU_SVR)
2984
+ if(param->C <= 0)
2985
+ return "C <= 0";
2986
+
2987
+ if(svm_type == NU_SVC ||
2988
+ svm_type == ONE_CLASS ||
2989
+ svm_type == NU_SVR)
2990
+ if(param->nu <= 0 || param->nu > 1)
2991
+ return "nu <= 0 or nu > 1";
2992
+
2993
+ if(svm_type == EPSILON_SVR)
2994
+ if(param->p < 0)
2995
+ return "p < 0";
2996
+
2997
+ if(param->shrinking != 0 &&
2998
+ param->shrinking != 1)
2999
+ return "shrinking != 0 and shrinking != 1";
3000
+
3001
+ if(param->probability != 0 &&
3002
+ param->probability != 1)
3003
+ return "probability != 0 and probability != 1";
3004
+
3005
+ if(param->probability == 1 &&
3006
+ svm_type == ONE_CLASS)
3007
+ return "one-class SVM probability output not supported yet";
3008
+
3009
+
3010
+ // check whether nu-svc is feasible
3011
+
3012
+ if(svm_type == NU_SVC)
3013
+ {
3014
+ int l = prob->l;
3015
+ int max_nr_class = 16;
3016
+ int nr_class = 0;
3017
+ int *label = Malloc(int,max_nr_class);
3018
+ int *count = Malloc(int,max_nr_class);
3019
+
3020
+ int i;
3021
+ for(i=0;i<l;i++)
3022
+ {
3023
+ int this_label = (int)prob->y[i];
3024
+ int j;
3025
+ for(j=0;j<nr_class;j++)
3026
+ if(this_label == label[j])
3027
+ {
3028
+ ++count[j];
3029
+ break;
3030
+ }
3031
+ if(j == nr_class)
3032
+ {
3033
+ if(nr_class == max_nr_class)
3034
+ {
3035
+ max_nr_class *= 2;
3036
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
3037
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
3038
+ }
3039
+ label[nr_class] = this_label;
3040
+ count[nr_class] = 1;
3041
+ ++nr_class;
3042
+ }
3043
+ }
3044
+
3045
+ for(i=0;i<nr_class;i++)
3046
+ {
3047
+ int n1 = count[i];
3048
+ for(int j=i+1;j<nr_class;j++)
3049
+ {
3050
+ int n2 = count[j];
3051
+ if(param->nu*(n1+n2)/2 > min(n1,n2))
3052
+ {
3053
+ free(label);
3054
+ free(count);
3055
+ return "specified nu is infeasible";
3056
+ }
3057
+ }
3058
+ }
3059
+ free(label);
3060
+ free(count);
3061
+ }
3062
+
3063
+ return NULL;
3064
+ }
3065
+
3066
+ int svm_check_probability_model(const svm_model *model)
3067
+ {
3068
+ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3069
+ model->probA!=NULL && model->probB!=NULL) ||
3070
+ ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3071
+ model->probA!=NULL);
3072
+ }