plastictrophy-libsvm-ruby-swig 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/svm.cpp ADDED
@@ -0,0 +1,3069 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <ctype.h>
5
+ #include <float.h>
6
+ #include <string.h>
7
+ #include <stdarg.h>
8
+ #include "svm.h"
9
+ int libsvm_version = LIBSVM_VERSION;
10
+ typedef float Qfloat;
11
+ typedef signed char schar;
12
+ #ifndef min
13
+ template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
14
+ #endif
15
+ #ifndef max
16
+ template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
17
+ #endif
18
+ template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
19
+ template <class S, class T> inline void clone(T*& dst, S* src, int n)
20
+ {
21
+ dst = new T[n];
22
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
23
+ }
24
+ inline double powi(double base, int times)
25
+ {
26
+ double tmp = base, ret = 1.0;
27
+
28
+ for(int t=times; t>0; t/=2)
29
+ {
30
+ if(t%2==1) ret*=tmp;
31
+ tmp = tmp * tmp;
32
+ }
33
+ return ret;
34
+ }
35
+ #define INF HUGE_VAL
36
+ #define TAU 1e-12
37
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
38
+
39
+ static void print_string_stdout(const char *s)
40
+ {
41
+ fputs(s,stdout);
42
+ fflush(stdout);
43
+ }
44
+ void (*svm_print_string) (const char *) = &print_string_stdout;
45
+ #if 1
46
+ int info_on = 0;
47
+ static void info(const char *fmt,...)
48
+ {
49
+ char buf[BUFSIZ];
50
+ va_list ap;
51
+ if (info_on==1) {
52
+ va_start(ap,fmt);
53
+ vsprintf(buf,fmt,ap);
54
+ va_end(ap);
55
+ (*svm_print_string)(buf);
56
+ }
57
+ }
58
+ #else
59
+ static void info(const char *fmt,...) {}
60
+ #endif
61
+
62
+ //
63
+ // Kernel Cache
64
+ //
65
+ // l is the number of total data items
66
+ // size is the cache size limit in bytes
67
+ //
68
+ class Cache
69
+ {
70
+ public:
71
+ Cache(int l,long int size);
72
+ ~Cache();
73
+
74
+ // request data [0,len)
75
+ // return some position p where [p,len) need to be filled
76
+ // (p >= len if nothing needs to be filled)
77
+ int get_data(const int index, Qfloat **data, int len);
78
+ void swap_index(int i, int j);
79
+ private:
80
+ int l;
81
+ long int size;
82
+ struct head_t
83
+ {
84
+ head_t *prev, *next; // a circular list
85
+ Qfloat *data;
86
+ int len; // data[0,len) is cached in this entry
87
+ };
88
+
89
+ head_t *head;
90
+ head_t lru_head;
91
+ void lru_delete(head_t *h);
92
+ void lru_insert(head_t *h);
93
+ };
94
+
95
+ Cache::Cache(int l_,long int size_):l(l_),size(size_)
96
+ {
97
+ head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
98
+ size /= sizeof(Qfloat);
99
+ size -= l * sizeof(head_t) / sizeof(Qfloat);
100
+ size = max(size, 2 * (long int) l); // cache must be large enough for two columns
101
+ lru_head.next = lru_head.prev = &lru_head;
102
+ }
103
+
104
+ Cache::~Cache()
105
+ {
106
+ for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
107
+ free(h->data);
108
+ free(head);
109
+ }
110
+
111
+ void Cache::lru_delete(head_t *h)
112
+ {
113
+ // delete from current location
114
+ h->prev->next = h->next;
115
+ h->next->prev = h->prev;
116
+ }
117
+
118
+ void Cache::lru_insert(head_t *h)
119
+ {
120
+ // insert to last position
121
+ h->next = &lru_head;
122
+ h->prev = lru_head.prev;
123
+ h->prev->next = h;
124
+ h->next->prev = h;
125
+ }
126
+
127
+ int Cache::get_data(const int index, Qfloat **data, int len)
128
+ {
129
+ head_t *h = &head[index];
130
+ if(h->len) lru_delete(h);
131
+ int more = len - h->len;
132
+
133
+ if(more > 0)
134
+ {
135
+ // free old space
136
+ while(size < more)
137
+ {
138
+ head_t *old = lru_head.next;
139
+ lru_delete(old);
140
+ free(old->data);
141
+ size += old->len;
142
+ old->data = 0;
143
+ old->len = 0;
144
+ }
145
+
146
+ // allocate new space
147
+ h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
148
+ size -= more;
149
+ swap(h->len,len);
150
+ }
151
+
152
+ lru_insert(h);
153
+ *data = h->data;
154
+ return len;
155
+ }
156
+
157
+ void Cache::swap_index(int i, int j)
158
+ {
159
+ if(i==j) return;
160
+
161
+ if(head[i].len) lru_delete(&head[i]);
162
+ if(head[j].len) lru_delete(&head[j]);
163
+ swap(head[i].data,head[j].data);
164
+ swap(head[i].len,head[j].len);
165
+ if(head[i].len) lru_insert(&head[i]);
166
+ if(head[j].len) lru_insert(&head[j]);
167
+
168
+ if(i>j) swap(i,j);
169
+ for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
170
+ {
171
+ if(h->len > i)
172
+ {
173
+ if(h->len > j)
174
+ swap(h->data[i],h->data[j]);
175
+ else
176
+ {
177
+ // give up
178
+ lru_delete(h);
179
+ free(h->data);
180
+ size += h->len;
181
+ h->data = 0;
182
+ h->len = 0;
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ //
189
+ // Kernel evaluation
190
+ //
191
+ // the static method k_function is for doing single kernel evaluation
192
+ // the constructor of Kernel prepares to calculate the l*l kernel matrix
193
+ // the member function get_Q is for getting one column from the Q Matrix
194
+ //
195
+ class QMatrix {
196
+ public:
197
+ virtual Qfloat *get_Q(int column, int len) const = 0;
198
+ virtual Qfloat *get_QD() const = 0;
199
+ virtual void swap_index(int i, int j) const = 0;
200
+ virtual ~QMatrix() {}
201
+ };
202
+
203
+ class Kernel: public QMatrix {
204
+ public:
205
+ Kernel(int l, svm_node * const * x, const svm_parameter& param);
206
+ virtual ~Kernel();
207
+
208
+ static double k_function(const svm_node *x, const svm_node *y,
209
+ const svm_parameter& param);
210
+ virtual Qfloat *get_Q(int column, int len) const = 0;
211
+ virtual Qfloat *get_QD() const = 0;
212
+ virtual void swap_index(int i, int j) const // no so const...
213
+ {
214
+ swap(x[i],x[j]);
215
+ if(x_square) swap(x_square[i],x_square[j]);
216
+ }
217
+ protected:
218
+
219
+ double (Kernel::*kernel_function)(int i, int j) const;
220
+
221
+ private:
222
+ const svm_node **x;
223
+ double *x_square;
224
+
225
+ // svm_parameter
226
+ const int kernel_type;
227
+ const int degree;
228
+ const double gamma;
229
+ const double coef0;
230
+
231
+ static double dot(const svm_node *px, const svm_node *py);
232
+ double kernel_linear(int i, int j) const
233
+ {
234
+ return dot(x[i],x[j]);
235
+ }
236
+ double kernel_poly(int i, int j) const
237
+ {
238
+ return powi(gamma*dot(x[i],x[j])+coef0,degree);
239
+ }
240
+ double kernel_rbf(int i, int j) const
241
+ {
242
+ return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
243
+ }
244
+ double kernel_sigmoid(int i, int j) const
245
+ {
246
+ return tanh(gamma*dot(x[i],x[j])+coef0);
247
+ }
248
+ double kernel_precomputed(int i, int j) const
249
+ {
250
+ return x[i][(int)(x[j][0].value)].value;
251
+ }
252
+ };
253
+
254
+ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
255
+ :kernel_type(param.kernel_type), degree(param.degree),
256
+ gamma(param.gamma), coef0(param.coef0)
257
+ {
258
+ switch(kernel_type)
259
+ {
260
+ case LINEAR:
261
+ kernel_function = &Kernel::kernel_linear;
262
+ break;
263
+ case POLY:
264
+ kernel_function = &Kernel::kernel_poly;
265
+ break;
266
+ case RBF:
267
+ kernel_function = &Kernel::kernel_rbf;
268
+ break;
269
+ case SIGMOID:
270
+ kernel_function = &Kernel::kernel_sigmoid;
271
+ break;
272
+ case PRECOMPUTED:
273
+ kernel_function = &Kernel::kernel_precomputed;
274
+ break;
275
+ }
276
+
277
+ clone(x,x_,l);
278
+
279
+ if(kernel_type == RBF)
280
+ {
281
+ x_square = new double[l];
282
+ for(int i=0;i<l;i++)
283
+ x_square[i] = dot(x[i],x[i]);
284
+ }
285
+ else
286
+ x_square = 0;
287
+ }
288
+
289
+ Kernel::~Kernel()
290
+ {
291
+ delete[] x;
292
+ delete[] x_square;
293
+ }
294
+
295
+ double Kernel::dot(const svm_node *px, const svm_node *py)
296
+ {
297
+ double sum = 0;
298
+ while(px->index != -1 && py->index != -1)
299
+ {
300
+ if(px->index == py->index)
301
+ {
302
+ sum += px->value * py->value;
303
+ ++px;
304
+ ++py;
305
+ }
306
+ else
307
+ {
308
+ if(px->index > py->index)
309
+ ++py;
310
+ else
311
+ ++px;
312
+ }
313
+ }
314
+ return sum;
315
+ }
316
+
317
+ double Kernel::k_function(const svm_node *x, const svm_node *y,
318
+ const svm_parameter& param)
319
+ {
320
+ switch(param.kernel_type)
321
+ {
322
+ case LINEAR:
323
+ return dot(x,y);
324
+ case POLY:
325
+ return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
326
+ case RBF:
327
+ {
328
+ double sum = 0;
329
+ while(x->index != -1 && y->index !=-1)
330
+ {
331
+ if(x->index == y->index)
332
+ {
333
+ double d = x->value - y->value;
334
+ sum += d*d;
335
+ ++x;
336
+ ++y;
337
+ }
338
+ else
339
+ {
340
+ if(x->index > y->index)
341
+ {
342
+ sum += y->value * y->value;
343
+ ++y;
344
+ }
345
+ else
346
+ {
347
+ sum += x->value * x->value;
348
+ ++x;
349
+ }
350
+ }
351
+ }
352
+
353
+ while(x->index != -1)
354
+ {
355
+ sum += x->value * x->value;
356
+ ++x;
357
+ }
358
+
359
+ while(y->index != -1)
360
+ {
361
+ sum += y->value * y->value;
362
+ ++y;
363
+ }
364
+
365
+ return exp(-param.gamma*sum);
366
+ }
367
+ case SIGMOID:
368
+ return tanh(param.gamma*dot(x,y)+param.coef0);
369
+ case PRECOMPUTED: //x: test (validation), y: SV
370
+ return x[(int)(y->value)].value;
371
+ default:
372
+ return 0; // Unreachable
373
+ }
374
+ }
375
+
376
+ // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
377
+ // Solves:
378
+ //
379
+ // min 0.5(\alpha^T Q \alpha) + p^T \alpha
380
+ //
381
+ // y^T \alpha = \delta
382
+ // y_i = +1 or -1
383
+ // 0 <= alpha_i <= Cp for y_i = 1
384
+ // 0 <= alpha_i <= Cn for y_i = -1
385
+ //
386
+ // Given:
387
+ //
388
+ // Q, p, y, Cp, Cn, and an initial feasible point \alpha
389
+ // l is the size of vectors and matrices
390
+ // eps is the stopping tolerance
391
+ //
392
+ // solution will be put in \alpha, objective value will be put in obj
393
+ //
394
+ class Solver {
395
+ public:
396
+ Solver() {};
397
+ virtual ~Solver() {};
398
+
399
+ struct SolutionInfo {
400
+ double obj;
401
+ double rho;
402
+ double upper_bound_p;
403
+ double upper_bound_n;
404
+ double r; // for Solver_NU
405
+ };
406
+
407
+ void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
408
+ double *alpha_, double Cp, double Cn, double eps,
409
+ SolutionInfo* si, int shrinking);
410
+ protected:
411
+ int active_size;
412
+ schar *y;
413
+ double *G; // gradient of objective function
414
+ enum { LOWER_BOUND, UPPER_BOUND, FREE };
415
+ char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
416
+ double *alpha;
417
+ const QMatrix *Q;
418
+ const Qfloat *QD;
419
+ double eps;
420
+ double Cp,Cn;
421
+ double *p;
422
+ int *active_set;
423
+ double *G_bar; // gradient, if we treat free variables as 0
424
+ int l;
425
+ bool unshrink; // XXX
426
+
427
+ double get_C(int i)
428
+ {
429
+ return (y[i] > 0)? Cp : Cn;
430
+ }
431
+ void update_alpha_status(int i)
432
+ {
433
+ if(alpha[i] >= get_C(i))
434
+ alpha_status[i] = UPPER_BOUND;
435
+ else if(alpha[i] <= 0)
436
+ alpha_status[i] = LOWER_BOUND;
437
+ else alpha_status[i] = FREE;
438
+ }
439
+ bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
440
+ bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
441
+ bool is_free(int i) { return alpha_status[i] == FREE; }
442
+ void swap_index(int i, int j);
443
+ void reconstruct_gradient();
444
+ virtual int select_working_set(int &i, int &j);
445
+ virtual double calculate_rho();
446
+ virtual void do_shrinking();
447
+ private:
448
+ bool be_shrunk(int i, double Gmax1, double Gmax2);
449
+ };
450
+
451
+ void Solver::swap_index(int i, int j)
452
+ {
453
+ Q->swap_index(i,j);
454
+ swap(y[i],y[j]);
455
+ swap(G[i],G[j]);
456
+ swap(alpha_status[i],alpha_status[j]);
457
+ swap(alpha[i],alpha[j]);
458
+ swap(p[i],p[j]);
459
+ swap(active_set[i],active_set[j]);
460
+ swap(G_bar[i],G_bar[j]);
461
+ }
462
+
463
+ void Solver::reconstruct_gradient()
464
+ {
465
+ // reconstruct inactive elements of G from G_bar and free variables
466
+
467
+ if(active_size == l) return;
468
+
469
+ int i,j;
470
+ int nr_free = 0;
471
+
472
+ for(j=active_size;j<l;j++)
473
+ G[j] = G_bar[j] + p[j];
474
+
475
+ for(j=0;j<active_size;j++)
476
+ if(is_free(j))
477
+ nr_free++;
478
+
479
+ if(2*nr_free < active_size)
480
+ info("\nWarning: using -h 0 may be faster\n");
481
+
482
+ if (nr_free*l > 2*active_size*(l-active_size))
483
+ {
484
+ for(i=active_size;i<l;i++)
485
+ {
486
+ const Qfloat *Q_i = Q->get_Q(i,active_size);
487
+ for(j=0;j<active_size;j++)
488
+ if(is_free(j))
489
+ G[i] += alpha[j] * Q_i[j];
490
+ }
491
+ }
492
+ else
493
+ {
494
+ for(i=0;i<active_size;i++)
495
+ if(is_free(i))
496
+ {
497
+ const Qfloat *Q_i = Q->get_Q(i,l);
498
+ double alpha_i = alpha[i];
499
+ for(j=active_size;j<l;j++)
500
+ G[j] += alpha_i * Q_i[j];
501
+ }
502
+ }
503
+ }
504
+
505
+ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
506
+ double *alpha_, double Cp, double Cn, double eps,
507
+ SolutionInfo* si, int shrinking)
508
+ {
509
+ this->l = l;
510
+ this->Q = &Q;
511
+ QD=Q.get_QD();
512
+ clone(p, p_,l);
513
+ clone(y, y_,l);
514
+ clone(alpha,alpha_,l);
515
+ this->Cp = Cp;
516
+ this->Cn = Cn;
517
+ this->eps = eps;
518
+ unshrink = false;
519
+
520
+ // initialize alpha_status
521
+ {
522
+ alpha_status = new char[l];
523
+ for(int i=0;i<l;i++)
524
+ update_alpha_status(i);
525
+ }
526
+
527
+ // initialize active set (for shrinking)
528
+ {
529
+ active_set = new int[l];
530
+ for(int i=0;i<l;i++)
531
+ active_set[i] = i;
532
+ active_size = l;
533
+ }
534
+
535
+ // initialize gradient
536
+ {
537
+ G = new double[l];
538
+ G_bar = new double[l];
539
+ int i;
540
+ for(i=0;i<l;i++)
541
+ {
542
+ G[i] = p[i];
543
+ G_bar[i] = 0;
544
+ }
545
+ for(i=0;i<l;i++)
546
+ if(!is_lower_bound(i))
547
+ {
548
+ const Qfloat *Q_i = Q.get_Q(i,l);
549
+ double alpha_i = alpha[i];
550
+ int j;
551
+ for(j=0;j<l;j++)
552
+ G[j] += alpha_i*Q_i[j];
553
+ if(is_upper_bound(i))
554
+ for(j=0;j<l;j++)
555
+ G_bar[j] += get_C(i) * Q_i[j];
556
+ }
557
+ }
558
+
559
+ // optimization step
560
+
561
+ int iter = 0;
562
+ int counter = min(l,1000)+1;
563
+
564
+ while(1)
565
+ {
566
+ // show progress and do shrinking
567
+
568
+ if(--counter == 0)
569
+ {
570
+ counter = min(l,1000);
571
+ if(shrinking) do_shrinking();
572
+ info(".");
573
+ }
574
+
575
+ int i,j;
576
+ if(select_working_set(i,j)!=0)
577
+ {
578
+ // reconstruct the whole gradient
579
+ reconstruct_gradient();
580
+ // reset active set size and check
581
+ active_size = l;
582
+ info("*");
583
+ if(select_working_set(i,j)!=0)
584
+ break;
585
+ else
586
+ counter = 1; // do shrinking next iteration
587
+ }
588
+
589
+ ++iter;
590
+
591
+ // update alpha[i] and alpha[j], handle bounds carefully
592
+
593
+ const Qfloat *Q_i = Q.get_Q(i,active_size);
594
+ const Qfloat *Q_j = Q.get_Q(j,active_size);
595
+
596
+ double C_i = get_C(i);
597
+ double C_j = get_C(j);
598
+
599
+ double old_alpha_i = alpha[i];
600
+ double old_alpha_j = alpha[j];
601
+
602
+ if(y[i]!=y[j])
603
+ {
604
+ double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
605
+ if (quad_coef <= 0)
606
+ quad_coef = TAU;
607
+ double delta = (-G[i]-G[j])/quad_coef;
608
+ double diff = alpha[i] - alpha[j];
609
+ alpha[i] += delta;
610
+ alpha[j] += delta;
611
+
612
+ if(diff > 0)
613
+ {
614
+ if(alpha[j] < 0)
615
+ {
616
+ alpha[j] = 0;
617
+ alpha[i] = diff;
618
+ }
619
+ }
620
+ else
621
+ {
622
+ if(alpha[i] < 0)
623
+ {
624
+ alpha[i] = 0;
625
+ alpha[j] = -diff;
626
+ }
627
+ }
628
+ if(diff > C_i - C_j)
629
+ {
630
+ if(alpha[i] > C_i)
631
+ {
632
+ alpha[i] = C_i;
633
+ alpha[j] = C_i - diff;
634
+ }
635
+ }
636
+ else
637
+ {
638
+ if(alpha[j] > C_j)
639
+ {
640
+ alpha[j] = C_j;
641
+ alpha[i] = C_j + diff;
642
+ }
643
+ }
644
+ }
645
+ else
646
+ {
647
+ double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
648
+ if (quad_coef <= 0)
649
+ quad_coef = TAU;
650
+ double delta = (G[i]-G[j])/quad_coef;
651
+ double sum = alpha[i] + alpha[j];
652
+ alpha[i] -= delta;
653
+ alpha[j] += delta;
654
+
655
+ if(sum > C_i)
656
+ {
657
+ if(alpha[i] > C_i)
658
+ {
659
+ alpha[i] = C_i;
660
+ alpha[j] = sum - C_i;
661
+ }
662
+ }
663
+ else
664
+ {
665
+ if(alpha[j] < 0)
666
+ {
667
+ alpha[j] = 0;
668
+ alpha[i] = sum;
669
+ }
670
+ }
671
+ if(sum > C_j)
672
+ {
673
+ if(alpha[j] > C_j)
674
+ {
675
+ alpha[j] = C_j;
676
+ alpha[i] = sum - C_j;
677
+ }
678
+ }
679
+ else
680
+ {
681
+ if(alpha[i] < 0)
682
+ {
683
+ alpha[i] = 0;
684
+ alpha[j] = sum;
685
+ }
686
+ }
687
+ }
688
+
689
+ // update G
690
+
691
+ double delta_alpha_i = alpha[i] - old_alpha_i;
692
+ double delta_alpha_j = alpha[j] - old_alpha_j;
693
+
694
+ for(int k=0;k<active_size;k++)
695
+ {
696
+ G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
697
+ }
698
+
699
+ // update alpha_status and G_bar
700
+
701
+ {
702
+ bool ui = is_upper_bound(i);
703
+ bool uj = is_upper_bound(j);
704
+ update_alpha_status(i);
705
+ update_alpha_status(j);
706
+ int k;
707
+ if(ui != is_upper_bound(i))
708
+ {
709
+ Q_i = Q.get_Q(i,l);
710
+ if(ui)
711
+ for(k=0;k<l;k++)
712
+ G_bar[k] -= C_i * Q_i[k];
713
+ else
714
+ for(k=0;k<l;k++)
715
+ G_bar[k] += C_i * Q_i[k];
716
+ }
717
+
718
+ if(uj != is_upper_bound(j))
719
+ {
720
+ Q_j = Q.get_Q(j,l);
721
+ if(uj)
722
+ for(k=0;k<l;k++)
723
+ G_bar[k] -= C_j * Q_j[k];
724
+ else
725
+ for(k=0;k<l;k++)
726
+ G_bar[k] += C_j * Q_j[k];
727
+ }
728
+ }
729
+ }
730
+
731
+ // calculate rho
732
+
733
+ si->rho = calculate_rho();
734
+
735
+ // calculate objective value
736
+ {
737
+ double v = 0;
738
+ int i;
739
+ for(i=0;i<l;i++)
740
+ v += alpha[i] * (G[i] + p[i]);
741
+
742
+ si->obj = v/2;
743
+ }
744
+
745
+ // put back the solution
746
+ {
747
+ for(int i=0;i<l;i++)
748
+ alpha_[active_set[i]] = alpha[i];
749
+ }
750
+
751
+ // juggle everything back
752
+ /*{
753
+ for(int i=0;i<l;i++)
754
+ while(active_set[i] != i)
755
+ swap_index(i,active_set[i]);
756
+ // or Q.swap_index(i,active_set[i]);
757
+ }*/
758
+
759
+ si->upper_bound_p = Cp;
760
+ si->upper_bound_n = Cn;
761
+
762
+ info("\noptimization finished, #iter = %d\n",iter);
763
+
764
+ delete[] p;
765
+ delete[] y;
766
+ delete[] alpha;
767
+ delete[] alpha_status;
768
+ delete[] active_set;
769
+ delete[] G;
770
+ delete[] G_bar;
771
+ }
772
+
773
+ // return 1 if already optimal, return 0 otherwise
774
+ int Solver::select_working_set(int &out_i, int &out_j)
775
+ {
776
+ // return i,j such that
777
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
778
+ // j: minimizes the decrease of obj value
779
+ // (if quadratic coefficeint <= 0, replace it with tau)
780
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
781
+
782
+ double Gmax = -INF;
783
+ double Gmax2 = -INF;
784
+ int Gmax_idx = -1;
785
+ int Gmin_idx = -1;
786
+ double obj_diff_min = INF;
787
+
788
+ for(int t=0;t<active_size;t++)
789
+ if(y[t]==+1)
790
+ {
791
+ if(!is_upper_bound(t))
792
+ if(-G[t] >= Gmax)
793
+ {
794
+ Gmax = -G[t];
795
+ Gmax_idx = t;
796
+ }
797
+ }
798
+ else
799
+ {
800
+ if(!is_lower_bound(t))
801
+ if(G[t] >= Gmax)
802
+ {
803
+ Gmax = G[t];
804
+ Gmax_idx = t;
805
+ }
806
+ }
807
+
808
+ int i = Gmax_idx;
809
+ const Qfloat *Q_i = NULL;
810
+ if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
811
+ Q_i = Q->get_Q(i,active_size);
812
+
813
+ for(int j=0;j<active_size;j++)
814
+ {
815
+ if(y[j]==+1)
816
+ {
817
+ if (!is_lower_bound(j))
818
+ {
819
+ double grad_diff=Gmax+G[j];
820
+ if (G[j] >= Gmax2)
821
+ Gmax2 = G[j];
822
+ if (grad_diff > 0)
823
+ {
824
+ double obj_diff;
825
+ double quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
826
+ if (quad_coef > 0)
827
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
828
+ else
829
+ obj_diff = -(grad_diff*grad_diff)/TAU;
830
+
831
+ if (obj_diff <= obj_diff_min)
832
+ {
833
+ Gmin_idx=j;
834
+ obj_diff_min = obj_diff;
835
+ }
836
+ }
837
+ }
838
+ }
839
+ else
840
+ {
841
+ if (!is_upper_bound(j))
842
+ {
843
+ double grad_diff= Gmax-G[j];
844
+ if (-G[j] >= Gmax2)
845
+ Gmax2 = -G[j];
846
+ if (grad_diff > 0)
847
+ {
848
+ double obj_diff;
849
+ double quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
850
+ if (quad_coef > 0)
851
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
852
+ else
853
+ obj_diff = -(grad_diff*grad_diff)/TAU;
854
+
855
+ if (obj_diff <= obj_diff_min)
856
+ {
857
+ Gmin_idx=j;
858
+ obj_diff_min = obj_diff;
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+
865
+ if(Gmax+Gmax2 < eps)
866
+ return 1;
867
+
868
+ out_i = Gmax_idx;
869
+ out_j = Gmin_idx;
870
+ return 0;
871
+ }
872
+
873
+ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
874
+ {
875
+ if(is_upper_bound(i))
876
+ {
877
+ if(y[i]==+1)
878
+ return(-G[i] > Gmax1);
879
+ else
880
+ return(-G[i] > Gmax2);
881
+ }
882
+ else if(is_lower_bound(i))
883
+ {
884
+ if(y[i]==+1)
885
+ return(G[i] > Gmax2);
886
+ else
887
+ return(G[i] > Gmax1);
888
+ }
889
+ else
890
+ return(false);
891
+ }
892
+
893
+ void Solver::do_shrinking()
894
+ {
895
+ int i;
896
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
897
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
898
+
899
+ // find maximal violating pair first
900
+ for(i=0;i<active_size;i++)
901
+ {
902
+ if(y[i]==+1)
903
+ {
904
+ if(!is_upper_bound(i))
905
+ {
906
+ if(-G[i] >= Gmax1)
907
+ Gmax1 = -G[i];
908
+ }
909
+ if(!is_lower_bound(i))
910
+ {
911
+ if(G[i] >= Gmax2)
912
+ Gmax2 = G[i];
913
+ }
914
+ }
915
+ else
916
+ {
917
+ if(!is_upper_bound(i))
918
+ {
919
+ if(-G[i] >= Gmax2)
920
+ Gmax2 = -G[i];
921
+ }
922
+ if(!is_lower_bound(i))
923
+ {
924
+ if(G[i] >= Gmax1)
925
+ Gmax1 = G[i];
926
+ }
927
+ }
928
+ }
929
+
930
+ if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
931
+ {
932
+ unshrink = true;
933
+ reconstruct_gradient();
934
+ active_size = l;
935
+ info("*");
936
+ }
937
+
938
+ for(i=0;i<active_size;i++)
939
+ if (be_shrunk(i, Gmax1, Gmax2))
940
+ {
941
+ active_size--;
942
+ while (active_size > i)
943
+ {
944
+ if (!be_shrunk(active_size, Gmax1, Gmax2))
945
+ {
946
+ swap_index(i,active_size);
947
+ break;
948
+ }
949
+ active_size--;
950
+ }
951
+ }
952
+ }
953
+
954
+ double Solver::calculate_rho()
955
+ {
956
+ double r;
957
+ int nr_free = 0;
958
+ double ub = INF, lb = -INF, sum_free = 0;
959
+ for(int i=0;i<active_size;i++)
960
+ {
961
+ double yG = y[i]*G[i];
962
+
963
+ if(is_upper_bound(i))
964
+ {
965
+ if(y[i]==-1)
966
+ ub = min(ub,yG);
967
+ else
968
+ lb = max(lb,yG);
969
+ }
970
+ else if(is_lower_bound(i))
971
+ {
972
+ if(y[i]==+1)
973
+ ub = min(ub,yG);
974
+ else
975
+ lb = max(lb,yG);
976
+ }
977
+ else
978
+ {
979
+ ++nr_free;
980
+ sum_free += yG;
981
+ }
982
+ }
983
+
984
+ if(nr_free>0)
985
+ r = sum_free/nr_free;
986
+ else
987
+ r = (ub+lb)/2;
988
+
989
+ return r;
990
+ }
991
+
992
+ //
993
+ // Solver for nu-svm classification and regression
994
+ //
995
+ // additional constraint: e^T \alpha = constant
996
+ //
997
+ class Solver_NU : public Solver
998
+ {
999
+ public:
1000
+ Solver_NU() {}
1001
+ void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1002
+ double *alpha, double Cp, double Cn, double eps,
1003
+ SolutionInfo* si, int shrinking)
1004
+ {
1005
+ this->si = si;
1006
+ Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1007
+ }
1008
+ private:
1009
+ SolutionInfo *si;
1010
+ int select_working_set(int &i, int &j);
1011
+ double calculate_rho();
1012
+ bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1013
+ void do_shrinking();
1014
+ };
1015
+
1016
+ // return 1 if already optimal, return 0 otherwise
1017
+ int Solver_NU::select_working_set(int &out_i, int &out_j)
1018
+ {
1019
+ // return i,j such that y_i = y_j and
1020
+ // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1021
+ // j: minimizes the decrease of obj value
1022
+ // (if quadratic coefficeint <= 0, replace it with tau)
1023
+ // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1024
+
1025
+ double Gmaxp = -INF;
1026
+ double Gmaxp2 = -INF;
1027
+ int Gmaxp_idx = -1;
1028
+
1029
+ double Gmaxn = -INF;
1030
+ double Gmaxn2 = -INF;
1031
+ int Gmaxn_idx = -1;
1032
+
1033
+ int Gmin_idx = -1;
1034
+ double obj_diff_min = INF;
1035
+
1036
+ for(int t=0;t<active_size;t++)
1037
+ if(y[t]==+1)
1038
+ {
1039
+ if(!is_upper_bound(t))
1040
+ if(-G[t] >= Gmaxp)
1041
+ {
1042
+ Gmaxp = -G[t];
1043
+ Gmaxp_idx = t;
1044
+ }
1045
+ }
1046
+ else
1047
+ {
1048
+ if(!is_lower_bound(t))
1049
+ if(G[t] >= Gmaxn)
1050
+ {
1051
+ Gmaxn = G[t];
1052
+ Gmaxn_idx = t;
1053
+ }
1054
+ }
1055
+
1056
+ int ip = Gmaxp_idx;
1057
+ int in = Gmaxn_idx;
1058
+ const Qfloat *Q_ip = NULL;
1059
+ const Qfloat *Q_in = NULL;
1060
+ if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1061
+ Q_ip = Q->get_Q(ip,active_size);
1062
+ if(in != -1)
1063
+ Q_in = Q->get_Q(in,active_size);
1064
+
1065
+ for(int j=0;j<active_size;j++)
1066
+ {
1067
+ if(y[j]==+1)
1068
+ {
1069
+ if (!is_lower_bound(j))
1070
+ {
1071
+ double grad_diff=Gmaxp+G[j];
1072
+ if (G[j] >= Gmaxp2)
1073
+ Gmaxp2 = G[j];
1074
+ if (grad_diff > 0)
1075
+ {
1076
+ double obj_diff;
1077
+ double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1078
+ if (quad_coef > 0)
1079
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1080
+ else
1081
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1082
+
1083
+ if (obj_diff <= obj_diff_min)
1084
+ {
1085
+ Gmin_idx=j;
1086
+ obj_diff_min = obj_diff;
1087
+ }
1088
+ }
1089
+ }
1090
+ }
1091
+ else
1092
+ {
1093
+ if (!is_upper_bound(j))
1094
+ {
1095
+ double grad_diff=Gmaxn-G[j];
1096
+ if (-G[j] >= Gmaxn2)
1097
+ Gmaxn2 = -G[j];
1098
+ if (grad_diff > 0)
1099
+ {
1100
+ double obj_diff;
1101
+ double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1102
+ if (quad_coef > 0)
1103
+ obj_diff = -(grad_diff*grad_diff)/quad_coef;
1104
+ else
1105
+ obj_diff = -(grad_diff*grad_diff)/TAU;
1106
+
1107
+ if (obj_diff <= obj_diff_min)
1108
+ {
1109
+ Gmin_idx=j;
1110
+ obj_diff_min = obj_diff;
1111
+ }
1112
+ }
1113
+ }
1114
+ }
1115
+ }
1116
+
1117
+ if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1118
+ return 1;
1119
+
1120
+ if (y[Gmin_idx] == +1)
1121
+ out_i = Gmaxp_idx;
1122
+ else
1123
+ out_i = Gmaxn_idx;
1124
+ out_j = Gmin_idx;
1125
+
1126
+ return 0;
1127
+ }
1128
+
1129
+ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1130
+ {
1131
+ if(is_upper_bound(i))
1132
+ {
1133
+ if(y[i]==+1)
1134
+ return(-G[i] > Gmax1);
1135
+ else
1136
+ return(-G[i] > Gmax4);
1137
+ }
1138
+ else if(is_lower_bound(i))
1139
+ {
1140
+ if(y[i]==+1)
1141
+ return(G[i] > Gmax2);
1142
+ else
1143
+ return(G[i] > Gmax3);
1144
+ }
1145
+ else
1146
+ return(false);
1147
+ }
1148
+
1149
+ void Solver_NU::do_shrinking()
1150
+ {
1151
+ double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1152
+ double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1153
+ double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1154
+ double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1155
+
1156
+ // find maximal violating pair first
1157
+ int i;
1158
+ for(i=0;i<active_size;i++)
1159
+ {
1160
+ if(!is_upper_bound(i))
1161
+ {
1162
+ if(y[i]==+1)
1163
+ {
1164
+ if(-G[i] > Gmax1) Gmax1 = -G[i];
1165
+ }
1166
+ else if(-G[i] > Gmax4) Gmax4 = -G[i];
1167
+ }
1168
+ if(!is_lower_bound(i))
1169
+ {
1170
+ if(y[i]==+1)
1171
+ {
1172
+ if(G[i] > Gmax2) Gmax2 = G[i];
1173
+ }
1174
+ else if(G[i] > Gmax3) Gmax3 = G[i];
1175
+ }
1176
+ }
1177
+
1178
+ if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1179
+ {
1180
+ unshrink = true;
1181
+ reconstruct_gradient();
1182
+ active_size = l;
1183
+ }
1184
+
1185
+ for(i=0;i<active_size;i++)
1186
+ if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1187
+ {
1188
+ active_size--;
1189
+ while (active_size > i)
1190
+ {
1191
+ if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1192
+ {
1193
+ swap_index(i,active_size);
1194
+ break;
1195
+ }
1196
+ active_size--;
1197
+ }
1198
+ }
1199
+ }
1200
+
1201
+ double Solver_NU::calculate_rho()
1202
+ {
1203
+ int nr_free1 = 0,nr_free2 = 0;
1204
+ double ub1 = INF, ub2 = INF;
1205
+ double lb1 = -INF, lb2 = -INF;
1206
+ double sum_free1 = 0, sum_free2 = 0;
1207
+
1208
+ for(int i=0;i<active_size;i++)
1209
+ {
1210
+ if(y[i]==+1)
1211
+ {
1212
+ if(is_upper_bound(i))
1213
+ lb1 = max(lb1,G[i]);
1214
+ else if(is_lower_bound(i))
1215
+ ub1 = min(ub1,G[i]);
1216
+ else
1217
+ {
1218
+ ++nr_free1;
1219
+ sum_free1 += G[i];
1220
+ }
1221
+ }
1222
+ else
1223
+ {
1224
+ if(is_upper_bound(i))
1225
+ lb2 = max(lb2,G[i]);
1226
+ else if(is_lower_bound(i))
1227
+ ub2 = min(ub2,G[i]);
1228
+ else
1229
+ {
1230
+ ++nr_free2;
1231
+ sum_free2 += G[i];
1232
+ }
1233
+ }
1234
+ }
1235
+
1236
+ double r1,r2;
1237
+ if(nr_free1 > 0)
1238
+ r1 = sum_free1/nr_free1;
1239
+ else
1240
+ r1 = (ub1+lb1)/2;
1241
+
1242
+ if(nr_free2 > 0)
1243
+ r2 = sum_free2/nr_free2;
1244
+ else
1245
+ r2 = (ub2+lb2)/2;
1246
+
1247
+ si->r = (r1+r2)/2;
1248
+ return (r1-r2)/2;
1249
+ }
1250
+
1251
+ //
1252
+ // Q matrices for various formulations
1253
+ //
1254
+ class SVC_Q: public Kernel
1255
+ {
1256
+ public:
1257
+ SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1258
+ :Kernel(prob.l, prob.x, param)
1259
+ {
1260
+ clone(y,y_,prob.l);
1261
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1262
+ QD = new Qfloat[prob.l];
1263
+ for(int i=0;i<prob.l;i++)
1264
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1265
+ }
1266
+
1267
+ Qfloat *get_Q(int i, int len) const
1268
+ {
1269
+ Qfloat *data;
1270
+ int start, j;
1271
+ if((start = cache->get_data(i,&data,len)) < len)
1272
+ {
1273
+ for(j=start;j<len;j++)
1274
+ data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1275
+ }
1276
+ return data;
1277
+ }
1278
+
1279
+ Qfloat *get_QD() const
1280
+ {
1281
+ return QD;
1282
+ }
1283
+
1284
+ void swap_index(int i, int j) const
1285
+ {
1286
+ cache->swap_index(i,j);
1287
+ Kernel::swap_index(i,j);
1288
+ swap(y[i],y[j]);
1289
+ swap(QD[i],QD[j]);
1290
+ }
1291
+
1292
+ ~SVC_Q()
1293
+ {
1294
+ delete[] y;
1295
+ delete cache;
1296
+ delete[] QD;
1297
+ }
1298
+ private:
1299
+ schar *y;
1300
+ Cache *cache;
1301
+ Qfloat *QD;
1302
+ };
1303
+
1304
+ class ONE_CLASS_Q: public Kernel
1305
+ {
1306
+ public:
1307
+ ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1308
+ :Kernel(prob.l, prob.x, param)
1309
+ {
1310
+ cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1311
+ QD = new Qfloat[prob.l];
1312
+ for(int i=0;i<prob.l;i++)
1313
+ QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1314
+ }
1315
+
1316
+ Qfloat *get_Q(int i, int len) const
1317
+ {
1318
+ Qfloat *data;
1319
+ int start, j;
1320
+ if((start = cache->get_data(i,&data,len)) < len)
1321
+ {
1322
+ for(j=start;j<len;j++)
1323
+ data[j] = (Qfloat)(this->*kernel_function)(i,j);
1324
+ }
1325
+ return data;
1326
+ }
1327
+
1328
+ Qfloat *get_QD() const
1329
+ {
1330
+ return QD;
1331
+ }
1332
+
1333
+ void swap_index(int i, int j) const
1334
+ {
1335
+ cache->swap_index(i,j);
1336
+ Kernel::swap_index(i,j);
1337
+ swap(QD[i],QD[j]);
1338
+ }
1339
+
1340
+ ~ONE_CLASS_Q()
1341
+ {
1342
+ delete cache;
1343
+ delete[] QD;
1344
+ }
1345
+ private:
1346
+ Cache *cache;
1347
+ Qfloat *QD;
1348
+ };
1349
+
1350
+ class SVR_Q: public Kernel
1351
+ {
1352
+ public:
1353
+ SVR_Q(const svm_problem& prob, const svm_parameter& param)
1354
+ :Kernel(prob.l, prob.x, param)
1355
+ {
1356
+ l = prob.l;
1357
+ cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1358
+ QD = new Qfloat[2*l];
1359
+ sign = new schar[2*l];
1360
+ index = new int[2*l];
1361
+ for(int k=0;k<l;k++)
1362
+ {
1363
+ sign[k] = 1;
1364
+ sign[k+l] = -1;
1365
+ index[k] = k;
1366
+ index[k+l] = k;
1367
+ QD[k]= (Qfloat)(this->*kernel_function)(k,k);
1368
+ QD[k+l]=QD[k];
1369
+ }
1370
+ buffer[0] = new Qfloat[2*l];
1371
+ buffer[1] = new Qfloat[2*l];
1372
+ next_buffer = 0;
1373
+ }
1374
+
1375
+ void swap_index(int i, int j) const
1376
+ {
1377
+ swap(sign[i],sign[j]);
1378
+ swap(index[i],index[j]);
1379
+ swap(QD[i],QD[j]);
1380
+ }
1381
+
1382
+ Qfloat *get_Q(int i, int len) const
1383
+ {
1384
+ Qfloat *data;
1385
+ int j, real_i = index[i];
1386
+ if(cache->get_data(real_i,&data,l) < l)
1387
+ {
1388
+ for(j=0;j<l;j++)
1389
+ data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1390
+ }
1391
+
1392
+ // reorder and copy
1393
+ Qfloat *buf = buffer[next_buffer];
1394
+ next_buffer = 1 - next_buffer;
1395
+ schar si = sign[i];
1396
+ for(j=0;j<len;j++)
1397
+ buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1398
+ return buf;
1399
+ }
1400
+
1401
+ Qfloat *get_QD() const
1402
+ {
1403
+ return QD;
1404
+ }
1405
+
1406
+ ~SVR_Q()
1407
+ {
1408
+ delete cache;
1409
+ delete[] sign;
1410
+ delete[] index;
1411
+ delete[] buffer[0];
1412
+ delete[] buffer[1];
1413
+ delete[] QD;
1414
+ }
1415
+ private:
1416
+ int l;
1417
+ Cache *cache;
1418
+ schar *sign;
1419
+ int *index;
1420
+ mutable int next_buffer;
1421
+ Qfloat *buffer[2];
1422
+ Qfloat *QD;
1423
+ };
1424
+
1425
+ //
1426
+ // construct and solve various formulations
1427
+ //
1428
+ static void solve_c_svc(
1429
+ const svm_problem *prob, const svm_parameter* param,
1430
+ double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1431
+ {
1432
+ int l = prob->l;
1433
+ double *minus_ones = new double[l];
1434
+ schar *y = new schar[l];
1435
+
1436
+ int i;
1437
+
1438
+ for(i=0;i<l;i++)
1439
+ {
1440
+ alpha[i] = 0;
1441
+ minus_ones[i] = -1;
1442
+ if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1443
+ }
1444
+
1445
+ Solver s;
1446
+ s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1447
+ alpha, Cp, Cn, param->eps, si, param->shrinking);
1448
+
1449
+ double sum_alpha=0;
1450
+ for(i=0;i<l;i++)
1451
+ sum_alpha += alpha[i];
1452
+
1453
+ if (Cp==Cn)
1454
+ info("nu = %f\n", sum_alpha/(Cp*prob->l));
1455
+
1456
+ for(i=0;i<l;i++)
1457
+ alpha[i] *= y[i];
1458
+
1459
+ delete[] minus_ones;
1460
+ delete[] y;
1461
+ }
1462
+
1463
+ static void solve_nu_svc(
1464
+ const svm_problem *prob, const svm_parameter *param,
1465
+ double *alpha, Solver::SolutionInfo* si)
1466
+ {
1467
+ int i;
1468
+ int l = prob->l;
1469
+ double nu = param->nu;
1470
+
1471
+ schar *y = new schar[l];
1472
+
1473
+ for(i=0;i<l;i++)
1474
+ if(prob->y[i]>0)
1475
+ y[i] = +1;
1476
+ else
1477
+ y[i] = -1;
1478
+
1479
+ double sum_pos = nu*l/2;
1480
+ double sum_neg = nu*l/2;
1481
+
1482
+ for(i=0;i<l;i++)
1483
+ if(y[i] == +1)
1484
+ {
1485
+ alpha[i] = min(1.0,sum_pos);
1486
+ sum_pos -= alpha[i];
1487
+ }
1488
+ else
1489
+ {
1490
+ alpha[i] = min(1.0,sum_neg);
1491
+ sum_neg -= alpha[i];
1492
+ }
1493
+
1494
+ double *zeros = new double[l];
1495
+
1496
+ for(i=0;i<l;i++)
1497
+ zeros[i] = 0;
1498
+
1499
+ Solver_NU s;
1500
+ s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1501
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1502
+ double r = si->r;
1503
+
1504
+ info("C = %f\n",1/r);
1505
+
1506
+ for(i=0;i<l;i++)
1507
+ alpha[i] *= y[i]/r;
1508
+
1509
+ si->rho /= r;
1510
+ si->obj /= (r*r);
1511
+ si->upper_bound_p = 1/r;
1512
+ si->upper_bound_n = 1/r;
1513
+
1514
+ delete[] y;
1515
+ delete[] zeros;
1516
+ }
1517
+
1518
+ static void solve_one_class(
1519
+ const svm_problem *prob, const svm_parameter *param,
1520
+ double *alpha, Solver::SolutionInfo* si)
1521
+ {
1522
+ int l = prob->l;
1523
+ double *zeros = new double[l];
1524
+ schar *ones = new schar[l];
1525
+ int i;
1526
+
1527
+ int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1528
+
1529
+ for(i=0;i<n;i++)
1530
+ alpha[i] = 1;
1531
+ if(n<prob->l)
1532
+ alpha[n] = param->nu * prob->l - n;
1533
+ for(i=n+1;i<l;i++)
1534
+ alpha[i] = 0;
1535
+
1536
+ for(i=0;i<l;i++)
1537
+ {
1538
+ zeros[i] = 0;
1539
+ ones[i] = 1;
1540
+ }
1541
+
1542
+ Solver s;
1543
+ s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1544
+ alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1545
+
1546
+ delete[] zeros;
1547
+ delete[] ones;
1548
+ }
1549
+
1550
+ static void solve_epsilon_svr(
1551
+ const svm_problem *prob, const svm_parameter *param,
1552
+ double *alpha, Solver::SolutionInfo* si)
1553
+ {
1554
+ int l = prob->l;
1555
+ double *alpha2 = new double[2*l];
1556
+ double *linear_term = new double[2*l];
1557
+ schar *y = new schar[2*l];
1558
+ int i;
1559
+
1560
+ for(i=0;i<l;i++)
1561
+ {
1562
+ alpha2[i] = 0;
1563
+ linear_term[i] = param->p - prob->y[i];
1564
+ y[i] = 1;
1565
+
1566
+ alpha2[i+l] = 0;
1567
+ linear_term[i+l] = param->p + prob->y[i];
1568
+ y[i+l] = -1;
1569
+ }
1570
+
1571
+ Solver s;
1572
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1573
+ alpha2, param->C, param->C, param->eps, si, param->shrinking);
1574
+
1575
+ double sum_alpha = 0;
1576
+ for(i=0;i<l;i++)
1577
+ {
1578
+ alpha[i] = alpha2[i] - alpha2[i+l];
1579
+ sum_alpha += fabs(alpha[i]);
1580
+ }
1581
+ info("nu = %f\n",sum_alpha/(param->C*l));
1582
+
1583
+ delete[] alpha2;
1584
+ delete[] linear_term;
1585
+ delete[] y;
1586
+ }
1587
+
1588
+ static void solve_nu_svr(
1589
+ const svm_problem *prob, const svm_parameter *param,
1590
+ double *alpha, Solver::SolutionInfo* si)
1591
+ {
1592
+ int l = prob->l;
1593
+ double C = param->C;
1594
+ double *alpha2 = new double[2*l];
1595
+ double *linear_term = new double[2*l];
1596
+ schar *y = new schar[2*l];
1597
+ int i;
1598
+
1599
+ double sum = C * param->nu * l / 2;
1600
+ for(i=0;i<l;i++)
1601
+ {
1602
+ alpha2[i] = alpha2[i+l] = min(sum,C);
1603
+ sum -= alpha2[i];
1604
+
1605
+ linear_term[i] = - prob->y[i];
1606
+ y[i] = 1;
1607
+
1608
+ linear_term[i+l] = prob->y[i];
1609
+ y[i+l] = -1;
1610
+ }
1611
+
1612
+ Solver_NU s;
1613
+ s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1614
+ alpha2, C, C, param->eps, si, param->shrinking);
1615
+
1616
+ info("epsilon = %f\n",-si->r);
1617
+
1618
+ for(i=0;i<l;i++)
1619
+ alpha[i] = alpha2[i] - alpha2[i+l];
1620
+
1621
+ delete[] alpha2;
1622
+ delete[] linear_term;
1623
+ delete[] y;
1624
+ }
1625
+
1626
+ //
1627
+ // decision_function
1628
+ //
1629
+ struct decision_function
1630
+ {
1631
+ double *alpha;
1632
+ double rho;
1633
+ };
1634
+
1635
+ decision_function svm_train_one(
1636
+ const svm_problem *prob, const svm_parameter *param,
1637
+ double Cp, double Cn)
1638
+ {
1639
+ double *alpha = Malloc(double,prob->l);
1640
+ Solver::SolutionInfo si;
1641
+ switch(param->svm_type)
1642
+ {
1643
+ case C_SVC:
1644
+ solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1645
+ break;
1646
+ case NU_SVC:
1647
+ solve_nu_svc(prob,param,alpha,&si);
1648
+ break;
1649
+ case ONE_CLASS:
1650
+ solve_one_class(prob,param,alpha,&si);
1651
+ break;
1652
+ case EPSILON_SVR:
1653
+ solve_epsilon_svr(prob,param,alpha,&si);
1654
+ break;
1655
+ case NU_SVR:
1656
+ solve_nu_svr(prob,param,alpha,&si);
1657
+ break;
1658
+ }
1659
+
1660
+ info("obj = %f, rho = %f\n",si.obj,si.rho);
1661
+
1662
+ // output SVs
1663
+
1664
+ int nSV = 0;
1665
+ int nBSV = 0;
1666
+ for(int i=0;i<prob->l;i++)
1667
+ {
1668
+ if(fabs(alpha[i]) > 0)
1669
+ {
1670
+ ++nSV;
1671
+ if(prob->y[i] > 0)
1672
+ {
1673
+ if(fabs(alpha[i]) >= si.upper_bound_p)
1674
+ ++nBSV;
1675
+ }
1676
+ else
1677
+ {
1678
+ if(fabs(alpha[i]) >= si.upper_bound_n)
1679
+ ++nBSV;
1680
+ }
1681
+ }
1682
+ }
1683
+
1684
+ info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1685
+
1686
+ decision_function f;
1687
+ f.alpha = alpha;
1688
+ f.rho = si.rho;
1689
+ return f;
1690
+ }
1691
+
1692
+ //
1693
+ // svm_model
1694
+ //
1695
+ struct svm_model
1696
+ {
1697
+ svm_parameter param; // parameter
1698
+ int nr_class; // number of classes, = 2 in regression/one class svm
1699
+ int l; // total #SV
1700
+ svm_node **SV; // SVs (SV[l])
1701
+ double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
1702
+ double *rho; // constants in decision functions (rho[k*(k-1)/2])
1703
+ double *probA; // pariwise probability information
1704
+ double *probB;
1705
+
1706
+ // for classification only
1707
+
1708
+ int *label; // label of each class (label[k])
1709
+ int *nSV; // number of SVs for each class (nSV[k])
1710
+ // nSV[0] + nSV[1] + ... + nSV[k-1] = l
1711
+ // XXX
1712
+ int free_sv; // 1 if svm_model is created by svm_load_model
1713
+ // 0 if svm_model is created by svm_train
1714
+ };
1715
+
1716
+ // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1717
+ void sigmoid_train(
1718
+ int l, const double *dec_values, const double *labels,
1719
+ double& A, double& B)
1720
+ {
1721
+ double prior1=0, prior0 = 0;
1722
+ int i;
1723
+
1724
+ for (i=0;i<l;i++)
1725
+ if (labels[i] > 0) prior1+=1;
1726
+ else prior0+=1;
1727
+
1728
+ int max_iter=100; // Maximal number of iterations
1729
+ double min_step=1e-10; // Minimal step taken in line search
1730
+ double sigma=1e-12; // For numerically strict PD of Hessian
1731
+ double eps=1e-5;
1732
+ double hiTarget=(prior1+1.0)/(prior1+2.0);
1733
+ double loTarget=1/(prior0+2.0);
1734
+ double *t=Malloc(double,l);
1735
+ double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1736
+ double newA,newB,newf,d1,d2;
1737
+ int iter;
1738
+
1739
+ // Initial Point and Initial Fun Value
1740
+ A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1741
+ double fval = 0.0;
1742
+
1743
+ for (i=0;i<l;i++)
1744
+ {
1745
+ if (labels[i]>0) t[i]=hiTarget;
1746
+ else t[i]=loTarget;
1747
+ fApB = dec_values[i]*A+B;
1748
+ if (fApB>=0)
1749
+ fval += t[i]*fApB + log(1+exp(-fApB));
1750
+ else
1751
+ fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1752
+ }
1753
+ for (iter=0;iter<max_iter;iter++)
1754
+ {
1755
+ // Update Gradient and Hessian (use H' = H + sigma I)
1756
+ h11=sigma; // numerically ensures strict PD
1757
+ h22=sigma;
1758
+ h21=0.0;g1=0.0;g2=0.0;
1759
+ for (i=0;i<l;i++)
1760
+ {
1761
+ fApB = dec_values[i]*A+B;
1762
+ if (fApB >= 0)
1763
+ {
1764
+ p=exp(-fApB)/(1.0+exp(-fApB));
1765
+ q=1.0/(1.0+exp(-fApB));
1766
+ }
1767
+ else
1768
+ {
1769
+ p=1.0/(1.0+exp(fApB));
1770
+ q=exp(fApB)/(1.0+exp(fApB));
1771
+ }
1772
+ d2=p*q;
1773
+ h11+=dec_values[i]*dec_values[i]*d2;
1774
+ h22+=d2;
1775
+ h21+=dec_values[i]*d2;
1776
+ d1=t[i]-p;
1777
+ g1+=dec_values[i]*d1;
1778
+ g2+=d1;
1779
+ }
1780
+
1781
+ // Stopping Criteria
1782
+ if (fabs(g1)<eps && fabs(g2)<eps)
1783
+ break;
1784
+
1785
+ // Finding Newton direction: -inv(H') * g
1786
+ det=h11*h22-h21*h21;
1787
+ dA=-(h22*g1 - h21 * g2) / det;
1788
+ dB=-(-h21*g1+ h11 * g2) / det;
1789
+ gd=g1*dA+g2*dB;
1790
+
1791
+
1792
+ stepsize = 1; // Line Search
1793
+ while (stepsize >= min_step)
1794
+ {
1795
+ newA = A + stepsize * dA;
1796
+ newB = B + stepsize * dB;
1797
+
1798
+ // New function value
1799
+ newf = 0.0;
1800
+ for (i=0;i<l;i++)
1801
+ {
1802
+ fApB = dec_values[i]*newA+newB;
1803
+ if (fApB >= 0)
1804
+ newf += t[i]*fApB + log(1+exp(-fApB));
1805
+ else
1806
+ newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1807
+ }
1808
+ // Check sufficient decrease
1809
+ if (newf<fval+0.0001*stepsize*gd)
1810
+ {
1811
+ A=newA;B=newB;fval=newf;
1812
+ break;
1813
+ }
1814
+ else
1815
+ stepsize = stepsize / 2.0;
1816
+ }
1817
+
1818
+ if (stepsize < min_step)
1819
+ {
1820
+ info("Line search fails in two-class probability estimates\n");
1821
+ break;
1822
+ }
1823
+ }
1824
+
1825
+ if (iter>=max_iter)
1826
+ info("Reaching maximal iterations in two-class probability estimates\n");
1827
+ free(t);
1828
+ }
1829
+
1830
+ double sigmoid_predict(double decision_value, double A, double B)
1831
+ {
1832
+ double fApB = decision_value*A+B;
1833
+ if (fApB >= 0)
1834
+ return exp(-fApB)/(1.0+exp(-fApB));
1835
+ else
1836
+ return 1.0/(1+exp(fApB)) ;
1837
+ }
1838
+
1839
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1840
+ void multiclass_probability(int k, double **r, double *p)
1841
+ {
1842
+ int t,j;
1843
+ int iter = 0, max_iter=max(100,k);
1844
+ double **Q=Malloc(double *,k);
1845
+ double *Qp=Malloc(double,k);
1846
+ double pQp, eps=0.005/k;
1847
+
1848
+ for (t=0;t<k;t++)
1849
+ {
1850
+ p[t]=1.0/k; // Valid if k = 1
1851
+ Q[t]=Malloc(double,k);
1852
+ Q[t][t]=0;
1853
+ for (j=0;j<t;j++)
1854
+ {
1855
+ Q[t][t]+=r[j][t]*r[j][t];
1856
+ Q[t][j]=Q[j][t];
1857
+ }
1858
+ for (j=t+1;j<k;j++)
1859
+ {
1860
+ Q[t][t]+=r[j][t]*r[j][t];
1861
+ Q[t][j]=-r[j][t]*r[t][j];
1862
+ }
1863
+ }
1864
+ for (iter=0;iter<max_iter;iter++)
1865
+ {
1866
+ // stopping condition, recalculate QP,pQP for numerical accuracy
1867
+ pQp=0;
1868
+ for (t=0;t<k;t++)
1869
+ {
1870
+ Qp[t]=0;
1871
+ for (j=0;j<k;j++)
1872
+ Qp[t]+=Q[t][j]*p[j];
1873
+ pQp+=p[t]*Qp[t];
1874
+ }
1875
+ double max_error=0;
1876
+ for (t=0;t<k;t++)
1877
+ {
1878
+ double error=fabs(Qp[t]-pQp);
1879
+ if (error>max_error)
1880
+ max_error=error;
1881
+ }
1882
+ if (max_error<eps) break;
1883
+
1884
+ for (t=0;t<k;t++)
1885
+ {
1886
+ double diff=(-Qp[t]+pQp)/Q[t][t];
1887
+ p[t]+=diff;
1888
+ pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1889
+ for (j=0;j<k;j++)
1890
+ {
1891
+ Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1892
+ p[j]/=(1+diff);
1893
+ }
1894
+ }
1895
+ }
1896
+ if (iter>=max_iter)
1897
+ info("Exceeds max_iter in multiclass_prob\n");
1898
+ for(t=0;t<k;t++) free(Q[t]);
1899
+ free(Q);
1900
+ free(Qp);
1901
+ }
1902
+
1903
+ // Cross-validation decision values for probability estimates
1904
+ void svm_binary_svc_probability(
1905
+ const svm_problem *prob, const svm_parameter *param,
1906
+ double Cp, double Cn, double& probA, double& probB)
1907
+ {
1908
+ int i;
1909
+ int nr_fold = 5;
1910
+ int *perm = Malloc(int,prob->l);
1911
+ double *dec_values = Malloc(double,prob->l);
1912
+
1913
+ // random shuffle
1914
+ for(i=0;i<prob->l;i++) perm[i]=i;
1915
+ for(i=0;i<prob->l;i++)
1916
+ {
1917
+ int j = i+rand()%(prob->l-i);
1918
+ swap(perm[i],perm[j]);
1919
+ }
1920
+ for(i=0;i<nr_fold;i++)
1921
+ {
1922
+ int begin = i*prob->l/nr_fold;
1923
+ int end = (i+1)*prob->l/nr_fold;
1924
+ int j,k;
1925
+ struct svm_problem subprob;
1926
+
1927
+ subprob.l = prob->l-(end-begin);
1928
+ subprob.x = Malloc(struct svm_node*,subprob.l);
1929
+ subprob.y = Malloc(double,subprob.l);
1930
+
1931
+ k=0;
1932
+ for(j=0;j<begin;j++)
1933
+ {
1934
+ subprob.x[k] = prob->x[perm[j]];
1935
+ subprob.y[k] = prob->y[perm[j]];
1936
+ ++k;
1937
+ }
1938
+ for(j=end;j<prob->l;j++)
1939
+ {
1940
+ subprob.x[k] = prob->x[perm[j]];
1941
+ subprob.y[k] = prob->y[perm[j]];
1942
+ ++k;
1943
+ }
1944
+ int p_count=0,n_count=0;
1945
+ for(j=0;j<k;j++)
1946
+ if(subprob.y[j]>0)
1947
+ p_count++;
1948
+ else
1949
+ n_count++;
1950
+
1951
+ if(p_count==0 && n_count==0)
1952
+ for(j=begin;j<end;j++)
1953
+ dec_values[perm[j]] = 0;
1954
+ else if(p_count > 0 && n_count == 0)
1955
+ for(j=begin;j<end;j++)
1956
+ dec_values[perm[j]] = 1;
1957
+ else if(p_count == 0 && n_count > 0)
1958
+ for(j=begin;j<end;j++)
1959
+ dec_values[perm[j]] = -1;
1960
+ else
1961
+ {
1962
+ svm_parameter subparam = *param;
1963
+ subparam.probability=0;
1964
+ subparam.C=1.0;
1965
+ subparam.nr_weight=2;
1966
+ subparam.weight_label = Malloc(int,2);
1967
+ subparam.weight = Malloc(double,2);
1968
+ subparam.weight_label[0]=+1;
1969
+ subparam.weight_label[1]=-1;
1970
+ subparam.weight[0]=Cp;
1971
+ subparam.weight[1]=Cn;
1972
+ struct svm_model *submodel = svm_train(&subprob,&subparam);
1973
+ for(j=begin;j<end;j++)
1974
+ {
1975
+ svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1976
+ // ensure +1 -1 order; reason not using CV subroutine
1977
+ dec_values[perm[j]] *= submodel->label[0];
1978
+ }
1979
+ svm_destroy_model(submodel);
1980
+ svm_destroy_param(&subparam);
1981
+ }
1982
+ free(subprob.x);
1983
+ free(subprob.y);
1984
+ }
1985
+ sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1986
+ free(dec_values);
1987
+ free(perm);
1988
+ }
1989
+
1990
+ // Return parameter of a Laplace distribution
1991
+ double svm_svr_probability(
1992
+ const svm_problem *prob, const svm_parameter *param)
1993
+ {
1994
+ int i;
1995
+ int nr_fold = 5;
1996
+ double *ymv = Malloc(double,prob->l);
1997
+ double mae = 0;
1998
+
1999
+ svm_parameter newparam = *param;
2000
+ newparam.probability = 0;
2001
+ svm_cross_validation(prob,&newparam,nr_fold,ymv);
2002
+ for(i=0;i<prob->l;i++)
2003
+ {
2004
+ ymv[i]=prob->y[i]-ymv[i];
2005
+ mae += fabs(ymv[i]);
2006
+ }
2007
+ mae /= prob->l;
2008
+ double std=sqrt(2*mae*mae);
2009
+ int count=0;
2010
+ mae=0;
2011
+ for(i=0;i<prob->l;i++)
2012
+ if (fabs(ymv[i]) > 5*std)
2013
+ count=count+1;
2014
+ else
2015
+ mae+=fabs(ymv[i]);
2016
+ mae /= (prob->l-count);
2017
+ info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2018
+ free(ymv);
2019
+ return mae;
2020
+ }
2021
+
2022
+
2023
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2024
+ // perm, length l, must be allocated before calling this subroutine
2025
+ void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2026
+ {
2027
+ int l = prob->l;
2028
+ int max_nr_class = 16;
2029
+ int nr_class = 0;
2030
+ int *label = Malloc(int,max_nr_class);
2031
+ int *count = Malloc(int,max_nr_class);
2032
+ int *data_label = Malloc(int,l);
2033
+ int i;
2034
+
2035
+ for(i=0;i<l;i++)
2036
+ {
2037
+ int this_label = (int)prob->y[i];
2038
+ int j;
2039
+ for(j=0;j<nr_class;j++)
2040
+ {
2041
+ if(this_label == label[j])
2042
+ {
2043
+ ++count[j];
2044
+ break;
2045
+ }
2046
+ }
2047
+ data_label[i] = j;
2048
+ if(j == nr_class)
2049
+ {
2050
+ if(nr_class == max_nr_class)
2051
+ {
2052
+ max_nr_class *= 2;
2053
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2054
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2055
+ }
2056
+ label[nr_class] = this_label;
2057
+ count[nr_class] = 1;
2058
+ ++nr_class;
2059
+ }
2060
+ }
2061
+
2062
+ int *start = Malloc(int,nr_class);
2063
+ start[0] = 0;
2064
+ for(i=1;i<nr_class;i++)
2065
+ start[i] = start[i-1]+count[i-1];
2066
+ for(i=0;i<l;i++)
2067
+ {
2068
+ perm[start[data_label[i]]] = i;
2069
+ ++start[data_label[i]];
2070
+ }
2071
+ start[0] = 0;
2072
+ for(i=1;i<nr_class;i++)
2073
+ start[i] = start[i-1]+count[i-1];
2074
+
2075
+ *nr_class_ret = nr_class;
2076
+ *label_ret = label;
2077
+ *start_ret = start;
2078
+ *count_ret = count;
2079
+ free(data_label);
2080
+ }
2081
+
2082
+ //
2083
+ // Interface functions
2084
+ //
2085
+ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2086
+ {
2087
+ svm_model *model = Malloc(svm_model,1);
2088
+ model->param = *param;
2089
+ model->free_sv = 0; // XXX
2090
+
2091
+ if(param->svm_type == ONE_CLASS ||
2092
+ param->svm_type == EPSILON_SVR ||
2093
+ param->svm_type == NU_SVR)
2094
+ {
2095
+ // regression or one-class-svm
2096
+ model->nr_class = 2;
2097
+ model->label = NULL;
2098
+ model->nSV = NULL;
2099
+ model->probA = NULL; model->probB = NULL;
2100
+ model->sv_coef = Malloc(double *,1);
2101
+
2102
+ if(param->probability &&
2103
+ (param->svm_type == EPSILON_SVR ||
2104
+ param->svm_type == NU_SVR))
2105
+ {
2106
+ model->probA = Malloc(double,1);
2107
+ model->probA[0] = svm_svr_probability(prob,param);
2108
+ }
2109
+
2110
+ decision_function f = svm_train_one(prob,param,0,0);
2111
+ model->rho = Malloc(double,1);
2112
+ model->rho[0] = f.rho;
2113
+
2114
+ int nSV = 0;
2115
+ int i;
2116
+ for(i=0;i<prob->l;i++)
2117
+ if(fabs(f.alpha[i]) > 0) ++nSV;
2118
+ model->l = nSV;
2119
+ model->SV = Malloc(svm_node *,nSV);
2120
+ model->sv_coef[0] = Malloc(double,nSV);
2121
+ int j = 0;
2122
+ for(i=0;i<prob->l;i++)
2123
+ if(fabs(f.alpha[i]) > 0)
2124
+ {
2125
+ model->SV[j] = prob->x[i];
2126
+ model->sv_coef[0][j] = f.alpha[i];
2127
+ ++j;
2128
+ }
2129
+
2130
+ free(f.alpha);
2131
+ }
2132
+ else
2133
+ {
2134
+ // classification
2135
+ int l = prob->l;
2136
+ int nr_class;
2137
+ int *label = NULL;
2138
+ int *start = NULL;
2139
+ int *count = NULL;
2140
+ int *perm = Malloc(int,l);
2141
+
2142
+ // group training data of the same class
2143
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2144
+ svm_node **x = Malloc(svm_node *,l);
2145
+ int i;
2146
+ for(i=0;i<l;i++)
2147
+ x[i] = prob->x[perm[i]];
2148
+
2149
+ // calculate weighted C
2150
+
2151
+ double *weighted_C = Malloc(double, nr_class);
2152
+ for(i=0;i<nr_class;i++)
2153
+ weighted_C[i] = param->C;
2154
+ for(i=0;i<param->nr_weight;i++)
2155
+ {
2156
+ int j;
2157
+ for(j=0;j<nr_class;j++)
2158
+ if(param->weight_label[i] == label[j])
2159
+ break;
2160
+ if(j == nr_class)
2161
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2162
+ else
2163
+ weighted_C[j] *= param->weight[i];
2164
+ }
2165
+
2166
+ // train k*(k-1)/2 models
2167
+
2168
+ bool *nonzero = Malloc(bool,l);
2169
+ for(i=0;i<l;i++)
2170
+ nonzero[i] = false;
2171
+ decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2172
+
2173
+ double *probA=NULL,*probB=NULL;
2174
+ if (param->probability)
2175
+ {
2176
+ probA=Malloc(double,nr_class*(nr_class-1)/2);
2177
+ probB=Malloc(double,nr_class*(nr_class-1)/2);
2178
+ }
2179
+
2180
+ int p = 0;
2181
+ for(i=0;i<nr_class;i++)
2182
+ for(int j=i+1;j<nr_class;j++)
2183
+ {
2184
+ svm_problem sub_prob;
2185
+ int si = start[i], sj = start[j];
2186
+ int ci = count[i], cj = count[j];
2187
+ sub_prob.l = ci+cj;
2188
+ sub_prob.x = Malloc(svm_node *,sub_prob.l);
2189
+ sub_prob.y = Malloc(double,sub_prob.l);
2190
+ int k;
2191
+ for(k=0;k<ci;k++)
2192
+ {
2193
+ sub_prob.x[k] = x[si+k];
2194
+ sub_prob.y[k] = +1;
2195
+ }
2196
+ for(k=0;k<cj;k++)
2197
+ {
2198
+ sub_prob.x[ci+k] = x[sj+k];
2199
+ sub_prob.y[ci+k] = -1;
2200
+ }
2201
+
2202
+ if(param->probability)
2203
+ svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2204
+
2205
+ f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2206
+ for(k=0;k<ci;k++)
2207
+ if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2208
+ nonzero[si+k] = true;
2209
+ for(k=0;k<cj;k++)
2210
+ if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2211
+ nonzero[sj+k] = true;
2212
+ free(sub_prob.x);
2213
+ free(sub_prob.y);
2214
+ ++p;
2215
+ }
2216
+
2217
+ // build output
2218
+
2219
+ model->nr_class = nr_class;
2220
+
2221
+ model->label = Malloc(int,nr_class);
2222
+ for(i=0;i<nr_class;i++)
2223
+ model->label[i] = label[i];
2224
+
2225
+ model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2226
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2227
+ model->rho[i] = f[i].rho;
2228
+
2229
+ if(param->probability)
2230
+ {
2231
+ model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2232
+ model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2233
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2234
+ {
2235
+ model->probA[i] = probA[i];
2236
+ model->probB[i] = probB[i];
2237
+ }
2238
+ }
2239
+ else
2240
+ {
2241
+ model->probA=NULL;
2242
+ model->probB=NULL;
2243
+ }
2244
+
2245
+ int total_sv = 0;
2246
+ int *nz_count = Malloc(int,nr_class);
2247
+ model->nSV = Malloc(int,nr_class);
2248
+ for(i=0;i<nr_class;i++)
2249
+ {
2250
+ int nSV = 0;
2251
+ for(int j=0;j<count[i];j++)
2252
+ if(nonzero[start[i]+j])
2253
+ {
2254
+ ++nSV;
2255
+ ++total_sv;
2256
+ }
2257
+ model->nSV[i] = nSV;
2258
+ nz_count[i] = nSV;
2259
+ }
2260
+
2261
+ info("Total nSV = %d\n",total_sv);
2262
+
2263
+ model->l = total_sv;
2264
+ model->SV = Malloc(svm_node *,total_sv);
2265
+ p = 0;
2266
+ for(i=0;i<l;i++)
2267
+ if(nonzero[i]) model->SV[p++] = x[i];
2268
+
2269
+ int *nz_start = Malloc(int,nr_class);
2270
+ nz_start[0] = 0;
2271
+ for(i=1;i<nr_class;i++)
2272
+ nz_start[i] = nz_start[i-1]+nz_count[i-1];
2273
+
2274
+ model->sv_coef = Malloc(double *,nr_class-1);
2275
+ for(i=0;i<nr_class-1;i++)
2276
+ model->sv_coef[i] = Malloc(double,total_sv);
2277
+
2278
+ p = 0;
2279
+ for(i=0;i<nr_class;i++)
2280
+ for(int j=i+1;j<nr_class;j++)
2281
+ {
2282
+ // classifier (i,j): coefficients with
2283
+ // i are in sv_coef[j-1][nz_start[i]...],
2284
+ // j are in sv_coef[i][nz_start[j]...]
2285
+
2286
+ int si = start[i];
2287
+ int sj = start[j];
2288
+ int ci = count[i];
2289
+ int cj = count[j];
2290
+
2291
+ int q = nz_start[i];
2292
+ int k;
2293
+ for(k=0;k<ci;k++)
2294
+ if(nonzero[si+k])
2295
+ model->sv_coef[j-1][q++] = f[p].alpha[k];
2296
+ q = nz_start[j];
2297
+ for(k=0;k<cj;k++)
2298
+ if(nonzero[sj+k])
2299
+ model->sv_coef[i][q++] = f[p].alpha[ci+k];
2300
+ ++p;
2301
+ }
2302
+
2303
+ free(label);
2304
+ free(probA);
2305
+ free(probB);
2306
+ free(count);
2307
+ free(perm);
2308
+ free(start);
2309
+ free(x);
2310
+ free(weighted_C);
2311
+ free(nonzero);
2312
+ for(i=0;i<nr_class*(nr_class-1)/2;i++)
2313
+ free(f[i].alpha);
2314
+ free(f);
2315
+ free(nz_count);
2316
+ free(nz_start);
2317
+ }
2318
+ return model;
2319
+ }
2320
+
2321
+ // Stratified cross validation
2322
+ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2323
+ {
2324
+ int i;
2325
+ int *fold_start = Malloc(int,nr_fold+1);
2326
+ int l = prob->l;
2327
+ int *perm = Malloc(int,l);
2328
+ int nr_class;
2329
+
2330
+ // stratified cv may not give leave-one-out rate
2331
+ // Each class to l folds -> some folds may have zero elements
2332
+ if((param->svm_type == C_SVC ||
2333
+ param->svm_type == NU_SVC) && nr_fold < l)
2334
+ {
2335
+ int *start = NULL;
2336
+ int *label = NULL;
2337
+ int *count = NULL;
2338
+ svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2339
+
2340
+ // random shuffle and then data grouped by fold using the array perm
2341
+ int *fold_count = Malloc(int,nr_fold);
2342
+ int c;
2343
+ int *index = Malloc(int,l);
2344
+ for(i=0;i<l;i++)
2345
+ index[i]=perm[i];
2346
+ for (c=0; c<nr_class; c++)
2347
+ for(i=0;i<count[c];i++)
2348
+ {
2349
+ int j = i+rand()%(count[c]-i);
2350
+ swap(index[start[c]+j],index[start[c]+i]);
2351
+ }
2352
+ for(i=0;i<nr_fold;i++)
2353
+ {
2354
+ fold_count[i] = 0;
2355
+ for (c=0; c<nr_class;c++)
2356
+ fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2357
+ }
2358
+ fold_start[0]=0;
2359
+ for (i=1;i<=nr_fold;i++)
2360
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2361
+ for (c=0; c<nr_class;c++)
2362
+ for(i=0;i<nr_fold;i++)
2363
+ {
2364
+ int begin = start[c]+i*count[c]/nr_fold;
2365
+ int end = start[c]+(i+1)*count[c]/nr_fold;
2366
+ for(int j=begin;j<end;j++)
2367
+ {
2368
+ perm[fold_start[i]] = index[j];
2369
+ fold_start[i]++;
2370
+ }
2371
+ }
2372
+ fold_start[0]=0;
2373
+ for (i=1;i<=nr_fold;i++)
2374
+ fold_start[i] = fold_start[i-1]+fold_count[i-1];
2375
+ free(start);
2376
+ free(label);
2377
+ free(count);
2378
+ free(index);
2379
+ free(fold_count);
2380
+ }
2381
+ else
2382
+ {
2383
+ for(i=0;i<l;i++) perm[i]=i;
2384
+ for(i=0;i<l;i++)
2385
+ {
2386
+ int j = i+rand()%(l-i);
2387
+ swap(perm[i],perm[j]);
2388
+ }
2389
+ for(i=0;i<=nr_fold;i++)
2390
+ fold_start[i]=i*l/nr_fold;
2391
+ }
2392
+
2393
+ for(i=0;i<nr_fold;i++)
2394
+ {
2395
+ int begin = fold_start[i];
2396
+ int end = fold_start[i+1];
2397
+ int j,k;
2398
+ struct svm_problem subprob;
2399
+
2400
+ subprob.l = l-(end-begin);
2401
+ subprob.x = Malloc(struct svm_node*,subprob.l);
2402
+ subprob.y = Malloc(double,subprob.l);
2403
+
2404
+ k=0;
2405
+ for(j=0;j<begin;j++)
2406
+ {
2407
+ subprob.x[k] = prob->x[perm[j]];
2408
+ subprob.y[k] = prob->y[perm[j]];
2409
+ ++k;
2410
+ }
2411
+ for(j=end;j<l;j++)
2412
+ {
2413
+ subprob.x[k] = prob->x[perm[j]];
2414
+ subprob.y[k] = prob->y[perm[j]];
2415
+ ++k;
2416
+ }
2417
+ struct svm_model *submodel = svm_train(&subprob,param);
2418
+ if(param->probability &&
2419
+ (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2420
+ {
2421
+ double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2422
+ for(j=begin;j<end;j++)
2423
+ target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2424
+ free(prob_estimates);
2425
+ }
2426
+ else
2427
+ for(j=begin;j<end;j++)
2428
+ target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2429
+ svm_destroy_model(submodel);
2430
+ free(subprob.x);
2431
+ free(subprob.y);
2432
+ }
2433
+ free(fold_start);
2434
+ free(perm);
2435
+ }
2436
+
2437
+
2438
+ int svm_get_svm_type(const svm_model *model)
2439
+ {
2440
+ return model->param.svm_type;
2441
+ }
2442
+
2443
+ int svm_get_nr_class(const svm_model *model)
2444
+ {
2445
+ return model->nr_class;
2446
+ }
2447
+
2448
+ void svm_get_labels(const svm_model *model, int* label)
2449
+ {
2450
+ if (model->label != NULL)
2451
+ for(int i=0;i<model->nr_class;i++)
2452
+ label[i] = model->label[i];
2453
+ }
2454
+
2455
+ double svm_get_svr_probability(const svm_model *model)
2456
+ {
2457
+ if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2458
+ model->probA!=NULL)
2459
+ return model->probA[0];
2460
+ else
2461
+ {
2462
+ fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2463
+ return 0;
2464
+ }
2465
+ }
2466
+
2467
+ void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2468
+ {
2469
+ if(model->param.svm_type == ONE_CLASS ||
2470
+ model->param.svm_type == EPSILON_SVR ||
2471
+ model->param.svm_type == NU_SVR)
2472
+ {
2473
+ double *sv_coef = model->sv_coef[0];
2474
+ double sum = 0;
2475
+ for(int i=0;i<model->l;i++)
2476
+ sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2477
+ sum -= model->rho[0];
2478
+ *dec_values = sum;
2479
+ }
2480
+ else
2481
+ {
2482
+ int i;
2483
+ int nr_class = model->nr_class;
2484
+ int l = model->l;
2485
+
2486
+ double *kvalue = Malloc(double,l);
2487
+ for(i=0;i<l;i++)
2488
+ kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2489
+
2490
+ int *start = Malloc(int,nr_class);
2491
+ start[0] = 0;
2492
+ for(i=1;i<nr_class;i++)
2493
+ start[i] = start[i-1]+model->nSV[i-1];
2494
+
2495
+ int p=0;
2496
+ for(i=0;i<nr_class;i++)
2497
+ for(int j=i+1;j<nr_class;j++)
2498
+ {
2499
+ double sum = 0;
2500
+ int si = start[i];
2501
+ int sj = start[j];
2502
+ int ci = model->nSV[i];
2503
+ int cj = model->nSV[j];
2504
+
2505
+ int k;
2506
+ double *coef1 = model->sv_coef[j-1];
2507
+ double *coef2 = model->sv_coef[i];
2508
+ for(k=0;k<ci;k++)
2509
+ sum += coef1[si+k] * kvalue[si+k];
2510
+ for(k=0;k<cj;k++)
2511
+ sum += coef2[sj+k] * kvalue[sj+k];
2512
+ sum -= model->rho[p];
2513
+ dec_values[p] = sum;
2514
+ p++;
2515
+ }
2516
+
2517
+ free(kvalue);
2518
+ free(start);
2519
+ }
2520
+ }
2521
+
2522
+ double svm_predict(const svm_model *model, const svm_node *x)
2523
+ {
2524
+ if(model->param.svm_type == ONE_CLASS ||
2525
+ model->param.svm_type == EPSILON_SVR ||
2526
+ model->param.svm_type == NU_SVR)
2527
+ {
2528
+ double res;
2529
+ svm_predict_values(model, x, &res);
2530
+
2531
+ if(model->param.svm_type == ONE_CLASS)
2532
+ return (res>0)?1:-1;
2533
+ else
2534
+ return res;
2535
+ }
2536
+ else
2537
+ {
2538
+ int i;
2539
+ int nr_class = model->nr_class;
2540
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2541
+ svm_predict_values(model, x, dec_values);
2542
+
2543
+ int *vote = Malloc(int,nr_class);
2544
+ for(i=0;i<nr_class;i++)
2545
+ vote[i] = 0;
2546
+ int pos=0;
2547
+ for(i=0;i<nr_class;i++)
2548
+ for(int j=i+1;j<nr_class;j++)
2549
+ {
2550
+ if(dec_values[pos++] > 0)
2551
+ ++vote[i];
2552
+ else
2553
+ ++vote[j];
2554
+ }
2555
+
2556
+ int vote_max_idx = 0;
2557
+ for(i=1;i<nr_class;i++)
2558
+ if(vote[i] > vote[vote_max_idx])
2559
+ vote_max_idx = i;
2560
+ free(vote);
2561
+ free(dec_values);
2562
+ return model->label[vote_max_idx];
2563
+ }
2564
+ }
2565
+
2566
+ double svm_predict_probability(
2567
+ const svm_model *model, const svm_node *x, double *prob_estimates)
2568
+ {
2569
+ if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2570
+ model->probA!=NULL && model->probB!=NULL)
2571
+ {
2572
+ int i;
2573
+ int nr_class = model->nr_class;
2574
+ double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2575
+ svm_predict_values(model, x, dec_values);
2576
+
2577
+ double min_prob=1e-7;
2578
+ double **pairwise_prob=Malloc(double *,nr_class);
2579
+ for(i=0;i<nr_class;i++)
2580
+ pairwise_prob[i]=Malloc(double,nr_class);
2581
+ int k=0;
2582
+ for(i=0;i<nr_class;i++)
2583
+ for(int j=i+1;j<nr_class;j++)
2584
+ {
2585
+ pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2586
+ pairwise_prob[j][i]=1-pairwise_prob[i][j];
2587
+ k++;
2588
+ }
2589
+ multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2590
+
2591
+ int prob_max_idx = 0;
2592
+ for(i=1;i<nr_class;i++)
2593
+ if(prob_estimates[i] > prob_estimates[prob_max_idx])
2594
+ prob_max_idx = i;
2595
+ for(i=0;i<nr_class;i++)
2596
+ free(pairwise_prob[i]);
2597
+ free(dec_values);
2598
+ free(pairwise_prob);
2599
+ return model->label[prob_max_idx];
2600
+ }
2601
+ else
2602
+ return svm_predict(model, x);
2603
+ }
2604
+
2605
+ const char *svm_type_table[] =
2606
+ {
2607
+ "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2608
+ };
2609
+
2610
+ const char *kernel_type_table[]=
2611
+ {
2612
+ "linear","polynomial","rbf","sigmoid","precomputed",NULL
2613
+ };
2614
+
2615
+ int svm_save_model(const char *model_file_name, const svm_model *model)
2616
+ {
2617
+ FILE *fp = fopen(model_file_name,"w");
2618
+ if(fp==NULL) return -1;
2619
+
2620
+ const svm_parameter& param = model->param;
2621
+
2622
+ fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2623
+ fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2624
+
2625
+ if(param.kernel_type == POLY)
2626
+ fprintf(fp,"degree %d\n", param.degree);
2627
+
2628
+ if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2629
+ fprintf(fp,"gamma %g\n", param.gamma);
2630
+
2631
+ if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2632
+ fprintf(fp,"coef0 %g\n", param.coef0);
2633
+
2634
+ int nr_class = model->nr_class;
2635
+ int l = model->l;
2636
+ fprintf(fp, "nr_class %d\n", nr_class);
2637
+ fprintf(fp, "total_sv %d\n",l);
2638
+
2639
+ {
2640
+ fprintf(fp, "rho");
2641
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2642
+ fprintf(fp," %g",model->rho[i]);
2643
+ fprintf(fp, "\n");
2644
+ }
2645
+
2646
+ if(model->label)
2647
+ {
2648
+ fprintf(fp, "label");
2649
+ for(int i=0;i<nr_class;i++)
2650
+ fprintf(fp," %d",model->label[i]);
2651
+ fprintf(fp, "\n");
2652
+ }
2653
+
2654
+ if(model->probA) // regression has probA only
2655
+ {
2656
+ fprintf(fp, "probA");
2657
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2658
+ fprintf(fp," %g",model->probA[i]);
2659
+ fprintf(fp, "\n");
2660
+ }
2661
+ if(model->probB)
2662
+ {
2663
+ fprintf(fp, "probB");
2664
+ for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2665
+ fprintf(fp," %g",model->probB[i]);
2666
+ fprintf(fp, "\n");
2667
+ }
2668
+
2669
+ if(model->nSV)
2670
+ {
2671
+ fprintf(fp, "nr_sv");
2672
+ for(int i=0;i<nr_class;i++)
2673
+ fprintf(fp," %d",model->nSV[i]);
2674
+ fprintf(fp, "\n");
2675
+ }
2676
+
2677
+ fprintf(fp, "SV\n");
2678
+ const double * const *sv_coef = model->sv_coef;
2679
+ const svm_node * const *SV = model->SV;
2680
+
2681
+ for(int i=0;i<l;i++)
2682
+ {
2683
+ for(int j=0;j<nr_class-1;j++)
2684
+ fprintf(fp, "%.16g ",sv_coef[j][i]);
2685
+
2686
+ const svm_node *p = SV[i];
2687
+
2688
+ if(param.kernel_type == PRECOMPUTED)
2689
+ fprintf(fp,"0:%d ",(int)(p->value));
2690
+ else
2691
+ while(p->index != -1)
2692
+ {
2693
+ fprintf(fp,"%d:%.8g ",p->index,p->value);
2694
+ p++;
2695
+ }
2696
+ fprintf(fp, "\n");
2697
+ }
2698
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2699
+ else return 0;
2700
+ }
2701
+
2702
+ static char *line = NULL;
2703
+ static int max_line_len;
2704
+
2705
+ static char* readline(FILE *input)
2706
+ {
2707
+ int len;
2708
+
2709
+ if(fgets(line,max_line_len,input) == NULL)
2710
+ return NULL;
2711
+
2712
+ while(strrchr(line,'\n') == NULL)
2713
+ {
2714
+ max_line_len *= 2;
2715
+ line = (char *) realloc(line,max_line_len);
2716
+ len = (int) strlen(line);
2717
+ if(fgets(line+len,max_line_len-len,input) == NULL)
2718
+ break;
2719
+ }
2720
+ return line;
2721
+ }
2722
+
2723
+ svm_model *svm_load_model(const char *model_file_name)
2724
+ {
2725
+ FILE *fp = fopen(model_file_name,"rb");
2726
+ if(fp==NULL) return NULL;
2727
+
2728
+ // read parameters
2729
+
2730
+ svm_model *model = Malloc(svm_model,1);
2731
+ svm_parameter& param = model->param;
2732
+ model->rho = NULL;
2733
+ model->probA = NULL;
2734
+ model->probB = NULL;
2735
+ model->label = NULL;
2736
+ model->nSV = NULL;
2737
+
2738
+ char cmd[81];
2739
+ while(1)
2740
+ {
2741
+ fscanf(fp,"%80s",cmd);
2742
+
2743
+ if(strcmp(cmd,"svm_type")==0)
2744
+ {
2745
+ fscanf(fp,"%80s",cmd);
2746
+ int i;
2747
+ for(i=0;svm_type_table[i];i++)
2748
+ {
2749
+ if(strcmp(svm_type_table[i],cmd)==0)
2750
+ {
2751
+ param.svm_type=i;
2752
+ break;
2753
+ }
2754
+ }
2755
+ if(svm_type_table[i] == NULL)
2756
+ {
2757
+ fprintf(stderr,"unknown svm type.\n");
2758
+ free(model->rho);
2759
+ free(model->label);
2760
+ free(model->nSV);
2761
+ free(model);
2762
+ return NULL;
2763
+ }
2764
+ }
2765
+ else if(strcmp(cmd,"kernel_type")==0)
2766
+ {
2767
+ fscanf(fp,"%80s",cmd);
2768
+ int i;
2769
+ for(i=0;kernel_type_table[i];i++)
2770
+ {
2771
+ if(strcmp(kernel_type_table[i],cmd)==0)
2772
+ {
2773
+ param.kernel_type=i;
2774
+ break;
2775
+ }
2776
+ }
2777
+ if(kernel_type_table[i] == NULL)
2778
+ {
2779
+ fprintf(stderr,"unknown kernel function.\n");
2780
+ free(model->rho);
2781
+ free(model->label);
2782
+ free(model->nSV);
2783
+ free(model);
2784
+ return NULL;
2785
+ }
2786
+ }
2787
+ else if(strcmp(cmd,"degree")==0)
2788
+ fscanf(fp,"%d",&param.degree);
2789
+ else if(strcmp(cmd,"gamma")==0)
2790
+ fscanf(fp,"%lf",&param.gamma);
2791
+ else if(strcmp(cmd,"coef0")==0)
2792
+ fscanf(fp,"%lf",&param.coef0);
2793
+ else if(strcmp(cmd,"nr_class")==0)
2794
+ fscanf(fp,"%d",&model->nr_class);
2795
+ else if(strcmp(cmd,"total_sv")==0)
2796
+ fscanf(fp,"%d",&model->l);
2797
+ else if(strcmp(cmd,"rho")==0)
2798
+ {
2799
+ int n = model->nr_class * (model->nr_class-1)/2;
2800
+ model->rho = Malloc(double,n);
2801
+ for(int i=0;i<n;i++)
2802
+ fscanf(fp,"%lf",&model->rho[i]);
2803
+ }
2804
+ else if(strcmp(cmd,"label")==0)
2805
+ {
2806
+ int n = model->nr_class;
2807
+ model->label = Malloc(int,n);
2808
+ for(int i=0;i<n;i++)
2809
+ fscanf(fp,"%d",&model->label[i]);
2810
+ }
2811
+ else if(strcmp(cmd,"probA")==0)
2812
+ {
2813
+ int n = model->nr_class * (model->nr_class-1)/2;
2814
+ model->probA = Malloc(double,n);
2815
+ for(int i=0;i<n;i++)
2816
+ fscanf(fp,"%lf",&model->probA[i]);
2817
+ }
2818
+ else if(strcmp(cmd,"probB")==0)
2819
+ {
2820
+ int n = model->nr_class * (model->nr_class-1)/2;
2821
+ model->probB = Malloc(double,n);
2822
+ for(int i=0;i<n;i++)
2823
+ fscanf(fp,"%lf",&model->probB[i]);
2824
+ }
2825
+ else if(strcmp(cmd,"nr_sv")==0)
2826
+ {
2827
+ int n = model->nr_class;
2828
+ model->nSV = Malloc(int,n);
2829
+ for(int i=0;i<n;i++)
2830
+ fscanf(fp,"%d",&model->nSV[i]);
2831
+ }
2832
+ else if(strcmp(cmd,"SV")==0)
2833
+ {
2834
+ while(1)
2835
+ {
2836
+ int c = getc(fp);
2837
+ if(c==EOF || c=='\n') break;
2838
+ }
2839
+ break;
2840
+ }
2841
+ else
2842
+ {
2843
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2844
+ free(model->rho);
2845
+ free(model->label);
2846
+ free(model->nSV);
2847
+ free(model);
2848
+ return NULL;
2849
+ }
2850
+ }
2851
+
2852
+ // read sv_coef and SV
2853
+
2854
+ int elements = 0;
2855
+ long pos = ftell(fp);
2856
+
2857
+ max_line_len = 1024;
2858
+ line = Malloc(char,max_line_len);
2859
+ char *p,*endptr,*idx,*val;
2860
+
2861
+ while(readline(fp)!=NULL)
2862
+ {
2863
+ p = strtok(line,":");
2864
+ while(1)
2865
+ {
2866
+ p = strtok(NULL,":");
2867
+ if(p == NULL)
2868
+ break;
2869
+ ++elements;
2870
+ }
2871
+ }
2872
+ elements += model->l;
2873
+
2874
+ fseek(fp,pos,SEEK_SET);
2875
+
2876
+ int m = model->nr_class - 1;
2877
+ int l = model->l;
2878
+ model->sv_coef = Malloc(double *,m);
2879
+ int i;
2880
+ for(i=0;i<m;i++)
2881
+ model->sv_coef[i] = Malloc(double,l);
2882
+ model->SV = Malloc(svm_node*,l);
2883
+ svm_node *x_space = NULL;
2884
+ if(l>0) x_space = Malloc(svm_node,elements);
2885
+
2886
+ int j=0;
2887
+ for(i=0;i<l;i++)
2888
+ {
2889
+ readline(fp);
2890
+ model->SV[i] = &x_space[j];
2891
+
2892
+ p = strtok(line, " \t");
2893
+ model->sv_coef[0][i] = strtod(p,&endptr);
2894
+ for(int k=1;k<m;k++)
2895
+ {
2896
+ p = strtok(NULL, " \t");
2897
+ model->sv_coef[k][i] = strtod(p,&endptr);
2898
+ }
2899
+
2900
+ while(1)
2901
+ {
2902
+ idx = strtok(NULL, ":");
2903
+ val = strtok(NULL, " \t");
2904
+
2905
+ if(val == NULL)
2906
+ break;
2907
+ x_space[j].index = (int) strtol(idx,&endptr,10);
2908
+ x_space[j].value = strtod(val,&endptr);
2909
+
2910
+ ++j;
2911
+ }
2912
+ x_space[j++].index = -1;
2913
+ }
2914
+ free(line);
2915
+
2916
+ if (ferror(fp) != 0 || fclose(fp) != 0)
2917
+ return NULL;
2918
+
2919
+ model->free_sv = 1; // XXX
2920
+ return model;
2921
+ }
2922
+
2923
+ void svm_destroy_model(svm_model* model)
2924
+ {
2925
+ if(model->free_sv && model->l > 0)
2926
+ free((void *)(model->SV[0]));
2927
+ for(int i=0;i<model->nr_class-1;i++)
2928
+ free(model->sv_coef[i]);
2929
+ free(model->SV);
2930
+ free(model->sv_coef);
2931
+ free(model->rho);
2932
+ free(model->label);
2933
+ free(model->probA);
2934
+ free(model->probB);
2935
+ free(model->nSV);
2936
+ free(model);
2937
+ }
2938
+
2939
+ void svm_destroy_param(svm_parameter* param)
2940
+ {
2941
+ free(param->weight_label);
2942
+ free(param->weight);
2943
+ }
2944
+
2945
+ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2946
+ {
2947
+ // svm_type
2948
+
2949
+ int svm_type = param->svm_type;
2950
+ if(svm_type != C_SVC &&
2951
+ svm_type != NU_SVC &&
2952
+ svm_type != ONE_CLASS &&
2953
+ svm_type != EPSILON_SVR &&
2954
+ svm_type != NU_SVR)
2955
+ return "unknown svm type";
2956
+
2957
+ // kernel_type, degree
2958
+
2959
+ int kernel_type = param->kernel_type;
2960
+ if(kernel_type != LINEAR &&
2961
+ kernel_type != POLY &&
2962
+ kernel_type != RBF &&
2963
+ kernel_type != SIGMOID &&
2964
+ kernel_type != PRECOMPUTED)
2965
+ return "unknown kernel type";
2966
+
2967
+ if(param->degree < 0)
2968
+ return "degree of polynomial kernel < 0";
2969
+
2970
+ // cache_size,eps,C,nu,p,shrinking
2971
+
2972
+ if(param->cache_size <= 0)
2973
+ return "cache_size <= 0";
2974
+
2975
+ if(param->eps <= 0)
2976
+ return "eps <= 0";
2977
+
2978
+ if(svm_type == C_SVC ||
2979
+ svm_type == EPSILON_SVR ||
2980
+ svm_type == NU_SVR)
2981
+ if(param->C <= 0)
2982
+ return "C <= 0";
2983
+
2984
+ if(svm_type == NU_SVC ||
2985
+ svm_type == ONE_CLASS ||
2986
+ svm_type == NU_SVR)
2987
+ if(param->nu <= 0 || param->nu > 1)
2988
+ return "nu <= 0 or nu > 1";
2989
+
2990
+ if(svm_type == EPSILON_SVR)
2991
+ if(param->p < 0)
2992
+ return "p < 0";
2993
+
2994
+ if(param->shrinking != 0 &&
2995
+ param->shrinking != 1)
2996
+ return "shrinking != 0 and shrinking != 1";
2997
+
2998
+ if(param->probability != 0 &&
2999
+ param->probability != 1)
3000
+ return "probability != 0 and probability != 1";
3001
+
3002
+ if(param->probability == 1 &&
3003
+ svm_type == ONE_CLASS)
3004
+ return "one-class SVM probability output not supported yet";
3005
+
3006
+
3007
+ // check whether nu-svc is feasible
3008
+
3009
+ if(svm_type == NU_SVC)
3010
+ {
3011
+ int l = prob->l;
3012
+ int max_nr_class = 16;
3013
+ int nr_class = 0;
3014
+ int *label = Malloc(int,max_nr_class);
3015
+ int *count = Malloc(int,max_nr_class);
3016
+
3017
+ int i;
3018
+ for(i=0;i<l;i++)
3019
+ {
3020
+ int this_label = (int)prob->y[i];
3021
+ int j;
3022
+ for(j=0;j<nr_class;j++)
3023
+ if(this_label == label[j])
3024
+ {
3025
+ ++count[j];
3026
+ break;
3027
+ }
3028
+ if(j == nr_class)
3029
+ {
3030
+ if(nr_class == max_nr_class)
3031
+ {
3032
+ max_nr_class *= 2;
3033
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
3034
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
3035
+ }
3036
+ label[nr_class] = this_label;
3037
+ count[nr_class] = 1;
3038
+ ++nr_class;
3039
+ }
3040
+ }
3041
+
3042
+ for(i=0;i<nr_class;i++)
3043
+ {
3044
+ int n1 = count[i];
3045
+ for(int j=i+1;j<nr_class;j++)
3046
+ {
3047
+ int n2 = count[j];
3048
+ if(param->nu*(n1+n2)/2 > min(n1,n2))
3049
+ {
3050
+ free(label);
3051
+ free(count);
3052
+ return "specified nu is infeasible";
3053
+ }
3054
+ }
3055
+ }
3056
+ free(label);
3057
+ free(count);
3058
+ }
3059
+
3060
+ return NULL;
3061
+ }
3062
+
3063
+ int svm_check_probability_model(const svm_model *model)
3064
+ {
3065
+ return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3066
+ model->probA!=NULL && model->probB!=NULL) ||
3067
+ ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3068
+ model->probA!=NULL);
3069
+ }