svmlightcli 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4147 @@
1
+ /***********************************************************************/
2
+ /* */
3
+ /* svm_learn.c */
4
+ /* */
5
+ /* Learning module of Support Vector Machine. */
6
+ /* */
7
+ /* Author: Thorsten Joachims */
8
+ /* Date: 02.07.02 */
9
+ /* */
10
+ /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
11
+ /* */
12
+ /* This software is available for non-commercial use only. It must */
13
+ /* not be modified and distributed without prior permission of the */
14
+ /* author. The author is not responsible for implications from the */
15
+ /* use of this software. */
16
+ /* */
17
+ /***********************************************************************/
18
+
19
+
20
+ # include "svm_common.h"
21
+ # include "svm_learn.h"
22
+
23
+
24
+ /* interface to QP-solver */
25
+ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
26
+
27
+ /*---------------------------------------------------------------------------*/
28
+
29
+ /* Learns an SVM classification model based on the training data in
30
+ docs/label. The resulting model is returned in the structure
31
+ model. */
32
+
33
+ void svm_learn_classification(DOC **docs, double *class, long int
34
+ totdoc, long int totwords,
35
+ LEARN_PARM *learn_parm,
36
+ KERNEL_PARM *kernel_parm,
37
+ KERNEL_CACHE *kernel_cache,
38
+ MODEL *model,
39
+ double *alpha)
40
+ /* docs: Training vectors (x-part) */
41
+ /* class: Training labels (y-part, zero if test example for
42
+ transduction) */
43
+ /* totdoc: Number of examples in docs/label */
44
+ /* totwords: Number of features (i.e. highest feature index) */
45
+ /* learn_parm: Learning paramenters */
46
+ /* kernel_parm: Kernel paramenters */
47
+ /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
48
+ NULL if linear.*/
49
+ /* model: Returns learning result (assumed empty before called) */
50
+ /* alpha: Start values for the alpha variables or NULL
51
+ pointer. The new alpha values are returned after
52
+ optimization if not NULL. Array must be of size totdoc. */
53
+ {
54
+ long *inconsistent,i,*label;
55
+ long inconsistentnum;
56
+ long misclassified,upsupvecnum;
57
+ double loss,model_length,example_length;
58
+ double maxdiff,*lin,*a,*c;
59
+ long runtime_start,runtime_end;
60
+ long iterations;
61
+ long *unlabeled,transduction;
62
+ long heldout;
63
+ long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
64
+ long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
65
+ double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
66
+ long *index,*index2dnum;
67
+ double *weights;
68
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
69
+
70
+ double *xi_fullset; /* buffer for storing xi on full sample in loo */
71
+ double *a_fullset; /* buffer for storing alpha on full sample in loo */
72
+ TIMING timing_profile;
73
+ SHRINK_STATE shrink_state;
74
+
75
+ runtime_start=get_runtime();
76
+ timing_profile.time_kernel=0;
77
+ timing_profile.time_opti=0;
78
+ timing_profile.time_shrink=0;
79
+ timing_profile.time_update=0;
80
+ timing_profile.time_model=0;
81
+ timing_profile.time_check=0;
82
+ timing_profile.time_select=0;
83
+ kernel_cache_statistic=0;
84
+
85
+ learn_parm->totwords=totwords;
86
+
87
+ /* make sure -n value is reasonable */
88
+ if((learn_parm->svm_newvarsinqp < 2)
89
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
90
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
91
+ }
92
+
93
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
94
+
95
+ label = (long *)my_malloc(sizeof(long)*totdoc);
96
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
97
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
98
+ c = (double *)my_malloc(sizeof(double)*totdoc);
99
+ a = (double *)my_malloc(sizeof(double)*totdoc);
100
+ a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
101
+ xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
102
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
103
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
104
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
105
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
106
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
107
+
108
+ model->at_upper_bound=0;
109
+ model->b=0;
110
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
111
+ model->alpha[0]=0;
112
+ model->lin_weights=NULL;
113
+ model->totwords=totwords;
114
+ model->totdoc=totdoc;
115
+ model->kernel_parm=(*kernel_parm);
116
+ model->sv_num=1;
117
+ model->loo_error=-1;
118
+ model->loo_recall=-1;
119
+ model->loo_precision=-1;
120
+ model->xa_error=-1;
121
+ model->xa_recall=-1;
122
+ model->xa_precision=-1;
123
+ inconsistentnum=0;
124
+ transduction=0;
125
+
126
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
127
+ r_delta_sq=r_delta*r_delta;
128
+
129
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
130
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
131
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
132
+ if(verbosity>=1)
133
+ printf("Setting default regularization parameter C=%.4f\n",
134
+ learn_parm->svm_c);
135
+ }
136
+
137
+ learn_parm->eps=-1.0; /* equivalent regression epsilon for
138
+ classification */
139
+
140
+ for(i=0;i<totdoc;i++) { /* various inits */
141
+ docs[i]->docnum=i;
142
+ inconsistent[i]=0;
143
+ a[i]=0;
144
+ lin[i]=0;
145
+ c[i]=0.0;
146
+ unlabeled[i]=0;
147
+ if(class[i] == 0) {
148
+ unlabeled[i]=1;
149
+ label[i]=0;
150
+ transduction=1;
151
+ }
152
+ if(class[i] > 0) {
153
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
154
+ docs[i]->costfactor;
155
+ label[i]=1;
156
+ trainpos++;
157
+ }
158
+ else if(class[i] < 0) {
159
+ learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
160
+ label[i]=-1;
161
+ trainneg++;
162
+ }
163
+ else {
164
+ learn_parm->svm_cost[i]=0;
165
+ }
166
+ }
167
+ if(verbosity>=2) {
168
+ printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
169
+ }
170
+
171
+ /* caching makes no sense for linear kernel */
172
+ if(kernel_parm->kernel_type == LINEAR) {
173
+ kernel_cache = NULL;
174
+ }
175
+
176
+ /* compute starting state for initial alpha values */
177
+ if(alpha) {
178
+ if(verbosity>=1) {
179
+ printf("Computing starting state..."); fflush(stdout);
180
+ }
181
+ index = (long *)my_malloc(sizeof(long)*totdoc);
182
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
183
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
184
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
185
+ for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
186
+ index[i]=1;
187
+ alpha[i]=fabs(alpha[i]);
188
+ if(alpha[i]<0) alpha[i]=0;
189
+ if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
190
+ }
191
+ if(kernel_parm->kernel_type != LINEAR) {
192
+ for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
193
+ if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
194
+ && (kernel_cache_space_available(kernel_cache)))
195
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
196
+ for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
197
+ if((alpha[i]==learn_parm->svm_cost[i])
198
+ && (kernel_cache_space_available(kernel_cache)))
199
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
200
+ }
201
+ (void)compute_index(index,totdoc,index2dnum);
202
+ update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
203
+ totwords,kernel_parm,kernel_cache,lin,aicache,
204
+ weights);
205
+ (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
206
+ learn_parm,index2dnum,index2dnum,model);
207
+ for(i=0;i<totdoc;i++) { /* copy initial alphas */
208
+ a[i]=alpha[i];
209
+ }
210
+ free(index);
211
+ free(index2dnum);
212
+ free(weights);
213
+ free(aicache);
214
+ if(verbosity>=1) {
215
+ printf("done.\n"); fflush(stdout);
216
+ }
217
+ }
218
+
219
+ if(transduction) {
220
+ learn_parm->svm_iter_to_shrink=99999999;
221
+ if(verbosity >= 1)
222
+ printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
223
+ }
224
+
225
+ if(transduction && learn_parm->compute_loo) {
226
+ learn_parm->compute_loo=0;
227
+ if(verbosity >= 1)
228
+ printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
229
+ }
230
+
231
+ if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
232
+ learn_parm->compute_loo=0;
233
+ printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
234
+ }
235
+
236
+ if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
237
+ learn_parm->compute_loo=0;
238
+ printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
239
+ }
240
+
241
+
242
+ if(verbosity==1) {
243
+ printf("Optimizing"); fflush(stdout);
244
+ }
245
+
246
+ /* train the svm */
247
+ iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
248
+ kernel_parm,kernel_cache,&shrink_state,model,
249
+ inconsistent,unlabeled,a,lin,
250
+ c,&timing_profile,
251
+ &maxdiff,(long)-1,
252
+ (long)1);
253
+
254
+ if(verbosity>=1) {
255
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
256
+
257
+ misclassified=0;
258
+ for(i=0;(i<totdoc);i++) { /* get final statistic */
259
+ if((lin[i]-model->b)*(double)label[i] <= 0.0)
260
+ misclassified++;
261
+ }
262
+
263
+ printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
264
+ misclassified,maxdiff);
265
+
266
+ runtime_end=get_runtime();
267
+ if(verbosity>=2) {
268
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
269
+ ((float)runtime_end-(float)runtime_start)/100.0,
270
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
271
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
272
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
273
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
274
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
275
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
276
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
277
+ }
278
+ else {
279
+ printf("Runtime in cpu-seconds: %.2f\n",
280
+ (runtime_end-runtime_start)/100.0);
281
+ }
282
+
283
+ if(learn_parm->remove_inconsistent) {
284
+ inconsistentnum=0;
285
+ for(i=0;i<totdoc;i++)
286
+ if(inconsistent[i])
287
+ inconsistentnum++;
288
+ printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
289
+ model->sv_num-1,inconsistentnum);
290
+ }
291
+ else {
292
+ upsupvecnum=0;
293
+ for(i=1;i<model->sv_num;i++) {
294
+ if(fabs(model->alpha[i]) >=
295
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
296
+ learn_parm->epsilon_a))
297
+ upsupvecnum++;
298
+ }
299
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
300
+ model->sv_num-1,upsupvecnum);
301
+ }
302
+
303
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
304
+ loss=0;
305
+ model_length=0;
306
+ for(i=0;i<totdoc;i++) {
307
+ if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
308
+ loss+=1.0-(lin[i]-model->b)*(double)label[i];
309
+ model_length+=a[i]*label[i]*lin[i];
310
+ }
311
+ model_length=sqrt(model_length);
312
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
313
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
314
+ example_length=estimate_sphere(model,kernel_parm);
315
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
316
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
317
+ fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
318
+ estimate_margin_vcdim(model,model_length,example_length,
319
+ kernel_parm));
320
+ if((!learn_parm->remove_inconsistent) && (!transduction)) {
321
+ runtime_start_xa=get_runtime();
322
+ if(verbosity>=1) {
323
+ printf("Computing XiAlpha-estimates..."); fflush(stdout);
324
+ }
325
+ compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
326
+ kernel_parm,learn_parm,&(model->xa_error),
327
+ &(model->xa_recall),&(model->xa_precision));
328
+ if(verbosity>=1) {
329
+ printf("done\n");
330
+ }
331
+ printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
332
+ (get_runtime()-runtime_start_xa)/100.0);
333
+
334
+ fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
335
+ model->xa_error,learn_parm->rho,learn_parm->xa_depth);
336
+ fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
337
+ model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
338
+ fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
339
+ model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
340
+ }
341
+ else if(!learn_parm->remove_inconsistent) {
342
+ estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
343
+ }
344
+ }
345
+ if(verbosity>=1) {
346
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
347
+ }
348
+ }
349
+
350
+
351
+ /* leave-one-out testing starts now */
352
+ if(learn_parm->compute_loo) {
353
+ /* save results of training on full dataset for leave-one-out */
354
+ runtime_start_loo=get_runtime();
355
+ for(i=0;i<totdoc;i++) {
356
+ xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
357
+ if(xi_fullset[i]<0) xi_fullset[i]=0;
358
+ a_fullset[i]=a[i];
359
+ }
360
+ if(verbosity>=1) {
361
+ printf("Computing leave-one-out");
362
+ }
363
+
364
+ /* repeat this loop for every held-out example */
365
+ for(heldout=0;(heldout<totdoc);heldout++) {
366
+ if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
367
+ < 1.0) {
368
+ /* guaranteed to not produce a leave-one-out error */
369
+ if(verbosity==1) {
370
+ printf("+"); fflush(stdout);
371
+ }
372
+ }
373
+ else if(xi_fullset[heldout] > 1.0) {
374
+ /* guaranteed to produce a leave-one-out error */
375
+ loo_count++;
376
+ if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
377
+ if(verbosity==1) {
378
+ printf("-"); fflush(stdout);
379
+ }
380
+ }
381
+ else {
382
+ loocomputed++;
383
+ heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
384
+ learn_parm->svm_cost[heldout]=0;
385
+ /* make sure heldout example is not currently */
386
+ /* shrunk away. Assumes that lin is up to date! */
387
+ shrink_state.active[heldout]=1;
388
+ if(verbosity>=2)
389
+ printf("\nLeave-One-Out test on example %ld\n",heldout);
390
+ if(verbosity>=1) {
391
+ printf("(?[%ld]",heldout); fflush(stdout);
392
+ }
393
+
394
+ optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
395
+ kernel_parm,
396
+ kernel_cache,&shrink_state,model,inconsistent,unlabeled,
397
+ a,lin,c,&timing_profile,
398
+ &maxdiff,heldout,(long)2);
399
+
400
+ /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
401
+
402
+ if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
403
+ loo_count++; /* there was a loo-error */
404
+ if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
405
+ if(verbosity>=1) {
406
+ printf("-)"); fflush(stdout);
407
+ }
408
+ }
409
+ else {
410
+ if(verbosity>=1) {
411
+ printf("+)"); fflush(stdout);
412
+ }
413
+ }
414
+ /* now we need to restore the original data set*/
415
+ learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
416
+ }
417
+ } /* end of leave-one-out loop */
418
+
419
+
420
+ if(verbosity>=1) {
421
+ printf("\nRetrain on full problem"); fflush(stdout);
422
+ }
423
+ optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
424
+ kernel_parm,
425
+ kernel_cache,&shrink_state,model,inconsistent,unlabeled,
426
+ a,lin,c,&timing_profile,
427
+ &maxdiff,(long)-1,(long)1);
428
+ if(verbosity >= 1)
429
+ printf("done.\n");
430
+
431
+
432
+ /* after all leave-one-out computed */
433
+ model->loo_error=100.0*loo_count/(double)totdoc;
434
+ model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
435
+ model->loo_precision=(trainpos-loo_count_pos)/
436
+ (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
437
+ if(verbosity >= 1) {
438
+ fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
439
+ model->loo_error);
440
+ fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
441
+ model->loo_recall);
442
+ fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
443
+ model->loo_precision);
444
+ fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
445
+ loocomputed,learn_parm->rho);
446
+ printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
447
+ (double)(get_runtime()-runtime_start_loo)/100.0);
448
+ }
449
+ }
450
+
451
+ if(learn_parm->alphafile[0])
452
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
453
+
454
+ shrink_state_cleanup(&shrink_state);
455
+ free(label);
456
+ free(inconsistent);
457
+ free(unlabeled);
458
+ free(c);
459
+ free(a);
460
+ free(a_fullset);
461
+ free(xi_fullset);
462
+ free(lin);
463
+ free(learn_parm->svm_cost);
464
+ }
465
+
466
+
467
+ /* Learns an SVM regression model based on the training data in
468
+ docs/label. The resulting model is returned in the structure
469
+ model. */
470
+
471
+ void svm_learn_regression(DOC **docs, double *value, long int totdoc,
472
+ long int totwords, LEARN_PARM *learn_parm,
473
+ KERNEL_PARM *kernel_parm,
474
+ KERNEL_CACHE **kernel_cache, MODEL *model)
475
+ /* docs: Training vectors (x-part) */
476
+ /* class: Training value (y-part) */
477
+ /* totdoc: Number of examples in docs/label */
478
+ /* totwords: Number of features (i.e. highest feature index) */
479
+ /* learn_parm: Learning paramenters */
480
+ /* kernel_parm: Kernel paramenters */
481
+ /* kernel_cache:Initialized Cache, if using a kernel. NULL if
482
+ linear. Note that it will be free'd and reassigned */
483
+ /* model: Returns learning result (assumed empty before called) */
484
+ {
485
+ long *inconsistent,i,j;
486
+ long inconsistentnum;
487
+ long upsupvecnum;
488
+ double loss,model_length,example_length;
489
+ double maxdiff,*lin,*a,*c;
490
+ long runtime_start,runtime_end;
491
+ long iterations,kernel_cache_size;
492
+ long *unlabeled;
493
+ double r_delta_sq=0,r_delta,r_delta_avg;
494
+ double *xi_fullset; /* buffer for storing xi on full sample in loo */
495
+ double *a_fullset; /* buffer for storing alpha on full sample in loo */
496
+ TIMING timing_profile;
497
+ SHRINK_STATE shrink_state;
498
+ DOC **docs_org;
499
+ long *label;
500
+
501
+ /* set up regression problem in standard form */
502
+ docs_org=docs;
503
+ docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
504
+ label = (long *)my_malloc(sizeof(long)*2*totdoc);
505
+ c = (double *)my_malloc(sizeof(double)*2*totdoc);
506
+ for(i=0;i<totdoc;i++) {
507
+ j=2*totdoc-1-i;
508
+ docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
509
+ label[i]=+1;
510
+ c[i]=value[i];
511
+ docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
512
+ label[j]=-1;
513
+ c[j]=value[i];
514
+ }
515
+ totdoc*=2;
516
+
517
+ /* need to get a bigger kernel cache */
518
+ if(*kernel_cache) {
519
+ kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
520
+ kernel_cache_cleanup(*kernel_cache);
521
+ (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
522
+ }
523
+
524
+ runtime_start=get_runtime();
525
+ timing_profile.time_kernel=0;
526
+ timing_profile.time_opti=0;
527
+ timing_profile.time_shrink=0;
528
+ timing_profile.time_update=0;
529
+ timing_profile.time_model=0;
530
+ timing_profile.time_check=0;
531
+ timing_profile.time_select=0;
532
+ kernel_cache_statistic=0;
533
+
534
+ learn_parm->totwords=totwords;
535
+
536
+ /* make sure -n value is reasonable */
537
+ if((learn_parm->svm_newvarsinqp < 2)
538
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
539
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
540
+ }
541
+
542
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
543
+
544
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
545
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
546
+ a = (double *)my_malloc(sizeof(double)*totdoc);
547
+ a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
548
+ xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
549
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
550
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
551
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
552
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
553
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
554
+
555
+ model->at_upper_bound=0;
556
+ model->b=0;
557
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
558
+ model->alpha[0]=0;
559
+ model->lin_weights=NULL;
560
+ model->totwords=totwords;
561
+ model->totdoc=totdoc;
562
+ model->kernel_parm=(*kernel_parm);
563
+ model->sv_num=1;
564
+ model->loo_error=-1;
565
+ model->loo_recall=-1;
566
+ model->loo_precision=-1;
567
+ model->xa_error=-1;
568
+ model->xa_recall=-1;
569
+ model->xa_precision=-1;
570
+ inconsistentnum=0;
571
+
572
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
573
+ r_delta_sq=r_delta*r_delta;
574
+
575
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
576
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
577
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
578
+ if(verbosity>=1)
579
+ printf("Setting default regularization parameter C=%.4f\n",
580
+ learn_parm->svm_c);
581
+ }
582
+
583
+ for(i=0;i<totdoc;i++) { /* various inits */
584
+ inconsistent[i]=0;
585
+ a[i]=0;
586
+ lin[i]=0;
587
+ unlabeled[i]=0;
588
+ if(label[i] > 0) {
589
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
590
+ docs[i]->costfactor;
591
+ }
592
+ else if(label[i] < 0) {
593
+ learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
594
+ }
595
+ }
596
+
597
+ /* caching makes no sense for linear kernel */
598
+ if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
599
+ printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
600
+ }
601
+
602
+ if(verbosity==1) {
603
+ printf("Optimizing"); fflush(stdout);
604
+ }
605
+
606
+ /* train the svm */
607
+ iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
608
+ kernel_parm,*kernel_cache,&shrink_state,
609
+ model,inconsistent,unlabeled,a,lin,c,
610
+ &timing_profile,&maxdiff,(long)-1,
611
+ (long)1);
612
+
613
+ if(verbosity>=1) {
614
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
615
+
616
+ printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
617
+
618
+ runtime_end=get_runtime();
619
+ if(verbosity>=2) {
620
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
621
+ ((float)runtime_end-(float)runtime_start)/100.0,
622
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
623
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
624
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
625
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
626
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
627
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
628
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
629
+ }
630
+ else {
631
+ printf("Runtime in cpu-seconds: %.2f\n",
632
+ (runtime_end-runtime_start)/100.0);
633
+ }
634
+
635
+ if(learn_parm->remove_inconsistent) {
636
+ inconsistentnum=0;
637
+ for(i=0;i<totdoc;i++)
638
+ if(inconsistent[i])
639
+ inconsistentnum++;
640
+ printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
641
+ model->sv_num-1,inconsistentnum);
642
+ }
643
+ else {
644
+ upsupvecnum=0;
645
+ for(i=1;i<model->sv_num;i++) {
646
+ if(fabs(model->alpha[i]) >=
647
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
648
+ learn_parm->epsilon_a))
649
+ upsupvecnum++;
650
+ }
651
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
652
+ model->sv_num-1,upsupvecnum);
653
+ }
654
+
655
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
656
+ loss=0;
657
+ model_length=0;
658
+ for(i=0;i<totdoc;i++) {
659
+ if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
660
+ loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
661
+ model_length+=a[i]*label[i]*lin[i];
662
+ }
663
+ model_length=sqrt(model_length);
664
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
665
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
666
+ example_length=estimate_sphere(model,kernel_parm);
667
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
668
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
669
+ }
670
+ if(verbosity>=1) {
671
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
672
+ }
673
+ }
674
+
675
+ if(learn_parm->alphafile[0])
676
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
677
+
678
+ /* this makes sure the model we return does not contain pointers to the
679
+ temporary documents */
680
+ for(i=1;i<model->sv_num;i++) {
681
+ j=model->supvec[i]->docnum;
682
+ if(j >= (totdoc/2)) {
683
+ j=totdoc-j-1;
684
+ }
685
+ model->supvec[i]=docs_org[j];
686
+ }
687
+
688
+ shrink_state_cleanup(&shrink_state);
689
+ for(i=0;i<totdoc;i++)
690
+ free_example(docs[i],0);
691
+ free(docs);
692
+ free(label);
693
+ free(inconsistent);
694
+ free(unlabeled);
695
+ free(c);
696
+ free(a);
697
+ free(a_fullset);
698
+ free(xi_fullset);
699
+ free(lin);
700
+ free(learn_parm->svm_cost);
701
+ }
702
+
703
+ void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
704
+ long int totwords, LEARN_PARM *learn_parm,
705
+ KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
706
+ MODEL *model)
707
+ /* docs: Training vectors (x-part) */
708
+ /* rankvalue: Training target values that determine the ranking */
709
+ /* totdoc: Number of examples in docs/label */
710
+ /* totwords: Number of features (i.e. highest feature index) */
711
+ /* learn_parm: Learning paramenters */
712
+ /* kernel_parm: Kernel paramenters */
713
+ /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
714
+ using a kernel. NULL if linear. NOTE: Cache is
715
+ getting reinitialized in this function */
716
+ /* model: Returns learning result (assumed empty before called) */
717
+ {
718
+ DOC **docdiff;
719
+ long i,j,k,totpair,kernel_cache_size;
720
+ double *target,*alpha,cost;
721
+ long *greater,*lesser;
722
+ MODEL *pairmodel;
723
+ SVECTOR *flow,*fhigh;
724
+
725
+ totpair=0;
726
+ for(i=0;i<totdoc;i++) {
727
+ for(j=i+1;j<totdoc;j++) {
728
+ if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
729
+ totpair++;
730
+ }
731
+ }
732
+ }
733
+
734
+ printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
735
+ docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
736
+ target=(double *)my_malloc(sizeof(double)*totpair);
737
+ greater=(long *)my_malloc(sizeof(long)*totpair);
738
+ lesser=(long *)my_malloc(sizeof(long)*totpair);
739
+
740
+ k=0;
741
+ for(i=0;i<totdoc;i++) {
742
+ for(j=i+1;j<totdoc;j++) {
743
+ if(docs[i]->queryid == docs[j]->queryid) {
744
+ cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
745
+ if(rankvalue[i] > rankvalue[j]) {
746
+ if(kernel_parm->kernel_type == LINEAR)
747
+ docdiff[k]=create_example(k,0,0,cost,
748
+ sub_ss(docs[i]->fvec,docs[j]->fvec));
749
+ else {
750
+ flow=copy_svector(docs[j]->fvec);
751
+ flow->factor=-1.0;
752
+ flow->next=NULL;
753
+ fhigh=copy_svector(docs[i]->fvec);
754
+ fhigh->factor=1.0;
755
+ fhigh->next=flow;
756
+ docdiff[k]=create_example(k,0,0,cost,fhigh);
757
+ }
758
+ target[k]=1;
759
+ greater[k]=i;
760
+ lesser[k]=j;
761
+ k++;
762
+ }
763
+ else if(rankvalue[i] < rankvalue[j]) {
764
+ if(kernel_parm->kernel_type == LINEAR)
765
+ docdiff[k]=create_example(k,0,0,cost,
766
+ sub_ss(docs[i]->fvec,docs[j]->fvec));
767
+ else {
768
+ flow=copy_svector(docs[j]->fvec);
769
+ flow->factor=-1.0;
770
+ flow->next=NULL;
771
+ fhigh=copy_svector(docs[i]->fvec);
772
+ fhigh->factor=1.0;
773
+ fhigh->next=flow;
774
+ docdiff[k]=create_example(k,0,0,cost,fhigh);
775
+ }
776
+ target[k]=-1;
777
+ greater[k]=i;
778
+ lesser[k]=j;
779
+ k++;
780
+ }
781
+ }
782
+ }
783
+ }
784
+ printf("done.\n"); fflush(stdout);
785
+
786
+ /* need to get a bigger kernel cache */
787
+ if(*kernel_cache) {
788
+ kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
789
+ kernel_cache_cleanup(*kernel_cache);
790
+ (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
791
+ }
792
+
793
+ /* must use unbiased hyperplane on difference vectors */
794
+ learn_parm->biased_hyperplane=0;
795
+ pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
796
+ svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
797
+ kernel_parm,(*kernel_cache),pairmodel,NULL);
798
+
799
+ /* Transfer the result into a more compact model. If you would like
800
+ to output the original model on pairs of documents, see below. */
801
+ alpha=(double *)my_malloc(sizeof(double)*totdoc);
802
+ for(i=0;i<totdoc;i++) {
803
+ alpha[i]=0;
804
+ }
805
+ for(i=1;i<pairmodel->sv_num;i++) {
806
+ alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
807
+ alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
808
+ }
809
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
810
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
811
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
812
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
813
+ model->alpha[0]=0;
814
+ model->sv_num=1;
815
+ for(i=0;i<totdoc;i++) {
816
+ if(alpha[i]) {
817
+ model->supvec[model->sv_num]=docs[i];
818
+ model->alpha[model->sv_num]=alpha[i];
819
+ model->index[i]=model->sv_num;
820
+ model->sv_num++;
821
+ }
822
+ else {
823
+ model->index[i]=-1;
824
+ }
825
+ }
826
+ model->at_upper_bound=0;
827
+ model->b=0;
828
+ model->lin_weights=NULL;
829
+ model->totwords=totwords;
830
+ model->totdoc=totdoc;
831
+ model->kernel_parm=(*kernel_parm);
832
+ model->loo_error=-1;
833
+ model->loo_recall=-1;
834
+ model->loo_precision=-1;
835
+ model->xa_error=-1;
836
+ model->xa_recall=-1;
837
+ model->xa_precision=-1;
838
+
839
+ free(alpha);
840
+ free(greater);
841
+ free(lesser);
842
+ free(target);
843
+
844
+ /* If you would like to output the original model on pairs of
845
+ document, replace the following lines with '(*model)=(*pairmodel);' */
846
+ for(i=0;i<totpair;i++)
847
+ free_example(docdiff[i],1);
848
+ free(docdiff);
849
+ free_model(pairmodel,0);
850
+ }
851
+
852
+
853
+ /* The following solves a freely defined and given set of
854
+ inequalities. The optimization problem is of the following form:
855
+
856
+ min 0.5 w*w + C sum_i C_i \xi_i
857
+ s.t. x_i * w > rhs_i - \xi_i
858
+
859
+ This corresponds to the -z o option. */
860
+
861
+ void svm_learn_optimization(DOC **docs, double *rhs, long int
862
+ totdoc, long int totwords,
863
+ LEARN_PARM *learn_parm,
864
+ KERNEL_PARM *kernel_parm,
865
+ KERNEL_CACHE *kernel_cache, MODEL *model,
866
+ double *alpha)
867
+ /* docs: Left-hand side of inequalities (x-part) */
868
+ /* rhs: Right-hand side of inequalities */
869
+ /* totdoc: Number of examples in docs/label */
870
+ /* totwords: Number of features (i.e. highest feature index) */
871
+ /* learn_parm: Learning paramenters */
872
+ /* kernel_parm: Kernel paramenters */
873
+ /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
874
+ NULL if linear.*/
875
+ /* model: Returns solution as SV expansion (assumed empty before called) */
876
+ /* alpha: Start values for the alpha variables or NULL
877
+ pointer. The new alpha values are returned after
878
+ optimization if not NULL. Array must be of size totdoc. */
879
+ {
880
+ long i,*label;
881
+ long misclassified,upsupvecnum;
882
+ double loss,model_length,example_length;
883
+ double maxdiff,*lin,*a,*c;
884
+ long runtime_start,runtime_end;
885
+ long iterations,maxslackid,svsetnum;
886
+ long *unlabeled,*inconsistent;
887
+ double r_delta_sq=0,r_delta,r_delta_avg;
888
+ long *index,*index2dnum;
889
+ double *weights,*slack,*alphaslack;
890
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
891
+
892
+ TIMING timing_profile;
893
+ SHRINK_STATE shrink_state;
894
+
895
+ runtime_start=get_runtime();
896
+ timing_profile.time_kernel=0;
897
+ timing_profile.time_opti=0;
898
+ timing_profile.time_shrink=0;
899
+ timing_profile.time_update=0;
900
+ timing_profile.time_model=0;
901
+ timing_profile.time_check=0;
902
+ timing_profile.time_select=0;
903
+ kernel_cache_statistic=0;
904
+
905
+ learn_parm->totwords=totwords;
906
+
907
+ /* make sure -n value is reasonable */
908
+ if((learn_parm->svm_newvarsinqp < 2)
909
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
910
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
911
+ }
912
+
913
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
914
+
915
+ label = (long *)my_malloc(sizeof(long)*totdoc);
916
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
917
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
918
+ c = (double *)my_malloc(sizeof(double)*totdoc);
919
+ a = (double *)my_malloc(sizeof(double)*totdoc);
920
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
921
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
922
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
923
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
924
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
925
+
926
+ model->at_upper_bound=0;
927
+ model->b=0;
928
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
929
+ model->alpha[0]=0;
930
+ model->lin_weights=NULL;
931
+ model->totwords=totwords;
932
+ model->totdoc=totdoc;
933
+ model->kernel_parm=(*kernel_parm);
934
+ model->sv_num=1;
935
+ model->loo_error=-1;
936
+ model->loo_recall=-1;
937
+ model->loo_precision=-1;
938
+ model->xa_error=-1;
939
+ model->xa_recall=-1;
940
+ model->xa_precision=-1;
941
+
942
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
943
+ r_delta_sq=r_delta*r_delta;
944
+
945
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
946
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
947
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
948
+ if(verbosity>=1)
949
+ printf("Setting default regularization parameter C=%.4f\n",
950
+ learn_parm->svm_c);
951
+ }
952
+
953
+ learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
954
+
955
+ learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
956
+ in the right-hand side in the training
957
+ set. */
958
+
959
+ for(i=0;i<totdoc;i++) { /* various inits */
960
+ docs[i]->docnum=i;
961
+ a[i]=0;
962
+ lin[i]=0;
963
+ c[i]=rhs[i]; /* set right-hand side */
964
+ unlabeled[i]=0;
965
+ inconsistent[i]=0;
966
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
967
+ docs[i]->costfactor;
968
+ label[i]=1;
969
+ }
970
+ if(learn_parm->sharedslack) /* if shared slacks are used, they must */
971
+ for(i=0;i<totdoc;i++) /* be used on every constraint */
972
+ if(!docs[i]->slackid) {
973
+ perror("Error: Missing shared slacks definitions in some of the examples.");
974
+ exit(0);
975
+ }
976
+
977
+ /* compute starting state for initial alpha values */
978
+ if(alpha) {
979
+ if(verbosity>=1) {
980
+ printf("Computing starting state..."); fflush(stdout);
981
+ }
982
+ index = (long *)my_malloc(sizeof(long)*totdoc);
983
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
984
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
985
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
986
+ for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
987
+ index[i]=1;
988
+ alpha[i]=fabs(alpha[i]);
989
+ if(alpha[i]<0) alpha[i]=0;
990
+ if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
991
+ }
992
+ if(kernel_parm->kernel_type != LINEAR) {
993
+ for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
994
+ if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
995
+ && (kernel_cache_space_available(kernel_cache)))
996
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
997
+ for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
998
+ if((alpha[i]==learn_parm->svm_cost[i])
999
+ && (kernel_cache_space_available(kernel_cache)))
1000
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
1001
+ }
1002
+ (void)compute_index(index,totdoc,index2dnum);
1003
+ update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
1004
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1005
+ weights);
1006
+ (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
1007
+ learn_parm,index2dnum,index2dnum,model);
1008
+ for(i=0;i<totdoc;i++) { /* copy initial alphas */
1009
+ a[i]=alpha[i];
1010
+ }
1011
+ free(index);
1012
+ free(index2dnum);
1013
+ free(weights);
1014
+ free(aicache);
1015
+ if(verbosity>=1) {
1016
+ printf("done.\n"); fflush(stdout);
1017
+ }
1018
+ }
1019
+
1020
+ /* removing inconsistent does not work for general optimization problem */
1021
+ if(learn_parm->remove_inconsistent) {
1022
+ learn_parm->remove_inconsistent = 0;
1023
+ printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
1024
+ }
1025
+
1026
+ /* caching makes no sense for linear kernel */
1027
+ if(kernel_parm->kernel_type == LINEAR) {
1028
+ kernel_cache = NULL;
1029
+ }
1030
+
1031
+ if(verbosity==1) {
1032
+ printf("Optimizing"); fflush(stdout);
1033
+ }
1034
+
1035
+ /* train the svm */
1036
+ if(learn_parm->sharedslack)
1037
+ iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
1038
+ totwords,learn_parm,kernel_parm,
1039
+ kernel_cache,&shrink_state,model,
1040
+ a,lin,c,&timing_profile,
1041
+ &maxdiff);
1042
+ else
1043
+ iterations=optimize_to_convergence(docs,label,totdoc,
1044
+ totwords,learn_parm,kernel_parm,
1045
+ kernel_cache,&shrink_state,model,
1046
+ inconsistent,unlabeled,
1047
+ a,lin,c,&timing_profile,
1048
+ &maxdiff,(long)-1,(long)1);
1049
+
1050
+ if(verbosity>=1) {
1051
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
1052
+
1053
+ misclassified=0;
1054
+ for(i=0;(i<totdoc);i++) { /* get final statistic */
1055
+ if((lin[i]-model->b)*(double)label[i] <= 0.0)
1056
+ misclassified++;
1057
+ }
1058
+
1059
+ printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
1060
+
1061
+ runtime_end=get_runtime();
1062
+ if(verbosity>=2) {
1063
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
1064
+ ((float)runtime_end-(float)runtime_start)/100.0,
1065
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
1066
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
1067
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
1068
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
1069
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
1070
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
1071
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
1072
+ }
1073
+ else {
1074
+ printf("Runtime in cpu-seconds: %.2f\n",
1075
+ (runtime_end-runtime_start)/100.0);
1076
+ }
1077
+ }
1078
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1079
+ loss=0;
1080
+ model_length=0;
1081
+ for(i=0;i<totdoc;i++) {
1082
+ if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
1083
+ loss+=c[i]-(lin[i]-model->b)*(double)label[i];
1084
+ model_length+=a[i]*label[i]*lin[i];
1085
+ }
1086
+ model_length=sqrt(model_length);
1087
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
1088
+ }
1089
+
1090
+ if(learn_parm->sharedslack) {
1091
+ index = (long *)my_malloc(sizeof(long)*totdoc);
1092
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1093
+ maxslackid=0;
1094
+ for(i=0;i<totdoc;i++) { /* create full index */
1095
+ index[i]=1;
1096
+ if(maxslackid<docs[i]->slackid)
1097
+ maxslackid=docs[i]->slackid;
1098
+ }
1099
+ (void)compute_index(index,totdoc,index2dnum);
1100
+ slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1101
+ alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1102
+ for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1103
+ slack[i]=0;
1104
+ alphaslack[i]=0;
1105
+ }
1106
+ compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
1107
+ slack,alphaslack);
1108
+ loss=0;
1109
+ model->at_upper_bound=0;
1110
+ svsetnum=0;
1111
+ for(i=0;i<=maxslackid;i++) { /* create full index */
1112
+ loss+=slack[i];
1113
+ if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
1114
+ model->at_upper_bound++;
1115
+ if(alphaslack[i] > learn_parm->epsilon_a)
1116
+ svsetnum++;
1117
+ }
1118
+ free(index);
1119
+ free(index2dnum);
1120
+ free(slack);
1121
+ free(alphaslack);
1122
+ }
1123
+
1124
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1125
+ if(learn_parm->sharedslack) {
1126
+ printf("Number of SV: %ld\n",
1127
+ model->sv_num-1);
1128
+ printf("Number of non-zero slack variables: %ld (out of %ld)\n",
1129
+ model->at_upper_bound,svsetnum);
1130
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1131
+ }
1132
+ else {
1133
+ upsupvecnum=0;
1134
+ for(i=1;i<model->sv_num;i++) {
1135
+ if(fabs(model->alpha[i]) >=
1136
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
1137
+ learn_parm->epsilon_a))
1138
+ upsupvecnum++;
1139
+ }
1140
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
1141
+ model->sv_num-1,upsupvecnum);
1142
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1143
+ }
1144
+ example_length=estimate_sphere(model,kernel_parm);
1145
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
1146
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
1147
+ }
1148
+ if(verbosity>=1) {
1149
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
1150
+ }
1151
+
1152
+ if(alpha) {
1153
+ for(i=0;i<totdoc;i++) { /* copy final alphas */
1154
+ alpha[i]=a[i];
1155
+ }
1156
+ }
1157
+
1158
+ if(learn_parm->alphafile[0])
1159
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
1160
+
1161
+ shrink_state_cleanup(&shrink_state);
1162
+ free(label);
1163
+ free(unlabeled);
1164
+ free(inconsistent);
1165
+ free(c);
1166
+ free(a);
1167
+ free(lin);
1168
+ free(learn_parm->svm_cost);
1169
+ }
1170
+
1171
+
1172
+ long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
1173
+ long int totwords, LEARN_PARM *learn_parm,
1174
+ KERNEL_PARM *kernel_parm,
1175
+ KERNEL_CACHE *kernel_cache,
1176
+ SHRINK_STATE *shrink_state, MODEL *model,
1177
+ long int *inconsistent, long int *unlabeled,
1178
+ double *a, double *lin, double *c,
1179
+ TIMING *timing_profile, double *maxdiff,
1180
+ long int heldout, long int retrain)
1181
+ /* docs: Training vectors (x-part) */
1182
+ /* label: Training labels/value (y-part, zero if test example for
1183
+ transduction) */
1184
+ /* totdoc: Number of examples in docs/label */
1185
+ /* totwords: Number of features (i.e. highest feature index) */
1186
+ /* laern_parm: Learning paramenters */
1187
+ /* kernel_parm: Kernel paramenters */
1188
+ /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1189
+ NULL if linear. */
1190
+ /* shrink_state: State of active variables */
1191
+ /* model: Returns learning result */
1192
+ /* inconsistent: examples thrown out as inconstistent */
1193
+ /* unlabeled: test examples for transduction */
1194
+ /* a: alphas */
1195
+ /* lin: linear component of gradient */
1196
+ /* c: right hand side of inequalities (margin) */
1197
+ /* maxdiff: returns maximum violation of KT-conditions */
1198
+ /* heldout: marks held-out example for leave-one-out (or -1) */
1199
+ /* retrain: selects training mode (1=regular / 2=holdout) */
1200
+ {
1201
+ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
1202
+ long inconsistentnum,choosenum,already_chosen=0,iteration;
1203
+ long misclassified,supvecnum=0,*active2dnum,inactivenum;
1204
+ long *working2dnum,*selexam;
1205
+ long activenum;
1206
+ double criterion,eq;
1207
+ double *a_old;
1208
+ long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1209
+ long transductcycle;
1210
+ long transduction;
1211
+ double epsilon_crit_org;
1212
+ double bestmaxdiff;
1213
+ long bestmaxdiffiter,terminate;
1214
+
1215
+ double *selcrit; /* buffer for sorting */
1216
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
1217
+ double *weights; /* buffer for weight vector in linear case */
1218
+ QP qp; /* buffer for one quadratic program */
1219
+
1220
+ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1221
+ if(kernel_parm->kernel_type == LINEAR) {
1222
+ learn_parm->epsilon_crit=2.0;
1223
+ kernel_cache=NULL; /* caching makes no sense for linear kernel */
1224
+ }
1225
+ learn_parm->epsilon_shrink=2;
1226
+ (*maxdiff)=1;
1227
+
1228
+ learn_parm->totwords=totwords;
1229
+
1230
+ chosen = (long *)my_malloc(sizeof(long)*totdoc);
1231
+ last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
1232
+ key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1233
+ selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1234
+ selexam = (long *)my_malloc(sizeof(long)*totdoc);
1235
+ a_old = (double *)my_malloc(sizeof(double)*totdoc);
1236
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1237
+ working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1238
+ active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1239
+ qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1240
+ qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1241
+ qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1242
+ *learn_parm->svm_maxqpsize);
1243
+ qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1244
+ qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1245
+ qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1246
+ qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1247
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1248
+
1249
+ choosenum=0;
1250
+ inconsistentnum=0;
1251
+ transductcycle=0;
1252
+ transduction=0;
1253
+ if(!retrain) retrain=1;
1254
+ iteration=1;
1255
+ bestmaxdiffiter=1;
1256
+ bestmaxdiff=999999999;
1257
+ terminate=0;
1258
+
1259
+ if(kernel_cache) {
1260
+ kernel_cache->time=iteration; /* for lru cache */
1261
+ kernel_cache_reset_lru(kernel_cache);
1262
+ }
1263
+
1264
+ for(i=0;i<totdoc;i++) { /* various inits */
1265
+ chosen[i]=0;
1266
+ a_old[i]=a[i];
1267
+ last_suboptimal_at[i]=1;
1268
+ if(inconsistent[i])
1269
+ inconsistentnum++;
1270
+ if(unlabeled[i]) {
1271
+ transduction=1;
1272
+ }
1273
+ }
1274
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1275
+ inactivenum=totdoc-activenum;
1276
+ clear_index(working2dnum);
1277
+
1278
+ /* repeat this loop until we have convergence */
1279
+ for(;retrain && (!terminate);iteration++) {
1280
+
1281
+ if(kernel_cache)
1282
+ kernel_cache->time=iteration; /* for lru cache */
1283
+ if(verbosity>=2) {
1284
+ printf(
1285
+ "Iteration %ld: ",iteration); fflush(stdout);
1286
+ }
1287
+ else if(verbosity==1) {
1288
+ printf("."); fflush(stdout);
1289
+ }
1290
+
1291
+ if(verbosity>=2) t0=get_runtime();
1292
+ if(verbosity>=3) {
1293
+ printf("\nSelecting working set... "); fflush(stdout);
1294
+ }
1295
+
1296
+ if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1297
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1298
+
1299
+ i=0;
1300
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1301
+ if((chosen[j]>=(learn_parm->svm_maxqpsize/
1302
+ minl(learn_parm->svm_maxqpsize,
1303
+ learn_parm->svm_newvarsinqp)))
1304
+ || (inconsistent[j])
1305
+ || (j == heldout)) {
1306
+ chosen[j]=0;
1307
+ choosenum--;
1308
+ }
1309
+ else {
1310
+ chosen[j]++;
1311
+ working2dnum[i++]=j;
1312
+ }
1313
+ }
1314
+ working2dnum[i]=-1;
1315
+
1316
+ if(retrain == 2) {
1317
+ choosenum=0;
1318
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
1319
+ chosen[j]=0;
1320
+ }
1321
+ clear_index(working2dnum);
1322
+ for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
1323
+ if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
1324
+ chosen[i]=99999;
1325
+ choosenum++;
1326
+ a[i]=0;
1327
+ }
1328
+ }
1329
+ if(learn_parm->biased_hyperplane) {
1330
+ eq=0;
1331
+ for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
1332
+ eq+=a[i]*label[i];
1333
+ }
1334
+ for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
1335
+ if((eq*label[i] > 0) && (a[i] > 0)) {
1336
+ chosen[i]=88888;
1337
+ choosenum++;
1338
+ if((eq*label[i]) > a[i]) {
1339
+ eq-=(a[i]*label[i]);
1340
+ a[i]=0;
1341
+ }
1342
+ else {
1343
+ a[i]-=(eq*label[i]);
1344
+ eq=0;
1345
+ }
1346
+ }
1347
+ }
1348
+ }
1349
+ compute_index(chosen,totdoc,working2dnum);
1350
+ }
1351
+ else { /* select working set according to steepest gradient */
1352
+ if(iteration % 101) {
1353
+ already_chosen=0;
1354
+ if((minl(learn_parm->svm_newvarsinqp,
1355
+ learn_parm->svm_maxqpsize-choosenum)>=4)
1356
+ && (kernel_parm->kernel_type != LINEAR)) {
1357
+ /* select part of the working set from cache */
1358
+ already_chosen=select_next_qp_subproblem_grad(
1359
+ label,unlabeled,a,lin,c,totdoc,
1360
+ (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1361
+ learn_parm->svm_newvarsinqp)
1362
+ /2),
1363
+ learn_parm,inconsistent,active2dnum,
1364
+ working2dnum,selcrit,selexam,kernel_cache,1,
1365
+ key,chosen);
1366
+ choosenum+=already_chosen;
1367
+ }
1368
+ choosenum+=select_next_qp_subproblem_grad(
1369
+ label,unlabeled,a,lin,c,totdoc,
1370
+ minl(learn_parm->svm_maxqpsize-choosenum,
1371
+ learn_parm->svm_newvarsinqp-already_chosen),
1372
+ learn_parm,inconsistent,active2dnum,
1373
+ working2dnum,selcrit,selexam,kernel_cache,0,key,
1374
+ chosen);
1375
+ }
1376
+ else { /* once in a while, select a somewhat random working set
1377
+ to get unlocked of infinite loops due to numerical
1378
+ inaccuracies in the core qp-solver */
1379
+ choosenum+=select_next_qp_subproblem_rand(
1380
+ label,unlabeled,a,lin,c,totdoc,
1381
+ minl(learn_parm->svm_maxqpsize-choosenum,
1382
+ learn_parm->svm_newvarsinqp),
1383
+ learn_parm,inconsistent,active2dnum,
1384
+ working2dnum,selcrit,selexam,kernel_cache,key,
1385
+ chosen,iteration);
1386
+ }
1387
+ }
1388
+
1389
+ if(verbosity>=2) {
1390
+ printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1391
+ }
1392
+
1393
+ if(verbosity>=2) t1=get_runtime();
1394
+
1395
+ if(kernel_cache)
1396
+ cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1397
+ choosenum,kernel_parm);
1398
+
1399
+ if(verbosity>=2) t2=get_runtime();
1400
+ if(retrain != 2) {
1401
+ optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
1402
+ model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1403
+ aicache,kernel_parm,&qp,&epsilon_crit_org);
1404
+ }
1405
+
1406
+ if(verbosity>=2) t3=get_runtime();
1407
+ update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1408
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1409
+ weights);
1410
+
1411
+ if(verbosity>=2) t4=get_runtime();
1412
+ supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1413
+ learn_parm,working2dnum,active2dnum,model);
1414
+
1415
+ if(verbosity>=2) t5=get_runtime();
1416
+
1417
+ /* The following computation of the objective function works only */
1418
+ /* relative to the active variables */
1419
+ if(verbosity>=3) {
1420
+ criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1421
+ active2dnum);
1422
+ printf("Objective function (over active variables): %.16f\n",criterion);
1423
+ fflush(stdout);
1424
+ }
1425
+
1426
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1427
+ a_old[i]=a[i];
1428
+ }
1429
+
1430
+ if(retrain == 2) { /* reset inconsistent unlabeled examples */
1431
+ for(i=0;(i<totdoc);i++) {
1432
+ if(inconsistent[i] && unlabeled[i]) {
1433
+ inconsistent[i]=0;
1434
+ label[i]=0;
1435
+ }
1436
+ }
1437
+ }
1438
+
1439
+ retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
1440
+ maxdiff,epsilon_crit_org,&misclassified,
1441
+ inconsistent,active2dnum,last_suboptimal_at,
1442
+ iteration,kernel_parm);
1443
+
1444
+ if(verbosity>=2) {
1445
+ t6=get_runtime();
1446
+ timing_profile->time_select+=t1-t0;
1447
+ timing_profile->time_kernel+=t2-t1;
1448
+ timing_profile->time_opti+=t3-t2;
1449
+ timing_profile->time_update+=t4-t3;
1450
+ timing_profile->time_model+=t5-t4;
1451
+ timing_profile->time_check+=t6-t5;
1452
+ }
1453
+
1454
+ /* checking whether optimizer got stuck */
1455
+ if((*maxdiff) < bestmaxdiff) {
1456
+ bestmaxdiff=(*maxdiff);
1457
+ bestmaxdiffiter=iteration;
1458
+ }
1459
+ if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1460
+ /* long time no progress? */
1461
+ terminate=1;
1462
+ retrain=0;
1463
+ if(verbosity>=1)
1464
+ printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1465
+ }
1466
+
1467
+ noshrink=0;
1468
+ if((!retrain) && (inactivenum>0)
1469
+ && ((!learn_parm->skip_final_opt_check)
1470
+ || (kernel_parm->kernel_type == LINEAR))) {
1471
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1472
+ || (verbosity>=2)) {
1473
+ if(verbosity==1) {
1474
+ printf("\n");
1475
+ }
1476
+ printf(" Checking optimality of inactive variables...");
1477
+ fflush(stdout);
1478
+ }
1479
+ t1=get_runtime();
1480
+ reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1481
+ totwords,iteration,learn_parm,inconsistent,
1482
+ docs,kernel_parm,kernel_cache,model,aicache,
1483
+ weights,maxdiff);
1484
+ /* Update to new active variables. */
1485
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1486
+ inactivenum=totdoc-activenum;
1487
+ /* reset watchdog */
1488
+ bestmaxdiff=(*maxdiff);
1489
+ bestmaxdiffiter=iteration;
1490
+ /* termination criterion */
1491
+ noshrink=1;
1492
+ retrain=0;
1493
+ if((*maxdiff) > learn_parm->epsilon_crit)
1494
+ retrain=1;
1495
+ timing_profile->time_shrink+=get_runtime()-t1;
1496
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1497
+ || (verbosity>=2)) {
1498
+ printf("done.\n"); fflush(stdout);
1499
+ printf(" Number of inactive variables = %ld\n",inactivenum);
1500
+ }
1501
+ }
1502
+
1503
+ if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1504
+ learn_parm->epsilon_crit=(*maxdiff);
1505
+ if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1506
+ learn_parm->epsilon_crit/=2.0;
1507
+ retrain=1;
1508
+ noshrink=1;
1509
+ }
1510
+ if(learn_parm->epsilon_crit<epsilon_crit_org)
1511
+ learn_parm->epsilon_crit=epsilon_crit_org;
1512
+
1513
+ if(verbosity>=2) {
1514
+ printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1515
+ supvecnum,model->at_upper_bound,(*maxdiff));
1516
+ fflush(stdout);
1517
+ }
1518
+ if(verbosity>=3) {
1519
+ printf("\n");
1520
+ }
1521
+
1522
+ if((!retrain) && (transduction)) {
1523
+ for(i=0;(i<totdoc);i++) {
1524
+ shrink_state->active[i]=1;
1525
+ }
1526
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1527
+ inactivenum=0;
1528
+ if(verbosity==1) printf("done\n");
1529
+ retrain=incorporate_unlabeled_examples(model,label,inconsistent,
1530
+ unlabeled,a,lin,totdoc,
1531
+ selcrit,selexam,key,
1532
+ transductcycle,kernel_parm,
1533
+ learn_parm);
1534
+ epsilon_crit_org=learn_parm->epsilon_crit;
1535
+ if(kernel_parm->kernel_type == LINEAR)
1536
+ learn_parm->epsilon_crit=1;
1537
+ transductcycle++;
1538
+ /* reset watchdog */
1539
+ bestmaxdiff=(*maxdiff);
1540
+ bestmaxdiffiter=iteration;
1541
+ }
1542
+ else if(((iteration % 10) == 0) && (!noshrink)) {
1543
+ activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
1544
+ active2dnum,last_suboptimal_at,iteration,totdoc,
1545
+ maxl((long)(activenum/10),
1546
+ maxl((long)(totdoc/500),100)),
1547
+ a,inconsistent);
1548
+ inactivenum=totdoc-activenum;
1549
+ if((kernel_cache)
1550
+ && (supvecnum>kernel_cache->max_elems)
1551
+ && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
1552
+ kernel_cache_shrink(kernel_cache,totdoc,
1553
+ minl((kernel_cache->activenum-activenum),
1554
+ (kernel_cache->activenum-supvecnum)),
1555
+ shrink_state->active);
1556
+ }
1557
+ }
1558
+
1559
+ if((!retrain) && learn_parm->remove_inconsistent) {
1560
+ if(verbosity>=1) {
1561
+ printf(" Moving training errors to inconsistent examples...");
1562
+ fflush(stdout);
1563
+ }
1564
+ if(learn_parm->remove_inconsistent == 1) {
1565
+ retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
1566
+ &inconsistentnum,inconsistent);
1567
+ }
1568
+ else if(learn_parm->remove_inconsistent == 2) {
1569
+ retrain=identify_misclassified(lin,label,unlabeled,totdoc,
1570
+ model,&inconsistentnum,inconsistent);
1571
+ }
1572
+ else if(learn_parm->remove_inconsistent == 3) {
1573
+ retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
1574
+ model,&inconsistentnum,inconsistent);
1575
+ }
1576
+ if(retrain) {
1577
+ if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
1578
+ learn_parm->epsilon_crit=2.0;
1579
+ }
1580
+ }
1581
+ if(verbosity>=1) {
1582
+ printf("done.\n");
1583
+ if(retrain) {
1584
+ printf(" Now %ld inconsistent examples.\n",inconsistentnum);
1585
+ }
1586
+ }
1587
+ }
1588
+ } /* end of loop */
1589
+
1590
+ free(chosen);
1591
+ free(last_suboptimal_at);
1592
+ free(key);
1593
+ free(selcrit);
1594
+ free(selexam);
1595
+ free(a_old);
1596
+ free(aicache);
1597
+ free(working2dnum);
1598
+ free(active2dnum);
1599
+ free(qp.opt_ce);
1600
+ free(qp.opt_ce0);
1601
+ free(qp.opt_g);
1602
+ free(qp.opt_g0);
1603
+ free(qp.opt_xinit);
1604
+ free(qp.opt_low);
1605
+ free(qp.opt_up);
1606
+ free(weights);
1607
+
1608
+ learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
1609
+ model->maxdiff=(*maxdiff);
1610
+
1611
+ return(iteration);
1612
+ }
1613
+
1614
+ long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
1615
+ long int totdoc,
1616
+ long int totwords, LEARN_PARM *learn_parm,
1617
+ KERNEL_PARM *kernel_parm,
1618
+ KERNEL_CACHE *kernel_cache,
1619
+ SHRINK_STATE *shrink_state, MODEL *model,
1620
+ double *a, double *lin, double *c,
1621
+ TIMING *timing_profile, double *maxdiff)
1622
+ /* docs: Training vectors (x-part) */
1623
+ /* label: Training labels/value (y-part, zero if test example for
1624
+ transduction) */
1625
+ /* totdoc: Number of examples in docs/label */
1626
+ /* totwords: Number of features (i.e. highest feature index) */
1627
+ /* learn_parm: Learning paramenters */
1628
+ /* kernel_parm: Kernel paramenters */
1629
+ /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1630
+ NULL if linear. */
1631
+ /* shrink_state: State of active variables */
1632
+ /* model: Returns learning result */
1633
+ /* a: alphas */
1634
+ /* lin: linear component of gradient */
1635
+ /* c: right hand side of inequalities (margin) */
1636
+ /* maxdiff: returns maximum violation of KT-conditions */
1637
+ {
1638
+ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
1639
+ long *inconsistent,choosenum,already_chosen=0,iteration;
1640
+ long misclassified,supvecnum=0,*active2dnum,inactivenum;
1641
+ long *working2dnum,*selexam,*ignore;
1642
+ long activenum,retrain,maxslackid,slackset,jointstep;
1643
+ double criterion,eq_target;
1644
+ double *a_old,*alphaslack;
1645
+ long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1646
+ double epsilon_crit_org,maxsharedviol;
1647
+ double bestmaxdiff;
1648
+ long bestmaxdiffiter,terminate;
1649
+
1650
+ double *selcrit; /* buffer for sorting */
1651
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
1652
+ double *weights; /* buffer for weight vector in linear case */
1653
+ QP qp; /* buffer for one quadratic program */
1654
+ double *slack; /* vector of slack variables for optimization with
1655
+ shared slacks */
1656
+
1657
+ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1658
+ if(kernel_parm->kernel_type == LINEAR) {
1659
+ learn_parm->epsilon_crit=2.0;
1660
+ kernel_cache=NULL; /* caching makes no sense for linear kernel */
1661
+ }
1662
+ learn_parm->epsilon_shrink=2;
1663
+ (*maxdiff)=1;
1664
+
1665
+ learn_parm->totwords=totwords;
1666
+
1667
+ chosen = (long *)my_malloc(sizeof(long)*totdoc);
1668
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
1669
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
1670
+ ignore = (long *)my_malloc(sizeof(long)*totdoc);
1671
+ key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1672
+ selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1673
+ selexam = (long *)my_malloc(sizeof(long)*totdoc);
1674
+ a_old = (double *)my_malloc(sizeof(double)*totdoc);
1675
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1676
+ working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1677
+ active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1678
+ qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1679
+ qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1680
+ qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1681
+ *learn_parm->svm_maxqpsize);
1682
+ qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1683
+ qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1684
+ qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1685
+ qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1686
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1687
+ maxslackid=0;
1688
+ for(i=0;i<totdoc;i++) { /* determine size of slack array */
1689
+ if(maxslackid<docs[i]->slackid)
1690
+ maxslackid=docs[i]->slackid;
1691
+ }
1692
+ slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1693
+ alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1694
+ last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
1695
+ for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1696
+ slack[i]=0;
1697
+ alphaslack[i]=0;
1698
+ last_suboptimal_at[i]=1;
1699
+ }
1700
+
1701
+ choosenum=0;
1702
+ retrain=1;
1703
+ iteration=1;
1704
+ bestmaxdiffiter=1;
1705
+ bestmaxdiff=999999999;
1706
+ terminate=0;
1707
+
1708
+ if(kernel_cache) {
1709
+ kernel_cache->time=iteration; /* for lru cache */
1710
+ kernel_cache_reset_lru(kernel_cache);
1711
+ }
1712
+
1713
+ for(i=0;i<totdoc;i++) { /* various inits */
1714
+ chosen[i]=0;
1715
+ unlabeled[i]=0;
1716
+ inconsistent[i]=0;
1717
+ ignore[i]=0;
1718
+ a_old[i]=a[i];
1719
+ }
1720
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1721
+ inactivenum=totdoc-activenum;
1722
+ clear_index(working2dnum);
1723
+
1724
+ /* call to init slack and alphaslack */
1725
+ compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1726
+ slack,alphaslack);
1727
+
1728
+ /* repeat this loop until we have convergence */
1729
+ for(;retrain && (!terminate);iteration++) {
1730
+
1731
+ if(kernel_cache)
1732
+ kernel_cache->time=iteration; /* for lru cache */
1733
+ if(verbosity>=2) {
1734
+ printf(
1735
+ "Iteration %ld: ",iteration); fflush(stdout);
1736
+ }
1737
+ else if(verbosity==1) {
1738
+ printf("."); fflush(stdout);
1739
+ }
1740
+
1741
+ if(verbosity>=2) t0=get_runtime();
1742
+ if(verbosity>=3) {
1743
+ printf("\nSelecting working set... "); fflush(stdout);
1744
+ }
1745
+
1746
+ if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1747
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1748
+
1749
+ /* select working set according to steepest gradient */
1750
+ jointstep=0;
1751
+ eq_target=0;
1752
+ if(iteration % 101) {
1753
+ slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
1754
+ learn_parm,active2dnum,&maxsharedviol);
1755
+ if((iteration % 2)
1756
+ || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
1757
+ /* do a step with examples from different slack sets */
1758
+ if(verbosity >= 2) {
1759
+ printf("(i-step)"); fflush(stdout);
1760
+ }
1761
+ i=0;
1762
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
1763
+ if((chosen[j]>=(learn_parm->svm_maxqpsize/
1764
+ minl(learn_parm->svm_maxqpsize,
1765
+ learn_parm->svm_newvarsinqp)))) {
1766
+ chosen[j]=0;
1767
+ choosenum--;
1768
+ }
1769
+ else {
1770
+ chosen[j]++;
1771
+ working2dnum[i++]=j;
1772
+ }
1773
+ }
1774
+ working2dnum[i]=-1;
1775
+
1776
+ already_chosen=0;
1777
+ if((minl(learn_parm->svm_newvarsinqp,
1778
+ learn_parm->svm_maxqpsize-choosenum)>=4)
1779
+ && (kernel_parm->kernel_type != LINEAR)) {
1780
+ /* select part of the working set from cache */
1781
+ already_chosen=select_next_qp_subproblem_grad(
1782
+ label,unlabeled,a,lin,c,totdoc,
1783
+ (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1784
+ learn_parm->svm_newvarsinqp)
1785
+ /2),
1786
+ learn_parm,inconsistent,active2dnum,
1787
+ working2dnum,selcrit,selexam,kernel_cache,
1788
+ (long)1,key,chosen);
1789
+ choosenum+=already_chosen;
1790
+ }
1791
+ choosenum+=select_next_qp_subproblem_grad(
1792
+ label,unlabeled,a,lin,c,totdoc,
1793
+ minl(learn_parm->svm_maxqpsize-choosenum,
1794
+ learn_parm->svm_newvarsinqp-already_chosen),
1795
+ learn_parm,inconsistent,active2dnum,
1796
+ working2dnum,selcrit,selexam,kernel_cache,
1797
+ (long)0,key,chosen);
1798
+ }
1799
+ else { /* do a step with all examples from same slack set */
1800
+ if(verbosity >= 2) {
1801
+ printf("(j-step on %ld)",slackset); fflush(stdout);
1802
+ }
1803
+ jointstep=1;
1804
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1805
+ chosen[j]=0;
1806
+ }
1807
+ working2dnum[0]=-1;
1808
+ eq_target=alphaslack[slackset];
1809
+ for(j=0;j<totdoc;j++) { /* mask all but slackset */
1810
+ /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
1811
+ if(docs[j]->slackid != slackset)
1812
+ ignore[j]=1;
1813
+ else {
1814
+ ignore[j]=0;
1815
+ learn_parm->svm_cost[j]=learn_parm->svm_c;
1816
+ /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
1817
+ }
1818
+ }
1819
+ learn_parm->biased_hyperplane=1;
1820
+ choosenum=select_next_qp_subproblem_grad(
1821
+ label,unlabeled,a,lin,c,totdoc,
1822
+ learn_parm->svm_maxqpsize,
1823
+ learn_parm,ignore,active2dnum,
1824
+ working2dnum,selcrit,selexam,kernel_cache,
1825
+ (long)0,key,chosen);
1826
+ learn_parm->biased_hyperplane=0;
1827
+ }
1828
+ }
1829
+ else { /* once in a while, select a somewhat random working set
1830
+ to get unlocked of infinite loops due to numerical
1831
+ inaccuracies in the core qp-solver */
1832
+ choosenum+=select_next_qp_subproblem_rand(
1833
+ label,unlabeled,a,lin,c,totdoc,
1834
+ minl(learn_parm->svm_maxqpsize-choosenum,
1835
+ learn_parm->svm_newvarsinqp),
1836
+ learn_parm,inconsistent,active2dnum,
1837
+ working2dnum,selcrit,selexam,kernel_cache,key,
1838
+ chosen,iteration);
1839
+ }
1840
+
1841
+ if(verbosity>=2) {
1842
+ printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1843
+ }
1844
+
1845
+ if(verbosity>=2) t1=get_runtime();
1846
+
1847
+ if(kernel_cache)
1848
+ cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1849
+ choosenum,kernel_parm);
1850
+
1851
+ if(verbosity>=2) t2=get_runtime();
1852
+ if(jointstep) learn_parm->biased_hyperplane=1;
1853
+ optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
1854
+ model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1855
+ aicache,kernel_parm,&qp,&epsilon_crit_org);
1856
+ learn_parm->biased_hyperplane=0;
1857
+
1858
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
1859
+ alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
1860
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
1861
+ constraints */
1862
+ if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
1863
+ if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
1864
+ alphaslack[docs[i]->slackid]-=a[i];
1865
+ a[i]=0;
1866
+ }
1867
+ else {
1868
+ a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
1869
+ alphaslack[docs[i]->slackid]=learn_parm->svm_c;
1870
+ }
1871
+ }
1872
+ }
1873
+ for(jj=0;(i=active2dnum[jj])>=0;jj++)
1874
+ learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
1875
+ -alphaslack[docs[i]->slackid]);
1876
+
1877
+ if(verbosity>=2) t3=get_runtime();
1878
+ update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1879
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1880
+ weights);
1881
+ compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1882
+ slack,alphaslack);
1883
+
1884
+ if(verbosity>=2) t4=get_runtime();
1885
+ supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1886
+ learn_parm,working2dnum,active2dnum,model);
1887
+
1888
+ if(verbosity>=2) t5=get_runtime();
1889
+
1890
+ /* The following computation of the objective function works only */
1891
+ /* relative to the active variables */
1892
+ if(verbosity>=3) {
1893
+ criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1894
+ active2dnum);
1895
+ printf("Objective function (over active variables): %.16f\n",criterion);
1896
+ fflush(stdout);
1897
+ }
1898
+
1899
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1900
+ a_old[i]=a[i];
1901
+ }
1902
+
1903
+ retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1904
+ slack,alphaslack,totdoc,learn_parm,
1905
+ maxdiff,epsilon_crit_org,&misclassified,
1906
+ active2dnum,last_suboptimal_at,
1907
+ iteration,kernel_parm);
1908
+
1909
+ if(verbosity>=2) {
1910
+ t6=get_runtime();
1911
+ timing_profile->time_select+=t1-t0;
1912
+ timing_profile->time_kernel+=t2-t1;
1913
+ timing_profile->time_opti+=t3-t2;
1914
+ timing_profile->time_update+=t4-t3;
1915
+ timing_profile->time_model+=t5-t4;
1916
+ timing_profile->time_check+=t6-t5;
1917
+ }
1918
+
1919
+ /* checking whether optimizer got stuck */
1920
+ if((*maxdiff) < bestmaxdiff) {
1921
+ bestmaxdiff=(*maxdiff);
1922
+ bestmaxdiffiter=iteration;
1923
+ }
1924
+ if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1925
+ /* long time no progress? */
1926
+ terminate=1;
1927
+ retrain=0;
1928
+ if(verbosity>=1)
1929
+ printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1930
+ }
1931
+
1932
+ noshrink=0;
1933
+
1934
+ if((!retrain) && (inactivenum>0)
1935
+ && ((!learn_parm->skip_final_opt_check)
1936
+ || (kernel_parm->kernel_type == LINEAR))) {
1937
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1938
+ || (verbosity>=2)) {
1939
+ if(verbosity==1) {
1940
+ printf("\n");
1941
+ }
1942
+ printf(" Checking optimality of inactive variables...");
1943
+ fflush(stdout);
1944
+ }
1945
+ t1=get_runtime();
1946
+ reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1947
+ totwords,iteration,learn_parm,inconsistent,
1948
+ docs,kernel_parm,kernel_cache,model,aicache,
1949
+ weights,maxdiff);
1950
+ /* Update to new active variables. */
1951
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1952
+ inactivenum=totdoc-activenum;
1953
+ /* check optimality, since check in reactivate does not work for
1954
+ sharedslacks */
1955
+ retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1956
+ slack,alphaslack,totdoc,learn_parm,
1957
+ maxdiff,epsilon_crit_org,&misclassified,
1958
+ active2dnum,last_suboptimal_at,
1959
+ iteration,kernel_parm);
1960
+
1961
+ /* reset watchdog */
1962
+ bestmaxdiff=(*maxdiff);
1963
+ bestmaxdiffiter=iteration;
1964
+ /* termination criterion */
1965
+ noshrink=1;
1966
+ retrain=0;
1967
+ if((*maxdiff) > learn_parm->epsilon_crit)
1968
+ retrain=1;
1969
+ timing_profile->time_shrink+=get_runtime()-t1;
1970
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1971
+ || (verbosity>=2)) {
1972
+ printf("done.\n"); fflush(stdout);
1973
+ printf(" Number of inactive variables = %ld\n",inactivenum);
1974
+ }
1975
+ }
1976
+
1977
+ if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1978
+ learn_parm->epsilon_crit=(*maxdiff);
1979
+ if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1980
+ learn_parm->epsilon_crit/=2.0;
1981
+ retrain=1;
1982
+ noshrink=1;
1983
+ }
1984
+ if(learn_parm->epsilon_crit<epsilon_crit_org)
1985
+ learn_parm->epsilon_crit=epsilon_crit_org;
1986
+
1987
+ if(verbosity>=2) {
1988
+ printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1989
+ supvecnum,model->at_upper_bound,(*maxdiff));
1990
+ fflush(stdout);
1991
+ }
1992
+ if(verbosity>=3) {
1993
+ printf("\n");
1994
+ }
1995
+
1996
+ if(((iteration % 10) == 0) && (!noshrink)) {
1997
+ activenum=shrink_problem(docs,learn_parm,shrink_state,
1998
+ kernel_parm,active2dnum,
1999
+ last_suboptimal_at,iteration,totdoc,
2000
+ maxl((long)(activenum/10),
2001
+ maxl((long)(totdoc/500),100)),
2002
+ a,inconsistent);
2003
+ inactivenum=totdoc-activenum;
2004
+ if((kernel_cache)
2005
+ && (supvecnum>kernel_cache->max_elems)
2006
+ && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
2007
+ kernel_cache_shrink(kernel_cache,totdoc,
2008
+ minl((kernel_cache->activenum-activenum),
2009
+ (kernel_cache->activenum-supvecnum)),
2010
+ shrink_state->active);
2011
+ }
2012
+ }
2013
+
2014
+ } /* end of loop */
2015
+
2016
+
2017
+ free(alphaslack);
2018
+ free(slack);
2019
+ free(chosen);
2020
+ free(unlabeled);
2021
+ free(inconsistent);
2022
+ free(ignore);
2023
+ free(last_suboptimal_at);
2024
+ free(key);
2025
+ free(selcrit);
2026
+ free(selexam);
2027
+ free(a_old);
2028
+ free(aicache);
2029
+ free(working2dnum);
2030
+ free(active2dnum);
2031
+ free(qp.opt_ce);
2032
+ free(qp.opt_ce0);
2033
+ free(qp.opt_g);
2034
+ free(qp.opt_g0);
2035
+ free(qp.opt_xinit);
2036
+ free(qp.opt_low);
2037
+ free(qp.opt_up);
2038
+ free(weights);
2039
+
2040
+ learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
2041
+ model->maxdiff=(*maxdiff);
2042
+
2043
+ return(iteration);
2044
+ }
2045
+
2046
+
2047
+ double compute_objective_function(double *a, double *lin, double *c,
2048
+ double eps, long int *label,
2049
+ long int *active2dnum)
2050
+ /* Return value of objective function. */
2051
+ /* Works only relative to the active variables! */
2052
+ {
2053
+ long i,ii;
2054
+ double criterion;
2055
+ /* calculate value of objective function */
2056
+ criterion=0;
2057
+ for(ii=0;active2dnum[ii]>=0;ii++) {
2058
+ i=active2dnum[ii];
2059
+ criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
2060
+ }
2061
+ return(criterion);
2062
+ }
2063
+
2064
+ void clear_index(long int *index)
2065
+ /* initializes and empties index */
2066
+ {
2067
+ index[0]=-1;
2068
+ }
2069
+
2070
+ void add_to_index(long int *index, long int elem)
2071
+ /* initializes and empties index */
2072
+ {
2073
+ register long i;
2074
+ for(i=0;index[i] != -1;i++);
2075
+ index[i]=elem;
2076
+ index[i+1]=-1;
2077
+ }
2078
+
2079
+ long compute_index(long int *binfeature, long int range, long int *index)
2080
+ /* create an inverted index of binfeature */
2081
+ {
2082
+ register long i,ii;
2083
+
2084
+ ii=0;
2085
+ for(i=0;i<range;i++) {
2086
+ if(binfeature[i]) {
2087
+ index[ii]=i;
2088
+ ii++;
2089
+ }
2090
+ }
2091
+ for(i=0;i<4;i++) {
2092
+ index[ii+i]=-1;
2093
+ }
2094
+ return(ii);
2095
+ }
2096
+
2097
+
2098
+ void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
2099
+ long int *exclude_from_eq_const, double eq_target,
2100
+ long int *chosen, long int *active2dnum, MODEL *model,
2101
+ long int totdoc, long int *working2dnum, long int varnum,
2102
+ double *a, double *lin, double *c, LEARN_PARM *learn_parm,
2103
+ CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
2104
+ double *epsilon_crit_target)
2105
+ /* Do optimization on the working set. */
2106
+ {
2107
+ long i;
2108
+ double *a_v;
2109
+
2110
+ compute_matrices_for_optimization(docs,label,unlabeled,
2111
+ exclude_from_eq_const,eq_target,chosen,
2112
+ active2dnum,working2dnum,model,a,lin,c,
2113
+ varnum,totdoc,learn_parm,aicache,
2114
+ kernel_parm,qp);
2115
+
2116
+ if(verbosity>=3) {
2117
+ printf("Running optimizer..."); fflush(stdout);
2118
+ }
2119
+ /* call the qp-subsolver */
2120
+ a_v=optimize_qp(qp,epsilon_crit_target,
2121
+ learn_parm->svm_maxqpsize,
2122
+ &(model->b), /* in case the optimizer gives us */
2123
+ /* the threshold for free. otherwise */
2124
+ /* b is calculated in calculate_model. */
2125
+ learn_parm);
2126
+ if(verbosity>=3) {
2127
+ printf("done\n");
2128
+ }
2129
+
2130
+ for(i=0;i<varnum;i++) {
2131
+ a[working2dnum[i]]=a_v[i];
2132
+ /*
2133
+ if(a_v[i]<=(0+learn_parm->epsilon_a)) {
2134
+ a[working2dnum[i]]=0;
2135
+ }
2136
+ else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
2137
+ a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
2138
+ }
2139
+ */
2140
+ }
2141
+ }
2142
+
2143
+ void compute_matrices_for_optimization(DOC **docs, long int *label,
2144
+ long int *unlabeled, long *exclude_from_eq_const, double eq_target,
2145
+ long int *chosen, long int *active2dnum,
2146
+ long int *key, MODEL *model, double *a, double *lin, double *c,
2147
+ long int varnum, long int totdoc, LEARN_PARM *learn_parm,
2148
+ CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
2149
+ {
2150
+ register long ki,kj,i,j;
2151
+ register double kernel_temp;
2152
+
2153
+ if(verbosity>=3) {
2154
+ fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
2155
+ fflush(stdout);
2156
+ }
2157
+
2158
+ qp->opt_n=varnum;
2159
+ qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
2160
+ for(j=1;j<model->sv_num;j++) { /* start at 1 */
2161
+ if((!chosen[(model->supvec[j])->docnum])
2162
+ && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
2163
+ qp->opt_ce0[0]+=model->alpha[j];
2164
+ }
2165
+ }
2166
+ if(learn_parm->biased_hyperplane)
2167
+ qp->opt_m=1;
2168
+ else
2169
+ qp->opt_m=0; /* eq-constraint will be ignored */
2170
+
2171
+ /* init linear part of objective function */
2172
+ for(i=0;i<varnum;i++) {
2173
+ qp->opt_g0[i]=lin[key[i]];
2174
+ }
2175
+
2176
+ for(i=0;i<varnum;i++) {
2177
+ ki=key[i];
2178
+
2179
+ /* Compute the matrix for equality constraints */
2180
+ qp->opt_ce[i]=label[ki];
2181
+ qp->opt_low[i]=0;
2182
+ qp->opt_up[i]=learn_parm->svm_cost[ki];
2183
+
2184
+ kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
2185
+ /* compute linear part of objective function */
2186
+ qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
2187
+ /* compute quadratic part of objective function */
2188
+ qp->opt_g[varnum*i+i]=kernel_temp;
2189
+ for(j=i+1;j<varnum;j++) {
2190
+ kj=key[j];
2191
+ kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
2192
+ /* compute linear part of objective function */
2193
+ qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
2194
+ qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
2195
+ /* compute quadratic part of objective function */
2196
+ qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
2197
+ qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
2198
+ }
2199
+
2200
+ if(verbosity>=3) {
2201
+ if(i % 20 == 0) {
2202
+ fprintf(stdout,"%ld..",i); fflush(stdout);
2203
+ }
2204
+ }
2205
+ }
2206
+
2207
+ for(i=0;i<varnum;i++) {
2208
+ /* assure starting at feasible point */
2209
+ qp->opt_xinit[i]=a[key[i]];
2210
+ /* set linear part of objective function */
2211
+ qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
2212
+ }
2213
+
2214
+ if(verbosity>=3) {
2215
+ fprintf(stdout,"done\n");
2216
+ }
2217
+ }
2218
+
2219
+ long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
2220
+ double *lin, double *a, double *a_old, double *c,
2221
+ LEARN_PARM *learn_parm, long int *working2dnum,
2222
+ long int *active2dnum, MODEL *model)
2223
+ /* Compute decision function based on current values */
2224
+ /* of alpha. */
2225
+ {
2226
+ long i,ii,pos,b_calculated=0,first_low,first_high;
2227
+ double ex_c,b_temp,b_low,b_high;
2228
+
2229
+ if(verbosity>=3) {
2230
+ printf("Calculating model..."); fflush(stdout);
2231
+ }
2232
+
2233
+ if(!learn_parm->biased_hyperplane) {
2234
+ model->b=0;
2235
+ b_calculated=1;
2236
+ }
2237
+
2238
+ for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2239
+ if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
2240
+ pos=model->index[i];
2241
+ model->index[i]=-1;
2242
+ (model->sv_num)--;
2243
+ model->supvec[pos]=model->supvec[model->sv_num];
2244
+ model->alpha[pos]=model->alpha[model->sv_num];
2245
+ model->index[(model->supvec[pos])->docnum]=pos;
2246
+ }
2247
+ else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
2248
+ model->supvec[model->sv_num]=docs[i];
2249
+ model->alpha[model->sv_num]=a[i]*(double)label[i];
2250
+ model->index[i]=model->sv_num;
2251
+ (model->sv_num)++;
2252
+ }
2253
+ else if(a_old[i]==a[i]) { /* nothing to do */
2254
+ }
2255
+ else { /* just update alpha */
2256
+ model->alpha[model->index[i]]=a[i]*(double)label[i];
2257
+ }
2258
+
2259
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2260
+ if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
2261
+ (model->at_upper_bound)--;
2262
+ }
2263
+ else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
2264
+ (model->at_upper_bound)++;
2265
+ }
2266
+
2267
+ if((!b_calculated)
2268
+ && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
2269
+ model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
2270
+ /* model->b=(-(double)label[i]+lin[i]); */
2271
+ b_calculated=1;
2272
+ }
2273
+ }
2274
+
2275
+ /* No alpha in the working set not at bounds, so b was not
2276
+ calculated in the usual way. The following handles this special
2277
+ case. */
2278
+ if(learn_parm->biased_hyperplane
2279
+ && (!b_calculated)
2280
+ && (model->sv_num-1 == model->at_upper_bound)) {
2281
+ first_low=1;
2282
+ first_high=1;
2283
+ b_low=0;
2284
+ b_high=0;
2285
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2286
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2287
+ if(a[i]<ex_c) {
2288
+ if(label[i]>0) {
2289
+ b_temp=-(learn_parm->eps-c[i]+lin[i]);
2290
+ if((b_temp>b_low) || (first_low)) {
2291
+ b_low=b_temp;
2292
+ first_low=0;
2293
+ }
2294
+ }
2295
+ else {
2296
+ b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2297
+ if((b_temp<b_high) || (first_high)) {
2298
+ b_high=b_temp;
2299
+ first_high=0;
2300
+ }
2301
+ }
2302
+ }
2303
+ else {
2304
+ if(label[i]<0) {
2305
+ b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2306
+ if((b_temp>b_low) || (first_low)) {
2307
+ b_low=b_temp;
2308
+ first_low=0;
2309
+ }
2310
+ }
2311
+ else {
2312
+ b_temp=-(learn_parm->eps-c[i]+lin[i]);
2313
+ if((b_temp<b_high) || (first_high)) {
2314
+ b_high=b_temp;
2315
+ first_high=0;
2316
+ }
2317
+ }
2318
+ }
2319
+ }
2320
+ if(first_high) {
2321
+ model->b=-b_low;
2322
+ }
2323
+ else if(first_low) {
2324
+ model->b=-b_high;
2325
+ }
2326
+ else {
2327
+ model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
2328
+ /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
2329
+ }
2330
+ }
2331
+
2332
+ if(verbosity>=3) {
2333
+ printf("done\n"); fflush(stdout);
2334
+ }
2335
+
2336
+ return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
2337
+ }
2338
+
2339
+ long check_optimality(MODEL *model, long int *label, long int *unlabeled,
2340
+ double *a, double *lin, double *c, long int totdoc,
2341
+ LEARN_PARM *learn_parm, double *maxdiff,
2342
+ double epsilon_crit_org, long int *misclassified,
2343
+ long int *inconsistent, long int *active2dnum,
2344
+ long int *last_suboptimal_at,
2345
+ long int iteration, KERNEL_PARM *kernel_parm)
2346
+ /* Check KT-conditions */
2347
+ {
2348
+ long i,ii,retrain;
2349
+ double dist,ex_c,target;
2350
+
2351
+ if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2352
+ learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2353
+ }
2354
+ else { /* be conservative */
2355
+ learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2356
+ }
2357
+ retrain=0;
2358
+ (*maxdiff)=0;
2359
+ (*misclassified)=0;
2360
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2361
+ if((!inconsistent[i]) && label[i]) {
2362
+ dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
2363
+ hyperplane*/
2364
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2365
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2366
+ if(dist <= 0) {
2367
+ (*misclassified)++; /* does not work due to deactivation of var */
2368
+ }
2369
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2370
+ if((dist-target)>(*maxdiff)) /* largest violation */
2371
+ (*maxdiff)=dist-target;
2372
+ }
2373
+ else if((a[i]<ex_c) && (dist < target)) {
2374
+ if((target-dist)>(*maxdiff)) /* largest violation */
2375
+ (*maxdiff)=target-dist;
2376
+ }
2377
+ /* Count how long a variable was at lower/upper bound (and optimal).*/
2378
+ /* Variables, which were at the bound and optimal for a long */
2379
+ /* time are unlikely to become support vectors. In case our */
2380
+ /* cache is filled up, those variables are excluded to save */
2381
+ /* kernel evaluations. (See chapter 'Shrinking').*/
2382
+ if((a[i]>(learn_parm->epsilon_a))
2383
+ && (a[i]<ex_c)) {
2384
+ last_suboptimal_at[i]=iteration; /* not at bound */
2385
+ }
2386
+ else if((a[i]<=(learn_parm->epsilon_a))
2387
+ && (dist < (target+learn_parm->epsilon_shrink))) {
2388
+ last_suboptimal_at[i]=iteration; /* not likely optimal */
2389
+ }
2390
+ else if((a[i]>=ex_c)
2391
+ && (dist > (target-learn_parm->epsilon_shrink))) {
2392
+ last_suboptimal_at[i]=iteration; /* not likely optimal */
2393
+ }
2394
+ }
2395
+ }
2396
+ /* termination criterion */
2397
+ if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2398
+ retrain=1;
2399
+ }
2400
+ return(retrain);
2401
+ }
2402
+
2403
+ long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
2404
+ double *a, double *lin, double *c, double *slack,
2405
+ double *alphaslack,
2406
+ long int totdoc,
2407
+ LEARN_PARM *learn_parm, double *maxdiff,
2408
+ double epsilon_crit_org, long int *misclassified,
2409
+ long int *active2dnum,
2410
+ long int *last_suboptimal_at,
2411
+ long int iteration, KERNEL_PARM *kernel_parm)
2412
+ /* Check KT-conditions */
2413
+ {
2414
+ long i,ii,retrain;
2415
+ double dist,ex_c=0,target;
2416
+
2417
+ if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2418
+ learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2419
+ }
2420
+ else { /* be conservative */
2421
+ learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2422
+ }
2423
+
2424
+ retrain=0;
2425
+ (*maxdiff)=0;
2426
+ (*misclassified)=0;
2427
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2428
+ /* 'distance' from hyperplane*/
2429
+ dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
2430
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2431
+ ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
2432
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2433
+ if((dist-target)>(*maxdiff)) { /* largest violation */
2434
+ (*maxdiff)=dist-target;
2435
+ if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2436
+ if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
2437
+ }
2438
+ }
2439
+ if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
2440
+ if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
2441
+ (*maxdiff)=slack[docs[i]->slackid];
2442
+ if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2443
+ if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
2444
+ }
2445
+ }
2446
+ /* Count how long a variable was at lower/upper bound (and optimal).*/
2447
+ /* Variables, which were at the bound and optimal for a long */
2448
+ /* time are unlikely to become support vectors. In case our */
2449
+ /* cache is filled up, those variables are excluded to save */
2450
+ /* kernel evaluations. (See chapter 'Shrinking').*/
2451
+ if((a[i]>(learn_parm->epsilon_a))
2452
+ && (a[i]<ex_c)) {
2453
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
2454
+ }
2455
+ else if((a[i]<=(learn_parm->epsilon_a))
2456
+ && (dist < (target+learn_parm->epsilon_shrink))) {
2457
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2458
+ }
2459
+ else if((a[i]>=ex_c)
2460
+ && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
2461
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2462
+ }
2463
+ }
2464
+ /* termination criterion */
2465
+ if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2466
+ retrain=1;
2467
+ }
2468
+ return(retrain);
2469
+ }
2470
+
2471
+ void compute_shared_slacks(DOC **docs, long int *label,
2472
+ double *a, double *lin,
2473
+ double *c, long int *active2dnum,
2474
+ LEARN_PARM *learn_parm,
2475
+ double *slack, double *alphaslack)
2476
+ /* compute the value of shared slacks and the joint alphas */
2477
+ {
2478
+ long jj,i;
2479
+ double dist,target;
2480
+
2481
+ for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
2482
+ slack[docs[i]->slackid]=0.0;
2483
+ alphaslack[docs[i]->slackid]=0.0;
2484
+ }
2485
+ for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
2486
+ dist=(lin[i])*(double)label[i];
2487
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2488
+ if((target-dist) > slack[docs[i]->slackid])
2489
+ slack[docs[i]->slackid]=target-dist;
2490
+ alphaslack[docs[i]->slackid]+=a[i];
2491
+ }
2492
+ }
2493
+
2494
+
2495
+ long identify_inconsistent(double *a, long int *label,
2496
+ long int *unlabeled, long int totdoc,
2497
+ LEARN_PARM *learn_parm,
2498
+ long int *inconsistentnum, long int *inconsistent)
2499
+ {
2500
+ long i,retrain;
2501
+
2502
+ /* Throw out examples with multipliers at upper bound. This */
2503
+ /* corresponds to the -i 1 option. */
2504
+ /* ATTENTION: this is just a heuristic for finding a close */
2505
+ /* to minimum number of examples to exclude to */
2506
+ /* make the problem separable with desired margin */
2507
+ retrain=0;
2508
+ for(i=0;i<totdoc;i++) {
2509
+ if((!inconsistent[i]) && (!unlabeled[i])
2510
+ && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2511
+ (*inconsistentnum)++;
2512
+ inconsistent[i]=1; /* never choose again */
2513
+ retrain=2; /* start over */
2514
+ if(verbosity>=3) {
2515
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2516
+ }
2517
+ }
2518
+ }
2519
+ return(retrain);
2520
+ }
2521
+
2522
+ long identify_misclassified(double *lin, long int *label,
2523
+ long int *unlabeled, long int totdoc,
2524
+ MODEL *model, long int *inconsistentnum,
2525
+ long int *inconsistent)
2526
+ {
2527
+ long i,retrain;
2528
+ double dist;
2529
+
2530
+ /* Throw out misclassified examples. This */
2531
+ /* corresponds to the -i 2 option. */
2532
+ /* ATTENTION: this is just a heuristic for finding a close */
2533
+ /* to minimum number of examples to exclude to */
2534
+ /* make the problem separable with desired margin */
2535
+ retrain=0;
2536
+ for(i=0;i<totdoc;i++) {
2537
+ dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
2538
+ if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
2539
+ (*inconsistentnum)++;
2540
+ inconsistent[i]=1; /* never choose again */
2541
+ retrain=2; /* start over */
2542
+ if(verbosity>=3) {
2543
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2544
+ }
2545
+ }
2546
+ }
2547
+ return(retrain);
2548
+ }
2549
+
2550
+ long identify_one_misclassified(double *lin, long int *label,
2551
+ long int *unlabeled,
2552
+ long int totdoc, MODEL *model,
2553
+ long int *inconsistentnum,
2554
+ long int *inconsistent)
2555
+ {
2556
+ long i,retrain,maxex=-1;
2557
+ double dist,maxdist=0;
2558
+
2559
+ /* Throw out the 'most misclassified' example. This */
2560
+ /* corresponds to the -i 3 option. */
2561
+ /* ATTENTION: this is just a heuristic for finding a close */
2562
+ /* to minimum number of examples to exclude to */
2563
+ /* make the problem separable with desired margin */
2564
+ retrain=0;
2565
+ for(i=0;i<totdoc;i++) {
2566
+ if((!inconsistent[i]) && (!unlabeled[i])) {
2567
+ dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
2568
+ if(dist<maxdist) {
2569
+ maxdist=dist;
2570
+ maxex=i;
2571
+ }
2572
+ }
2573
+ }
2574
+ if(maxex>=0) {
2575
+ (*inconsistentnum)++;
2576
+ inconsistent[maxex]=1; /* never choose again */
2577
+ retrain=2; /* start over */
2578
+ if(verbosity>=3) {
2579
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2580
+ }
2581
+ }
2582
+ return(retrain);
2583
+ }
2584
+
2585
+ void update_linear_component(DOC **docs, long int *label,
2586
+ long int *active2dnum, double *a,
2587
+ double *a_old, long int *working2dnum,
2588
+ long int totdoc, long int totwords,
2589
+ KERNEL_PARM *kernel_parm,
2590
+ KERNEL_CACHE *kernel_cache,
2591
+ double *lin, CFLOAT *aicache, double *weights)
2592
+ /* keep track of the linear component */
2593
+ /* lin of the gradient etc. by updating */
2594
+ /* based on the change of the variables */
2595
+ /* in the current working set */
2596
+ {
2597
+ register long i,ii,j,jj;
2598
+ register double tec;
2599
+ SVECTOR *f;
2600
+
2601
+ if(kernel_parm->kernel_type==0) { /* special linear case */
2602
+ clear_vector_n(weights,totwords);
2603
+ for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2604
+ if(a[i] != a_old[i]) {
2605
+ for(f=docs[i]->fvec;f;f=f->next)
2606
+ add_vector_ns(weights,f,
2607
+ f->factor*((a[i]-a_old[i])*(double)label[i]));
2608
+ }
2609
+ }
2610
+ for(jj=0;(j=active2dnum[jj])>=0;jj++) {
2611
+ for(f=docs[j]->fvec;f;f=f->next)
2612
+ lin[j]+=f->factor*sprod_ns(weights,f);
2613
+ }
2614
+ }
2615
+ else { /* general case */
2616
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
2617
+ if(a[i] != a_old[i]) {
2618
+ get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
2619
+ kernel_parm);
2620
+ for(ii=0;(j=active2dnum[ii])>=0;ii++) {
2621
+ tec=aicache[j];
2622
+ lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
2623
+ }
2624
+ }
2625
+ }
2626
+ }
2627
+ }
2628
+
2629
+
2630
+ long incorporate_unlabeled_examples(MODEL *model, long int *label,
2631
+ long int *inconsistent,
2632
+ long int *unlabeled,
2633
+ double *a, double *lin,
2634
+ long int totdoc, double *selcrit,
2635
+ long int *select, long int *key,
2636
+ long int transductcycle,
2637
+ KERNEL_PARM *kernel_parm,
2638
+ LEARN_PARM *learn_parm)
2639
+ {
2640
+ long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
2641
+ long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
2642
+ double dist,model_length,posratio,negratio;
2643
+ long check_every=2;
2644
+ double loss;
2645
+ static double switchsens=0.0,switchsensorg=0.0;
2646
+ double umin,umax,sumalpha;
2647
+ long imin=0,imax=0;
2648
+ static long switchnum=0;
2649
+
2650
+ switchsens/=1.2;
2651
+
2652
+ /* assumes that lin[] is up to date -> no inactive vars */
2653
+
2654
+ orgpos=0;
2655
+ orgneg=0;
2656
+ newpos=0;
2657
+ newneg=0;
2658
+ nolabel=0;
2659
+ allunlab=0;
2660
+ for(i=0;i<totdoc;i++) {
2661
+ if(!unlabeled[i]) {
2662
+ if(label[i] > 0) {
2663
+ orgpos++;
2664
+ }
2665
+ else {
2666
+ orgneg++;
2667
+ }
2668
+ }
2669
+ else {
2670
+ allunlab++;
2671
+ if(unlabeled[i]) {
2672
+ if(label[i] > 0) {
2673
+ newpos++;
2674
+ }
2675
+ else if(label[i] < 0) {
2676
+ newneg++;
2677
+ }
2678
+ }
2679
+ }
2680
+ if(label[i]==0) {
2681
+ nolabel++;
2682
+ }
2683
+ }
2684
+
2685
+ if(learn_parm->transduction_posratio >= 0) {
2686
+ posratio=learn_parm->transduction_posratio;
2687
+ }
2688
+ else {
2689
+ posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
2690
+ } /* in training data */
2691
+ negratio=1.0-posratio;
2692
+
2693
+ learn_parm->svm_costratio=1.0; /* global */
2694
+ if(posratio>0) {
2695
+ learn_parm->svm_costratio_unlab=negratio/posratio;
2696
+ }
2697
+ else {
2698
+ learn_parm->svm_costratio_unlab=1.0;
2699
+ }
2700
+
2701
+ pos=0;
2702
+ neg=0;
2703
+ upos=0;
2704
+ uneg=0;
2705
+ for(i=0;i<totdoc;i++) {
2706
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2707
+ if(dist>0) {
2708
+ pos++;
2709
+ }
2710
+ else {
2711
+ neg++;
2712
+ }
2713
+ if(unlabeled[i]) {
2714
+ if(dist>0) {
2715
+ upos++;
2716
+ }
2717
+ else {
2718
+ uneg++;
2719
+ }
2720
+ }
2721
+ if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2722
+ /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
2723
+ }
2724
+ }
2725
+ if(verbosity>=2) {
2726
+ printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
2727
+ printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
2728
+ printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
2729
+ fflush(stdout);
2730
+ }
2731
+
2732
+ if(transductcycle == 0) {
2733
+ j1=0;
2734
+ j2=0;
2735
+ j4=0;
2736
+ for(i=0;i<totdoc;i++) {
2737
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2738
+ if((label[i]==0) && (unlabeled[i])) {
2739
+ selcrit[j4]=dist;
2740
+ key[j4]=i;
2741
+ j4++;
2742
+ }
2743
+ }
2744
+ unsupaddnum1=0;
2745
+ unsupaddnum2=0;
2746
+ select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
2747
+ for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
2748
+ i=key[select[k]];
2749
+ label[i]=1;
2750
+ unsupaddnum1++;
2751
+ j1++;
2752
+ }
2753
+ for(i=0;i<totdoc;i++) {
2754
+ if((label[i]==0) && (unlabeled[i])) {
2755
+ label[i]=-1;
2756
+ j2++;
2757
+ unsupaddnum2++;
2758
+ }
2759
+ }
2760
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2761
+ if(unlabeled[i]) {
2762
+ if(label[i] == 1) {
2763
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2764
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2765
+ }
2766
+ else if(label[i] == -1) {
2767
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2768
+ learn_parm->svm_unlabbound;
2769
+ }
2770
+ }
2771
+ }
2772
+ if(verbosity>=1) {
2773
+ /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2774
+ learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2775
+ learn_parm->svm_unlabbound); */
2776
+ printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
2777
+ unsupaddnum1,unsupaddnum2);
2778
+ fflush(stdout);
2779
+ }
2780
+ if(verbosity >= 1)
2781
+ printf("Retraining.");
2782
+ if(verbosity >= 2) printf("\n");
2783
+ return((long)3);
2784
+ }
2785
+ if((transductcycle % check_every) == 0) {
2786
+ if(verbosity >= 1)
2787
+ printf("Retraining.");
2788
+ if(verbosity >= 2) printf("\n");
2789
+ j1=0;
2790
+ j2=0;
2791
+ unsupaddnum1=0;
2792
+ unsupaddnum2=0;
2793
+ for(i=0;i<totdoc;i++) {
2794
+ if((unlabeled[i] == 2)) {
2795
+ unlabeled[i]=1;
2796
+ label[i]=1;
2797
+ j1++;
2798
+ unsupaddnum1++;
2799
+ }
2800
+ else if((unlabeled[i] == 3)) {
2801
+ unlabeled[i]=1;
2802
+ label[i]=-1;
2803
+ j2++;
2804
+ unsupaddnum2++;
2805
+ }
2806
+ }
2807
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2808
+ if(unlabeled[i]) {
2809
+ if(label[i] == 1) {
2810
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2811
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2812
+ }
2813
+ else if(label[i] == -1) {
2814
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2815
+ learn_parm->svm_unlabbound;
2816
+ }
2817
+ }
2818
+ }
2819
+
2820
+ if(verbosity>=2) {
2821
+ /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2822
+ learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2823
+ learn_parm->svm_unlabbound); */
2824
+ printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
2825
+ upos,unsupaddnum1,unsupaddnum2);
2826
+ fflush(stdout);
2827
+ }
2828
+
2829
+ if(learn_parm->svm_unlabbound == 1) {
2830
+ learn_parm->epsilon_crit=0.001; /* do the last run right */
2831
+ }
2832
+ else {
2833
+ learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
2834
+ }
2835
+
2836
+ return((long)3);
2837
+ }
2838
+ else if(((transductcycle % check_every) < check_every)) {
2839
+ model_length=0;
2840
+ sumalpha=0;
2841
+ loss=0;
2842
+ for(i=0;i<totdoc;i++) {
2843
+ model_length+=a[i]*label[i]*lin[i];
2844
+ sumalpha+=a[i];
2845
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2846
+ if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
2847
+ loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
2848
+ }
2849
+ }
2850
+ model_length=sqrt(model_length);
2851
+ if(verbosity>=2) {
2852
+ printf("Model-length = %f (%f), loss = %f, objective = %f\n",
2853
+ model_length,sumalpha,loss,loss+0.5*model_length*model_length);
2854
+ fflush(stdout);
2855
+ }
2856
+ j1=0;
2857
+ j2=0;
2858
+ j3=0;
2859
+ j4=0;
2860
+ unsupaddnum1=0;
2861
+ unsupaddnum2=0;
2862
+ umin=99999;
2863
+ umax=-99999;
2864
+ j4=1;
2865
+ while(j4) {
2866
+ umin=99999;
2867
+ umax=-99999;
2868
+ for(i=0;(i<totdoc);i++) {
2869
+ dist=(lin[i]-model->b);
2870
+ if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
2871
+ && (dist<umin)) {
2872
+ umin=dist;
2873
+ imin=i;
2874
+ }
2875
+ if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
2876
+ && (dist>umax)) {
2877
+ umax=dist;
2878
+ imax=i;
2879
+ }
2880
+ }
2881
+ if((umin < (umax+switchsens-1E-4))) {
2882
+ j1++;
2883
+ j2++;
2884
+ unsupaddnum1++;
2885
+ unlabeled[imin]=3;
2886
+ inconsistent[imin]=1;
2887
+ unsupaddnum2++;
2888
+ unlabeled[imax]=2;
2889
+ inconsistent[imax]=1;
2890
+ }
2891
+ else
2892
+ j4=0;
2893
+ j4=0;
2894
+ }
2895
+ for(j=0;(j<totdoc);j++) {
2896
+ if(unlabeled[j] && (!inconsistent[j])) {
2897
+ if(label[j]>0) {
2898
+ unlabeled[j]=2;
2899
+ }
2900
+ else if(label[j]<0) {
2901
+ unlabeled[j]=3;
2902
+ }
2903
+ /* inconsistent[j]=1; */
2904
+ j3++;
2905
+ }
2906
+ }
2907
+ switchnum+=unsupaddnum1+unsupaddnum2;
2908
+
2909
+ /* stop and print out current margin
2910
+ printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
2911
+ if(switchnum == 2*kernel_parm->poly_degree) {
2912
+ learn_parm->svm_unlabbound=1;
2913
+ }
2914
+ */
2915
+
2916
+ if((!unsupaddnum1) && (!unsupaddnum2)) {
2917
+ if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
2918
+ for(j=0;(j<totdoc);j++) {
2919
+ inconsistent[j]=0;
2920
+ if(unlabeled[j]) unlabeled[j]=1;
2921
+ }
2922
+ write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
2923
+ totdoc,learn_parm);
2924
+ if(verbosity>=1)
2925
+ printf("Number of switches: %ld\n",switchnum);
2926
+ return((long)0);
2927
+ }
2928
+ switchsens=switchsensorg;
2929
+ learn_parm->svm_unlabbound*=1.5;
2930
+ if(learn_parm->svm_unlabbound>1) {
2931
+ learn_parm->svm_unlabbound=1;
2932
+ }
2933
+ model->at_upper_bound=0; /* since upper bound increased */
2934
+ if(verbosity>=1)
2935
+ printf("Increasing influence of unlabeled examples to %f%% .",
2936
+ learn_parm->svm_unlabbound*100.0);
2937
+ }
2938
+ else if(verbosity>=1) {
2939
+ printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
2940
+ upos,unsupaddnum1,unsupaddnum2);
2941
+ fflush(stdout);
2942
+ }
2943
+
2944
+ if(verbosity >= 2) printf("\n");
2945
+
2946
+ learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
2947
+
2948
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2949
+ if(unlabeled[i]) {
2950
+ if(label[i] == 1) {
2951
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2952
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2953
+ }
2954
+ else if(label[i] == -1) {
2955
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2956
+ learn_parm->svm_unlabbound;
2957
+ }
2958
+ }
2959
+ }
2960
+
2961
+ return((long)2);
2962
+ }
2963
+
2964
+ return((long)0);
2965
+ }
2966
+
2967
+ /*************************** Working set selection ***************************/
2968
+
2969
+ long select_next_qp_subproblem_grad(long int *label,
2970
+ long int *unlabeled,
2971
+ double *a, double *lin,
2972
+ double *c, long int totdoc,
2973
+ long int qp_size,
2974
+ LEARN_PARM *learn_parm,
2975
+ long int *inconsistent,
2976
+ long int *active2dnum,
2977
+ long int *working2dnum,
2978
+ double *selcrit,
2979
+ long int *select,
2980
+ KERNEL_CACHE *kernel_cache,
2981
+ long int cache_only,
2982
+ long int *key, long int *chosen)
2983
+ /* Use the feasible direction approach to select the next
2984
+ qp-subproblem (see chapter 'Selecting a good working set'). If
2985
+ 'cache_only' is true, then the variables are selected only among
2986
+ those for which the kernel evaluations are cached. */
2987
+ {
2988
+ long choosenum,i,j,k,activedoc,inum,valid;
2989
+ double s;
2990
+
2991
+ for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
2992
+ choosenum=0;
2993
+ activedoc=0;
2994
+ for(i=0;(j=active2dnum[i])>=0;i++) {
2995
+ s=-label[j];
2996
+ if(kernel_cache && cache_only)
2997
+ valid=(kernel_cache->index[j]>=0);
2998
+ else
2999
+ valid=1;
3000
+ if(valid
3001
+ && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3002
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3003
+ && (s>0)))
3004
+ && (!chosen[j])
3005
+ && (label[j])
3006
+ && (!inconsistent[j]))
3007
+ {
3008
+ selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3009
+ /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
3010
+ key[activedoc]=j;
3011
+ activedoc++;
3012
+ }
3013
+ }
3014
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3015
+ for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3016
+ /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3017
+ i=key[select[k]];
3018
+ chosen[i]=1;
3019
+ working2dnum[inum+choosenum]=i;
3020
+ choosenum+=1;
3021
+ if(kernel_cache)
3022
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3023
+ kicked out of cache */
3024
+ /* } */
3025
+ }
3026
+
3027
+ activedoc=0;
3028
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3029
+ s=label[j];
3030
+ if(kernel_cache && cache_only)
3031
+ valid=(kernel_cache->index[j]>=0);
3032
+ else
3033
+ valid=1;
3034
+ if(valid
3035
+ && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3036
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3037
+ && (s>0)))
3038
+ && (!chosen[j])
3039
+ && (label[j])
3040
+ && (!inconsistent[j]))
3041
+ {
3042
+ selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3043
+ /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
3044
+ key[activedoc]=j;
3045
+ activedoc++;
3046
+ }
3047
+ }
3048
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3049
+ for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3050
+ /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3051
+ i=key[select[k]];
3052
+ chosen[i]=1;
3053
+ working2dnum[inum+choosenum]=i;
3054
+ choosenum+=1;
3055
+ if(kernel_cache)
3056
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3057
+ kicked out of cache */
3058
+ /* } */
3059
+ }
3060
+ working2dnum[inum+choosenum]=-1; /* complete index */
3061
+ return(choosenum);
3062
+ }
3063
+
3064
+ long select_next_qp_subproblem_rand(long int *label,
3065
+ long int *unlabeled,
3066
+ double *a, double *lin,
3067
+ double *c, long int totdoc,
3068
+ long int qp_size,
3069
+ LEARN_PARM *learn_parm,
3070
+ long int *inconsistent,
3071
+ long int *active2dnum,
3072
+ long int *working2dnum,
3073
+ double *selcrit,
3074
+ long int *select,
3075
+ KERNEL_CACHE *kernel_cache,
3076
+ long int *key,
3077
+ long int *chosen,
3078
+ long int iteration)
3079
+ /* Use the feasible direction approach to select the next
3080
+ qp-subproblem (see section 'Selecting a good working set'). Chooses
3081
+ a feasible direction at (pseudo) random to help jump over numerical
3082
+ problem. */
3083
+ {
3084
+ long choosenum,i,j,k,activedoc,inum;
3085
+ double s;
3086
+
3087
+ for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
3088
+ choosenum=0;
3089
+ activedoc=0;
3090
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3091
+ s=-label[j];
3092
+ if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3093
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3094
+ && (s>0)))
3095
+ && (!inconsistent[j])
3096
+ && (label[j])
3097
+ && (!chosen[j])) {
3098
+ selcrit[activedoc]=(j+iteration) % totdoc;
3099
+ key[activedoc]=j;
3100
+ activedoc++;
3101
+ }
3102
+ }
3103
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3104
+ for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3105
+ i=key[select[k]];
3106
+ chosen[i]=1;
3107
+ working2dnum[inum+choosenum]=i;
3108
+ choosenum+=1;
3109
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3110
+ /* out of cache */
3111
+ }
3112
+
3113
+ activedoc=0;
3114
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3115
+ s=label[j];
3116
+ if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3117
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3118
+ && (s>0)))
3119
+ && (!inconsistent[j])
3120
+ && (label[j])
3121
+ && (!chosen[j])) {
3122
+ selcrit[activedoc]=(j+iteration) % totdoc;
3123
+ key[activedoc]=j;
3124
+ activedoc++;
3125
+ }
3126
+ }
3127
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3128
+ for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3129
+ i=key[select[k]];
3130
+ chosen[i]=1;
3131
+ working2dnum[inum+choosenum]=i;
3132
+ choosenum+=1;
3133
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3134
+ /* out of cache */
3135
+ }
3136
+ working2dnum[inum+choosenum]=-1; /* complete index */
3137
+ return(choosenum);
3138
+ }
3139
+
3140
+ long select_next_qp_slackset(DOC **docs, long int *label,
3141
+ double *a, double *lin,
3142
+ double *slack, double *alphaslack,
3143
+ double *c,
3144
+ LEARN_PARM *learn_parm,
3145
+ long int *active2dnum, double *maxviol)
3146
+ /* returns the slackset with the largest internal violation */
3147
+ {
3148
+ long i,ii,maxdiffid;
3149
+ double dist,target,maxdiff,ex_c;
3150
+
3151
+ maxdiff=0;
3152
+ maxdiffid=0;
3153
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
3154
+ ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
3155
+ if(alphaslack[docs[i]->slackid] >= ex_c) {
3156
+ dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
3157
+ target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
3158
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3159
+ if((dist-target)>maxdiff) { /* largest violation */
3160
+ maxdiff=dist-target;
3161
+ maxdiffid=docs[i]->slackid;
3162
+ }
3163
+ }
3164
+ }
3165
+ }
3166
+ (*maxviol)=maxdiff;
3167
+ return(maxdiffid);
3168
+ }
3169
+
3170
+
3171
+ void select_top_n(double *selcrit, long int range, long int *select,
3172
+ long int n)
3173
+ {
3174
+ register long i,j;
3175
+
3176
+ for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
3177
+ for(j=i;j>=0;j--) {
3178
+ if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
3179
+ select[j]=select[j-1];
3180
+ }
3181
+ else {
3182
+ select[j]=i;
3183
+ j=-1;
3184
+ }
3185
+ }
3186
+ }
3187
+ if(n>0) {
3188
+ for(i=n;i<range;i++) {
3189
+ if(selcrit[i]>selcrit[select[n-1]]) {
3190
+ for(j=n-1;j>=0;j--) {
3191
+ if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
3192
+ select[j]=select[j-1];
3193
+ }
3194
+ else {
3195
+ select[j]=i;
3196
+ j=-1;
3197
+ }
3198
+ }
3199
+ }
3200
+ }
3201
+ }
3202
+ }
3203
+
3204
+
3205
+ /******************************** Shrinking *********************************/
3206
+
3207
+ void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
3208
+ long int maxhistory)
3209
+ {
3210
+ long i;
3211
+
3212
+ shrink_state->deactnum=0;
3213
+ shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
3214
+ shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
3215
+ shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
3216
+ shrink_state->maxhistory=maxhistory;
3217
+ shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
3218
+ shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
3219
+
3220
+ for(i=0;i<totdoc;i++) {
3221
+ shrink_state->active[i]=1;
3222
+ shrink_state->inactive_since[i]=0;
3223
+ shrink_state->last_a[i]=0;
3224
+ shrink_state->last_lin[i]=0;
3225
+ }
3226
+ }
3227
+
3228
+ void shrink_state_cleanup(SHRINK_STATE *shrink_state)
3229
+ {
3230
+ free(shrink_state->active);
3231
+ free(shrink_state->inactive_since);
3232
+ if(shrink_state->deactnum > 0)
3233
+ free(shrink_state->a_history[shrink_state->deactnum-1]);
3234
+ free(shrink_state->a_history);
3235
+ free(shrink_state->last_a);
3236
+ free(shrink_state->last_lin);
3237
+ }
3238
+
3239
+ long shrink_problem(DOC **docs,
3240
+ LEARN_PARM *learn_parm,
3241
+ SHRINK_STATE *shrink_state,
3242
+ KERNEL_PARM *kernel_parm,
3243
+ long int *active2dnum,
3244
+ long int *last_suboptimal_at,
3245
+ long int iteration,
3246
+ long int totdoc,
3247
+ long int minshrink,
3248
+ double *a,
3249
+ long int *inconsistent)
3250
+ /* Shrink some variables away. Do the shrinking only if at least
3251
+ minshrink variables can be removed. */
3252
+ {
3253
+ long i,ii,change,activenum,lastiter;
3254
+ double *a_old;
3255
+
3256
+ activenum=0;
3257
+ change=0;
3258
+ for(ii=0;active2dnum[ii]>=0;ii++) {
3259
+ i=active2dnum[ii];
3260
+ activenum++;
3261
+ if(learn_parm->sharedslack)
3262
+ lastiter=last_suboptimal_at[docs[i]->slackid];
3263
+ else
3264
+ lastiter=last_suboptimal_at[i];
3265
+ if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3266
+ || (inconsistent[i])) {
3267
+ change++;
3268
+ }
3269
+ }
3270
+ if((change>=minshrink) /* shrink only if sufficiently many candidates */
3271
+ && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
3272
+ /* Shrink problem by removing those variables which are */
3273
+ /* optimal at a bound for a minimum number of iterations */
3274
+ if(verbosity>=2) {
3275
+ printf(" Shrinking..."); fflush(stdout);
3276
+ }
3277
+ if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
3278
+ a_old=(double *)my_malloc(sizeof(double)*totdoc);
3279
+ shrink_state->a_history[shrink_state->deactnum]=a_old;
3280
+ for(i=0;i<totdoc;i++) {
3281
+ a_old[i]=a[i];
3282
+ }
3283
+ }
3284
+ for(ii=0;active2dnum[ii]>=0;ii++) {
3285
+ i=active2dnum[ii];
3286
+ if(learn_parm->sharedslack)
3287
+ lastiter=last_suboptimal_at[docs[i]->slackid];
3288
+ else
3289
+ lastiter=last_suboptimal_at[i];
3290
+ if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3291
+ || (inconsistent[i])) {
3292
+ shrink_state->active[i]=0;
3293
+ shrink_state->inactive_since[i]=shrink_state->deactnum;
3294
+ }
3295
+ }
3296
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
3297
+ shrink_state->deactnum++;
3298
+ if(kernel_parm->kernel_type == LINEAR) {
3299
+ shrink_state->deactnum=0;
3300
+ }
3301
+ if(verbosity>=2) {
3302
+ printf("done.\n"); fflush(stdout);
3303
+ printf(" Number of inactive variables = %ld\n",totdoc-activenum);
3304
+ }
3305
+ }
3306
+ return(activenum);
3307
+ }
3308
+
3309
+
3310
+ void reactivate_inactive_examples(long int *label,
3311
+ long int *unlabeled,
3312
+ double *a,
3313
+ SHRINK_STATE *shrink_state,
3314
+ double *lin,
3315
+ double *c,
3316
+ long int totdoc,
3317
+ long int totwords,
3318
+ long int iteration,
3319
+ LEARN_PARM *learn_parm,
3320
+ long int *inconsistent,
3321
+ DOC **docs,
3322
+ KERNEL_PARM *kernel_parm,
3323
+ KERNEL_CACHE *kernel_cache,
3324
+ MODEL *model,
3325
+ CFLOAT *aicache,
3326
+ double *weights,
3327
+ double *maxdiff)
3328
+ /* Make all variables active again which had been removed by
3329
+ shrinking. */
3330
+ /* Computes lin for those variables from scratch. */
3331
+ {
3332
+ register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
3333
+ long *changed,*inactive;
3334
+ register double kernel_val,*a_old,dist;
3335
+ double ex_c,target;
3336
+ SVECTOR *f;
3337
+
3338
+ if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
3339
+ a_old=shrink_state->last_a;
3340
+ clear_vector_n(weights,totwords);
3341
+ for(i=0;i<totdoc;i++) {
3342
+ if(a[i] != a_old[i]) {
3343
+ for(f=docs[i]->fvec;f;f=f->next)
3344
+ add_vector_ns(weights,f,
3345
+ f->factor*((a[i]-a_old[i])*(double)label[i]));
3346
+ a_old[i]=a[i];
3347
+ }
3348
+ }
3349
+ for(i=0;i<totdoc;i++) {
3350
+ if(!shrink_state->active[i]) {
3351
+ for(f=docs[i]->fvec;f;f=f->next)
3352
+ lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
3353
+ }
3354
+ shrink_state->last_lin[i]=lin[i];
3355
+ }
3356
+ }
3357
+ else {
3358
+ changed=(long *)my_malloc(sizeof(long)*totdoc);
3359
+ changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3360
+ inactive=(long *)my_malloc(sizeof(long)*totdoc);
3361
+ inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3362
+ for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
3363
+ if(verbosity>=2) {
3364
+ printf("%ld..",t); fflush(stdout);
3365
+ }
3366
+ a_old=shrink_state->a_history[t];
3367
+ for(i=0;i<totdoc;i++) {
3368
+ inactive[i]=((!shrink_state->active[i])
3369
+ && (shrink_state->inactive_since[i] == t));
3370
+ changed[i]= (a[i] != a_old[i]);
3371
+ }
3372
+ compute_index(inactive,totdoc,inactive2dnum);
3373
+ compute_index(changed,totdoc,changed2dnum);
3374
+
3375
+ for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
3376
+ get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
3377
+ kernel_parm);
3378
+ for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
3379
+ kernel_val=aicache[j];
3380
+ lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
3381
+ }
3382
+ }
3383
+ }
3384
+ free(changed);
3385
+ free(changed2dnum);
3386
+ free(inactive);
3387
+ free(inactive2dnum);
3388
+ }
3389
+ (*maxdiff)=0;
3390
+ for(i=0;i<totdoc;i++) {
3391
+ shrink_state->inactive_since[i]=shrink_state->deactnum-1;
3392
+ if(!inconsistent[i]) {
3393
+ dist=(lin[i]-model->b)*(double)label[i];
3394
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
3395
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
3396
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3397
+ if((dist-target)>(*maxdiff)) /* largest violation */
3398
+ (*maxdiff)=dist-target;
3399
+ }
3400
+ else if((a[i]<ex_c) && (dist < target)) {
3401
+ if((target-dist)>(*maxdiff)) /* largest violation */
3402
+ (*maxdiff)=target-dist;
3403
+ }
3404
+ if((a[i]>(0+learn_parm->epsilon_a))
3405
+ && (a[i]<ex_c)) {
3406
+ shrink_state->active[i]=1; /* not at bound */
3407
+ }
3408
+ else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
3409
+ shrink_state->active[i]=1;
3410
+ }
3411
+ else if((a[i]>=ex_c)
3412
+ && (dist > (target-learn_parm->epsilon_shrink))) {
3413
+ shrink_state->active[i]=1;
3414
+ }
3415
+ else if(learn_parm->sharedslack) { /* make all active when sharedslack */
3416
+ shrink_state->active[i]=1;
3417
+ }
3418
+ }
3419
+ }
3420
+ if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
3421
+ for(i=0;i<totdoc;i++) {
3422
+ (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
3423
+ }
3424
+ for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
3425
+ free(shrink_state->a_history[t]);
3426
+ shrink_state->a_history[t]=0;
3427
+ }
3428
+ }
3429
+ }
3430
+
3431
+ /****************************** Cache handling *******************************/
3432
+
3433
+ void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3434
+ long int docnum, long int totdoc,
3435
+ long int *active2dnum, CFLOAT *buffer,
3436
+ KERNEL_PARM *kernel_parm)
3437
+ /* Get's a row of the matrix of kernel values This matrix has the
3438
+ same form as the Hessian, just that the elements are not
3439
+ multiplied by */
3440
+ /* y_i * y_j * a_i * a_j */
3441
+ /* Takes the values from the cache if available. */
3442
+ {
3443
+ register long i,j,start;
3444
+ DOC *ex;
3445
+
3446
+ ex=docs[docnum];
3447
+
3448
+ if(kernel_cache->index[docnum] != -1) { /* row is cached? */
3449
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3450
+ start=kernel_cache->activenum*kernel_cache->index[docnum];
3451
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3452
+ if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
3453
+ buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
3454
+ }
3455
+ else {
3456
+ buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3457
+ }
3458
+ }
3459
+ }
3460
+ else {
3461
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3462
+ buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3463
+ }
3464
+ }
3465
+ }
3466
+
3467
+
3468
+ void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3469
+ long int m, KERNEL_PARM *kernel_parm)
3470
+ /* Fills cache for the row m */
3471
+ {
3472
+ register DOC *ex;
3473
+ register long j,k,l;
3474
+ register CFLOAT *cache;
3475
+
3476
+ if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
3477
+ cache = kernel_cache_clean_and_malloc(kernel_cache,m);
3478
+ if(cache) {
3479
+ l=kernel_cache->totdoc2active[m];
3480
+ ex=docs[m];
3481
+ for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
3482
+ k=kernel_cache->active2totdoc[j];
3483
+ if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
3484
+ cache[j]=kernel_cache->buffer[kernel_cache->activenum
3485
+ *kernel_cache->index[k]+l];
3486
+ }
3487
+ else {
3488
+ cache[j]=kernel(kernel_parm,ex,docs[k]);
3489
+ }
3490
+ }
3491
+ }
3492
+ else {
3493
+ perror("Error: Kernel cache full! => increase cache size");
3494
+ }
3495
+ }
3496
+ }
3497
+
3498
+
3499
+ void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
3500
+ long int *key, long int varnum,
3501
+ KERNEL_PARM *kernel_parm)
3502
+ /* Fills cache for the rows in key */
3503
+ {
3504
+ register long i;
3505
+
3506
+ for(i=0;i<varnum;i++) { /* fill up kernel cache */
3507
+ cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
3508
+ }
3509
+ }
3510
+
3511
+
3512
+ void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
3513
+ long int numshrink, long int *after)
3514
+ /* Remove numshrink columns in the cache which correspond to
3515
+ examples marked 0 in after. */
3516
+ {
3517
+ register long i,j,jj,from=0,to=0,scount;
3518
+ long *keep;
3519
+
3520
+ if(verbosity>=2) {
3521
+ printf(" Reorganizing cache..."); fflush(stdout);
3522
+ }
3523
+
3524
+ keep=(long *)my_malloc(sizeof(long)*totdoc);
3525
+ for(j=0;j<totdoc;j++) {
3526
+ keep[j]=1;
3527
+ }
3528
+ scount=0;
3529
+ for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
3530
+ j=kernel_cache->active2totdoc[jj];
3531
+ if(!after[j]) {
3532
+ scount++;
3533
+ keep[j]=0;
3534
+ }
3535
+ }
3536
+
3537
+ for(i=0;i<kernel_cache->max_elems;i++) {
3538
+ for(jj=0;jj<kernel_cache->activenum;jj++) {
3539
+ j=kernel_cache->active2totdoc[jj];
3540
+ if(!keep[j]) {
3541
+ from++;
3542
+ }
3543
+ else {
3544
+ kernel_cache->buffer[to]=kernel_cache->buffer[from];
3545
+ to++;
3546
+ from++;
3547
+ }
3548
+ }
3549
+ }
3550
+
3551
+ kernel_cache->activenum=0;
3552
+ for(j=0;j<totdoc;j++) {
3553
+ if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
3554
+ kernel_cache->active2totdoc[kernel_cache->activenum]=j;
3555
+ kernel_cache->totdoc2active[j]=kernel_cache->activenum;
3556
+ kernel_cache->activenum++;
3557
+ }
3558
+ else {
3559
+ kernel_cache->totdoc2active[j]=-1;
3560
+ }
3561
+ }
3562
+
3563
+ kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
3564
+ if(kernel_cache->max_elems>totdoc) {
3565
+ kernel_cache->max_elems=totdoc;
3566
+ }
3567
+
3568
+ free(keep);
3569
+
3570
+ if(verbosity>=2) {
3571
+ printf("done.\n"); fflush(stdout);
3572
+ printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3573
+ }
3574
+ }
3575
+
3576
+ KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
3577
+ {
3578
+ long i;
3579
+ KERNEL_CACHE *kernel_cache;
3580
+
3581
+ kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
3582
+ kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
3583
+ kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
3584
+ kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
3585
+ kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
3586
+ kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
3587
+ kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
3588
+ kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
3589
+
3590
+ kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
3591
+
3592
+ kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
3593
+ if(kernel_cache->max_elems>totdoc) {
3594
+ kernel_cache->max_elems=totdoc;
3595
+ }
3596
+
3597
+ if(verbosity>=2) {
3598
+ printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3599
+ printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
3600
+ }
3601
+
3602
+ kernel_cache->elems=0; /* initialize cache */
3603
+ for(i=0;i<totdoc;i++) {
3604
+ kernel_cache->index[i]=-1;
3605
+ kernel_cache->lru[i]=0;
3606
+ }
3607
+ for(i=0;i<totdoc;i++) {
3608
+ kernel_cache->occu[i]=0;
3609
+ kernel_cache->invindex[i]=-1;
3610
+ }
3611
+
3612
+ kernel_cache->activenum=totdoc;;
3613
+ for(i=0;i<totdoc;i++) {
3614
+ kernel_cache->active2totdoc[i]=i;
3615
+ kernel_cache->totdoc2active[i]=i;
3616
+ }
3617
+
3618
+ kernel_cache->time=0;
3619
+
3620
+ return(kernel_cache);
3621
+ }
3622
+
3623
+ void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
3624
+ {
3625
+ long maxlru=0,k;
3626
+
3627
+ for(k=0;k<kernel_cache->max_elems;k++) {
3628
+ if(maxlru < kernel_cache->lru[k])
3629
+ maxlru=kernel_cache->lru[k];
3630
+ }
3631
+ for(k=0;k<kernel_cache->max_elems;k++) {
3632
+ kernel_cache->lru[k]-=maxlru;
3633
+ }
3634
+ }
3635
+
3636
+ void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
3637
+ {
3638
+ free(kernel_cache->index);
3639
+ free(kernel_cache->occu);
3640
+ free(kernel_cache->lru);
3641
+ free(kernel_cache->invindex);
3642
+ free(kernel_cache->active2totdoc);
3643
+ free(kernel_cache->totdoc2active);
3644
+ free(kernel_cache->buffer);
3645
+ free(kernel_cache);
3646
+ }
3647
+
3648
+ long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
3649
+ {
3650
+ long i;
3651
+
3652
+ if(kernel_cache_space_available(kernel_cache)) {
3653
+ for(i=0;i<kernel_cache->max_elems;i++) {
3654
+ if(!kernel_cache->occu[i]) {
3655
+ kernel_cache->occu[i]=1;
3656
+ kernel_cache->elems++;
3657
+ return(i);
3658
+ }
3659
+ }
3660
+ }
3661
+ return(-1);
3662
+ }
3663
+
3664
+ void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
3665
+ {
3666
+ kernel_cache->occu[i]=0;
3667
+ kernel_cache->elems--;
3668
+ }
3669
+
3670
+ long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
3671
+ /* remove least recently used cache element */
3672
+ {
3673
+ register long k,least_elem=-1,least_time;
3674
+
3675
+ least_time=kernel_cache->time+1;
3676
+ for(k=0;k<kernel_cache->max_elems;k++) {
3677
+ if(kernel_cache->invindex[k] != -1) {
3678
+ if(kernel_cache->lru[k]<least_time) {
3679
+ least_time=kernel_cache->lru[k];
3680
+ least_elem=k;
3681
+ }
3682
+ }
3683
+ }
3684
+ if(least_elem != -1) {
3685
+ kernel_cache_free(kernel_cache,least_elem);
3686
+ kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
3687
+ kernel_cache->invindex[least_elem]=-1;
3688
+ return(1);
3689
+ }
3690
+ return(0);
3691
+ }
3692
+
3693
+
3694
+ CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
3695
+ long int docnum)
3696
+ /* Get a free cache entry. In case cache is full, the lru element
3697
+ is removed. */
3698
+ {
3699
+ long result;
3700
+ if((result = kernel_cache_malloc(kernel_cache)) == -1) {
3701
+ if(kernel_cache_free_lru(kernel_cache)) {
3702
+ result = kernel_cache_malloc(kernel_cache);
3703
+ }
3704
+ }
3705
+ kernel_cache->index[docnum]=result;
3706
+ if(result == -1) {
3707
+ return(0);
3708
+ }
3709
+ kernel_cache->invindex[result]=docnum;
3710
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3711
+ return((CFLOAT *)((long)kernel_cache->buffer
3712
+ +(kernel_cache->activenum*sizeof(CFLOAT)*
3713
+ kernel_cache->index[docnum])));
3714
+ }
3715
+
3716
+ long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
3717
+ /* Update lru time to avoid removal from cache. */
3718
+ {
3719
+ if(kernel_cache && kernel_cache->index[docnum] != -1) {
3720
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3721
+ return(1);
3722
+ }
3723
+ return(0);
3724
+ }
3725
+
3726
+ long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
3727
+ /* Is that row cached? */
3728
+ {
3729
+ return(kernel_cache->index[docnum] != -1);
3730
+ }
3731
+
3732
+ long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
3733
+ /* Is there room for one more row? */
3734
+ {
3735
+ return(kernel_cache->elems < kernel_cache->max_elems);
3736
+ }
3737
+
3738
+ /************************** Compute estimates ******************************/
3739
+
3740
+ void compute_xa_estimates(MODEL *model, long int *label,
3741
+ long int *unlabeled, long int totdoc,
3742
+ DOC **docs, double *lin, double *a,
3743
+ KERNEL_PARM *kernel_parm,
3744
+ LEARN_PARM *learn_parm, double *error,
3745
+ double *recall, double *precision)
3746
+ /* Computes xa-estimate of error rate, recall, and precision. See
3747
+ T. Joachims, Estimating the Generalization Performance of an SVM
3748
+ Efficiently, IMCL, 2000. */
3749
+ {
3750
+ long i,looerror,looposerror,loonegerror;
3751
+ long totex,totposex;
3752
+ double xi,r_delta,r_delta_sq,sim=0;
3753
+ long *sv2dnum=NULL,*sv=NULL,svnum;
3754
+
3755
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
3756
+ r_delta_sq=r_delta*r_delta;
3757
+
3758
+ looerror=0;
3759
+ looposerror=0;
3760
+ loonegerror=0;
3761
+ totex=0;
3762
+ totposex=0;
3763
+ svnum=0;
3764
+
3765
+ if(learn_parm->xa_depth > 0) {
3766
+ sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
3767
+ for(i=0;i<totdoc;i++)
3768
+ sv[i]=0;
3769
+ for(i=1;i<model->sv_num;i++)
3770
+ if(a[model->supvec[i]->docnum]
3771
+ < (learn_parm->svm_cost[model->supvec[i]->docnum]
3772
+ -learn_parm->epsilon_a)) {
3773
+ sv[model->supvec[i]->docnum]=1;
3774
+ svnum++;
3775
+ }
3776
+ sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
3777
+ clear_index(sv2dnum);
3778
+ compute_index(sv,totdoc,sv2dnum);
3779
+ }
3780
+
3781
+ for(i=0;i<totdoc;i++) {
3782
+ if(unlabeled[i]) {
3783
+ /* ignore it */
3784
+ }
3785
+ else {
3786
+ xi=1.0-((lin[i]-model->b)*(double)label[i]);
3787
+ if(xi<0) xi=0;
3788
+ if(label[i]>0) {
3789
+ totposex++;
3790
+ }
3791
+ if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
3792
+ if(learn_parm->xa_depth > 0) { /* makes assumptions */
3793
+ sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
3794
+ kernel_parm,learn_parm,
3795
+ (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
3796
+ }
3797
+ if((learn_parm->xa_depth == 0) ||
3798
+ ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
3799
+ looerror++;
3800
+ if(label[i]>0) {
3801
+ looposerror++;
3802
+ }
3803
+ else {
3804
+ loonegerror++;
3805
+ }
3806
+ }
3807
+ }
3808
+ totex++;
3809
+ }
3810
+ }
3811
+
3812
+ (*error)=((double)looerror/(double)totex)*100.0;
3813
+ (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
3814
+ (*precision)=(((double)totposex-(double)looposerror)
3815
+ /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
3816
+
3817
+ free(sv);
3818
+ free(sv2dnum);
3819
+ }
3820
+
3821
+
3822
+ double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
3823
+ DOC **docs, double *a,
3824
+ long int docnum,
3825
+ long int *label,
3826
+ KERNEL_PARM *kernel_parm,
3827
+ LEARN_PARM *learn_parm, double thresh)
3828
+ /* Experimental Code improving plain XiAlpha Estimates by
3829
+ computing a better bound using a greedy optimzation strategy. */
3830
+ {
3831
+ long best_depth=0;
3832
+ long i,j,k,d,skip,allskip;
3833
+ double best,best_val[101],val,init_val_sq,init_val_lin;
3834
+ long best_ex[101];
3835
+ CFLOAT *cache,*trow;
3836
+
3837
+ cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
3838
+ trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
3839
+
3840
+ for(k=0;k<svnum;k++) {
3841
+ trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
3842
+ }
3843
+
3844
+ init_val_sq=0;
3845
+ init_val_lin=0;
3846
+ best=0;
3847
+
3848
+ for(d=0;d<learn_parm->xa_depth;d++) {
3849
+ allskip=1;
3850
+ if(d>=1) {
3851
+ init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
3852
+ for(k=0;k<d-1;k++) {
3853
+ init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
3854
+ }
3855
+ init_val_lin+=trow[best_ex[d-1]];
3856
+ }
3857
+ for(i=0;i<svnum;i++) {
3858
+ skip=0;
3859
+ if(sv2dnum[i] == docnum) skip=1;
3860
+ for(j=0;j<d;j++) {
3861
+ if(i == best_ex[j]) skip=1;
3862
+ }
3863
+
3864
+ if(!skip) {
3865
+ val=init_val_sq;
3866
+ if(kernel_parm->kernel_type == LINEAR)
3867
+ val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
3868
+ else
3869
+ val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
3870
+ for(j=0;j<d;j++) {
3871
+ val+=2.0*cache[i+j*svnum];
3872
+ }
3873
+ val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
3874
+ val-=((init_val_lin+trow[i])/(d+1.0));
3875
+
3876
+ if(allskip || (val < best_val[d])) {
3877
+ best_val[d]=val;
3878
+ best_ex[d]=i;
3879
+ }
3880
+ allskip=0;
3881
+ if(val < thresh) {
3882
+ i=svnum;
3883
+ /* printf("EARLY"); */
3884
+ }
3885
+ }
3886
+ }
3887
+ if(!allskip) {
3888
+ for(k=0;k<svnum;k++) {
3889
+ cache[d*svnum+k]=kernel(kernel_parm,
3890
+ docs[sv2dnum[best_ex[d]]],
3891
+ docs[sv2dnum[k]]);
3892
+ }
3893
+ }
3894
+ if((!allskip) && ((best_val[d] < best) || (d == 0))) {
3895
+ best=best_val[d];
3896
+ best_depth=d;
3897
+ }
3898
+ if(allskip || (best < thresh)) {
3899
+ d=learn_parm->xa_depth;
3900
+ }
3901
+ }
3902
+
3903
+ free(cache);
3904
+ free(trow);
3905
+
3906
+ /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
3907
+ return(best);
3908
+ }
3909
+
3910
+
3911
+ void estimate_transduction_quality(MODEL *model, long int *label,
3912
+ long int *unlabeled,
3913
+ long int totdoc, DOC **docs, double *lin)
3914
+ /* Loo-bound based on observation that loo-errors must have an
3915
+ equal distribution in both training and test examples, given
3916
+ that the test examples are classified correctly. Compare
3917
+ chapter "Constraints on the Transductive Hyperplane" in my
3918
+ Dissertation. */
3919
+ {
3920
+ long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
3921
+ double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
3922
+ double r_delta,r_delta_sq,xi,xisum=0,asum=0;
3923
+
3924
+ r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
3925
+ r_delta_sq=r_delta*r_delta;
3926
+
3927
+ for(j=0;j<totdoc;j++) {
3928
+ if(unlabeled[j]) {
3929
+ totulab++;
3930
+ }
3931
+ else {
3932
+ totlab++;
3933
+ if(label[j] > 0)
3934
+ totlabpos++;
3935
+ else
3936
+ totlabneg++;
3937
+ }
3938
+ }
3939
+ for(j=1;j<model->sv_num;j++) {
3940
+ i=model->supvec[j]->docnum;
3941
+ xi=1.0-((lin[i]-model->b)*(double)label[i]);
3942
+ if(xi<0) xi=0;
3943
+
3944
+ xisum+=xi;
3945
+ asum+=fabs(model->alpha[j]);
3946
+ if(unlabeled[i]) {
3947
+ ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3948
+ }
3949
+ else {
3950
+ labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3951
+ }
3952
+ if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
3953
+ l++;
3954
+ if(unlabeled[model->supvec[j]->docnum]) {
3955
+ ulab++;
3956
+ if(model->alpha[j] > 0)
3957
+ ulabpos++;
3958
+ else
3959
+ ulabneg++;
3960
+ }
3961
+ else {
3962
+ lab++;
3963
+ if(model->alpha[j] > 0)
3964
+ labpos++;
3965
+ else
3966
+ labneg++;
3967
+ }
3968
+ }
3969
+ }
3970
+ printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
3971
+ printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
3972
+ printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
3973
+ printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
3974
+ printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
3975
+ }
3976
+
3977
+ double estimate_margin_vcdim(MODEL *model, double w, double R,
3978
+ KERNEL_PARM *kernel_parm)
3979
+ /* optional: length of model vector in feature space */
3980
+ /* optional: radius of ball containing the data */
3981
+ {
3982
+ double h;
3983
+
3984
+ /* follows chapter 5.6.4 in [Vapnik/95] */
3985
+
3986
+ if(w<0) {
3987
+ w=model_length_s(model,kernel_parm);
3988
+ }
3989
+ if(R<0) {
3990
+ R=estimate_sphere(model,kernel_parm);
3991
+ }
3992
+ h = w*w * R*R +1;
3993
+ return(h);
3994
+ }
3995
+
3996
+ double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
3997
+ /* Approximates the radius of the ball containing */
3998
+ /* the support vectors by bounding it with the */
3999
+ { /* length of the longest support vector. This is */
4000
+ register long j; /* pretty good for text categorization, since all */
4001
+ double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
4002
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4003
+ WORD nullword; /* origin of the space. */
4004
+
4005
+ nullword.wnum=0;
4006
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4007
+
4008
+ for(j=1;j<model->sv_num;j++) {
4009
+ xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
4010
+ -2*kernel(kernel_parm,model->supvec[j],nulldoc)
4011
+ +kernel(kernel_parm,nulldoc,nulldoc));
4012
+ if(xlen>maxxlen) {
4013
+ maxxlen=xlen;
4014
+ }
4015
+ }
4016
+
4017
+ free_example(nulldoc,1);
4018
+ return(maxxlen);
4019
+ }
4020
+
4021
+ double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
4022
+ {
4023
+ long i;
4024
+ double maxxlen,xlen;
4025
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4026
+ WORD nullword; /* origin of the space. */
4027
+
4028
+ nullword.wnum=0;
4029
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4030
+
4031
+ maxxlen=0;
4032
+ for(i=0;i<totdoc;i++) {
4033
+ xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
4034
+ -2*kernel(kernel_parm,docs[i],nulldoc)
4035
+ +kernel(kernel_parm,nulldoc,nulldoc));
4036
+ if(xlen>maxxlen) {
4037
+ maxxlen=xlen;
4038
+ }
4039
+ }
4040
+
4041
+ free_example(nulldoc,1);
4042
+ return(maxxlen);
4043
+ }
4044
+
4045
+ double estimate_r_delta_average(DOC **docs, long int totdoc,
4046
+ KERNEL_PARM *kernel_parm)
4047
+ {
4048
+ long i;
4049
+ double avgxlen;
4050
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4051
+ WORD nullword; /* origin of the space. */
4052
+
4053
+ nullword.wnum=0;
4054
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4055
+
4056
+ avgxlen=0;
4057
+ for(i=0;i<totdoc;i++) {
4058
+ avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
4059
+ -2*kernel(kernel_parm,docs[i],nulldoc)
4060
+ +kernel(kernel_parm,nulldoc,nulldoc));
4061
+ }
4062
+
4063
+ free_example(nulldoc,1);
4064
+ return(avgxlen/totdoc);
4065
+ }
4066
+
4067
+ double length_of_longest_document_vector(DOC **docs, long int totdoc,
4068
+ KERNEL_PARM *kernel_parm)
4069
+ {
4070
+ long i;
4071
+ double maxxlen,xlen;
4072
+
4073
+ maxxlen=0;
4074
+ for(i=0;i<totdoc;i++) {
4075
+ xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
4076
+ if(xlen>maxxlen) {
4077
+ maxxlen=xlen;
4078
+ }
4079
+ }
4080
+
4081
+ return(maxxlen);
4082
+ }
4083
+
4084
+ /****************************** IO-handling **********************************/
4085
+
4086
+ void write_prediction(char *predfile, MODEL *model, double *lin,
4087
+ double *a, long int *unlabeled,
4088
+ long int *label, long int totdoc,
4089
+ LEARN_PARM *learn_parm)
4090
+ {
4091
+ FILE *predfl;
4092
+ long i;
4093
+ double dist,a_max;
4094
+
4095
+ if(verbosity>=1) {
4096
+ printf("Writing prediction file..."); fflush(stdout);
4097
+ }
4098
+ if ((predfl = fopen (predfile, "w")) == NULL)
4099
+ { perror (predfile); exit (1); }
4100
+ a_max=learn_parm->epsilon_a;
4101
+ for(i=0;i<totdoc;i++) {
4102
+ if((unlabeled[i]) && (a[i]>a_max)) {
4103
+ a_max=a[i];
4104
+ }
4105
+ }
4106
+ for(i=0;i<totdoc;i++) {
4107
+ if(unlabeled[i]) {
4108
+ if((a[i]>(learn_parm->epsilon_a))) {
4109
+ dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
4110
+ }
4111
+ else {
4112
+ dist=(lin[i]-model->b);
4113
+ }
4114
+ if(dist>0) {
4115
+ fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
4116
+ }
4117
+ else {
4118
+ fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
4119
+ }
4120
+ }
4121
+ }
4122
+ fclose(predfl);
4123
+ if(verbosity>=1) {
4124
+ printf("done\n");
4125
+ }
4126
+ }
4127
+
4128
+ void write_alphas(char *alphafile, double *a,
4129
+ long int *label, long int totdoc)
4130
+ {
4131
+ FILE *alphafl;
4132
+ long i;
4133
+
4134
+ if(verbosity>=1) {
4135
+ printf("Writing alpha file..."); fflush(stdout);
4136
+ }
4137
+ if ((alphafl = fopen (alphafile, "w")) == NULL)
4138
+ { perror (alphafile); exit (1); }
4139
+ for(i=0;i<totdoc;i++) {
4140
+ fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
4141
+ }
4142
+ fclose(alphafl);
4143
+ if(verbosity>=1) {
4144
+ printf("done\n");
4145
+ }
4146
+ }
4147
+