svmlightcli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4147 @@
1
+ /***********************************************************************/
2
+ /* */
3
+ /* svm_learn.c */
4
+ /* */
5
+ /* Learning module of Support Vector Machine. */
6
+ /* */
7
+ /* Author: Thorsten Joachims */
8
+ /* Date: 02.07.02 */
9
+ /* */
10
+ /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
11
+ /* */
12
+ /* This software is available for non-commercial use only. It must */
13
+ /* not be modified and distributed without prior permission of the */
14
+ /* author. The author is not responsible for implications from the */
15
+ /* use of this software. */
16
+ /* */
17
+ /***********************************************************************/
18
+
19
+
20
+ # include "svm_common.h"
21
+ # include "svm_learn.h"
22
+
23
+
24
+ /* interface to QP-solver */
25
+ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
26
+
27
+ /*---------------------------------------------------------------------------*/
28
+
29
+ /* Learns an SVM classification model based on the training data in
30
+ docs/label. The resulting model is returned in the structure
31
+ model. */
32
+
33
+ void svm_learn_classification(DOC **docs, double *class, long int
34
+ totdoc, long int totwords,
35
+ LEARN_PARM *learn_parm,
36
+ KERNEL_PARM *kernel_parm,
37
+ KERNEL_CACHE *kernel_cache,
38
+ MODEL *model,
39
+ double *alpha)
40
+ /* docs: Training vectors (x-part) */
41
+ /* class: Training labels (y-part, zero if test example for
42
+ transduction) */
43
+ /* totdoc: Number of examples in docs/label */
44
+ /* totwords: Number of features (i.e. highest feature index) */
45
+ /* learn_parm: Learning paramenters */
46
+ /* kernel_parm: Kernel paramenters */
47
+ /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
48
+ NULL if linear.*/
49
+ /* model: Returns learning result (assumed empty before called) */
50
+ /* alpha: Start values for the alpha variables or NULL
51
+ pointer. The new alpha values are returned after
52
+ optimization if not NULL. Array must be of size totdoc. */
53
+ {
54
+ long *inconsistent,i,*label;
55
+ long inconsistentnum;
56
+ long misclassified,upsupvecnum;
57
+ double loss,model_length,example_length;
58
+ double maxdiff,*lin,*a,*c;
59
+ long runtime_start,runtime_end;
60
+ long iterations;
61
+ long *unlabeled,transduction;
62
+ long heldout;
63
+ long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
64
+ long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
65
+ double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
66
+ long *index,*index2dnum;
67
+ double *weights;
68
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
69
+
70
+ double *xi_fullset; /* buffer for storing xi on full sample in loo */
71
+ double *a_fullset; /* buffer for storing alpha on full sample in loo */
72
+ TIMING timing_profile;
73
+ SHRINK_STATE shrink_state;
74
+
75
+ runtime_start=get_runtime();
76
+ timing_profile.time_kernel=0;
77
+ timing_profile.time_opti=0;
78
+ timing_profile.time_shrink=0;
79
+ timing_profile.time_update=0;
80
+ timing_profile.time_model=0;
81
+ timing_profile.time_check=0;
82
+ timing_profile.time_select=0;
83
+ kernel_cache_statistic=0;
84
+
85
+ learn_parm->totwords=totwords;
86
+
87
+ /* make sure -n value is reasonable */
88
+ if((learn_parm->svm_newvarsinqp < 2)
89
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
90
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
91
+ }
92
+
93
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
94
+
95
+ label = (long *)my_malloc(sizeof(long)*totdoc);
96
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
97
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
98
+ c = (double *)my_malloc(sizeof(double)*totdoc);
99
+ a = (double *)my_malloc(sizeof(double)*totdoc);
100
+ a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
101
+ xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
102
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
103
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
104
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
105
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
106
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
107
+
108
+ model->at_upper_bound=0;
109
+ model->b=0;
110
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
111
+ model->alpha[0]=0;
112
+ model->lin_weights=NULL;
113
+ model->totwords=totwords;
114
+ model->totdoc=totdoc;
115
+ model->kernel_parm=(*kernel_parm);
116
+ model->sv_num=1;
117
+ model->loo_error=-1;
118
+ model->loo_recall=-1;
119
+ model->loo_precision=-1;
120
+ model->xa_error=-1;
121
+ model->xa_recall=-1;
122
+ model->xa_precision=-1;
123
+ inconsistentnum=0;
124
+ transduction=0;
125
+
126
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
127
+ r_delta_sq=r_delta*r_delta;
128
+
129
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
130
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
131
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
132
+ if(verbosity>=1)
133
+ printf("Setting default regularization parameter C=%.4f\n",
134
+ learn_parm->svm_c);
135
+ }
136
+
137
+ learn_parm->eps=-1.0; /* equivalent regression epsilon for
138
+ classification */
139
+
140
+ for(i=0;i<totdoc;i++) { /* various inits */
141
+ docs[i]->docnum=i;
142
+ inconsistent[i]=0;
143
+ a[i]=0;
144
+ lin[i]=0;
145
+ c[i]=0.0;
146
+ unlabeled[i]=0;
147
+ if(class[i] == 0) {
148
+ unlabeled[i]=1;
149
+ label[i]=0;
150
+ transduction=1;
151
+ }
152
+ if(class[i] > 0) {
153
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
154
+ docs[i]->costfactor;
155
+ label[i]=1;
156
+ trainpos++;
157
+ }
158
+ else if(class[i] < 0) {
159
+ learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
160
+ label[i]=-1;
161
+ trainneg++;
162
+ }
163
+ else {
164
+ learn_parm->svm_cost[i]=0;
165
+ }
166
+ }
167
+ if(verbosity>=2) {
168
+ printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
169
+ }
170
+
171
+ /* caching makes no sense for linear kernel */
172
+ if(kernel_parm->kernel_type == LINEAR) {
173
+ kernel_cache = NULL;
174
+ }
175
+
176
+ /* compute starting state for initial alpha values */
177
+ if(alpha) {
178
+ if(verbosity>=1) {
179
+ printf("Computing starting state..."); fflush(stdout);
180
+ }
181
+ index = (long *)my_malloc(sizeof(long)*totdoc);
182
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
183
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
184
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
185
+ for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
186
+ index[i]=1;
187
+ alpha[i]=fabs(alpha[i]);
188
+ if(alpha[i]<0) alpha[i]=0;
189
+ if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
190
+ }
191
+ if(kernel_parm->kernel_type != LINEAR) {
192
+ for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
193
+ if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
194
+ && (kernel_cache_space_available(kernel_cache)))
195
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
196
+ for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
197
+ if((alpha[i]==learn_parm->svm_cost[i])
198
+ && (kernel_cache_space_available(kernel_cache)))
199
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
200
+ }
201
+ (void)compute_index(index,totdoc,index2dnum);
202
+ update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
203
+ totwords,kernel_parm,kernel_cache,lin,aicache,
204
+ weights);
205
+ (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
206
+ learn_parm,index2dnum,index2dnum,model);
207
+ for(i=0;i<totdoc;i++) { /* copy initial alphas */
208
+ a[i]=alpha[i];
209
+ }
210
+ free(index);
211
+ free(index2dnum);
212
+ free(weights);
213
+ free(aicache);
214
+ if(verbosity>=1) {
215
+ printf("done.\n"); fflush(stdout);
216
+ }
217
+ }
218
+
219
+ if(transduction) {
220
+ learn_parm->svm_iter_to_shrink=99999999;
221
+ if(verbosity >= 1)
222
+ printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
223
+ }
224
+
225
+ if(transduction && learn_parm->compute_loo) {
226
+ learn_parm->compute_loo=0;
227
+ if(verbosity >= 1)
228
+ printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
229
+ }
230
+
231
+ if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
232
+ learn_parm->compute_loo=0;
233
+ printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
234
+ }
235
+
236
+ if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
237
+ learn_parm->compute_loo=0;
238
+ printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
239
+ }
240
+
241
+
242
+ if(verbosity==1) {
243
+ printf("Optimizing"); fflush(stdout);
244
+ }
245
+
246
+ /* train the svm */
247
+ iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
248
+ kernel_parm,kernel_cache,&shrink_state,model,
249
+ inconsistent,unlabeled,a,lin,
250
+ c,&timing_profile,
251
+ &maxdiff,(long)-1,
252
+ (long)1);
253
+
254
+ if(verbosity>=1) {
255
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
256
+
257
+ misclassified=0;
258
+ for(i=0;(i<totdoc);i++) { /* get final statistic */
259
+ if((lin[i]-model->b)*(double)label[i] <= 0.0)
260
+ misclassified++;
261
+ }
262
+
263
+ printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
264
+ misclassified,maxdiff);
265
+
266
+ runtime_end=get_runtime();
267
+ if(verbosity>=2) {
268
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
269
+ ((float)runtime_end-(float)runtime_start)/100.0,
270
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
271
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
272
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
273
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
274
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
275
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
276
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
277
+ }
278
+ else {
279
+ printf("Runtime in cpu-seconds: %.2f\n",
280
+ (runtime_end-runtime_start)/100.0);
281
+ }
282
+
283
+ if(learn_parm->remove_inconsistent) {
284
+ inconsistentnum=0;
285
+ for(i=0;i<totdoc;i++)
286
+ if(inconsistent[i])
287
+ inconsistentnum++;
288
+ printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
289
+ model->sv_num-1,inconsistentnum);
290
+ }
291
+ else {
292
+ upsupvecnum=0;
293
+ for(i=1;i<model->sv_num;i++) {
294
+ if(fabs(model->alpha[i]) >=
295
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
296
+ learn_parm->epsilon_a))
297
+ upsupvecnum++;
298
+ }
299
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
300
+ model->sv_num-1,upsupvecnum);
301
+ }
302
+
303
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
304
+ loss=0;
305
+ model_length=0;
306
+ for(i=0;i<totdoc;i++) {
307
+ if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
308
+ loss+=1.0-(lin[i]-model->b)*(double)label[i];
309
+ model_length+=a[i]*label[i]*lin[i];
310
+ }
311
+ model_length=sqrt(model_length);
312
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
313
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
314
+ example_length=estimate_sphere(model,kernel_parm);
315
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
316
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
317
+ fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
318
+ estimate_margin_vcdim(model,model_length,example_length,
319
+ kernel_parm));
320
+ if((!learn_parm->remove_inconsistent) && (!transduction)) {
321
+ runtime_start_xa=get_runtime();
322
+ if(verbosity>=1) {
323
+ printf("Computing XiAlpha-estimates..."); fflush(stdout);
324
+ }
325
+ compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
326
+ kernel_parm,learn_parm,&(model->xa_error),
327
+ &(model->xa_recall),&(model->xa_precision));
328
+ if(verbosity>=1) {
329
+ printf("done\n");
330
+ }
331
+ printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
332
+ (get_runtime()-runtime_start_xa)/100.0);
333
+
334
+ fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
335
+ model->xa_error,learn_parm->rho,learn_parm->xa_depth);
336
+ fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
337
+ model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
338
+ fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
339
+ model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
340
+ }
341
+ else if(!learn_parm->remove_inconsistent) {
342
+ estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
343
+ }
344
+ }
345
+ if(verbosity>=1) {
346
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
347
+ }
348
+ }
349
+
350
+
351
+ /* leave-one-out testing starts now */
352
+ if(learn_parm->compute_loo) {
353
+ /* save results of training on full dataset for leave-one-out */
354
+ runtime_start_loo=get_runtime();
355
+ for(i=0;i<totdoc;i++) {
356
+ xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
357
+ if(xi_fullset[i]<0) xi_fullset[i]=0;
358
+ a_fullset[i]=a[i];
359
+ }
360
+ if(verbosity>=1) {
361
+ printf("Computing leave-one-out");
362
+ }
363
+
364
+ /* repeat this loop for every held-out example */
365
+ for(heldout=0;(heldout<totdoc);heldout++) {
366
+ if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
367
+ < 1.0) {
368
+ /* guaranteed to not produce a leave-one-out error */
369
+ if(verbosity==1) {
370
+ printf("+"); fflush(stdout);
371
+ }
372
+ }
373
+ else if(xi_fullset[heldout] > 1.0) {
374
+ /* guaranteed to produce a leave-one-out error */
375
+ loo_count++;
376
+ if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
377
+ if(verbosity==1) {
378
+ printf("-"); fflush(stdout);
379
+ }
380
+ }
381
+ else {
382
+ loocomputed++;
383
+ heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
384
+ learn_parm->svm_cost[heldout]=0;
385
+ /* make sure heldout example is not currently */
386
+ /* shrunk away. Assumes that lin is up to date! */
387
+ shrink_state.active[heldout]=1;
388
+ if(verbosity>=2)
389
+ printf("\nLeave-One-Out test on example %ld\n",heldout);
390
+ if(verbosity>=1) {
391
+ printf("(?[%ld]",heldout); fflush(stdout);
392
+ }
393
+
394
+ optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
395
+ kernel_parm,
396
+ kernel_cache,&shrink_state,model,inconsistent,unlabeled,
397
+ a,lin,c,&timing_profile,
398
+ &maxdiff,heldout,(long)2);
399
+
400
+ /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
401
+
402
+ if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
403
+ loo_count++; /* there was a loo-error */
404
+ if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
405
+ if(verbosity>=1) {
406
+ printf("-)"); fflush(stdout);
407
+ }
408
+ }
409
+ else {
410
+ if(verbosity>=1) {
411
+ printf("+)"); fflush(stdout);
412
+ }
413
+ }
414
+ /* now we need to restore the original data set*/
415
+ learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
416
+ }
417
+ } /* end of leave-one-out loop */
418
+
419
+
420
+ if(verbosity>=1) {
421
+ printf("\nRetrain on full problem"); fflush(stdout);
422
+ }
423
+ optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
424
+ kernel_parm,
425
+ kernel_cache,&shrink_state,model,inconsistent,unlabeled,
426
+ a,lin,c,&timing_profile,
427
+ &maxdiff,(long)-1,(long)1);
428
+ if(verbosity >= 1)
429
+ printf("done.\n");
430
+
431
+
432
+ /* after all leave-one-out computed */
433
+ model->loo_error=100.0*loo_count/(double)totdoc;
434
+ model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
435
+ model->loo_precision=(trainpos-loo_count_pos)/
436
+ (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
437
+ if(verbosity >= 1) {
438
+ fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
439
+ model->loo_error);
440
+ fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
441
+ model->loo_recall);
442
+ fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
443
+ model->loo_precision);
444
+ fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
445
+ loocomputed,learn_parm->rho);
446
+ printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
447
+ (double)(get_runtime()-runtime_start_loo)/100.0);
448
+ }
449
+ }
450
+
451
+ if(learn_parm->alphafile[0])
452
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
453
+
454
+ shrink_state_cleanup(&shrink_state);
455
+ free(label);
456
+ free(inconsistent);
457
+ free(unlabeled);
458
+ free(c);
459
+ free(a);
460
+ free(a_fullset);
461
+ free(xi_fullset);
462
+ free(lin);
463
+ free(learn_parm->svm_cost);
464
+ }
465
+
466
+
467
+ /* Learns an SVM regression model based on the training data in
468
+ docs/label. The resulting model is returned in the structure
469
+ model. */
470
+
471
+ void svm_learn_regression(DOC **docs, double *value, long int totdoc,
472
+ long int totwords, LEARN_PARM *learn_parm,
473
+ KERNEL_PARM *kernel_parm,
474
+ KERNEL_CACHE **kernel_cache, MODEL *model)
475
+ /* docs: Training vectors (x-part) */
476
+ /* class: Training value (y-part) */
477
+ /* totdoc: Number of examples in docs/label */
478
+ /* totwords: Number of features (i.e. highest feature index) */
479
+ /* learn_parm: Learning paramenters */
480
+ /* kernel_parm: Kernel paramenters */
481
+ /* kernel_cache:Initialized Cache, if using a kernel. NULL if
482
+ linear. Note that it will be free'd and reassigned */
483
+ /* model: Returns learning result (assumed empty before called) */
484
+ {
485
+ long *inconsistent,i,j;
486
+ long inconsistentnum;
487
+ long upsupvecnum;
488
+ double loss,model_length,example_length;
489
+ double maxdiff,*lin,*a,*c;
490
+ long runtime_start,runtime_end;
491
+ long iterations,kernel_cache_size;
492
+ long *unlabeled;
493
+ double r_delta_sq=0,r_delta,r_delta_avg;
494
+ double *xi_fullset; /* buffer for storing xi on full sample in loo */
495
+ double *a_fullset; /* buffer for storing alpha on full sample in loo */
496
+ TIMING timing_profile;
497
+ SHRINK_STATE shrink_state;
498
+ DOC **docs_org;
499
+ long *label;
500
+
501
+ /* set up regression problem in standard form */
502
+ docs_org=docs;
503
+ docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
504
+ label = (long *)my_malloc(sizeof(long)*2*totdoc);
505
+ c = (double *)my_malloc(sizeof(double)*2*totdoc);
506
+ for(i=0;i<totdoc;i++) {
507
+ j=2*totdoc-1-i;
508
+ docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
509
+ label[i]=+1;
510
+ c[i]=value[i];
511
+ docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
512
+ label[j]=-1;
513
+ c[j]=value[i];
514
+ }
515
+ totdoc*=2;
516
+
517
+ /* need to get a bigger kernel cache */
518
+ if(*kernel_cache) {
519
+ kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
520
+ kernel_cache_cleanup(*kernel_cache);
521
+ (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
522
+ }
523
+
524
+ runtime_start=get_runtime();
525
+ timing_profile.time_kernel=0;
526
+ timing_profile.time_opti=0;
527
+ timing_profile.time_shrink=0;
528
+ timing_profile.time_update=0;
529
+ timing_profile.time_model=0;
530
+ timing_profile.time_check=0;
531
+ timing_profile.time_select=0;
532
+ kernel_cache_statistic=0;
533
+
534
+ learn_parm->totwords=totwords;
535
+
536
+ /* make sure -n value is reasonable */
537
+ if((learn_parm->svm_newvarsinqp < 2)
538
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
539
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
540
+ }
541
+
542
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
543
+
544
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
545
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
546
+ a = (double *)my_malloc(sizeof(double)*totdoc);
547
+ a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
548
+ xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
549
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
550
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
551
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
552
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
553
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
554
+
555
+ model->at_upper_bound=0;
556
+ model->b=0;
557
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
558
+ model->alpha[0]=0;
559
+ model->lin_weights=NULL;
560
+ model->totwords=totwords;
561
+ model->totdoc=totdoc;
562
+ model->kernel_parm=(*kernel_parm);
563
+ model->sv_num=1;
564
+ model->loo_error=-1;
565
+ model->loo_recall=-1;
566
+ model->loo_precision=-1;
567
+ model->xa_error=-1;
568
+ model->xa_recall=-1;
569
+ model->xa_precision=-1;
570
+ inconsistentnum=0;
571
+
572
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
573
+ r_delta_sq=r_delta*r_delta;
574
+
575
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
576
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
577
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
578
+ if(verbosity>=1)
579
+ printf("Setting default regularization parameter C=%.4f\n",
580
+ learn_parm->svm_c);
581
+ }
582
+
583
+ for(i=0;i<totdoc;i++) { /* various inits */
584
+ inconsistent[i]=0;
585
+ a[i]=0;
586
+ lin[i]=0;
587
+ unlabeled[i]=0;
588
+ if(label[i] > 0) {
589
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
590
+ docs[i]->costfactor;
591
+ }
592
+ else if(label[i] < 0) {
593
+ learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
594
+ }
595
+ }
596
+
597
+ /* caching makes no sense for linear kernel */
598
+ if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
599
+ printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
600
+ }
601
+
602
+ if(verbosity==1) {
603
+ printf("Optimizing"); fflush(stdout);
604
+ }
605
+
606
+ /* train the svm */
607
+ iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
608
+ kernel_parm,*kernel_cache,&shrink_state,
609
+ model,inconsistent,unlabeled,a,lin,c,
610
+ &timing_profile,&maxdiff,(long)-1,
611
+ (long)1);
612
+
613
+ if(verbosity>=1) {
614
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
615
+
616
+ printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
617
+
618
+ runtime_end=get_runtime();
619
+ if(verbosity>=2) {
620
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
621
+ ((float)runtime_end-(float)runtime_start)/100.0,
622
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
623
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
624
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
625
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
626
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
627
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
628
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
629
+ }
630
+ else {
631
+ printf("Runtime in cpu-seconds: %.2f\n",
632
+ (runtime_end-runtime_start)/100.0);
633
+ }
634
+
635
+ if(learn_parm->remove_inconsistent) {
636
+ inconsistentnum=0;
637
+ for(i=0;i<totdoc;i++)
638
+ if(inconsistent[i])
639
+ inconsistentnum++;
640
+ printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
641
+ model->sv_num-1,inconsistentnum);
642
+ }
643
+ else {
644
+ upsupvecnum=0;
645
+ for(i=1;i<model->sv_num;i++) {
646
+ if(fabs(model->alpha[i]) >=
647
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
648
+ learn_parm->epsilon_a))
649
+ upsupvecnum++;
650
+ }
651
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
652
+ model->sv_num-1,upsupvecnum);
653
+ }
654
+
655
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
656
+ loss=0;
657
+ model_length=0;
658
+ for(i=0;i<totdoc;i++) {
659
+ if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
660
+ loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
661
+ model_length+=a[i]*label[i]*lin[i];
662
+ }
663
+ model_length=sqrt(model_length);
664
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
665
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
666
+ example_length=estimate_sphere(model,kernel_parm);
667
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
668
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
669
+ }
670
+ if(verbosity>=1) {
671
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
672
+ }
673
+ }
674
+
675
+ if(learn_parm->alphafile[0])
676
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
677
+
678
+ /* this makes sure the model we return does not contain pointers to the
679
+ temporary documents */
680
+ for(i=1;i<model->sv_num;i++) {
681
+ j=model->supvec[i]->docnum;
682
+ if(j >= (totdoc/2)) {
683
+ j=totdoc-j-1;
684
+ }
685
+ model->supvec[i]=docs_org[j];
686
+ }
687
+
688
+ shrink_state_cleanup(&shrink_state);
689
+ for(i=0;i<totdoc;i++)
690
+ free_example(docs[i],0);
691
+ free(docs);
692
+ free(label);
693
+ free(inconsistent);
694
+ free(unlabeled);
695
+ free(c);
696
+ free(a);
697
+ free(a_fullset);
698
+ free(xi_fullset);
699
+ free(lin);
700
+ free(learn_parm->svm_cost);
701
+ }
702
+
703
+ void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
704
+ long int totwords, LEARN_PARM *learn_parm,
705
+ KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
706
+ MODEL *model)
707
+ /* docs: Training vectors (x-part) */
708
+ /* rankvalue: Training target values that determine the ranking */
709
+ /* totdoc: Number of examples in docs/label */
710
+ /* totwords: Number of features (i.e. highest feature index) */
711
+ /* learn_parm: Learning paramenters */
712
+ /* kernel_parm: Kernel paramenters */
713
+ /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
714
+ using a kernel. NULL if linear. NOTE: Cache is
715
+ getting reinitialized in this function */
716
+ /* model: Returns learning result (assumed empty before called) */
717
+ {
718
+ DOC **docdiff;
719
+ long i,j,k,totpair,kernel_cache_size;
720
+ double *target,*alpha,cost;
721
+ long *greater,*lesser;
722
+ MODEL *pairmodel;
723
+ SVECTOR *flow,*fhigh;
724
+
725
+ totpair=0;
726
+ for(i=0;i<totdoc;i++) {
727
+ for(j=i+1;j<totdoc;j++) {
728
+ if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
729
+ totpair++;
730
+ }
731
+ }
732
+ }
733
+
734
+ printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
735
+ docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
736
+ target=(double *)my_malloc(sizeof(double)*totpair);
737
+ greater=(long *)my_malloc(sizeof(long)*totpair);
738
+ lesser=(long *)my_malloc(sizeof(long)*totpair);
739
+
740
+ k=0;
741
+ for(i=0;i<totdoc;i++) {
742
+ for(j=i+1;j<totdoc;j++) {
743
+ if(docs[i]->queryid == docs[j]->queryid) {
744
+ cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
745
+ if(rankvalue[i] > rankvalue[j]) {
746
+ if(kernel_parm->kernel_type == LINEAR)
747
+ docdiff[k]=create_example(k,0,0,cost,
748
+ sub_ss(docs[i]->fvec,docs[j]->fvec));
749
+ else {
750
+ flow=copy_svector(docs[j]->fvec);
751
+ flow->factor=-1.0;
752
+ flow->next=NULL;
753
+ fhigh=copy_svector(docs[i]->fvec);
754
+ fhigh->factor=1.0;
755
+ fhigh->next=flow;
756
+ docdiff[k]=create_example(k,0,0,cost,fhigh);
757
+ }
758
+ target[k]=1;
759
+ greater[k]=i;
760
+ lesser[k]=j;
761
+ k++;
762
+ }
763
+ else if(rankvalue[i] < rankvalue[j]) {
764
+ if(kernel_parm->kernel_type == LINEAR)
765
+ docdiff[k]=create_example(k,0,0,cost,
766
+ sub_ss(docs[i]->fvec,docs[j]->fvec));
767
+ else {
768
+ flow=copy_svector(docs[j]->fvec);
769
+ flow->factor=-1.0;
770
+ flow->next=NULL;
771
+ fhigh=copy_svector(docs[i]->fvec);
772
+ fhigh->factor=1.0;
773
+ fhigh->next=flow;
774
+ docdiff[k]=create_example(k,0,0,cost,fhigh);
775
+ }
776
+ target[k]=-1;
777
+ greater[k]=i;
778
+ lesser[k]=j;
779
+ k++;
780
+ }
781
+ }
782
+ }
783
+ }
784
+ printf("done.\n"); fflush(stdout);
785
+
786
+ /* need to get a bigger kernel cache */
787
+ if(*kernel_cache) {
788
+ kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
789
+ kernel_cache_cleanup(*kernel_cache);
790
+ (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
791
+ }
792
+
793
+ /* must use unbiased hyperplane on difference vectors */
794
+ learn_parm->biased_hyperplane=0;
795
+ pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
796
+ svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
797
+ kernel_parm,(*kernel_cache),pairmodel,NULL);
798
+
799
+ /* Transfer the result into a more compact model. If you would like
800
+ to output the original model on pairs of documents, see below. */
801
+ alpha=(double *)my_malloc(sizeof(double)*totdoc);
802
+ for(i=0;i<totdoc;i++) {
803
+ alpha[i]=0;
804
+ }
805
+ for(i=1;i<pairmodel->sv_num;i++) {
806
+ alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
807
+ alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
808
+ }
809
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
810
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
811
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
812
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
813
+ model->alpha[0]=0;
814
+ model->sv_num=1;
815
+ for(i=0;i<totdoc;i++) {
816
+ if(alpha[i]) {
817
+ model->supvec[model->sv_num]=docs[i];
818
+ model->alpha[model->sv_num]=alpha[i];
819
+ model->index[i]=model->sv_num;
820
+ model->sv_num++;
821
+ }
822
+ else {
823
+ model->index[i]=-1;
824
+ }
825
+ }
826
+ model->at_upper_bound=0;
827
+ model->b=0;
828
+ model->lin_weights=NULL;
829
+ model->totwords=totwords;
830
+ model->totdoc=totdoc;
831
+ model->kernel_parm=(*kernel_parm);
832
+ model->loo_error=-1;
833
+ model->loo_recall=-1;
834
+ model->loo_precision=-1;
835
+ model->xa_error=-1;
836
+ model->xa_recall=-1;
837
+ model->xa_precision=-1;
838
+
839
+ free(alpha);
840
+ free(greater);
841
+ free(lesser);
842
+ free(target);
843
+
844
+ /* If you would like to output the original model on pairs of
845
+ document, replace the following lines with '(*model)=(*pairmodel);' */
846
+ for(i=0;i<totpair;i++)
847
+ free_example(docdiff[i],1);
848
+ free(docdiff);
849
+ free_model(pairmodel,0);
850
+ }
851
+
852
+
853
+ /* The following solves a freely defined and given set of
854
+ inequalities. The optimization problem is of the following form:
855
+
856
+ min 0.5 w*w + C sum_i C_i \xi_i
857
+ s.t. x_i * w > rhs_i - \xi_i
858
+
859
+ This corresponds to the -z o option. */
860
+
861
+ void svm_learn_optimization(DOC **docs, double *rhs, long int
862
+ totdoc, long int totwords,
863
+ LEARN_PARM *learn_parm,
864
+ KERNEL_PARM *kernel_parm,
865
+ KERNEL_CACHE *kernel_cache, MODEL *model,
866
+ double *alpha)
867
+ /* docs: Left-hand side of inequalities (x-part) */
868
+ /* rhs: Right-hand side of inequalities */
869
+ /* totdoc: Number of examples in docs/label */
870
+ /* totwords: Number of features (i.e. highest feature index) */
871
+ /* learn_parm: Learning paramenters */
872
+ /* kernel_parm: Kernel paramenters */
873
+ /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
874
+ NULL if linear.*/
875
+ /* model: Returns solution as SV expansion (assumed empty before called) */
876
+ /* alpha: Start values for the alpha variables or NULL
877
+ pointer. The new alpha values are returned after
878
+ optimization if not NULL. Array must be of size totdoc. */
879
+ {
880
+ long i,*label;
881
+ long misclassified,upsupvecnum;
882
+ double loss,model_length,example_length;
883
+ double maxdiff,*lin,*a,*c;
884
+ long runtime_start,runtime_end;
885
+ long iterations,maxslackid,svsetnum;
886
+ long *unlabeled,*inconsistent;
887
+ double r_delta_sq=0,r_delta,r_delta_avg;
888
+ long *index,*index2dnum;
889
+ double *weights,*slack,*alphaslack;
890
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
891
+
892
+ TIMING timing_profile;
893
+ SHRINK_STATE shrink_state;
894
+
895
+ runtime_start=get_runtime();
896
+ timing_profile.time_kernel=0;
897
+ timing_profile.time_opti=0;
898
+ timing_profile.time_shrink=0;
899
+ timing_profile.time_update=0;
900
+ timing_profile.time_model=0;
901
+ timing_profile.time_check=0;
902
+ timing_profile.time_select=0;
903
+ kernel_cache_statistic=0;
904
+
905
+ learn_parm->totwords=totwords;
906
+
907
+ /* make sure -n value is reasonable */
908
+ if((learn_parm->svm_newvarsinqp < 2)
909
+ || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
910
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
911
+ }
912
+
913
+ init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
914
+
915
+ label = (long *)my_malloc(sizeof(long)*totdoc);
916
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
917
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
918
+ c = (double *)my_malloc(sizeof(double)*totdoc);
919
+ a = (double *)my_malloc(sizeof(double)*totdoc);
920
+ lin = (double *)my_malloc(sizeof(double)*totdoc);
921
+ learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
922
+ model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
923
+ model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
924
+ model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
925
+
926
+ model->at_upper_bound=0;
927
+ model->b=0;
928
+ model->supvec[0]=0; /* element 0 reserved and empty for now */
929
+ model->alpha[0]=0;
930
+ model->lin_weights=NULL;
931
+ model->totwords=totwords;
932
+ model->totdoc=totdoc;
933
+ model->kernel_parm=(*kernel_parm);
934
+ model->sv_num=1;
935
+ model->loo_error=-1;
936
+ model->loo_recall=-1;
937
+ model->loo_precision=-1;
938
+ model->xa_error=-1;
939
+ model->xa_recall=-1;
940
+ model->xa_precision=-1;
941
+
942
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
943
+ r_delta_sq=r_delta*r_delta;
944
+
945
+ r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
946
+ if(learn_parm->svm_c == 0.0) { /* default value for C */
947
+ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
948
+ if(verbosity>=1)
949
+ printf("Setting default regularization parameter C=%.4f\n",
950
+ learn_parm->svm_c);
951
+ }
952
+
953
+ learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
954
+
955
+ learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
956
+ in the right-hand side in the training
957
+ set. */
958
+
959
+ for(i=0;i<totdoc;i++) { /* various inits */
960
+ docs[i]->docnum=i;
961
+ a[i]=0;
962
+ lin[i]=0;
963
+ c[i]=rhs[i]; /* set right-hand side */
964
+ unlabeled[i]=0;
965
+ inconsistent[i]=0;
966
+ learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
967
+ docs[i]->costfactor;
968
+ label[i]=1;
969
+ }
970
+ if(learn_parm->sharedslack) /* if shared slacks are used, they must */
971
+ for(i=0;i<totdoc;i++) /* be used on every constraint */
972
+ if(!docs[i]->slackid) {
973
+ perror("Error: Missing shared slacks definitions in some of the examples.");
974
+ exit(0);
975
+ }
976
+
977
+ /* compute starting state for initial alpha values */
978
+ if(alpha) {
979
+ if(verbosity>=1) {
980
+ printf("Computing starting state..."); fflush(stdout);
981
+ }
982
+ index = (long *)my_malloc(sizeof(long)*totdoc);
983
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
984
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
985
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
986
+ for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
987
+ index[i]=1;
988
+ alpha[i]=fabs(alpha[i]);
989
+ if(alpha[i]<0) alpha[i]=0;
990
+ if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
991
+ }
992
+ if(kernel_parm->kernel_type != LINEAR) {
993
+ for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
994
+ if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
995
+ && (kernel_cache_space_available(kernel_cache)))
996
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
997
+ for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
998
+ if((alpha[i]==learn_parm->svm_cost[i])
999
+ && (kernel_cache_space_available(kernel_cache)))
1000
+ cache_kernel_row(kernel_cache,docs,i,kernel_parm);
1001
+ }
1002
+ (void)compute_index(index,totdoc,index2dnum);
1003
+ update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
1004
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1005
+ weights);
1006
+ (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
1007
+ learn_parm,index2dnum,index2dnum,model);
1008
+ for(i=0;i<totdoc;i++) { /* copy initial alphas */
1009
+ a[i]=alpha[i];
1010
+ }
1011
+ free(index);
1012
+ free(index2dnum);
1013
+ free(weights);
1014
+ free(aicache);
1015
+ if(verbosity>=1) {
1016
+ printf("done.\n"); fflush(stdout);
1017
+ }
1018
+ }
1019
+
1020
+ /* removing inconsistent does not work for general optimization problem */
1021
+ if(learn_parm->remove_inconsistent) {
1022
+ learn_parm->remove_inconsistent = 0;
1023
+ printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
1024
+ }
1025
+
1026
+ /* caching makes no sense for linear kernel */
1027
+ if(kernel_parm->kernel_type == LINEAR) {
1028
+ kernel_cache = NULL;
1029
+ }
1030
+
1031
+ if(verbosity==1) {
1032
+ printf("Optimizing"); fflush(stdout);
1033
+ }
1034
+
1035
+ /* train the svm */
1036
+ if(learn_parm->sharedslack)
1037
+ iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
1038
+ totwords,learn_parm,kernel_parm,
1039
+ kernel_cache,&shrink_state,model,
1040
+ a,lin,c,&timing_profile,
1041
+ &maxdiff);
1042
+ else
1043
+ iterations=optimize_to_convergence(docs,label,totdoc,
1044
+ totwords,learn_parm,kernel_parm,
1045
+ kernel_cache,&shrink_state,model,
1046
+ inconsistent,unlabeled,
1047
+ a,lin,c,&timing_profile,
1048
+ &maxdiff,(long)-1,(long)1);
1049
+
1050
+ if(verbosity>=1) {
1051
+ if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
1052
+
1053
+ misclassified=0;
1054
+ for(i=0;(i<totdoc);i++) { /* get final statistic */
1055
+ if((lin[i]-model->b)*(double)label[i] <= 0.0)
1056
+ misclassified++;
1057
+ }
1058
+
1059
+ printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
1060
+
1061
+ runtime_end=get_runtime();
1062
+ if(verbosity>=2) {
1063
+ printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
1064
+ ((float)runtime_end-(float)runtime_start)/100.0,
1065
+ (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
1066
+ (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
1067
+ (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
1068
+ (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
1069
+ (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
1070
+ (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
1071
+ (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
1072
+ }
1073
+ else {
1074
+ printf("Runtime in cpu-seconds: %.2f\n",
1075
+ (runtime_end-runtime_start)/100.0);
1076
+ }
1077
+ }
1078
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1079
+ loss=0;
1080
+ model_length=0;
1081
+ for(i=0;i<totdoc;i++) {
1082
+ if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
1083
+ loss+=c[i]-(lin[i]-model->b)*(double)label[i];
1084
+ model_length+=a[i]*label[i]*lin[i];
1085
+ }
1086
+ model_length=sqrt(model_length);
1087
+ fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
1088
+ }
1089
+
1090
+ if(learn_parm->sharedslack) {
1091
+ index = (long *)my_malloc(sizeof(long)*totdoc);
1092
+ index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1093
+ maxslackid=0;
1094
+ for(i=0;i<totdoc;i++) { /* create full index */
1095
+ index[i]=1;
1096
+ if(maxslackid<docs[i]->slackid)
1097
+ maxslackid=docs[i]->slackid;
1098
+ }
1099
+ (void)compute_index(index,totdoc,index2dnum);
1100
+ slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1101
+ alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1102
+ for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1103
+ slack[i]=0;
1104
+ alphaslack[i]=0;
1105
+ }
1106
+ compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
1107
+ slack,alphaslack);
1108
+ loss=0;
1109
+ model->at_upper_bound=0;
1110
+ svsetnum=0;
1111
+ for(i=0;i<=maxslackid;i++) { /* create full index */
1112
+ loss+=slack[i];
1113
+ if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
1114
+ model->at_upper_bound++;
1115
+ if(alphaslack[i] > learn_parm->epsilon_a)
1116
+ svsetnum++;
1117
+ }
1118
+ free(index);
1119
+ free(index2dnum);
1120
+ free(slack);
1121
+ free(alphaslack);
1122
+ }
1123
+
1124
+ if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1125
+ if(learn_parm->sharedslack) {
1126
+ printf("Number of SV: %ld\n",
1127
+ model->sv_num-1);
1128
+ printf("Number of non-zero slack variables: %ld (out of %ld)\n",
1129
+ model->at_upper_bound,svsetnum);
1130
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1131
+ }
1132
+ else {
1133
+ upsupvecnum=0;
1134
+ for(i=1;i<model->sv_num;i++) {
1135
+ if(fabs(model->alpha[i]) >=
1136
+ (learn_parm->svm_cost[(model->supvec[i])->docnum]-
1137
+ learn_parm->epsilon_a))
1138
+ upsupvecnum++;
1139
+ }
1140
+ printf("Number of SV: %ld (including %ld at upper bound)\n",
1141
+ model->sv_num-1,upsupvecnum);
1142
+ fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1143
+ }
1144
+ example_length=estimate_sphere(model,kernel_parm);
1145
+ fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
1146
+ length_of_longest_document_vector(docs,totdoc,kernel_parm));
1147
+ }
1148
+ if(verbosity>=1) {
1149
+ printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
1150
+ }
1151
+
1152
+ if(alpha) {
1153
+ for(i=0;i<totdoc;i++) { /* copy final alphas */
1154
+ alpha[i]=a[i];
1155
+ }
1156
+ }
1157
+
1158
+ if(learn_parm->alphafile[0])
1159
+ write_alphas(learn_parm->alphafile,a,label,totdoc);
1160
+
1161
+ shrink_state_cleanup(&shrink_state);
1162
+ free(label);
1163
+ free(unlabeled);
1164
+ free(inconsistent);
1165
+ free(c);
1166
+ free(a);
1167
+ free(lin);
1168
+ free(learn_parm->svm_cost);
1169
+ }
1170
+
1171
+
1172
+ long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
1173
+ long int totwords, LEARN_PARM *learn_parm,
1174
+ KERNEL_PARM *kernel_parm,
1175
+ KERNEL_CACHE *kernel_cache,
1176
+ SHRINK_STATE *shrink_state, MODEL *model,
1177
+ long int *inconsistent, long int *unlabeled,
1178
+ double *a, double *lin, double *c,
1179
+ TIMING *timing_profile, double *maxdiff,
1180
+ long int heldout, long int retrain)
1181
+ /* docs: Training vectors (x-part) */
1182
+ /* label: Training labels/value (y-part, zero if test example for
1183
+ transduction) */
1184
+ /* totdoc: Number of examples in docs/label */
1185
+ /* totwords: Number of features (i.e. highest feature index) */
1186
+ /* laern_parm: Learning paramenters */
1187
+ /* kernel_parm: Kernel paramenters */
1188
+ /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1189
+ NULL if linear. */
1190
+ /* shrink_state: State of active variables */
1191
+ /* model: Returns learning result */
1192
+ /* inconsistent: examples thrown out as inconstistent */
1193
+ /* unlabeled: test examples for transduction */
1194
+ /* a: alphas */
1195
+ /* lin: linear component of gradient */
1196
+ /* c: right hand side of inequalities (margin) */
1197
+ /* maxdiff: returns maximum violation of KT-conditions */
1198
+ /* heldout: marks held-out example for leave-one-out (or -1) */
1199
+ /* retrain: selects training mode (1=regular / 2=holdout) */
1200
+ {
1201
+ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
1202
+ long inconsistentnum,choosenum,already_chosen=0,iteration;
1203
+ long misclassified,supvecnum=0,*active2dnum,inactivenum;
1204
+ long *working2dnum,*selexam;
1205
+ long activenum;
1206
+ double criterion,eq;
1207
+ double *a_old;
1208
+ long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1209
+ long transductcycle;
1210
+ long transduction;
1211
+ double epsilon_crit_org;
1212
+ double bestmaxdiff;
1213
+ long bestmaxdiffiter,terminate;
1214
+
1215
+ double *selcrit; /* buffer for sorting */
1216
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
1217
+ double *weights; /* buffer for weight vector in linear case */
1218
+ QP qp; /* buffer for one quadratic program */
1219
+
1220
+ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1221
+ if(kernel_parm->kernel_type == LINEAR) {
1222
+ learn_parm->epsilon_crit=2.0;
1223
+ kernel_cache=NULL; /* caching makes no sense for linear kernel */
1224
+ }
1225
+ learn_parm->epsilon_shrink=2;
1226
+ (*maxdiff)=1;
1227
+
1228
+ learn_parm->totwords=totwords;
1229
+
1230
+ chosen = (long *)my_malloc(sizeof(long)*totdoc);
1231
+ last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
1232
+ key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1233
+ selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1234
+ selexam = (long *)my_malloc(sizeof(long)*totdoc);
1235
+ a_old = (double *)my_malloc(sizeof(double)*totdoc);
1236
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1237
+ working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1238
+ active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1239
+ qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1240
+ qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1241
+ qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1242
+ *learn_parm->svm_maxqpsize);
1243
+ qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1244
+ qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1245
+ qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1246
+ qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1247
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1248
+
1249
+ choosenum=0;
1250
+ inconsistentnum=0;
1251
+ transductcycle=0;
1252
+ transduction=0;
1253
+ if(!retrain) retrain=1;
1254
+ iteration=1;
1255
+ bestmaxdiffiter=1;
1256
+ bestmaxdiff=999999999;
1257
+ terminate=0;
1258
+
1259
+ if(kernel_cache) {
1260
+ kernel_cache->time=iteration; /* for lru cache */
1261
+ kernel_cache_reset_lru(kernel_cache);
1262
+ }
1263
+
1264
+ for(i=0;i<totdoc;i++) { /* various inits */
1265
+ chosen[i]=0;
1266
+ a_old[i]=a[i];
1267
+ last_suboptimal_at[i]=1;
1268
+ if(inconsistent[i])
1269
+ inconsistentnum++;
1270
+ if(unlabeled[i]) {
1271
+ transduction=1;
1272
+ }
1273
+ }
1274
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1275
+ inactivenum=totdoc-activenum;
1276
+ clear_index(working2dnum);
1277
+
1278
+ /* repeat this loop until we have convergence */
1279
+ for(;retrain && (!terminate);iteration++) {
1280
+
1281
+ if(kernel_cache)
1282
+ kernel_cache->time=iteration; /* for lru cache */
1283
+ if(verbosity>=2) {
1284
+ printf(
1285
+ "Iteration %ld: ",iteration); fflush(stdout);
1286
+ }
1287
+ else if(verbosity==1) {
1288
+ printf("."); fflush(stdout);
1289
+ }
1290
+
1291
+ if(verbosity>=2) t0=get_runtime();
1292
+ if(verbosity>=3) {
1293
+ printf("\nSelecting working set... "); fflush(stdout);
1294
+ }
1295
+
1296
+ if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1297
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1298
+
1299
+ i=0;
1300
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1301
+ if((chosen[j]>=(learn_parm->svm_maxqpsize/
1302
+ minl(learn_parm->svm_maxqpsize,
1303
+ learn_parm->svm_newvarsinqp)))
1304
+ || (inconsistent[j])
1305
+ || (j == heldout)) {
1306
+ chosen[j]=0;
1307
+ choosenum--;
1308
+ }
1309
+ else {
1310
+ chosen[j]++;
1311
+ working2dnum[i++]=j;
1312
+ }
1313
+ }
1314
+ working2dnum[i]=-1;
1315
+
1316
+ if(retrain == 2) {
1317
+ choosenum=0;
1318
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
1319
+ chosen[j]=0;
1320
+ }
1321
+ clear_index(working2dnum);
1322
+ for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
1323
+ if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
1324
+ chosen[i]=99999;
1325
+ choosenum++;
1326
+ a[i]=0;
1327
+ }
1328
+ }
1329
+ if(learn_parm->biased_hyperplane) {
1330
+ eq=0;
1331
+ for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
1332
+ eq+=a[i]*label[i];
1333
+ }
1334
+ for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
1335
+ if((eq*label[i] > 0) && (a[i] > 0)) {
1336
+ chosen[i]=88888;
1337
+ choosenum++;
1338
+ if((eq*label[i]) > a[i]) {
1339
+ eq-=(a[i]*label[i]);
1340
+ a[i]=0;
1341
+ }
1342
+ else {
1343
+ a[i]-=(eq*label[i]);
1344
+ eq=0;
1345
+ }
1346
+ }
1347
+ }
1348
+ }
1349
+ compute_index(chosen,totdoc,working2dnum);
1350
+ }
1351
+ else { /* select working set according to steepest gradient */
1352
+ if(iteration % 101) {
1353
+ already_chosen=0;
1354
+ if((minl(learn_parm->svm_newvarsinqp,
1355
+ learn_parm->svm_maxqpsize-choosenum)>=4)
1356
+ && (kernel_parm->kernel_type != LINEAR)) {
1357
+ /* select part of the working set from cache */
1358
+ already_chosen=select_next_qp_subproblem_grad(
1359
+ label,unlabeled,a,lin,c,totdoc,
1360
+ (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1361
+ learn_parm->svm_newvarsinqp)
1362
+ /2),
1363
+ learn_parm,inconsistent,active2dnum,
1364
+ working2dnum,selcrit,selexam,kernel_cache,1,
1365
+ key,chosen);
1366
+ choosenum+=already_chosen;
1367
+ }
1368
+ choosenum+=select_next_qp_subproblem_grad(
1369
+ label,unlabeled,a,lin,c,totdoc,
1370
+ minl(learn_parm->svm_maxqpsize-choosenum,
1371
+ learn_parm->svm_newvarsinqp-already_chosen),
1372
+ learn_parm,inconsistent,active2dnum,
1373
+ working2dnum,selcrit,selexam,kernel_cache,0,key,
1374
+ chosen);
1375
+ }
1376
+ else { /* once in a while, select a somewhat random working set
1377
+ to get unlocked of infinite loops due to numerical
1378
+ inaccuracies in the core qp-solver */
1379
+ choosenum+=select_next_qp_subproblem_rand(
1380
+ label,unlabeled,a,lin,c,totdoc,
1381
+ minl(learn_parm->svm_maxqpsize-choosenum,
1382
+ learn_parm->svm_newvarsinqp),
1383
+ learn_parm,inconsistent,active2dnum,
1384
+ working2dnum,selcrit,selexam,kernel_cache,key,
1385
+ chosen,iteration);
1386
+ }
1387
+ }
1388
+
1389
+ if(verbosity>=2) {
1390
+ printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1391
+ }
1392
+
1393
+ if(verbosity>=2) t1=get_runtime();
1394
+
1395
+ if(kernel_cache)
1396
+ cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1397
+ choosenum,kernel_parm);
1398
+
1399
+ if(verbosity>=2) t2=get_runtime();
1400
+ if(retrain != 2) {
1401
+ optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
1402
+ model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1403
+ aicache,kernel_parm,&qp,&epsilon_crit_org);
1404
+ }
1405
+
1406
+ if(verbosity>=2) t3=get_runtime();
1407
+ update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1408
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1409
+ weights);
1410
+
1411
+ if(verbosity>=2) t4=get_runtime();
1412
+ supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1413
+ learn_parm,working2dnum,active2dnum,model);
1414
+
1415
+ if(verbosity>=2) t5=get_runtime();
1416
+
1417
+ /* The following computation of the objective function works only */
1418
+ /* relative to the active variables */
1419
+ if(verbosity>=3) {
1420
+ criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1421
+ active2dnum);
1422
+ printf("Objective function (over active variables): %.16f\n",criterion);
1423
+ fflush(stdout);
1424
+ }
1425
+
1426
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1427
+ a_old[i]=a[i];
1428
+ }
1429
+
1430
+ if(retrain == 2) { /* reset inconsistent unlabeled examples */
1431
+ for(i=0;(i<totdoc);i++) {
1432
+ if(inconsistent[i] && unlabeled[i]) {
1433
+ inconsistent[i]=0;
1434
+ label[i]=0;
1435
+ }
1436
+ }
1437
+ }
1438
+
1439
+ retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
1440
+ maxdiff,epsilon_crit_org,&misclassified,
1441
+ inconsistent,active2dnum,last_suboptimal_at,
1442
+ iteration,kernel_parm);
1443
+
1444
+ if(verbosity>=2) {
1445
+ t6=get_runtime();
1446
+ timing_profile->time_select+=t1-t0;
1447
+ timing_profile->time_kernel+=t2-t1;
1448
+ timing_profile->time_opti+=t3-t2;
1449
+ timing_profile->time_update+=t4-t3;
1450
+ timing_profile->time_model+=t5-t4;
1451
+ timing_profile->time_check+=t6-t5;
1452
+ }
1453
+
1454
+ /* checking whether optimizer got stuck */
1455
+ if((*maxdiff) < bestmaxdiff) {
1456
+ bestmaxdiff=(*maxdiff);
1457
+ bestmaxdiffiter=iteration;
1458
+ }
1459
+ if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1460
+ /* long time no progress? */
1461
+ terminate=1;
1462
+ retrain=0;
1463
+ if(verbosity>=1)
1464
+ printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1465
+ }
1466
+
1467
+ noshrink=0;
1468
+ if((!retrain) && (inactivenum>0)
1469
+ && ((!learn_parm->skip_final_opt_check)
1470
+ || (kernel_parm->kernel_type == LINEAR))) {
1471
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1472
+ || (verbosity>=2)) {
1473
+ if(verbosity==1) {
1474
+ printf("\n");
1475
+ }
1476
+ printf(" Checking optimality of inactive variables...");
1477
+ fflush(stdout);
1478
+ }
1479
+ t1=get_runtime();
1480
+ reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1481
+ totwords,iteration,learn_parm,inconsistent,
1482
+ docs,kernel_parm,kernel_cache,model,aicache,
1483
+ weights,maxdiff);
1484
+ /* Update to new active variables. */
1485
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1486
+ inactivenum=totdoc-activenum;
1487
+ /* reset watchdog */
1488
+ bestmaxdiff=(*maxdiff);
1489
+ bestmaxdiffiter=iteration;
1490
+ /* termination criterion */
1491
+ noshrink=1;
1492
+ retrain=0;
1493
+ if((*maxdiff) > learn_parm->epsilon_crit)
1494
+ retrain=1;
1495
+ timing_profile->time_shrink+=get_runtime()-t1;
1496
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1497
+ || (verbosity>=2)) {
1498
+ printf("done.\n"); fflush(stdout);
1499
+ printf(" Number of inactive variables = %ld\n",inactivenum);
1500
+ }
1501
+ }
1502
+
1503
+ if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1504
+ learn_parm->epsilon_crit=(*maxdiff);
1505
+ if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1506
+ learn_parm->epsilon_crit/=2.0;
1507
+ retrain=1;
1508
+ noshrink=1;
1509
+ }
1510
+ if(learn_parm->epsilon_crit<epsilon_crit_org)
1511
+ learn_parm->epsilon_crit=epsilon_crit_org;
1512
+
1513
+ if(verbosity>=2) {
1514
+ printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1515
+ supvecnum,model->at_upper_bound,(*maxdiff));
1516
+ fflush(stdout);
1517
+ }
1518
+ if(verbosity>=3) {
1519
+ printf("\n");
1520
+ }
1521
+
1522
+ if((!retrain) && (transduction)) {
1523
+ for(i=0;(i<totdoc);i++) {
1524
+ shrink_state->active[i]=1;
1525
+ }
1526
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1527
+ inactivenum=0;
1528
+ if(verbosity==1) printf("done\n");
1529
+ retrain=incorporate_unlabeled_examples(model,label,inconsistent,
1530
+ unlabeled,a,lin,totdoc,
1531
+ selcrit,selexam,key,
1532
+ transductcycle,kernel_parm,
1533
+ learn_parm);
1534
+ epsilon_crit_org=learn_parm->epsilon_crit;
1535
+ if(kernel_parm->kernel_type == LINEAR)
1536
+ learn_parm->epsilon_crit=1;
1537
+ transductcycle++;
1538
+ /* reset watchdog */
1539
+ bestmaxdiff=(*maxdiff);
1540
+ bestmaxdiffiter=iteration;
1541
+ }
1542
+ else if(((iteration % 10) == 0) && (!noshrink)) {
1543
+ activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
1544
+ active2dnum,last_suboptimal_at,iteration,totdoc,
1545
+ maxl((long)(activenum/10),
1546
+ maxl((long)(totdoc/500),100)),
1547
+ a,inconsistent);
1548
+ inactivenum=totdoc-activenum;
1549
+ if((kernel_cache)
1550
+ && (supvecnum>kernel_cache->max_elems)
1551
+ && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
1552
+ kernel_cache_shrink(kernel_cache,totdoc,
1553
+ minl((kernel_cache->activenum-activenum),
1554
+ (kernel_cache->activenum-supvecnum)),
1555
+ shrink_state->active);
1556
+ }
1557
+ }
1558
+
1559
+ if((!retrain) && learn_parm->remove_inconsistent) {
1560
+ if(verbosity>=1) {
1561
+ printf(" Moving training errors to inconsistent examples...");
1562
+ fflush(stdout);
1563
+ }
1564
+ if(learn_parm->remove_inconsistent == 1) {
1565
+ retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
1566
+ &inconsistentnum,inconsistent);
1567
+ }
1568
+ else if(learn_parm->remove_inconsistent == 2) {
1569
+ retrain=identify_misclassified(lin,label,unlabeled,totdoc,
1570
+ model,&inconsistentnum,inconsistent);
1571
+ }
1572
+ else if(learn_parm->remove_inconsistent == 3) {
1573
+ retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
1574
+ model,&inconsistentnum,inconsistent);
1575
+ }
1576
+ if(retrain) {
1577
+ if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
1578
+ learn_parm->epsilon_crit=2.0;
1579
+ }
1580
+ }
1581
+ if(verbosity>=1) {
1582
+ printf("done.\n");
1583
+ if(retrain) {
1584
+ printf(" Now %ld inconsistent examples.\n",inconsistentnum);
1585
+ }
1586
+ }
1587
+ }
1588
+ } /* end of loop */
1589
+
1590
+ free(chosen);
1591
+ free(last_suboptimal_at);
1592
+ free(key);
1593
+ free(selcrit);
1594
+ free(selexam);
1595
+ free(a_old);
1596
+ free(aicache);
1597
+ free(working2dnum);
1598
+ free(active2dnum);
1599
+ free(qp.opt_ce);
1600
+ free(qp.opt_ce0);
1601
+ free(qp.opt_g);
1602
+ free(qp.opt_g0);
1603
+ free(qp.opt_xinit);
1604
+ free(qp.opt_low);
1605
+ free(qp.opt_up);
1606
+ free(weights);
1607
+
1608
+ learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
1609
+ model->maxdiff=(*maxdiff);
1610
+
1611
+ return(iteration);
1612
+ }
1613
+
1614
+ long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
1615
+ long int totdoc,
1616
+ long int totwords, LEARN_PARM *learn_parm,
1617
+ KERNEL_PARM *kernel_parm,
1618
+ KERNEL_CACHE *kernel_cache,
1619
+ SHRINK_STATE *shrink_state, MODEL *model,
1620
+ double *a, double *lin, double *c,
1621
+ TIMING *timing_profile, double *maxdiff)
1622
+ /* docs: Training vectors (x-part) */
1623
+ /* label: Training labels/value (y-part, zero if test example for
1624
+ transduction) */
1625
+ /* totdoc: Number of examples in docs/label */
1626
+ /* totwords: Number of features (i.e. highest feature index) */
1627
+ /* learn_parm: Learning paramenters */
1628
+ /* kernel_parm: Kernel paramenters */
1629
+ /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1630
+ NULL if linear. */
1631
+ /* shrink_state: State of active variables */
1632
+ /* model: Returns learning result */
1633
+ /* a: alphas */
1634
+ /* lin: linear component of gradient */
1635
+ /* c: right hand side of inequalities (margin) */
1636
+ /* maxdiff: returns maximum violation of KT-conditions */
1637
+ {
1638
+ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
1639
+ long *inconsistent,choosenum,already_chosen=0,iteration;
1640
+ long misclassified,supvecnum=0,*active2dnum,inactivenum;
1641
+ long *working2dnum,*selexam,*ignore;
1642
+ long activenum,retrain,maxslackid,slackset,jointstep;
1643
+ double criterion,eq_target;
1644
+ double *a_old,*alphaslack;
1645
+ long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1646
+ double epsilon_crit_org,maxsharedviol;
1647
+ double bestmaxdiff;
1648
+ long bestmaxdiffiter,terminate;
1649
+
1650
+ double *selcrit; /* buffer for sorting */
1651
+ CFLOAT *aicache; /* buffer to keep one row of hessian */
1652
+ double *weights; /* buffer for weight vector in linear case */
1653
+ QP qp; /* buffer for one quadratic program */
1654
+ double *slack; /* vector of slack variables for optimization with
1655
+ shared slacks */
1656
+
1657
+ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1658
+ if(kernel_parm->kernel_type == LINEAR) {
1659
+ learn_parm->epsilon_crit=2.0;
1660
+ kernel_cache=NULL; /* caching makes no sense for linear kernel */
1661
+ }
1662
+ learn_parm->epsilon_shrink=2;
1663
+ (*maxdiff)=1;
1664
+
1665
+ learn_parm->totwords=totwords;
1666
+
1667
+ chosen = (long *)my_malloc(sizeof(long)*totdoc);
1668
+ unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
1669
+ inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
1670
+ ignore = (long *)my_malloc(sizeof(long)*totdoc);
1671
+ key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1672
+ selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1673
+ selexam = (long *)my_malloc(sizeof(long)*totdoc);
1674
+ a_old = (double *)my_malloc(sizeof(double)*totdoc);
1675
+ aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1676
+ working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1677
+ active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1678
+ qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1679
+ qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1680
+ qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1681
+ *learn_parm->svm_maxqpsize);
1682
+ qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1683
+ qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1684
+ qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1685
+ qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1686
+ weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1687
+ maxslackid=0;
1688
+ for(i=0;i<totdoc;i++) { /* determine size of slack array */
1689
+ if(maxslackid<docs[i]->slackid)
1690
+ maxslackid=docs[i]->slackid;
1691
+ }
1692
+ slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1693
+ alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1694
+ last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
1695
+ for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1696
+ slack[i]=0;
1697
+ alphaslack[i]=0;
1698
+ last_suboptimal_at[i]=1;
1699
+ }
1700
+
1701
+ choosenum=0;
1702
+ retrain=1;
1703
+ iteration=1;
1704
+ bestmaxdiffiter=1;
1705
+ bestmaxdiff=999999999;
1706
+ terminate=0;
1707
+
1708
+ if(kernel_cache) {
1709
+ kernel_cache->time=iteration; /* for lru cache */
1710
+ kernel_cache_reset_lru(kernel_cache);
1711
+ }
1712
+
1713
+ for(i=0;i<totdoc;i++) { /* various inits */
1714
+ chosen[i]=0;
1715
+ unlabeled[i]=0;
1716
+ inconsistent[i]=0;
1717
+ ignore[i]=0;
1718
+ a_old[i]=a[i];
1719
+ }
1720
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1721
+ inactivenum=totdoc-activenum;
1722
+ clear_index(working2dnum);
1723
+
1724
+ /* call to init slack and alphaslack */
1725
+ compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1726
+ slack,alphaslack);
1727
+
1728
+ /* repeat this loop until we have convergence */
1729
+ for(;retrain && (!terminate);iteration++) {
1730
+
1731
+ if(kernel_cache)
1732
+ kernel_cache->time=iteration; /* for lru cache */
1733
+ if(verbosity>=2) {
1734
+ printf(
1735
+ "Iteration %ld: ",iteration); fflush(stdout);
1736
+ }
1737
+ else if(verbosity==1) {
1738
+ printf("."); fflush(stdout);
1739
+ }
1740
+
1741
+ if(verbosity>=2) t0=get_runtime();
1742
+ if(verbosity>=3) {
1743
+ printf("\nSelecting working set... "); fflush(stdout);
1744
+ }
1745
+
1746
+ if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1747
+ learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1748
+
1749
+ /* select working set according to steepest gradient */
1750
+ jointstep=0;
1751
+ eq_target=0;
1752
+ if(iteration % 101) {
1753
+ slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
1754
+ learn_parm,active2dnum,&maxsharedviol);
1755
+ if((iteration % 2)
1756
+ || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
1757
+ /* do a step with examples from different slack sets */
1758
+ if(verbosity >= 2) {
1759
+ printf("(i-step)"); fflush(stdout);
1760
+ }
1761
+ i=0;
1762
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
1763
+ if((chosen[j]>=(learn_parm->svm_maxqpsize/
1764
+ minl(learn_parm->svm_maxqpsize,
1765
+ learn_parm->svm_newvarsinqp)))) {
1766
+ chosen[j]=0;
1767
+ choosenum--;
1768
+ }
1769
+ else {
1770
+ chosen[j]++;
1771
+ working2dnum[i++]=j;
1772
+ }
1773
+ }
1774
+ working2dnum[i]=-1;
1775
+
1776
+ already_chosen=0;
1777
+ if((minl(learn_parm->svm_newvarsinqp,
1778
+ learn_parm->svm_maxqpsize-choosenum)>=4)
1779
+ && (kernel_parm->kernel_type != LINEAR)) {
1780
+ /* select part of the working set from cache */
1781
+ already_chosen=select_next_qp_subproblem_grad(
1782
+ label,unlabeled,a,lin,c,totdoc,
1783
+ (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1784
+ learn_parm->svm_newvarsinqp)
1785
+ /2),
1786
+ learn_parm,inconsistent,active2dnum,
1787
+ working2dnum,selcrit,selexam,kernel_cache,
1788
+ (long)1,key,chosen);
1789
+ choosenum+=already_chosen;
1790
+ }
1791
+ choosenum+=select_next_qp_subproblem_grad(
1792
+ label,unlabeled,a,lin,c,totdoc,
1793
+ minl(learn_parm->svm_maxqpsize-choosenum,
1794
+ learn_parm->svm_newvarsinqp-already_chosen),
1795
+ learn_parm,inconsistent,active2dnum,
1796
+ working2dnum,selcrit,selexam,kernel_cache,
1797
+ (long)0,key,chosen);
1798
+ }
1799
+ else { /* do a step with all examples from same slack set */
1800
+ if(verbosity >= 2) {
1801
+ printf("(j-step on %ld)",slackset); fflush(stdout);
1802
+ }
1803
+ jointstep=1;
1804
+ for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1805
+ chosen[j]=0;
1806
+ }
1807
+ working2dnum[0]=-1;
1808
+ eq_target=alphaslack[slackset];
1809
+ for(j=0;j<totdoc;j++) { /* mask all but slackset */
1810
+ /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
1811
+ if(docs[j]->slackid != slackset)
1812
+ ignore[j]=1;
1813
+ else {
1814
+ ignore[j]=0;
1815
+ learn_parm->svm_cost[j]=learn_parm->svm_c;
1816
+ /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
1817
+ }
1818
+ }
1819
+ learn_parm->biased_hyperplane=1;
1820
+ choosenum=select_next_qp_subproblem_grad(
1821
+ label,unlabeled,a,lin,c,totdoc,
1822
+ learn_parm->svm_maxqpsize,
1823
+ learn_parm,ignore,active2dnum,
1824
+ working2dnum,selcrit,selexam,kernel_cache,
1825
+ (long)0,key,chosen);
1826
+ learn_parm->biased_hyperplane=0;
1827
+ }
1828
+ }
1829
+ else { /* once in a while, select a somewhat random working set
1830
+ to get unlocked of infinite loops due to numerical
1831
+ inaccuracies in the core qp-solver */
1832
+ choosenum+=select_next_qp_subproblem_rand(
1833
+ label,unlabeled,a,lin,c,totdoc,
1834
+ minl(learn_parm->svm_maxqpsize-choosenum,
1835
+ learn_parm->svm_newvarsinqp),
1836
+ learn_parm,inconsistent,active2dnum,
1837
+ working2dnum,selcrit,selexam,kernel_cache,key,
1838
+ chosen,iteration);
1839
+ }
1840
+
1841
+ if(verbosity>=2) {
1842
+ printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1843
+ }
1844
+
1845
+ if(verbosity>=2) t1=get_runtime();
1846
+
1847
+ if(kernel_cache)
1848
+ cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1849
+ choosenum,kernel_parm);
1850
+
1851
+ if(verbosity>=2) t2=get_runtime();
1852
+ if(jointstep) learn_parm->biased_hyperplane=1;
1853
+ optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
1854
+ model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1855
+ aicache,kernel_parm,&qp,&epsilon_crit_org);
1856
+ learn_parm->biased_hyperplane=0;
1857
+
1858
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
1859
+ alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
1860
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
1861
+ constraints */
1862
+ if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
1863
+ if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
1864
+ alphaslack[docs[i]->slackid]-=a[i];
1865
+ a[i]=0;
1866
+ }
1867
+ else {
1868
+ a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
1869
+ alphaslack[docs[i]->slackid]=learn_parm->svm_c;
1870
+ }
1871
+ }
1872
+ }
1873
+ for(jj=0;(i=active2dnum[jj])>=0;jj++)
1874
+ learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
1875
+ -alphaslack[docs[i]->slackid]);
1876
+
1877
+ if(verbosity>=2) t3=get_runtime();
1878
+ update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1879
+ totwords,kernel_parm,kernel_cache,lin,aicache,
1880
+ weights);
1881
+ compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1882
+ slack,alphaslack);
1883
+
1884
+ if(verbosity>=2) t4=get_runtime();
1885
+ supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1886
+ learn_parm,working2dnum,active2dnum,model);
1887
+
1888
+ if(verbosity>=2) t5=get_runtime();
1889
+
1890
+ /* The following computation of the objective function works only */
1891
+ /* relative to the active variables */
1892
+ if(verbosity>=3) {
1893
+ criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1894
+ active2dnum);
1895
+ printf("Objective function (over active variables): %.16f\n",criterion);
1896
+ fflush(stdout);
1897
+ }
1898
+
1899
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1900
+ a_old[i]=a[i];
1901
+ }
1902
+
1903
+ retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1904
+ slack,alphaslack,totdoc,learn_parm,
1905
+ maxdiff,epsilon_crit_org,&misclassified,
1906
+ active2dnum,last_suboptimal_at,
1907
+ iteration,kernel_parm);
1908
+
1909
+ if(verbosity>=2) {
1910
+ t6=get_runtime();
1911
+ timing_profile->time_select+=t1-t0;
1912
+ timing_profile->time_kernel+=t2-t1;
1913
+ timing_profile->time_opti+=t3-t2;
1914
+ timing_profile->time_update+=t4-t3;
1915
+ timing_profile->time_model+=t5-t4;
1916
+ timing_profile->time_check+=t6-t5;
1917
+ }
1918
+
1919
+ /* checking whether optimizer got stuck */
1920
+ if((*maxdiff) < bestmaxdiff) {
1921
+ bestmaxdiff=(*maxdiff);
1922
+ bestmaxdiffiter=iteration;
1923
+ }
1924
+ if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1925
+ /* long time no progress? */
1926
+ terminate=1;
1927
+ retrain=0;
1928
+ if(verbosity>=1)
1929
+ printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1930
+ }
1931
+
1932
+ noshrink=0;
1933
+
1934
+ if((!retrain) && (inactivenum>0)
1935
+ && ((!learn_parm->skip_final_opt_check)
1936
+ || (kernel_parm->kernel_type == LINEAR))) {
1937
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1938
+ || (verbosity>=2)) {
1939
+ if(verbosity==1) {
1940
+ printf("\n");
1941
+ }
1942
+ printf(" Checking optimality of inactive variables...");
1943
+ fflush(stdout);
1944
+ }
1945
+ t1=get_runtime();
1946
+ reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1947
+ totwords,iteration,learn_parm,inconsistent,
1948
+ docs,kernel_parm,kernel_cache,model,aicache,
1949
+ weights,maxdiff);
1950
+ /* Update to new active variables. */
1951
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1952
+ inactivenum=totdoc-activenum;
1953
+ /* check optimality, since check in reactivate does not work for
1954
+ sharedslacks */
1955
+ retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1956
+ slack,alphaslack,totdoc,learn_parm,
1957
+ maxdiff,epsilon_crit_org,&misclassified,
1958
+ active2dnum,last_suboptimal_at,
1959
+ iteration,kernel_parm);
1960
+
1961
+ /* reset watchdog */
1962
+ bestmaxdiff=(*maxdiff);
1963
+ bestmaxdiffiter=iteration;
1964
+ /* termination criterion */
1965
+ noshrink=1;
1966
+ retrain=0;
1967
+ if((*maxdiff) > learn_parm->epsilon_crit)
1968
+ retrain=1;
1969
+ timing_profile->time_shrink+=get_runtime()-t1;
1970
+ if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1971
+ || (verbosity>=2)) {
1972
+ printf("done.\n"); fflush(stdout);
1973
+ printf(" Number of inactive variables = %ld\n",inactivenum);
1974
+ }
1975
+ }
1976
+
1977
+ if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1978
+ learn_parm->epsilon_crit=(*maxdiff);
1979
+ if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1980
+ learn_parm->epsilon_crit/=2.0;
1981
+ retrain=1;
1982
+ noshrink=1;
1983
+ }
1984
+ if(learn_parm->epsilon_crit<epsilon_crit_org)
1985
+ learn_parm->epsilon_crit=epsilon_crit_org;
1986
+
1987
+ if(verbosity>=2) {
1988
+ printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1989
+ supvecnum,model->at_upper_bound,(*maxdiff));
1990
+ fflush(stdout);
1991
+ }
1992
+ if(verbosity>=3) {
1993
+ printf("\n");
1994
+ }
1995
+
1996
+ if(((iteration % 10) == 0) && (!noshrink)) {
1997
+ activenum=shrink_problem(docs,learn_parm,shrink_state,
1998
+ kernel_parm,active2dnum,
1999
+ last_suboptimal_at,iteration,totdoc,
2000
+ maxl((long)(activenum/10),
2001
+ maxl((long)(totdoc/500),100)),
2002
+ a,inconsistent);
2003
+ inactivenum=totdoc-activenum;
2004
+ if((kernel_cache)
2005
+ && (supvecnum>kernel_cache->max_elems)
2006
+ && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
2007
+ kernel_cache_shrink(kernel_cache,totdoc,
2008
+ minl((kernel_cache->activenum-activenum),
2009
+ (kernel_cache->activenum-supvecnum)),
2010
+ shrink_state->active);
2011
+ }
2012
+ }
2013
+
2014
+ } /* end of loop */
2015
+
2016
+
2017
+ free(alphaslack);
2018
+ free(slack);
2019
+ free(chosen);
2020
+ free(unlabeled);
2021
+ free(inconsistent);
2022
+ free(ignore);
2023
+ free(last_suboptimal_at);
2024
+ free(key);
2025
+ free(selcrit);
2026
+ free(selexam);
2027
+ free(a_old);
2028
+ free(aicache);
2029
+ free(working2dnum);
2030
+ free(active2dnum);
2031
+ free(qp.opt_ce);
2032
+ free(qp.opt_ce0);
2033
+ free(qp.opt_g);
2034
+ free(qp.opt_g0);
2035
+ free(qp.opt_xinit);
2036
+ free(qp.opt_low);
2037
+ free(qp.opt_up);
2038
+ free(weights);
2039
+
2040
+ learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
2041
+ model->maxdiff=(*maxdiff);
2042
+
2043
+ return(iteration);
2044
+ }
2045
+
2046
+
2047
+ double compute_objective_function(double *a, double *lin, double *c,
2048
+ double eps, long int *label,
2049
+ long int *active2dnum)
2050
+ /* Return value of objective function. */
2051
+ /* Works only relative to the active variables! */
2052
+ {
2053
+ long i,ii;
2054
+ double criterion;
2055
+ /* calculate value of objective function */
2056
+ criterion=0;
2057
+ for(ii=0;active2dnum[ii]>=0;ii++) {
2058
+ i=active2dnum[ii];
2059
+ criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
2060
+ }
2061
+ return(criterion);
2062
+ }
2063
+
2064
+ void clear_index(long int *index)
2065
+ /* initializes and empties index */
2066
+ {
2067
+ index[0]=-1;
2068
+ }
2069
+
2070
+ void add_to_index(long int *index, long int elem)
2071
+ /* initializes and empties index */
2072
+ {
2073
+ register long i;
2074
+ for(i=0;index[i] != -1;i++);
2075
+ index[i]=elem;
2076
+ index[i+1]=-1;
2077
+ }
2078
+
2079
+ long compute_index(long int *binfeature, long int range, long int *index)
2080
+ /* create an inverted index of binfeature */
2081
+ {
2082
+ register long i,ii;
2083
+
2084
+ ii=0;
2085
+ for(i=0;i<range;i++) {
2086
+ if(binfeature[i]) {
2087
+ index[ii]=i;
2088
+ ii++;
2089
+ }
2090
+ }
2091
+ for(i=0;i<4;i++) {
2092
+ index[ii+i]=-1;
2093
+ }
2094
+ return(ii);
2095
+ }
2096
+
2097
+
2098
+ void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
2099
+ long int *exclude_from_eq_const, double eq_target,
2100
+ long int *chosen, long int *active2dnum, MODEL *model,
2101
+ long int totdoc, long int *working2dnum, long int varnum,
2102
+ double *a, double *lin, double *c, LEARN_PARM *learn_parm,
2103
+ CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
2104
+ double *epsilon_crit_target)
2105
+ /* Do optimization on the working set. */
2106
+ {
2107
+ long i;
2108
+ double *a_v;
2109
+
2110
+ compute_matrices_for_optimization(docs,label,unlabeled,
2111
+ exclude_from_eq_const,eq_target,chosen,
2112
+ active2dnum,working2dnum,model,a,lin,c,
2113
+ varnum,totdoc,learn_parm,aicache,
2114
+ kernel_parm,qp);
2115
+
2116
+ if(verbosity>=3) {
2117
+ printf("Running optimizer..."); fflush(stdout);
2118
+ }
2119
+ /* call the qp-subsolver */
2120
+ a_v=optimize_qp(qp,epsilon_crit_target,
2121
+ learn_parm->svm_maxqpsize,
2122
+ &(model->b), /* in case the optimizer gives us */
2123
+ /* the threshold for free. otherwise */
2124
+ /* b is calculated in calculate_model. */
2125
+ learn_parm);
2126
+ if(verbosity>=3) {
2127
+ printf("done\n");
2128
+ }
2129
+
2130
+ for(i=0;i<varnum;i++) {
2131
+ a[working2dnum[i]]=a_v[i];
2132
+ /*
2133
+ if(a_v[i]<=(0+learn_parm->epsilon_a)) {
2134
+ a[working2dnum[i]]=0;
2135
+ }
2136
+ else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
2137
+ a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
2138
+ }
2139
+ */
2140
+ }
2141
+ }
2142
+
2143
+ void compute_matrices_for_optimization(DOC **docs, long int *label,
2144
+ long int *unlabeled, long *exclude_from_eq_const, double eq_target,
2145
+ long int *chosen, long int *active2dnum,
2146
+ long int *key, MODEL *model, double *a, double *lin, double *c,
2147
+ long int varnum, long int totdoc, LEARN_PARM *learn_parm,
2148
+ CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
2149
+ {
2150
+ register long ki,kj,i,j;
2151
+ register double kernel_temp;
2152
+
2153
+ if(verbosity>=3) {
2154
+ fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
2155
+ fflush(stdout);
2156
+ }
2157
+
2158
+ qp->opt_n=varnum;
2159
+ qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
2160
+ for(j=1;j<model->sv_num;j++) { /* start at 1 */
2161
+ if((!chosen[(model->supvec[j])->docnum])
2162
+ && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
2163
+ qp->opt_ce0[0]+=model->alpha[j];
2164
+ }
2165
+ }
2166
+ if(learn_parm->biased_hyperplane)
2167
+ qp->opt_m=1;
2168
+ else
2169
+ qp->opt_m=0; /* eq-constraint will be ignored */
2170
+
2171
+ /* init linear part of objective function */
2172
+ for(i=0;i<varnum;i++) {
2173
+ qp->opt_g0[i]=lin[key[i]];
2174
+ }
2175
+
2176
+ for(i=0;i<varnum;i++) {
2177
+ ki=key[i];
2178
+
2179
+ /* Compute the matrix for equality constraints */
2180
+ qp->opt_ce[i]=label[ki];
2181
+ qp->opt_low[i]=0;
2182
+ qp->opt_up[i]=learn_parm->svm_cost[ki];
2183
+
2184
+ kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
2185
+ /* compute linear part of objective function */
2186
+ qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
2187
+ /* compute quadratic part of objective function */
2188
+ qp->opt_g[varnum*i+i]=kernel_temp;
2189
+ for(j=i+1;j<varnum;j++) {
2190
+ kj=key[j];
2191
+ kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
2192
+ /* compute linear part of objective function */
2193
+ qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
2194
+ qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
2195
+ /* compute quadratic part of objective function */
2196
+ qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
2197
+ qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
2198
+ }
2199
+
2200
+ if(verbosity>=3) {
2201
+ if(i % 20 == 0) {
2202
+ fprintf(stdout,"%ld..",i); fflush(stdout);
2203
+ }
2204
+ }
2205
+ }
2206
+
2207
+ for(i=0;i<varnum;i++) {
2208
+ /* assure starting at feasible point */
2209
+ qp->opt_xinit[i]=a[key[i]];
2210
+ /* set linear part of objective function */
2211
+ qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
2212
+ }
2213
+
2214
+ if(verbosity>=3) {
2215
+ fprintf(stdout,"done\n");
2216
+ }
2217
+ }
2218
+
2219
+ long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
2220
+ double *lin, double *a, double *a_old, double *c,
2221
+ LEARN_PARM *learn_parm, long int *working2dnum,
2222
+ long int *active2dnum, MODEL *model)
2223
+ /* Compute decision function based on current values */
2224
+ /* of alpha. */
2225
+ {
2226
+ long i,ii,pos,b_calculated=0,first_low,first_high;
2227
+ double ex_c,b_temp,b_low,b_high;
2228
+
2229
+ if(verbosity>=3) {
2230
+ printf("Calculating model..."); fflush(stdout);
2231
+ }
2232
+
2233
+ if(!learn_parm->biased_hyperplane) {
2234
+ model->b=0;
2235
+ b_calculated=1;
2236
+ }
2237
+
2238
+ for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2239
+ if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
2240
+ pos=model->index[i];
2241
+ model->index[i]=-1;
2242
+ (model->sv_num)--;
2243
+ model->supvec[pos]=model->supvec[model->sv_num];
2244
+ model->alpha[pos]=model->alpha[model->sv_num];
2245
+ model->index[(model->supvec[pos])->docnum]=pos;
2246
+ }
2247
+ else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
2248
+ model->supvec[model->sv_num]=docs[i];
2249
+ model->alpha[model->sv_num]=a[i]*(double)label[i];
2250
+ model->index[i]=model->sv_num;
2251
+ (model->sv_num)++;
2252
+ }
2253
+ else if(a_old[i]==a[i]) { /* nothing to do */
2254
+ }
2255
+ else { /* just update alpha */
2256
+ model->alpha[model->index[i]]=a[i]*(double)label[i];
2257
+ }
2258
+
2259
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2260
+ if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
2261
+ (model->at_upper_bound)--;
2262
+ }
2263
+ else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
2264
+ (model->at_upper_bound)++;
2265
+ }
2266
+
2267
+ if((!b_calculated)
2268
+ && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
2269
+ model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
2270
+ /* model->b=(-(double)label[i]+lin[i]); */
2271
+ b_calculated=1;
2272
+ }
2273
+ }
2274
+
2275
+ /* No alpha in the working set not at bounds, so b was not
2276
+ calculated in the usual way. The following handles this special
2277
+ case. */
2278
+ if(learn_parm->biased_hyperplane
2279
+ && (!b_calculated)
2280
+ && (model->sv_num-1 == model->at_upper_bound)) {
2281
+ first_low=1;
2282
+ first_high=1;
2283
+ b_low=0;
2284
+ b_high=0;
2285
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2286
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2287
+ if(a[i]<ex_c) {
2288
+ if(label[i]>0) {
2289
+ b_temp=-(learn_parm->eps-c[i]+lin[i]);
2290
+ if((b_temp>b_low) || (first_low)) {
2291
+ b_low=b_temp;
2292
+ first_low=0;
2293
+ }
2294
+ }
2295
+ else {
2296
+ b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2297
+ if((b_temp<b_high) || (first_high)) {
2298
+ b_high=b_temp;
2299
+ first_high=0;
2300
+ }
2301
+ }
2302
+ }
2303
+ else {
2304
+ if(label[i]<0) {
2305
+ b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2306
+ if((b_temp>b_low) || (first_low)) {
2307
+ b_low=b_temp;
2308
+ first_low=0;
2309
+ }
2310
+ }
2311
+ else {
2312
+ b_temp=-(learn_parm->eps-c[i]+lin[i]);
2313
+ if((b_temp<b_high) || (first_high)) {
2314
+ b_high=b_temp;
2315
+ first_high=0;
2316
+ }
2317
+ }
2318
+ }
2319
+ }
2320
+ if(first_high) {
2321
+ model->b=-b_low;
2322
+ }
2323
+ else if(first_low) {
2324
+ model->b=-b_high;
2325
+ }
2326
+ else {
2327
+ model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
2328
+ /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
2329
+ }
2330
+ }
2331
+
2332
+ if(verbosity>=3) {
2333
+ printf("done\n"); fflush(stdout);
2334
+ }
2335
+
2336
+ return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
2337
+ }
2338
+
2339
+ long check_optimality(MODEL *model, long int *label, long int *unlabeled,
2340
+ double *a, double *lin, double *c, long int totdoc,
2341
+ LEARN_PARM *learn_parm, double *maxdiff,
2342
+ double epsilon_crit_org, long int *misclassified,
2343
+ long int *inconsistent, long int *active2dnum,
2344
+ long int *last_suboptimal_at,
2345
+ long int iteration, KERNEL_PARM *kernel_parm)
2346
+ /* Check KT-conditions */
2347
+ {
2348
+ long i,ii,retrain;
2349
+ double dist,ex_c,target;
2350
+
2351
+ if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2352
+ learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2353
+ }
2354
+ else { /* be conservative */
2355
+ learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2356
+ }
2357
+ retrain=0;
2358
+ (*maxdiff)=0;
2359
+ (*misclassified)=0;
2360
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2361
+ if((!inconsistent[i]) && label[i]) {
2362
+ dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
2363
+ hyperplane*/
2364
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2365
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2366
+ if(dist <= 0) {
2367
+ (*misclassified)++; /* does not work due to deactivation of var */
2368
+ }
2369
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2370
+ if((dist-target)>(*maxdiff)) /* largest violation */
2371
+ (*maxdiff)=dist-target;
2372
+ }
2373
+ else if((a[i]<ex_c) && (dist < target)) {
2374
+ if((target-dist)>(*maxdiff)) /* largest violation */
2375
+ (*maxdiff)=target-dist;
2376
+ }
2377
+ /* Count how long a variable was at lower/upper bound (and optimal).*/
2378
+ /* Variables, which were at the bound and optimal for a long */
2379
+ /* time are unlikely to become support vectors. In case our */
2380
+ /* cache is filled up, those variables are excluded to save */
2381
+ /* kernel evaluations. (See chapter 'Shrinking').*/
2382
+ if((a[i]>(learn_parm->epsilon_a))
2383
+ && (a[i]<ex_c)) {
2384
+ last_suboptimal_at[i]=iteration; /* not at bound */
2385
+ }
2386
+ else if((a[i]<=(learn_parm->epsilon_a))
2387
+ && (dist < (target+learn_parm->epsilon_shrink))) {
2388
+ last_suboptimal_at[i]=iteration; /* not likely optimal */
2389
+ }
2390
+ else if((a[i]>=ex_c)
2391
+ && (dist > (target-learn_parm->epsilon_shrink))) {
2392
+ last_suboptimal_at[i]=iteration; /* not likely optimal */
2393
+ }
2394
+ }
2395
+ }
2396
+ /* termination criterion */
2397
+ if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2398
+ retrain=1;
2399
+ }
2400
+ return(retrain);
2401
+ }
2402
+
2403
+ long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
2404
+ double *a, double *lin, double *c, double *slack,
2405
+ double *alphaslack,
2406
+ long int totdoc,
2407
+ LEARN_PARM *learn_parm, double *maxdiff,
2408
+ double epsilon_crit_org, long int *misclassified,
2409
+ long int *active2dnum,
2410
+ long int *last_suboptimal_at,
2411
+ long int iteration, KERNEL_PARM *kernel_parm)
2412
+ /* Check KT-conditions */
2413
+ {
2414
+ long i,ii,retrain;
2415
+ double dist,ex_c=0,target;
2416
+
2417
+ if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2418
+ learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2419
+ }
2420
+ else { /* be conservative */
2421
+ learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2422
+ }
2423
+
2424
+ retrain=0;
2425
+ (*maxdiff)=0;
2426
+ (*misclassified)=0;
2427
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2428
+ /* 'distance' from hyperplane*/
2429
+ dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
2430
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2431
+ ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
2432
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2433
+ if((dist-target)>(*maxdiff)) { /* largest violation */
2434
+ (*maxdiff)=dist-target;
2435
+ if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2436
+ if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
2437
+ }
2438
+ }
2439
+ if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
2440
+ if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
2441
+ (*maxdiff)=slack[docs[i]->slackid];
2442
+ if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2443
+ if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
2444
+ }
2445
+ }
2446
+ /* Count how long a variable was at lower/upper bound (and optimal).*/
2447
+ /* Variables, which were at the bound and optimal for a long */
2448
+ /* time are unlikely to become support vectors. In case our */
2449
+ /* cache is filled up, those variables are excluded to save */
2450
+ /* kernel evaluations. (See chapter 'Shrinking').*/
2451
+ if((a[i]>(learn_parm->epsilon_a))
2452
+ && (a[i]<ex_c)) {
2453
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
2454
+ }
2455
+ else if((a[i]<=(learn_parm->epsilon_a))
2456
+ && (dist < (target+learn_parm->epsilon_shrink))) {
2457
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2458
+ }
2459
+ else if((a[i]>=ex_c)
2460
+ && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
2461
+ last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2462
+ }
2463
+ }
2464
+ /* termination criterion */
2465
+ if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2466
+ retrain=1;
2467
+ }
2468
+ return(retrain);
2469
+ }
2470
+
2471
+ void compute_shared_slacks(DOC **docs, long int *label,
2472
+ double *a, double *lin,
2473
+ double *c, long int *active2dnum,
2474
+ LEARN_PARM *learn_parm,
2475
+ double *slack, double *alphaslack)
2476
+ /* compute the value of shared slacks and the joint alphas */
2477
+ {
2478
+ long jj,i;
2479
+ double dist,target;
2480
+
2481
+ for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
2482
+ slack[docs[i]->slackid]=0.0;
2483
+ alphaslack[docs[i]->slackid]=0.0;
2484
+ }
2485
+ for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
2486
+ dist=(lin[i])*(double)label[i];
2487
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
2488
+ if((target-dist) > slack[docs[i]->slackid])
2489
+ slack[docs[i]->slackid]=target-dist;
2490
+ alphaslack[docs[i]->slackid]+=a[i];
2491
+ }
2492
+ }
2493
+
2494
+
2495
+ long identify_inconsistent(double *a, long int *label,
2496
+ long int *unlabeled, long int totdoc,
2497
+ LEARN_PARM *learn_parm,
2498
+ long int *inconsistentnum, long int *inconsistent)
2499
+ {
2500
+ long i,retrain;
2501
+
2502
+ /* Throw out examples with multipliers at upper bound. This */
2503
+ /* corresponds to the -i 1 option. */
2504
+ /* ATTENTION: this is just a heuristic for finding a close */
2505
+ /* to minimum number of examples to exclude to */
2506
+ /* make the problem separable with desired margin */
2507
+ retrain=0;
2508
+ for(i=0;i<totdoc;i++) {
2509
+ if((!inconsistent[i]) && (!unlabeled[i])
2510
+ && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2511
+ (*inconsistentnum)++;
2512
+ inconsistent[i]=1; /* never choose again */
2513
+ retrain=2; /* start over */
2514
+ if(verbosity>=3) {
2515
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2516
+ }
2517
+ }
2518
+ }
2519
+ return(retrain);
2520
+ }
2521
+
2522
+ long identify_misclassified(double *lin, long int *label,
2523
+ long int *unlabeled, long int totdoc,
2524
+ MODEL *model, long int *inconsistentnum,
2525
+ long int *inconsistent)
2526
+ {
2527
+ long i,retrain;
2528
+ double dist;
2529
+
2530
+ /* Throw out misclassified examples. This */
2531
+ /* corresponds to the -i 2 option. */
2532
+ /* ATTENTION: this is just a heuristic for finding a close */
2533
+ /* to minimum number of examples to exclude to */
2534
+ /* make the problem separable with desired margin */
2535
+ retrain=0;
2536
+ for(i=0;i<totdoc;i++) {
2537
+ dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
2538
+ if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
2539
+ (*inconsistentnum)++;
2540
+ inconsistent[i]=1; /* never choose again */
2541
+ retrain=2; /* start over */
2542
+ if(verbosity>=3) {
2543
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2544
+ }
2545
+ }
2546
+ }
2547
+ return(retrain);
2548
+ }
2549
+
2550
+ long identify_one_misclassified(double *lin, long int *label,
2551
+ long int *unlabeled,
2552
+ long int totdoc, MODEL *model,
2553
+ long int *inconsistentnum,
2554
+ long int *inconsistent)
2555
+ {
2556
+ long i,retrain,maxex=-1;
2557
+ double dist,maxdist=0;
2558
+
2559
+ /* Throw out the 'most misclassified' example. This */
2560
+ /* corresponds to the -i 3 option. */
2561
+ /* ATTENTION: this is just a heuristic for finding a close */
2562
+ /* to minimum number of examples to exclude to */
2563
+ /* make the problem separable with desired margin */
2564
+ retrain=0;
2565
+ for(i=0;i<totdoc;i++) {
2566
+ if((!inconsistent[i]) && (!unlabeled[i])) {
2567
+ dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
2568
+ if(dist<maxdist) {
2569
+ maxdist=dist;
2570
+ maxex=i;
2571
+ }
2572
+ }
2573
+ }
2574
+ if(maxex>=0) {
2575
+ (*inconsistentnum)++;
2576
+ inconsistent[maxex]=1; /* never choose again */
2577
+ retrain=2; /* start over */
2578
+ if(verbosity>=3) {
2579
+ printf("inconsistent(%ld)..",i); fflush(stdout);
2580
+ }
2581
+ }
2582
+ return(retrain);
2583
+ }
2584
+
2585
+ void update_linear_component(DOC **docs, long int *label,
2586
+ long int *active2dnum, double *a,
2587
+ double *a_old, long int *working2dnum,
2588
+ long int totdoc, long int totwords,
2589
+ KERNEL_PARM *kernel_parm,
2590
+ KERNEL_CACHE *kernel_cache,
2591
+ double *lin, CFLOAT *aicache, double *weights)
2592
+ /* keep track of the linear component */
2593
+ /* lin of the gradient etc. by updating */
2594
+ /* based on the change of the variables */
2595
+ /* in the current working set */
2596
+ {
2597
+ register long i,ii,j,jj;
2598
+ register double tec;
2599
+ SVECTOR *f;
2600
+
2601
+ if(kernel_parm->kernel_type==0) { /* special linear case */
2602
+ clear_vector_n(weights,totwords);
2603
+ for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2604
+ if(a[i] != a_old[i]) {
2605
+ for(f=docs[i]->fvec;f;f=f->next)
2606
+ add_vector_ns(weights,f,
2607
+ f->factor*((a[i]-a_old[i])*(double)label[i]));
2608
+ }
2609
+ }
2610
+ for(jj=0;(j=active2dnum[jj])>=0;jj++) {
2611
+ for(f=docs[j]->fvec;f;f=f->next)
2612
+ lin[j]+=f->factor*sprod_ns(weights,f);
2613
+ }
2614
+ }
2615
+ else { /* general case */
2616
+ for(jj=0;(i=working2dnum[jj])>=0;jj++) {
2617
+ if(a[i] != a_old[i]) {
2618
+ get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
2619
+ kernel_parm);
2620
+ for(ii=0;(j=active2dnum[ii])>=0;ii++) {
2621
+ tec=aicache[j];
2622
+ lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
2623
+ }
2624
+ }
2625
+ }
2626
+ }
2627
+ }
2628
+
2629
+
2630
+ long incorporate_unlabeled_examples(MODEL *model, long int *label,
2631
+ long int *inconsistent,
2632
+ long int *unlabeled,
2633
+ double *a, double *lin,
2634
+ long int totdoc, double *selcrit,
2635
+ long int *select, long int *key,
2636
+ long int transductcycle,
2637
+ KERNEL_PARM *kernel_parm,
2638
+ LEARN_PARM *learn_parm)
2639
+ {
2640
+ long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
2641
+ long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
2642
+ double dist,model_length,posratio,negratio;
2643
+ long check_every=2;
2644
+ double loss;
2645
+ static double switchsens=0.0,switchsensorg=0.0;
2646
+ double umin,umax,sumalpha;
2647
+ long imin=0,imax=0;
2648
+ static long switchnum=0;
2649
+
2650
+ switchsens/=1.2;
2651
+
2652
+ /* assumes that lin[] is up to date -> no inactive vars */
2653
+
2654
+ orgpos=0;
2655
+ orgneg=0;
2656
+ newpos=0;
2657
+ newneg=0;
2658
+ nolabel=0;
2659
+ allunlab=0;
2660
+ for(i=0;i<totdoc;i++) {
2661
+ if(!unlabeled[i]) {
2662
+ if(label[i] > 0) {
2663
+ orgpos++;
2664
+ }
2665
+ else {
2666
+ orgneg++;
2667
+ }
2668
+ }
2669
+ else {
2670
+ allunlab++;
2671
+ if(unlabeled[i]) {
2672
+ if(label[i] > 0) {
2673
+ newpos++;
2674
+ }
2675
+ else if(label[i] < 0) {
2676
+ newneg++;
2677
+ }
2678
+ }
2679
+ }
2680
+ if(label[i]==0) {
2681
+ nolabel++;
2682
+ }
2683
+ }
2684
+
2685
+ if(learn_parm->transduction_posratio >= 0) {
2686
+ posratio=learn_parm->transduction_posratio;
2687
+ }
2688
+ else {
2689
+ posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
2690
+ } /* in training data */
2691
+ negratio=1.0-posratio;
2692
+
2693
+ learn_parm->svm_costratio=1.0; /* global */
2694
+ if(posratio>0) {
2695
+ learn_parm->svm_costratio_unlab=negratio/posratio;
2696
+ }
2697
+ else {
2698
+ learn_parm->svm_costratio_unlab=1.0;
2699
+ }
2700
+
2701
+ pos=0;
2702
+ neg=0;
2703
+ upos=0;
2704
+ uneg=0;
2705
+ for(i=0;i<totdoc;i++) {
2706
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2707
+ if(dist>0) {
2708
+ pos++;
2709
+ }
2710
+ else {
2711
+ neg++;
2712
+ }
2713
+ if(unlabeled[i]) {
2714
+ if(dist>0) {
2715
+ upos++;
2716
+ }
2717
+ else {
2718
+ uneg++;
2719
+ }
2720
+ }
2721
+ if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2722
+ /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
2723
+ }
2724
+ }
2725
+ if(verbosity>=2) {
2726
+ printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
2727
+ printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
2728
+ printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
2729
+ fflush(stdout);
2730
+ }
2731
+
2732
+ if(transductcycle == 0) {
2733
+ j1=0;
2734
+ j2=0;
2735
+ j4=0;
2736
+ for(i=0;i<totdoc;i++) {
2737
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2738
+ if((label[i]==0) && (unlabeled[i])) {
2739
+ selcrit[j4]=dist;
2740
+ key[j4]=i;
2741
+ j4++;
2742
+ }
2743
+ }
2744
+ unsupaddnum1=0;
2745
+ unsupaddnum2=0;
2746
+ select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
2747
+ for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
2748
+ i=key[select[k]];
2749
+ label[i]=1;
2750
+ unsupaddnum1++;
2751
+ j1++;
2752
+ }
2753
+ for(i=0;i<totdoc;i++) {
2754
+ if((label[i]==0) && (unlabeled[i])) {
2755
+ label[i]=-1;
2756
+ j2++;
2757
+ unsupaddnum2++;
2758
+ }
2759
+ }
2760
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2761
+ if(unlabeled[i]) {
2762
+ if(label[i] == 1) {
2763
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2764
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2765
+ }
2766
+ else if(label[i] == -1) {
2767
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2768
+ learn_parm->svm_unlabbound;
2769
+ }
2770
+ }
2771
+ }
2772
+ if(verbosity>=1) {
2773
+ /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2774
+ learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2775
+ learn_parm->svm_unlabbound); */
2776
+ printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
2777
+ unsupaddnum1,unsupaddnum2);
2778
+ fflush(stdout);
2779
+ }
2780
+ if(verbosity >= 1)
2781
+ printf("Retraining.");
2782
+ if(verbosity >= 2) printf("\n");
2783
+ return((long)3);
2784
+ }
2785
+ if((transductcycle % check_every) == 0) {
2786
+ if(verbosity >= 1)
2787
+ printf("Retraining.");
2788
+ if(verbosity >= 2) printf("\n");
2789
+ j1=0;
2790
+ j2=0;
2791
+ unsupaddnum1=0;
2792
+ unsupaddnum2=0;
2793
+ for(i=0;i<totdoc;i++) {
2794
+ if((unlabeled[i] == 2)) {
2795
+ unlabeled[i]=1;
2796
+ label[i]=1;
2797
+ j1++;
2798
+ unsupaddnum1++;
2799
+ }
2800
+ else if((unlabeled[i] == 3)) {
2801
+ unlabeled[i]=1;
2802
+ label[i]=-1;
2803
+ j2++;
2804
+ unsupaddnum2++;
2805
+ }
2806
+ }
2807
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2808
+ if(unlabeled[i]) {
2809
+ if(label[i] == 1) {
2810
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2811
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2812
+ }
2813
+ else if(label[i] == -1) {
2814
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2815
+ learn_parm->svm_unlabbound;
2816
+ }
2817
+ }
2818
+ }
2819
+
2820
+ if(verbosity>=2) {
2821
+ /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2822
+ learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2823
+ learn_parm->svm_unlabbound); */
2824
+ printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
2825
+ upos,unsupaddnum1,unsupaddnum2);
2826
+ fflush(stdout);
2827
+ }
2828
+
2829
+ if(learn_parm->svm_unlabbound == 1) {
2830
+ learn_parm->epsilon_crit=0.001; /* do the last run right */
2831
+ }
2832
+ else {
2833
+ learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
2834
+ }
2835
+
2836
+ return((long)3);
2837
+ }
2838
+ else if(((transductcycle % check_every) < check_every)) {
2839
+ model_length=0;
2840
+ sumalpha=0;
2841
+ loss=0;
2842
+ for(i=0;i<totdoc;i++) {
2843
+ model_length+=a[i]*label[i]*lin[i];
2844
+ sumalpha+=a[i];
2845
+ dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2846
+ if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
2847
+ loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
2848
+ }
2849
+ }
2850
+ model_length=sqrt(model_length);
2851
+ if(verbosity>=2) {
2852
+ printf("Model-length = %f (%f), loss = %f, objective = %f\n",
2853
+ model_length,sumalpha,loss,loss+0.5*model_length*model_length);
2854
+ fflush(stdout);
2855
+ }
2856
+ j1=0;
2857
+ j2=0;
2858
+ j3=0;
2859
+ j4=0;
2860
+ unsupaddnum1=0;
2861
+ unsupaddnum2=0;
2862
+ umin=99999;
2863
+ umax=-99999;
2864
+ j4=1;
2865
+ while(j4) {
2866
+ umin=99999;
2867
+ umax=-99999;
2868
+ for(i=0;(i<totdoc);i++) {
2869
+ dist=(lin[i]-model->b);
2870
+ if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
2871
+ && (dist<umin)) {
2872
+ umin=dist;
2873
+ imin=i;
2874
+ }
2875
+ if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
2876
+ && (dist>umax)) {
2877
+ umax=dist;
2878
+ imax=i;
2879
+ }
2880
+ }
2881
+ if((umin < (umax+switchsens-1E-4))) {
2882
+ j1++;
2883
+ j2++;
2884
+ unsupaddnum1++;
2885
+ unlabeled[imin]=3;
2886
+ inconsistent[imin]=1;
2887
+ unsupaddnum2++;
2888
+ unlabeled[imax]=2;
2889
+ inconsistent[imax]=1;
2890
+ }
2891
+ else
2892
+ j4=0;
2893
+ j4=0;
2894
+ }
2895
+ for(j=0;(j<totdoc);j++) {
2896
+ if(unlabeled[j] && (!inconsistent[j])) {
2897
+ if(label[j]>0) {
2898
+ unlabeled[j]=2;
2899
+ }
2900
+ else if(label[j]<0) {
2901
+ unlabeled[j]=3;
2902
+ }
2903
+ /* inconsistent[j]=1; */
2904
+ j3++;
2905
+ }
2906
+ }
2907
+ switchnum+=unsupaddnum1+unsupaddnum2;
2908
+
2909
+ /* stop and print out current margin
2910
+ printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
2911
+ if(switchnum == 2*kernel_parm->poly_degree) {
2912
+ learn_parm->svm_unlabbound=1;
2913
+ }
2914
+ */
2915
+
2916
+ if((!unsupaddnum1) && (!unsupaddnum2)) {
2917
+ if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
2918
+ for(j=0;(j<totdoc);j++) {
2919
+ inconsistent[j]=0;
2920
+ if(unlabeled[j]) unlabeled[j]=1;
2921
+ }
2922
+ write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
2923
+ totdoc,learn_parm);
2924
+ if(verbosity>=1)
2925
+ printf("Number of switches: %ld\n",switchnum);
2926
+ return((long)0);
2927
+ }
2928
+ switchsens=switchsensorg;
2929
+ learn_parm->svm_unlabbound*=1.5;
2930
+ if(learn_parm->svm_unlabbound>1) {
2931
+ learn_parm->svm_unlabbound=1;
2932
+ }
2933
+ model->at_upper_bound=0; /* since upper bound increased */
2934
+ if(verbosity>=1)
2935
+ printf("Increasing influence of unlabeled examples to %f%% .",
2936
+ learn_parm->svm_unlabbound*100.0);
2937
+ }
2938
+ else if(verbosity>=1) {
2939
+ printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
2940
+ upos,unsupaddnum1,unsupaddnum2);
2941
+ fflush(stdout);
2942
+ }
2943
+
2944
+ if(verbosity >= 2) printf("\n");
2945
+
2946
+ learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
2947
+
2948
+ for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2949
+ if(unlabeled[i]) {
2950
+ if(label[i] == 1) {
2951
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2952
+ learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2953
+ }
2954
+ else if(label[i] == -1) {
2955
+ learn_parm->svm_cost[i]=learn_parm->svm_c*
2956
+ learn_parm->svm_unlabbound;
2957
+ }
2958
+ }
2959
+ }
2960
+
2961
+ return((long)2);
2962
+ }
2963
+
2964
+ return((long)0);
2965
+ }
2966
+
2967
+ /*************************** Working set selection ***************************/
2968
+
2969
+ long select_next_qp_subproblem_grad(long int *label,
2970
+ long int *unlabeled,
2971
+ double *a, double *lin,
2972
+ double *c, long int totdoc,
2973
+ long int qp_size,
2974
+ LEARN_PARM *learn_parm,
2975
+ long int *inconsistent,
2976
+ long int *active2dnum,
2977
+ long int *working2dnum,
2978
+ double *selcrit,
2979
+ long int *select,
2980
+ KERNEL_CACHE *kernel_cache,
2981
+ long int cache_only,
2982
+ long int *key, long int *chosen)
2983
+ /* Use the feasible direction approach to select the next
2984
+ qp-subproblem (see chapter 'Selecting a good working set'). If
2985
+ 'cache_only' is true, then the variables are selected only among
2986
+ those for which the kernel evaluations are cached. */
2987
+ {
2988
+ long choosenum,i,j,k,activedoc,inum,valid;
2989
+ double s;
2990
+
2991
+ for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
2992
+ choosenum=0;
2993
+ activedoc=0;
2994
+ for(i=0;(j=active2dnum[i])>=0;i++) {
2995
+ s=-label[j];
2996
+ if(kernel_cache && cache_only)
2997
+ valid=(kernel_cache->index[j]>=0);
2998
+ else
2999
+ valid=1;
3000
+ if(valid
3001
+ && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3002
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3003
+ && (s>0)))
3004
+ && (!chosen[j])
3005
+ && (label[j])
3006
+ && (!inconsistent[j]))
3007
+ {
3008
+ selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3009
+ /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
3010
+ key[activedoc]=j;
3011
+ activedoc++;
3012
+ }
3013
+ }
3014
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3015
+ for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3016
+ /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3017
+ i=key[select[k]];
3018
+ chosen[i]=1;
3019
+ working2dnum[inum+choosenum]=i;
3020
+ choosenum+=1;
3021
+ if(kernel_cache)
3022
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3023
+ kicked out of cache */
3024
+ /* } */
3025
+ }
3026
+
3027
+ activedoc=0;
3028
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3029
+ s=label[j];
3030
+ if(kernel_cache && cache_only)
3031
+ valid=(kernel_cache->index[j]>=0);
3032
+ else
3033
+ valid=1;
3034
+ if(valid
3035
+ && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3036
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3037
+ && (s>0)))
3038
+ && (!chosen[j])
3039
+ && (label[j])
3040
+ && (!inconsistent[j]))
3041
+ {
3042
+ selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3043
+ /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
3044
+ key[activedoc]=j;
3045
+ activedoc++;
3046
+ }
3047
+ }
3048
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3049
+ for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3050
+ /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3051
+ i=key[select[k]];
3052
+ chosen[i]=1;
3053
+ working2dnum[inum+choosenum]=i;
3054
+ choosenum+=1;
3055
+ if(kernel_cache)
3056
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3057
+ kicked out of cache */
3058
+ /* } */
3059
+ }
3060
+ working2dnum[inum+choosenum]=-1; /* complete index */
3061
+ return(choosenum);
3062
+ }
3063
+
3064
+ long select_next_qp_subproblem_rand(long int *label,
3065
+ long int *unlabeled,
3066
+ double *a, double *lin,
3067
+ double *c, long int totdoc,
3068
+ long int qp_size,
3069
+ LEARN_PARM *learn_parm,
3070
+ long int *inconsistent,
3071
+ long int *active2dnum,
3072
+ long int *working2dnum,
3073
+ double *selcrit,
3074
+ long int *select,
3075
+ KERNEL_CACHE *kernel_cache,
3076
+ long int *key,
3077
+ long int *chosen,
3078
+ long int iteration)
3079
+ /* Use the feasible direction approach to select the next
3080
+ qp-subproblem (see section 'Selecting a good working set'). Chooses
3081
+ a feasible direction at (pseudo) random to help jump over numerical
3082
+ problem. */
3083
+ {
3084
+ long choosenum,i,j,k,activedoc,inum;
3085
+ double s;
3086
+
3087
+ for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
3088
+ choosenum=0;
3089
+ activedoc=0;
3090
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3091
+ s=-label[j];
3092
+ if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3093
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3094
+ && (s>0)))
3095
+ && (!inconsistent[j])
3096
+ && (label[j])
3097
+ && (!chosen[j])) {
3098
+ selcrit[activedoc]=(j+iteration) % totdoc;
3099
+ key[activedoc]=j;
3100
+ activedoc++;
3101
+ }
3102
+ }
3103
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3104
+ for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3105
+ i=key[select[k]];
3106
+ chosen[i]=1;
3107
+ working2dnum[inum+choosenum]=i;
3108
+ choosenum+=1;
3109
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3110
+ /* out of cache */
3111
+ }
3112
+
3113
+ activedoc=0;
3114
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3115
+ s=label[j];
3116
+ if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3117
+ && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3118
+ && (s>0)))
3119
+ && (!inconsistent[j])
3120
+ && (label[j])
3121
+ && (!chosen[j])) {
3122
+ selcrit[activedoc]=(j+iteration) % totdoc;
3123
+ key[activedoc]=j;
3124
+ activedoc++;
3125
+ }
3126
+ }
3127
+ select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3128
+ for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3129
+ i=key[select[k]];
3130
+ chosen[i]=1;
3131
+ working2dnum[inum+choosenum]=i;
3132
+ choosenum+=1;
3133
+ kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3134
+ /* out of cache */
3135
+ }
3136
+ working2dnum[inum+choosenum]=-1; /* complete index */
3137
+ return(choosenum);
3138
+ }
3139
+
3140
+ long select_next_qp_slackset(DOC **docs, long int *label,
3141
+ double *a, double *lin,
3142
+ double *slack, double *alphaslack,
3143
+ double *c,
3144
+ LEARN_PARM *learn_parm,
3145
+ long int *active2dnum, double *maxviol)
3146
+ /* returns the slackset with the largest internal violation */
3147
+ {
3148
+ long i,ii,maxdiffid;
3149
+ double dist,target,maxdiff,ex_c;
3150
+
3151
+ maxdiff=0;
3152
+ maxdiffid=0;
3153
+ for(ii=0;(i=active2dnum[ii])>=0;ii++) {
3154
+ ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
3155
+ if(alphaslack[docs[i]->slackid] >= ex_c) {
3156
+ dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
3157
+ target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
3158
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3159
+ if((dist-target)>maxdiff) { /* largest violation */
3160
+ maxdiff=dist-target;
3161
+ maxdiffid=docs[i]->slackid;
3162
+ }
3163
+ }
3164
+ }
3165
+ }
3166
+ (*maxviol)=maxdiff;
3167
+ return(maxdiffid);
3168
+ }
3169
+
3170
+
3171
+ void select_top_n(double *selcrit, long int range, long int *select,
3172
+ long int n)
3173
+ {
3174
+ register long i,j;
3175
+
3176
+ for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
3177
+ for(j=i;j>=0;j--) {
3178
+ if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
3179
+ select[j]=select[j-1];
3180
+ }
3181
+ else {
3182
+ select[j]=i;
3183
+ j=-1;
3184
+ }
3185
+ }
3186
+ }
3187
+ if(n>0) {
3188
+ for(i=n;i<range;i++) {
3189
+ if(selcrit[i]>selcrit[select[n-1]]) {
3190
+ for(j=n-1;j>=0;j--) {
3191
+ if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
3192
+ select[j]=select[j-1];
3193
+ }
3194
+ else {
3195
+ select[j]=i;
3196
+ j=-1;
3197
+ }
3198
+ }
3199
+ }
3200
+ }
3201
+ }
3202
+ }
3203
+
3204
+
3205
+ /******************************** Shrinking *********************************/
3206
+
3207
+ void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
3208
+ long int maxhistory)
3209
+ {
3210
+ long i;
3211
+
3212
+ shrink_state->deactnum=0;
3213
+ shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
3214
+ shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
3215
+ shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
3216
+ shrink_state->maxhistory=maxhistory;
3217
+ shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
3218
+ shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
3219
+
3220
+ for(i=0;i<totdoc;i++) {
3221
+ shrink_state->active[i]=1;
3222
+ shrink_state->inactive_since[i]=0;
3223
+ shrink_state->last_a[i]=0;
3224
+ shrink_state->last_lin[i]=0;
3225
+ }
3226
+ }
3227
+
3228
+ void shrink_state_cleanup(SHRINK_STATE *shrink_state)
3229
+ {
3230
+ free(shrink_state->active);
3231
+ free(shrink_state->inactive_since);
3232
+ if(shrink_state->deactnum > 0)
3233
+ free(shrink_state->a_history[shrink_state->deactnum-1]);
3234
+ free(shrink_state->a_history);
3235
+ free(shrink_state->last_a);
3236
+ free(shrink_state->last_lin);
3237
+ }
3238
+
3239
+ long shrink_problem(DOC **docs,
3240
+ LEARN_PARM *learn_parm,
3241
+ SHRINK_STATE *shrink_state,
3242
+ KERNEL_PARM *kernel_parm,
3243
+ long int *active2dnum,
3244
+ long int *last_suboptimal_at,
3245
+ long int iteration,
3246
+ long int totdoc,
3247
+ long int minshrink,
3248
+ double *a,
3249
+ long int *inconsistent)
3250
+ /* Shrink some variables away. Do the shrinking only if at least
3251
+ minshrink variables can be removed. */
3252
+ {
3253
+ long i,ii,change,activenum,lastiter;
3254
+ double *a_old;
3255
+
3256
+ activenum=0;
3257
+ change=0;
3258
+ for(ii=0;active2dnum[ii]>=0;ii++) {
3259
+ i=active2dnum[ii];
3260
+ activenum++;
3261
+ if(learn_parm->sharedslack)
3262
+ lastiter=last_suboptimal_at[docs[i]->slackid];
3263
+ else
3264
+ lastiter=last_suboptimal_at[i];
3265
+ if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3266
+ || (inconsistent[i])) {
3267
+ change++;
3268
+ }
3269
+ }
3270
+ if((change>=minshrink) /* shrink only if sufficiently many candidates */
3271
+ && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
3272
+ /* Shrink problem by removing those variables which are */
3273
+ /* optimal at a bound for a minimum number of iterations */
3274
+ if(verbosity>=2) {
3275
+ printf(" Shrinking..."); fflush(stdout);
3276
+ }
3277
+ if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
3278
+ a_old=(double *)my_malloc(sizeof(double)*totdoc);
3279
+ shrink_state->a_history[shrink_state->deactnum]=a_old;
3280
+ for(i=0;i<totdoc;i++) {
3281
+ a_old[i]=a[i];
3282
+ }
3283
+ }
3284
+ for(ii=0;active2dnum[ii]>=0;ii++) {
3285
+ i=active2dnum[ii];
3286
+ if(learn_parm->sharedslack)
3287
+ lastiter=last_suboptimal_at[docs[i]->slackid];
3288
+ else
3289
+ lastiter=last_suboptimal_at[i];
3290
+ if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3291
+ || (inconsistent[i])) {
3292
+ shrink_state->active[i]=0;
3293
+ shrink_state->inactive_since[i]=shrink_state->deactnum;
3294
+ }
3295
+ }
3296
+ activenum=compute_index(shrink_state->active,totdoc,active2dnum);
3297
+ shrink_state->deactnum++;
3298
+ if(kernel_parm->kernel_type == LINEAR) {
3299
+ shrink_state->deactnum=0;
3300
+ }
3301
+ if(verbosity>=2) {
3302
+ printf("done.\n"); fflush(stdout);
3303
+ printf(" Number of inactive variables = %ld\n",totdoc-activenum);
3304
+ }
3305
+ }
3306
+ return(activenum);
3307
+ }
3308
+
3309
+
3310
+ void reactivate_inactive_examples(long int *label,
3311
+ long int *unlabeled,
3312
+ double *a,
3313
+ SHRINK_STATE *shrink_state,
3314
+ double *lin,
3315
+ double *c,
3316
+ long int totdoc,
3317
+ long int totwords,
3318
+ long int iteration,
3319
+ LEARN_PARM *learn_parm,
3320
+ long int *inconsistent,
3321
+ DOC **docs,
3322
+ KERNEL_PARM *kernel_parm,
3323
+ KERNEL_CACHE *kernel_cache,
3324
+ MODEL *model,
3325
+ CFLOAT *aicache,
3326
+ double *weights,
3327
+ double *maxdiff)
3328
+ /* Make all variables active again which had been removed by
3329
+ shrinking. */
3330
+ /* Computes lin for those variables from scratch. */
3331
+ {
3332
+ register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
3333
+ long *changed,*inactive;
3334
+ register double kernel_val,*a_old,dist;
3335
+ double ex_c,target;
3336
+ SVECTOR *f;
3337
+
3338
+ if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
3339
+ a_old=shrink_state->last_a;
3340
+ clear_vector_n(weights,totwords);
3341
+ for(i=0;i<totdoc;i++) {
3342
+ if(a[i] != a_old[i]) {
3343
+ for(f=docs[i]->fvec;f;f=f->next)
3344
+ add_vector_ns(weights,f,
3345
+ f->factor*((a[i]-a_old[i])*(double)label[i]));
3346
+ a_old[i]=a[i];
3347
+ }
3348
+ }
3349
+ for(i=0;i<totdoc;i++) {
3350
+ if(!shrink_state->active[i]) {
3351
+ for(f=docs[i]->fvec;f;f=f->next)
3352
+ lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
3353
+ }
3354
+ shrink_state->last_lin[i]=lin[i];
3355
+ }
3356
+ }
3357
+ else {
3358
+ changed=(long *)my_malloc(sizeof(long)*totdoc);
3359
+ changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3360
+ inactive=(long *)my_malloc(sizeof(long)*totdoc);
3361
+ inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3362
+ for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
3363
+ if(verbosity>=2) {
3364
+ printf("%ld..",t); fflush(stdout);
3365
+ }
3366
+ a_old=shrink_state->a_history[t];
3367
+ for(i=0;i<totdoc;i++) {
3368
+ inactive[i]=((!shrink_state->active[i])
3369
+ && (shrink_state->inactive_since[i] == t));
3370
+ changed[i]= (a[i] != a_old[i]);
3371
+ }
3372
+ compute_index(inactive,totdoc,inactive2dnum);
3373
+ compute_index(changed,totdoc,changed2dnum);
3374
+
3375
+ for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
3376
+ get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
3377
+ kernel_parm);
3378
+ for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
3379
+ kernel_val=aicache[j];
3380
+ lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
3381
+ }
3382
+ }
3383
+ }
3384
+ free(changed);
3385
+ free(changed2dnum);
3386
+ free(inactive);
3387
+ free(inactive2dnum);
3388
+ }
3389
+ (*maxdiff)=0;
3390
+ for(i=0;i<totdoc;i++) {
3391
+ shrink_state->inactive_since[i]=shrink_state->deactnum-1;
3392
+ if(!inconsistent[i]) {
3393
+ dist=(lin[i]-model->b)*(double)label[i];
3394
+ target=-(learn_parm->eps-(double)label[i]*c[i]);
3395
+ ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
3396
+ if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3397
+ if((dist-target)>(*maxdiff)) /* largest violation */
3398
+ (*maxdiff)=dist-target;
3399
+ }
3400
+ else if((a[i]<ex_c) && (dist < target)) {
3401
+ if((target-dist)>(*maxdiff)) /* largest violation */
3402
+ (*maxdiff)=target-dist;
3403
+ }
3404
+ if((a[i]>(0+learn_parm->epsilon_a))
3405
+ && (a[i]<ex_c)) {
3406
+ shrink_state->active[i]=1; /* not at bound */
3407
+ }
3408
+ else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
3409
+ shrink_state->active[i]=1;
3410
+ }
3411
+ else if((a[i]>=ex_c)
3412
+ && (dist > (target-learn_parm->epsilon_shrink))) {
3413
+ shrink_state->active[i]=1;
3414
+ }
3415
+ else if(learn_parm->sharedslack) { /* make all active when sharedslack */
3416
+ shrink_state->active[i]=1;
3417
+ }
3418
+ }
3419
+ }
3420
+ if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
3421
+ for(i=0;i<totdoc;i++) {
3422
+ (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
3423
+ }
3424
+ for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
3425
+ free(shrink_state->a_history[t]);
3426
+ shrink_state->a_history[t]=0;
3427
+ }
3428
+ }
3429
+ }
3430
+
3431
+ /****************************** Cache handling *******************************/
3432
+
3433
+ void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3434
+ long int docnum, long int totdoc,
3435
+ long int *active2dnum, CFLOAT *buffer,
3436
+ KERNEL_PARM *kernel_parm)
3437
+ /* Get's a row of the matrix of kernel values This matrix has the
3438
+ same form as the Hessian, just that the elements are not
3439
+ multiplied by */
3440
+ /* y_i * y_j * a_i * a_j */
3441
+ /* Takes the values from the cache if available. */
3442
+ {
3443
+ register long i,j,start;
3444
+ DOC *ex;
3445
+
3446
+ ex=docs[docnum];
3447
+
3448
+ if(kernel_cache->index[docnum] != -1) { /* row is cached? */
3449
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3450
+ start=kernel_cache->activenum*kernel_cache->index[docnum];
3451
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3452
+ if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
3453
+ buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
3454
+ }
3455
+ else {
3456
+ buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3457
+ }
3458
+ }
3459
+ }
3460
+ else {
3461
+ for(i=0;(j=active2dnum[i])>=0;i++) {
3462
+ buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3463
+ }
3464
+ }
3465
+ }
3466
+
3467
+
3468
+ void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3469
+ long int m, KERNEL_PARM *kernel_parm)
3470
+ /* Fills cache for the row m */
3471
+ {
3472
+ register DOC *ex;
3473
+ register long j,k,l;
3474
+ register CFLOAT *cache;
3475
+
3476
+ if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
3477
+ cache = kernel_cache_clean_and_malloc(kernel_cache,m);
3478
+ if(cache) {
3479
+ l=kernel_cache->totdoc2active[m];
3480
+ ex=docs[m];
3481
+ for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
3482
+ k=kernel_cache->active2totdoc[j];
3483
+ if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
3484
+ cache[j]=kernel_cache->buffer[kernel_cache->activenum
3485
+ *kernel_cache->index[k]+l];
3486
+ }
3487
+ else {
3488
+ cache[j]=kernel(kernel_parm,ex,docs[k]);
3489
+ }
3490
+ }
3491
+ }
3492
+ else {
3493
+ perror("Error: Kernel cache full! => increase cache size");
3494
+ }
3495
+ }
3496
+ }
3497
+
3498
+
3499
+ void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
3500
+ long int *key, long int varnum,
3501
+ KERNEL_PARM *kernel_parm)
3502
+ /* Fills cache for the rows in key */
3503
+ {
3504
+ register long i;
3505
+
3506
+ for(i=0;i<varnum;i++) { /* fill up kernel cache */
3507
+ cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
3508
+ }
3509
+ }
3510
+
3511
+
3512
+ void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
3513
+ long int numshrink, long int *after)
3514
+ /* Remove numshrink columns in the cache which correspond to
3515
+ examples marked 0 in after. */
3516
+ {
3517
+ register long i,j,jj,from=0,to=0,scount;
3518
+ long *keep;
3519
+
3520
+ if(verbosity>=2) {
3521
+ printf(" Reorganizing cache..."); fflush(stdout);
3522
+ }
3523
+
3524
+ keep=(long *)my_malloc(sizeof(long)*totdoc);
3525
+ for(j=0;j<totdoc;j++) {
3526
+ keep[j]=1;
3527
+ }
3528
+ scount=0;
3529
+ for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
3530
+ j=kernel_cache->active2totdoc[jj];
3531
+ if(!after[j]) {
3532
+ scount++;
3533
+ keep[j]=0;
3534
+ }
3535
+ }
3536
+
3537
+ for(i=0;i<kernel_cache->max_elems;i++) {
3538
+ for(jj=0;jj<kernel_cache->activenum;jj++) {
3539
+ j=kernel_cache->active2totdoc[jj];
3540
+ if(!keep[j]) {
3541
+ from++;
3542
+ }
3543
+ else {
3544
+ kernel_cache->buffer[to]=kernel_cache->buffer[from];
3545
+ to++;
3546
+ from++;
3547
+ }
3548
+ }
3549
+ }
3550
+
3551
+ kernel_cache->activenum=0;
3552
+ for(j=0;j<totdoc;j++) {
3553
+ if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
3554
+ kernel_cache->active2totdoc[kernel_cache->activenum]=j;
3555
+ kernel_cache->totdoc2active[j]=kernel_cache->activenum;
3556
+ kernel_cache->activenum++;
3557
+ }
3558
+ else {
3559
+ kernel_cache->totdoc2active[j]=-1;
3560
+ }
3561
+ }
3562
+
3563
+ kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
3564
+ if(kernel_cache->max_elems>totdoc) {
3565
+ kernel_cache->max_elems=totdoc;
3566
+ }
3567
+
3568
+ free(keep);
3569
+
3570
+ if(verbosity>=2) {
3571
+ printf("done.\n"); fflush(stdout);
3572
+ printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3573
+ }
3574
+ }
3575
+
3576
+ KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
3577
+ {
3578
+ long i;
3579
+ KERNEL_CACHE *kernel_cache;
3580
+
3581
+ kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
3582
+ kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
3583
+ kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
3584
+ kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
3585
+ kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
3586
+ kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
3587
+ kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
3588
+ kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
3589
+
3590
+ kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
3591
+
3592
+ kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
3593
+ if(kernel_cache->max_elems>totdoc) {
3594
+ kernel_cache->max_elems=totdoc;
3595
+ }
3596
+
3597
+ if(verbosity>=2) {
3598
+ printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3599
+ printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
3600
+ }
3601
+
3602
+ kernel_cache->elems=0; /* initialize cache */
3603
+ for(i=0;i<totdoc;i++) {
3604
+ kernel_cache->index[i]=-1;
3605
+ kernel_cache->lru[i]=0;
3606
+ }
3607
+ for(i=0;i<totdoc;i++) {
3608
+ kernel_cache->occu[i]=0;
3609
+ kernel_cache->invindex[i]=-1;
3610
+ }
3611
+
3612
+ kernel_cache->activenum=totdoc;;
3613
+ for(i=0;i<totdoc;i++) {
3614
+ kernel_cache->active2totdoc[i]=i;
3615
+ kernel_cache->totdoc2active[i]=i;
3616
+ }
3617
+
3618
+ kernel_cache->time=0;
3619
+
3620
+ return(kernel_cache);
3621
+ }
3622
+
3623
+ void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
3624
+ {
3625
+ long maxlru=0,k;
3626
+
3627
+ for(k=0;k<kernel_cache->max_elems;k++) {
3628
+ if(maxlru < kernel_cache->lru[k])
3629
+ maxlru=kernel_cache->lru[k];
3630
+ }
3631
+ for(k=0;k<kernel_cache->max_elems;k++) {
3632
+ kernel_cache->lru[k]-=maxlru;
3633
+ }
3634
+ }
3635
+
3636
+ void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
3637
+ {
3638
+ free(kernel_cache->index);
3639
+ free(kernel_cache->occu);
3640
+ free(kernel_cache->lru);
3641
+ free(kernel_cache->invindex);
3642
+ free(kernel_cache->active2totdoc);
3643
+ free(kernel_cache->totdoc2active);
3644
+ free(kernel_cache->buffer);
3645
+ free(kernel_cache);
3646
+ }
3647
+
3648
+ long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
3649
+ {
3650
+ long i;
3651
+
3652
+ if(kernel_cache_space_available(kernel_cache)) {
3653
+ for(i=0;i<kernel_cache->max_elems;i++) {
3654
+ if(!kernel_cache->occu[i]) {
3655
+ kernel_cache->occu[i]=1;
3656
+ kernel_cache->elems++;
3657
+ return(i);
3658
+ }
3659
+ }
3660
+ }
3661
+ return(-1);
3662
+ }
3663
+
3664
+ void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
3665
+ {
3666
+ kernel_cache->occu[i]=0;
3667
+ kernel_cache->elems--;
3668
+ }
3669
+
3670
+ long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
3671
+ /* remove least recently used cache element */
3672
+ {
3673
+ register long k,least_elem=-1,least_time;
3674
+
3675
+ least_time=kernel_cache->time+1;
3676
+ for(k=0;k<kernel_cache->max_elems;k++) {
3677
+ if(kernel_cache->invindex[k] != -1) {
3678
+ if(kernel_cache->lru[k]<least_time) {
3679
+ least_time=kernel_cache->lru[k];
3680
+ least_elem=k;
3681
+ }
3682
+ }
3683
+ }
3684
+ if(least_elem != -1) {
3685
+ kernel_cache_free(kernel_cache,least_elem);
3686
+ kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
3687
+ kernel_cache->invindex[least_elem]=-1;
3688
+ return(1);
3689
+ }
3690
+ return(0);
3691
+ }
3692
+
3693
+
3694
+ CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
3695
+ long int docnum)
3696
+ /* Get a free cache entry. In case cache is full, the lru element
3697
+ is removed. */
3698
+ {
3699
+ long result;
3700
+ if((result = kernel_cache_malloc(kernel_cache)) == -1) {
3701
+ if(kernel_cache_free_lru(kernel_cache)) {
3702
+ result = kernel_cache_malloc(kernel_cache);
3703
+ }
3704
+ }
3705
+ kernel_cache->index[docnum]=result;
3706
+ if(result == -1) {
3707
+ return(0);
3708
+ }
3709
+ kernel_cache->invindex[result]=docnum;
3710
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3711
+ return((CFLOAT *)((long)kernel_cache->buffer
3712
+ +(kernel_cache->activenum*sizeof(CFLOAT)*
3713
+ kernel_cache->index[docnum])));
3714
+ }
3715
+
3716
+ long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
3717
+ /* Update lru time to avoid removal from cache. */
3718
+ {
3719
+ if(kernel_cache && kernel_cache->index[docnum] != -1) {
3720
+ kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3721
+ return(1);
3722
+ }
3723
+ return(0);
3724
+ }
3725
+
3726
+ long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
3727
+ /* Is that row cached? */
3728
+ {
3729
+ return(kernel_cache->index[docnum] != -1);
3730
+ }
3731
+
3732
+ long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
3733
+ /* Is there room for one more row? */
3734
+ {
3735
+ return(kernel_cache->elems < kernel_cache->max_elems);
3736
+ }
3737
+
3738
+ /************************** Compute estimates ******************************/
3739
+
3740
+ void compute_xa_estimates(MODEL *model, long int *label,
3741
+ long int *unlabeled, long int totdoc,
3742
+ DOC **docs, double *lin, double *a,
3743
+ KERNEL_PARM *kernel_parm,
3744
+ LEARN_PARM *learn_parm, double *error,
3745
+ double *recall, double *precision)
3746
+ /* Computes xa-estimate of error rate, recall, and precision. See
3747
+ T. Joachims, Estimating the Generalization Performance of an SVM
3748
+ Efficiently, IMCL, 2000. */
3749
+ {
3750
+ long i,looerror,looposerror,loonegerror;
3751
+ long totex,totposex;
3752
+ double xi,r_delta,r_delta_sq,sim=0;
3753
+ long *sv2dnum=NULL,*sv=NULL,svnum;
3754
+
3755
+ r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
3756
+ r_delta_sq=r_delta*r_delta;
3757
+
3758
+ looerror=0;
3759
+ looposerror=0;
3760
+ loonegerror=0;
3761
+ totex=0;
3762
+ totposex=0;
3763
+ svnum=0;
3764
+
3765
+ if(learn_parm->xa_depth > 0) {
3766
+ sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
3767
+ for(i=0;i<totdoc;i++)
3768
+ sv[i]=0;
3769
+ for(i=1;i<model->sv_num;i++)
3770
+ if(a[model->supvec[i]->docnum]
3771
+ < (learn_parm->svm_cost[model->supvec[i]->docnum]
3772
+ -learn_parm->epsilon_a)) {
3773
+ sv[model->supvec[i]->docnum]=1;
3774
+ svnum++;
3775
+ }
3776
+ sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
3777
+ clear_index(sv2dnum);
3778
+ compute_index(sv,totdoc,sv2dnum);
3779
+ }
3780
+
3781
+ for(i=0;i<totdoc;i++) {
3782
+ if(unlabeled[i]) {
3783
+ /* ignore it */
3784
+ }
3785
+ else {
3786
+ xi=1.0-((lin[i]-model->b)*(double)label[i]);
3787
+ if(xi<0) xi=0;
3788
+ if(label[i]>0) {
3789
+ totposex++;
3790
+ }
3791
+ if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
3792
+ if(learn_parm->xa_depth > 0) { /* makes assumptions */
3793
+ sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
3794
+ kernel_parm,learn_parm,
3795
+ (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
3796
+ }
3797
+ if((learn_parm->xa_depth == 0) ||
3798
+ ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
3799
+ looerror++;
3800
+ if(label[i]>0) {
3801
+ looposerror++;
3802
+ }
3803
+ else {
3804
+ loonegerror++;
3805
+ }
3806
+ }
3807
+ }
3808
+ totex++;
3809
+ }
3810
+ }
3811
+
3812
+ (*error)=((double)looerror/(double)totex)*100.0;
3813
+ (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
3814
+ (*precision)=(((double)totposex-(double)looposerror)
3815
+ /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
3816
+
3817
+ free(sv);
3818
+ free(sv2dnum);
3819
+ }
3820
+
3821
+
3822
+ double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
3823
+ DOC **docs, double *a,
3824
+ long int docnum,
3825
+ long int *label,
3826
+ KERNEL_PARM *kernel_parm,
3827
+ LEARN_PARM *learn_parm, double thresh)
3828
+ /* Experimental Code improving plain XiAlpha Estimates by
3829
+ computing a better bound using a greedy optimzation strategy. */
3830
+ {
3831
+ long best_depth=0;
3832
+ long i,j,k,d,skip,allskip;
3833
+ double best,best_val[101],val,init_val_sq,init_val_lin;
3834
+ long best_ex[101];
3835
+ CFLOAT *cache,*trow;
3836
+
3837
+ cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
3838
+ trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
3839
+
3840
+ for(k=0;k<svnum;k++) {
3841
+ trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
3842
+ }
3843
+
3844
+ init_val_sq=0;
3845
+ init_val_lin=0;
3846
+ best=0;
3847
+
3848
+ for(d=0;d<learn_parm->xa_depth;d++) {
3849
+ allskip=1;
3850
+ if(d>=1) {
3851
+ init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
3852
+ for(k=0;k<d-1;k++) {
3853
+ init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
3854
+ }
3855
+ init_val_lin+=trow[best_ex[d-1]];
3856
+ }
3857
+ for(i=0;i<svnum;i++) {
3858
+ skip=0;
3859
+ if(sv2dnum[i] == docnum) skip=1;
3860
+ for(j=0;j<d;j++) {
3861
+ if(i == best_ex[j]) skip=1;
3862
+ }
3863
+
3864
+ if(!skip) {
3865
+ val=init_val_sq;
3866
+ if(kernel_parm->kernel_type == LINEAR)
3867
+ val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
3868
+ else
3869
+ val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
3870
+ for(j=0;j<d;j++) {
3871
+ val+=2.0*cache[i+j*svnum];
3872
+ }
3873
+ val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
3874
+ val-=((init_val_lin+trow[i])/(d+1.0));
3875
+
3876
+ if(allskip || (val < best_val[d])) {
3877
+ best_val[d]=val;
3878
+ best_ex[d]=i;
3879
+ }
3880
+ allskip=0;
3881
+ if(val < thresh) {
3882
+ i=svnum;
3883
+ /* printf("EARLY"); */
3884
+ }
3885
+ }
3886
+ }
3887
+ if(!allskip) {
3888
+ for(k=0;k<svnum;k++) {
3889
+ cache[d*svnum+k]=kernel(kernel_parm,
3890
+ docs[sv2dnum[best_ex[d]]],
3891
+ docs[sv2dnum[k]]);
3892
+ }
3893
+ }
3894
+ if((!allskip) && ((best_val[d] < best) || (d == 0))) {
3895
+ best=best_val[d];
3896
+ best_depth=d;
3897
+ }
3898
+ if(allskip || (best < thresh)) {
3899
+ d=learn_parm->xa_depth;
3900
+ }
3901
+ }
3902
+
3903
+ free(cache);
3904
+ free(trow);
3905
+
3906
+ /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
3907
+ return(best);
3908
+ }
3909
+
3910
+
3911
+ void estimate_transduction_quality(MODEL *model, long int *label,
3912
+ long int *unlabeled,
3913
+ long int totdoc, DOC **docs, double *lin)
3914
+ /* Loo-bound based on observation that loo-errors must have an
3915
+ equal distribution in both training and test examples, given
3916
+ that the test examples are classified correctly. Compare
3917
+ chapter "Constraints on the Transductive Hyperplane" in my
3918
+ Dissertation. */
3919
+ {
3920
+ long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
3921
+ double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
3922
+ double r_delta,r_delta_sq,xi,xisum=0,asum=0;
3923
+
3924
+ r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
3925
+ r_delta_sq=r_delta*r_delta;
3926
+
3927
+ for(j=0;j<totdoc;j++) {
3928
+ if(unlabeled[j]) {
3929
+ totulab++;
3930
+ }
3931
+ else {
3932
+ totlab++;
3933
+ if(label[j] > 0)
3934
+ totlabpos++;
3935
+ else
3936
+ totlabneg++;
3937
+ }
3938
+ }
3939
+ for(j=1;j<model->sv_num;j++) {
3940
+ i=model->supvec[j]->docnum;
3941
+ xi=1.0-((lin[i]-model->b)*(double)label[i]);
3942
+ if(xi<0) xi=0;
3943
+
3944
+ xisum+=xi;
3945
+ asum+=fabs(model->alpha[j]);
3946
+ if(unlabeled[i]) {
3947
+ ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3948
+ }
3949
+ else {
3950
+ labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3951
+ }
3952
+ if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
3953
+ l++;
3954
+ if(unlabeled[model->supvec[j]->docnum]) {
3955
+ ulab++;
3956
+ if(model->alpha[j] > 0)
3957
+ ulabpos++;
3958
+ else
3959
+ ulabneg++;
3960
+ }
3961
+ else {
3962
+ lab++;
3963
+ if(model->alpha[j] > 0)
3964
+ labpos++;
3965
+ else
3966
+ labneg++;
3967
+ }
3968
+ }
3969
+ }
3970
+ printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
3971
+ printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
3972
+ printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
3973
+ printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
3974
+ printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
3975
+ }
3976
+
3977
+ double estimate_margin_vcdim(MODEL *model, double w, double R,
3978
+ KERNEL_PARM *kernel_parm)
3979
+ /* optional: length of model vector in feature space */
3980
+ /* optional: radius of ball containing the data */
3981
+ {
3982
+ double h;
3983
+
3984
+ /* follows chapter 5.6.4 in [Vapnik/95] */
3985
+
3986
+ if(w<0) {
3987
+ w=model_length_s(model,kernel_parm);
3988
+ }
3989
+ if(R<0) {
3990
+ R=estimate_sphere(model,kernel_parm);
3991
+ }
3992
+ h = w*w * R*R +1;
3993
+ return(h);
3994
+ }
3995
+
3996
+ double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
3997
+ /* Approximates the radius of the ball containing */
3998
+ /* the support vectors by bounding it with the */
3999
+ { /* length of the longest support vector. This is */
4000
+ register long j; /* pretty good for text categorization, since all */
4001
+ double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
4002
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4003
+ WORD nullword; /* origin of the space. */
4004
+
4005
+ nullword.wnum=0;
4006
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4007
+
4008
+ for(j=1;j<model->sv_num;j++) {
4009
+ xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
4010
+ -2*kernel(kernel_parm,model->supvec[j],nulldoc)
4011
+ +kernel(kernel_parm,nulldoc,nulldoc));
4012
+ if(xlen>maxxlen) {
4013
+ maxxlen=xlen;
4014
+ }
4015
+ }
4016
+
4017
+ free_example(nulldoc,1);
4018
+ return(maxxlen);
4019
+ }
4020
+
4021
+ double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
4022
+ {
4023
+ long i;
4024
+ double maxxlen,xlen;
4025
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4026
+ WORD nullword; /* origin of the space. */
4027
+
4028
+ nullword.wnum=0;
4029
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4030
+
4031
+ maxxlen=0;
4032
+ for(i=0;i<totdoc;i++) {
4033
+ xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
4034
+ -2*kernel(kernel_parm,docs[i],nulldoc)
4035
+ +kernel(kernel_parm,nulldoc,nulldoc));
4036
+ if(xlen>maxxlen) {
4037
+ maxxlen=xlen;
4038
+ }
4039
+ }
4040
+
4041
+ free_example(nulldoc,1);
4042
+ return(maxxlen);
4043
+ }
4044
+
4045
+ double estimate_r_delta_average(DOC **docs, long int totdoc,
4046
+ KERNEL_PARM *kernel_parm)
4047
+ {
4048
+ long i;
4049
+ double avgxlen;
4050
+ DOC *nulldoc; /* assumes that the center of the ball is at the */
4051
+ WORD nullword; /* origin of the space. */
4052
+
4053
+ nullword.wnum=0;
4054
+ nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4055
+
4056
+ avgxlen=0;
4057
+ for(i=0;i<totdoc;i++) {
4058
+ avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
4059
+ -2*kernel(kernel_parm,docs[i],nulldoc)
4060
+ +kernel(kernel_parm,nulldoc,nulldoc));
4061
+ }
4062
+
4063
+ free_example(nulldoc,1);
4064
+ return(avgxlen/totdoc);
4065
+ }
4066
+
4067
+ double length_of_longest_document_vector(DOC **docs, long int totdoc,
4068
+ KERNEL_PARM *kernel_parm)
4069
+ {
4070
+ long i;
4071
+ double maxxlen,xlen;
4072
+
4073
+ maxxlen=0;
4074
+ for(i=0;i<totdoc;i++) {
4075
+ xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
4076
+ if(xlen>maxxlen) {
4077
+ maxxlen=xlen;
4078
+ }
4079
+ }
4080
+
4081
+ return(maxxlen);
4082
+ }
4083
+
4084
+ /****************************** IO-handling **********************************/
4085
+
4086
+ void write_prediction(char *predfile, MODEL *model, double *lin,
4087
+ double *a, long int *unlabeled,
4088
+ long int *label, long int totdoc,
4089
+ LEARN_PARM *learn_parm)
4090
+ {
4091
+ FILE *predfl;
4092
+ long i;
4093
+ double dist,a_max;
4094
+
4095
+ if(verbosity>=1) {
4096
+ printf("Writing prediction file..."); fflush(stdout);
4097
+ }
4098
+ if ((predfl = fopen (predfile, "w")) == NULL)
4099
+ { perror (predfile); exit (1); }
4100
+ a_max=learn_parm->epsilon_a;
4101
+ for(i=0;i<totdoc;i++) {
4102
+ if((unlabeled[i]) && (a[i]>a_max)) {
4103
+ a_max=a[i];
4104
+ }
4105
+ }
4106
+ for(i=0;i<totdoc;i++) {
4107
+ if(unlabeled[i]) {
4108
+ if((a[i]>(learn_parm->epsilon_a))) {
4109
+ dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
4110
+ }
4111
+ else {
4112
+ dist=(lin[i]-model->b);
4113
+ }
4114
+ if(dist>0) {
4115
+ fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
4116
+ }
4117
+ else {
4118
+ fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
4119
+ }
4120
+ }
4121
+ }
4122
+ fclose(predfl);
4123
+ if(verbosity>=1) {
4124
+ printf("done\n");
4125
+ }
4126
+ }
4127
+
4128
+ void write_alphas(char *alphafile, double *a,
4129
+ long int *label, long int totdoc)
4130
+ {
4131
+ FILE *alphafl;
4132
+ long i;
4133
+
4134
+ if(verbosity>=1) {
4135
+ printf("Writing alpha file..."); fflush(stdout);
4136
+ }
4137
+ if ((alphafl = fopen (alphafile, "w")) == NULL)
4138
+ { perror (alphafile); exit (1); }
4139
+ for(i=0;i<totdoc;i++) {
4140
+ fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
4141
+ }
4142
+ fclose(alphafl);
4143
+ if(verbosity>=1) {
4144
+ printf("done\n");
4145
+ }
4146
+ }
4147
+