svmlightcli 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +85 -0
- data/README.md +49 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bin/svm_classify +23 -0
- data/bin/svm_learn +23 -0
- data/ext/svmlightcli/Makefile +95 -0
- data/ext/svmlightcli/extconf.rb +33 -0
- data/ext/svmlightcli/kernel.h +40 -0
- data/ext/svmlightcli/svm_classify.c +197 -0
- data/ext/svmlightcli/svm_common.c +985 -0
- data/ext/svmlightcli/svm_common.h +301 -0
- data/ext/svmlightcli/svm_hideo.c +1062 -0
- data/ext/svmlightcli/svm_learn.c +4147 -0
- data/ext/svmlightcli/svm_learn.h +169 -0
- data/ext/svmlightcli/svm_learn_main.c +397 -0
- data/lib/svmlightcli.rb +6 -0
- data/lib/svmlightcli/version.rb +3 -0
- data/svmlightcli.gemspec +28 -0
- metadata +113 -0
@@ -0,0 +1,4147 @@
|
|
1
|
+
/***********************************************************************/
|
2
|
+
/* */
|
3
|
+
/* svm_learn.c */
|
4
|
+
/* */
|
5
|
+
/* Learning module of Support Vector Machine. */
|
6
|
+
/* */
|
7
|
+
/* Author: Thorsten Joachims */
|
8
|
+
/* Date: 02.07.02 */
|
9
|
+
/* */
|
10
|
+
/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
11
|
+
/* */
|
12
|
+
/* This software is available for non-commercial use only. It must */
|
13
|
+
/* not be modified and distributed without prior permission of the */
|
14
|
+
/* author. The author is not responsible for implications from the */
|
15
|
+
/* use of this software. */
|
16
|
+
/* */
|
17
|
+
/***********************************************************************/
|
18
|
+
|
19
|
+
|
20
|
+
# include "svm_common.h"
|
21
|
+
# include "svm_learn.h"
|
22
|
+
|
23
|
+
|
24
|
+
/* interface to QP-solver */
|
25
|
+
double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
|
26
|
+
|
27
|
+
/*---------------------------------------------------------------------------*/
|
28
|
+
|
29
|
+
/* Learns an SVM classification model based on the training data in
|
30
|
+
docs/label. The resulting model is returned in the structure
|
31
|
+
model. */
|
32
|
+
|
33
|
+
void svm_learn_classification(DOC **docs, double *class, long int
|
34
|
+
totdoc, long int totwords,
|
35
|
+
LEARN_PARM *learn_parm,
|
36
|
+
KERNEL_PARM *kernel_parm,
|
37
|
+
KERNEL_CACHE *kernel_cache,
|
38
|
+
MODEL *model,
|
39
|
+
double *alpha)
|
40
|
+
/* docs: Training vectors (x-part) */
|
41
|
+
/* class: Training labels (y-part, zero if test example for
|
42
|
+
transduction) */
|
43
|
+
/* totdoc: Number of examples in docs/label */
|
44
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
45
|
+
/* learn_parm: Learning paramenters */
|
46
|
+
/* kernel_parm: Kernel paramenters */
|
47
|
+
/* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
|
48
|
+
NULL if linear.*/
|
49
|
+
/* model: Returns learning result (assumed empty before called) */
|
50
|
+
/* alpha: Start values for the alpha variables or NULL
|
51
|
+
pointer. The new alpha values are returned after
|
52
|
+
optimization if not NULL. Array must be of size totdoc. */
|
53
|
+
{
|
54
|
+
long *inconsistent,i,*label;
|
55
|
+
long inconsistentnum;
|
56
|
+
long misclassified,upsupvecnum;
|
57
|
+
double loss,model_length,example_length;
|
58
|
+
double maxdiff,*lin,*a,*c;
|
59
|
+
long runtime_start,runtime_end;
|
60
|
+
long iterations;
|
61
|
+
long *unlabeled,transduction;
|
62
|
+
long heldout;
|
63
|
+
long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
|
64
|
+
long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
|
65
|
+
double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
|
66
|
+
long *index,*index2dnum;
|
67
|
+
double *weights;
|
68
|
+
CFLOAT *aicache; /* buffer to keep one row of hessian */
|
69
|
+
|
70
|
+
double *xi_fullset; /* buffer for storing xi on full sample in loo */
|
71
|
+
double *a_fullset; /* buffer for storing alpha on full sample in loo */
|
72
|
+
TIMING timing_profile;
|
73
|
+
SHRINK_STATE shrink_state;
|
74
|
+
|
75
|
+
runtime_start=get_runtime();
|
76
|
+
timing_profile.time_kernel=0;
|
77
|
+
timing_profile.time_opti=0;
|
78
|
+
timing_profile.time_shrink=0;
|
79
|
+
timing_profile.time_update=0;
|
80
|
+
timing_profile.time_model=0;
|
81
|
+
timing_profile.time_check=0;
|
82
|
+
timing_profile.time_select=0;
|
83
|
+
kernel_cache_statistic=0;
|
84
|
+
|
85
|
+
learn_parm->totwords=totwords;
|
86
|
+
|
87
|
+
/* make sure -n value is reasonable */
|
88
|
+
if((learn_parm->svm_newvarsinqp < 2)
|
89
|
+
|| (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
90
|
+
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
91
|
+
}
|
92
|
+
|
93
|
+
init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
94
|
+
|
95
|
+
label = (long *)my_malloc(sizeof(long)*totdoc);
|
96
|
+
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
97
|
+
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
98
|
+
c = (double *)my_malloc(sizeof(double)*totdoc);
|
99
|
+
a = (double *)my_malloc(sizeof(double)*totdoc);
|
100
|
+
a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
101
|
+
xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
102
|
+
lin = (double *)my_malloc(sizeof(double)*totdoc);
|
103
|
+
learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
104
|
+
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
105
|
+
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
106
|
+
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
107
|
+
|
108
|
+
model->at_upper_bound=0;
|
109
|
+
model->b=0;
|
110
|
+
model->supvec[0]=0; /* element 0 reserved and empty for now */
|
111
|
+
model->alpha[0]=0;
|
112
|
+
model->lin_weights=NULL;
|
113
|
+
model->totwords=totwords;
|
114
|
+
model->totdoc=totdoc;
|
115
|
+
model->kernel_parm=(*kernel_parm);
|
116
|
+
model->sv_num=1;
|
117
|
+
model->loo_error=-1;
|
118
|
+
model->loo_recall=-1;
|
119
|
+
model->loo_precision=-1;
|
120
|
+
model->xa_error=-1;
|
121
|
+
model->xa_recall=-1;
|
122
|
+
model->xa_precision=-1;
|
123
|
+
inconsistentnum=0;
|
124
|
+
transduction=0;
|
125
|
+
|
126
|
+
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
127
|
+
r_delta_sq=r_delta*r_delta;
|
128
|
+
|
129
|
+
r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
130
|
+
if(learn_parm->svm_c == 0.0) { /* default value for C */
|
131
|
+
learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
132
|
+
if(verbosity>=1)
|
133
|
+
printf("Setting default regularization parameter C=%.4f\n",
|
134
|
+
learn_parm->svm_c);
|
135
|
+
}
|
136
|
+
|
137
|
+
learn_parm->eps=-1.0; /* equivalent regression epsilon for
|
138
|
+
classification */
|
139
|
+
|
140
|
+
for(i=0;i<totdoc;i++) { /* various inits */
|
141
|
+
docs[i]->docnum=i;
|
142
|
+
inconsistent[i]=0;
|
143
|
+
a[i]=0;
|
144
|
+
lin[i]=0;
|
145
|
+
c[i]=0.0;
|
146
|
+
unlabeled[i]=0;
|
147
|
+
if(class[i] == 0) {
|
148
|
+
unlabeled[i]=1;
|
149
|
+
label[i]=0;
|
150
|
+
transduction=1;
|
151
|
+
}
|
152
|
+
if(class[i] > 0) {
|
153
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
154
|
+
docs[i]->costfactor;
|
155
|
+
label[i]=1;
|
156
|
+
trainpos++;
|
157
|
+
}
|
158
|
+
else if(class[i] < 0) {
|
159
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
|
160
|
+
label[i]=-1;
|
161
|
+
trainneg++;
|
162
|
+
}
|
163
|
+
else {
|
164
|
+
learn_parm->svm_cost[i]=0;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
if(verbosity>=2) {
|
168
|
+
printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
|
169
|
+
}
|
170
|
+
|
171
|
+
/* caching makes no sense for linear kernel */
|
172
|
+
if(kernel_parm->kernel_type == LINEAR) {
|
173
|
+
kernel_cache = NULL;
|
174
|
+
}
|
175
|
+
|
176
|
+
/* compute starting state for initial alpha values */
|
177
|
+
if(alpha) {
|
178
|
+
if(verbosity>=1) {
|
179
|
+
printf("Computing starting state..."); fflush(stdout);
|
180
|
+
}
|
181
|
+
index = (long *)my_malloc(sizeof(long)*totdoc);
|
182
|
+
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
183
|
+
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
184
|
+
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
185
|
+
for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
|
186
|
+
index[i]=1;
|
187
|
+
alpha[i]=fabs(alpha[i]);
|
188
|
+
if(alpha[i]<0) alpha[i]=0;
|
189
|
+
if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
|
190
|
+
}
|
191
|
+
if(kernel_parm->kernel_type != LINEAR) {
|
192
|
+
for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
|
193
|
+
if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
|
194
|
+
&& (kernel_cache_space_available(kernel_cache)))
|
195
|
+
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
196
|
+
for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
|
197
|
+
if((alpha[i]==learn_parm->svm_cost[i])
|
198
|
+
&& (kernel_cache_space_available(kernel_cache)))
|
199
|
+
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
200
|
+
}
|
201
|
+
(void)compute_index(index,totdoc,index2dnum);
|
202
|
+
update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
|
203
|
+
totwords,kernel_parm,kernel_cache,lin,aicache,
|
204
|
+
weights);
|
205
|
+
(void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
|
206
|
+
learn_parm,index2dnum,index2dnum,model);
|
207
|
+
for(i=0;i<totdoc;i++) { /* copy initial alphas */
|
208
|
+
a[i]=alpha[i];
|
209
|
+
}
|
210
|
+
free(index);
|
211
|
+
free(index2dnum);
|
212
|
+
free(weights);
|
213
|
+
free(aicache);
|
214
|
+
if(verbosity>=1) {
|
215
|
+
printf("done.\n"); fflush(stdout);
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
if(transduction) {
|
220
|
+
learn_parm->svm_iter_to_shrink=99999999;
|
221
|
+
if(verbosity >= 1)
|
222
|
+
printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
|
223
|
+
}
|
224
|
+
|
225
|
+
if(transduction && learn_parm->compute_loo) {
|
226
|
+
learn_parm->compute_loo=0;
|
227
|
+
if(verbosity >= 1)
|
228
|
+
printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
|
229
|
+
}
|
230
|
+
|
231
|
+
if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
|
232
|
+
learn_parm->compute_loo=0;
|
233
|
+
printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
|
234
|
+
}
|
235
|
+
|
236
|
+
if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
|
237
|
+
learn_parm->compute_loo=0;
|
238
|
+
printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
|
239
|
+
}
|
240
|
+
|
241
|
+
|
242
|
+
if(verbosity==1) {
|
243
|
+
printf("Optimizing"); fflush(stdout);
|
244
|
+
}
|
245
|
+
|
246
|
+
/* train the svm */
|
247
|
+
iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
248
|
+
kernel_parm,kernel_cache,&shrink_state,model,
|
249
|
+
inconsistent,unlabeled,a,lin,
|
250
|
+
c,&timing_profile,
|
251
|
+
&maxdiff,(long)-1,
|
252
|
+
(long)1);
|
253
|
+
|
254
|
+
if(verbosity>=1) {
|
255
|
+
if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
256
|
+
|
257
|
+
misclassified=0;
|
258
|
+
for(i=0;(i<totdoc);i++) { /* get final statistic */
|
259
|
+
if((lin[i]-model->b)*(double)label[i] <= 0.0)
|
260
|
+
misclassified++;
|
261
|
+
}
|
262
|
+
|
263
|
+
printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
|
264
|
+
misclassified,maxdiff);
|
265
|
+
|
266
|
+
runtime_end=get_runtime();
|
267
|
+
if(verbosity>=2) {
|
268
|
+
printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
269
|
+
((float)runtime_end-(float)runtime_start)/100.0,
|
270
|
+
(100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
271
|
+
(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
272
|
+
(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
273
|
+
(100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
274
|
+
(100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
275
|
+
(100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
276
|
+
(100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
277
|
+
}
|
278
|
+
else {
|
279
|
+
printf("Runtime in cpu-seconds: %.2f\n",
|
280
|
+
(runtime_end-runtime_start)/100.0);
|
281
|
+
}
|
282
|
+
|
283
|
+
if(learn_parm->remove_inconsistent) {
|
284
|
+
inconsistentnum=0;
|
285
|
+
for(i=0;i<totdoc;i++)
|
286
|
+
if(inconsistent[i])
|
287
|
+
inconsistentnum++;
|
288
|
+
printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
|
289
|
+
model->sv_num-1,inconsistentnum);
|
290
|
+
}
|
291
|
+
else {
|
292
|
+
upsupvecnum=0;
|
293
|
+
for(i=1;i<model->sv_num;i++) {
|
294
|
+
if(fabs(model->alpha[i]) >=
|
295
|
+
(learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
296
|
+
learn_parm->epsilon_a))
|
297
|
+
upsupvecnum++;
|
298
|
+
}
|
299
|
+
printf("Number of SV: %ld (including %ld at upper bound)\n",
|
300
|
+
model->sv_num-1,upsupvecnum);
|
301
|
+
}
|
302
|
+
|
303
|
+
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
304
|
+
loss=0;
|
305
|
+
model_length=0;
|
306
|
+
for(i=0;i<totdoc;i++) {
|
307
|
+
if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
|
308
|
+
loss+=1.0-(lin[i]-model->b)*(double)label[i];
|
309
|
+
model_length+=a[i]*label[i]*lin[i];
|
310
|
+
}
|
311
|
+
model_length=sqrt(model_length);
|
312
|
+
fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
313
|
+
fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
314
|
+
example_length=estimate_sphere(model,kernel_parm);
|
315
|
+
fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
316
|
+
length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
317
|
+
fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
|
318
|
+
estimate_margin_vcdim(model,model_length,example_length,
|
319
|
+
kernel_parm));
|
320
|
+
if((!learn_parm->remove_inconsistent) && (!transduction)) {
|
321
|
+
runtime_start_xa=get_runtime();
|
322
|
+
if(verbosity>=1) {
|
323
|
+
printf("Computing XiAlpha-estimates..."); fflush(stdout);
|
324
|
+
}
|
325
|
+
compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
|
326
|
+
kernel_parm,learn_parm,&(model->xa_error),
|
327
|
+
&(model->xa_recall),&(model->xa_precision));
|
328
|
+
if(verbosity>=1) {
|
329
|
+
printf("done\n");
|
330
|
+
}
|
331
|
+
printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
|
332
|
+
(get_runtime()-runtime_start_xa)/100.0);
|
333
|
+
|
334
|
+
fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
|
335
|
+
model->xa_error,learn_parm->rho,learn_parm->xa_depth);
|
336
|
+
fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
|
337
|
+
model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
|
338
|
+
fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
|
339
|
+
model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
|
340
|
+
}
|
341
|
+
else if(!learn_parm->remove_inconsistent) {
|
342
|
+
estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
|
343
|
+
}
|
344
|
+
}
|
345
|
+
if(verbosity>=1) {
|
346
|
+
printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
|
351
|
+
/* leave-one-out testing starts now */
|
352
|
+
if(learn_parm->compute_loo) {
|
353
|
+
/* save results of training on full dataset for leave-one-out */
|
354
|
+
runtime_start_loo=get_runtime();
|
355
|
+
for(i=0;i<totdoc;i++) {
|
356
|
+
xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
|
357
|
+
if(xi_fullset[i]<0) xi_fullset[i]=0;
|
358
|
+
a_fullset[i]=a[i];
|
359
|
+
}
|
360
|
+
if(verbosity>=1) {
|
361
|
+
printf("Computing leave-one-out");
|
362
|
+
}
|
363
|
+
|
364
|
+
/* repeat this loop for every held-out example */
|
365
|
+
for(heldout=0;(heldout<totdoc);heldout++) {
|
366
|
+
if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
|
367
|
+
< 1.0) {
|
368
|
+
/* guaranteed to not produce a leave-one-out error */
|
369
|
+
if(verbosity==1) {
|
370
|
+
printf("+"); fflush(stdout);
|
371
|
+
}
|
372
|
+
}
|
373
|
+
else if(xi_fullset[heldout] > 1.0) {
|
374
|
+
/* guaranteed to produce a leave-one-out error */
|
375
|
+
loo_count++;
|
376
|
+
if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
|
377
|
+
if(verbosity==1) {
|
378
|
+
printf("-"); fflush(stdout);
|
379
|
+
}
|
380
|
+
}
|
381
|
+
else {
|
382
|
+
loocomputed++;
|
383
|
+
heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
|
384
|
+
learn_parm->svm_cost[heldout]=0;
|
385
|
+
/* make sure heldout example is not currently */
|
386
|
+
/* shrunk away. Assumes that lin is up to date! */
|
387
|
+
shrink_state.active[heldout]=1;
|
388
|
+
if(verbosity>=2)
|
389
|
+
printf("\nLeave-One-Out test on example %ld\n",heldout);
|
390
|
+
if(verbosity>=1) {
|
391
|
+
printf("(?[%ld]",heldout); fflush(stdout);
|
392
|
+
}
|
393
|
+
|
394
|
+
optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
395
|
+
kernel_parm,
|
396
|
+
kernel_cache,&shrink_state,model,inconsistent,unlabeled,
|
397
|
+
a,lin,c,&timing_profile,
|
398
|
+
&maxdiff,heldout,(long)2);
|
399
|
+
|
400
|
+
/* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
|
401
|
+
|
402
|
+
if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
|
403
|
+
loo_count++; /* there was a loo-error */
|
404
|
+
if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
|
405
|
+
if(verbosity>=1) {
|
406
|
+
printf("-)"); fflush(stdout);
|
407
|
+
}
|
408
|
+
}
|
409
|
+
else {
|
410
|
+
if(verbosity>=1) {
|
411
|
+
printf("+)"); fflush(stdout);
|
412
|
+
}
|
413
|
+
}
|
414
|
+
/* now we need to restore the original data set*/
|
415
|
+
learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
|
416
|
+
}
|
417
|
+
} /* end of leave-one-out loop */
|
418
|
+
|
419
|
+
|
420
|
+
if(verbosity>=1) {
|
421
|
+
printf("\nRetrain on full problem"); fflush(stdout);
|
422
|
+
}
|
423
|
+
optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
424
|
+
kernel_parm,
|
425
|
+
kernel_cache,&shrink_state,model,inconsistent,unlabeled,
|
426
|
+
a,lin,c,&timing_profile,
|
427
|
+
&maxdiff,(long)-1,(long)1);
|
428
|
+
if(verbosity >= 1)
|
429
|
+
printf("done.\n");
|
430
|
+
|
431
|
+
|
432
|
+
/* after all leave-one-out computed */
|
433
|
+
model->loo_error=100.0*loo_count/(double)totdoc;
|
434
|
+
model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
|
435
|
+
model->loo_precision=(trainpos-loo_count_pos)/
|
436
|
+
(double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
|
437
|
+
if(verbosity >= 1) {
|
438
|
+
fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
|
439
|
+
model->loo_error);
|
440
|
+
fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
|
441
|
+
model->loo_recall);
|
442
|
+
fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
|
443
|
+
model->loo_precision);
|
444
|
+
fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
|
445
|
+
loocomputed,learn_parm->rho);
|
446
|
+
printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
|
447
|
+
(double)(get_runtime()-runtime_start_loo)/100.0);
|
448
|
+
}
|
449
|
+
}
|
450
|
+
|
451
|
+
if(learn_parm->alphafile[0])
|
452
|
+
write_alphas(learn_parm->alphafile,a,label,totdoc);
|
453
|
+
|
454
|
+
shrink_state_cleanup(&shrink_state);
|
455
|
+
free(label);
|
456
|
+
free(inconsistent);
|
457
|
+
free(unlabeled);
|
458
|
+
free(c);
|
459
|
+
free(a);
|
460
|
+
free(a_fullset);
|
461
|
+
free(xi_fullset);
|
462
|
+
free(lin);
|
463
|
+
free(learn_parm->svm_cost);
|
464
|
+
}
|
465
|
+
|
466
|
+
|
467
|
+
/* Learns an SVM regression model based on the training data in
|
468
|
+
docs/label. The resulting model is returned in the structure
|
469
|
+
model. */
|
470
|
+
|
471
|
+
void svm_learn_regression(DOC **docs, double *value, long int totdoc,
|
472
|
+
long int totwords, LEARN_PARM *learn_parm,
|
473
|
+
KERNEL_PARM *kernel_parm,
|
474
|
+
KERNEL_CACHE **kernel_cache, MODEL *model)
|
475
|
+
/* docs: Training vectors (x-part) */
|
476
|
+
/* class: Training value (y-part) */
|
477
|
+
/* totdoc: Number of examples in docs/label */
|
478
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
479
|
+
/* learn_parm: Learning paramenters */
|
480
|
+
/* kernel_parm: Kernel paramenters */
|
481
|
+
/* kernel_cache:Initialized Cache, if using a kernel. NULL if
|
482
|
+
linear. Note that it will be free'd and reassigned */
|
483
|
+
/* model: Returns learning result (assumed empty before called) */
|
484
|
+
{
|
485
|
+
long *inconsistent,i,j;
|
486
|
+
long inconsistentnum;
|
487
|
+
long upsupvecnum;
|
488
|
+
double loss,model_length,example_length;
|
489
|
+
double maxdiff,*lin,*a,*c;
|
490
|
+
long runtime_start,runtime_end;
|
491
|
+
long iterations,kernel_cache_size;
|
492
|
+
long *unlabeled;
|
493
|
+
double r_delta_sq=0,r_delta,r_delta_avg;
|
494
|
+
double *xi_fullset; /* buffer for storing xi on full sample in loo */
|
495
|
+
double *a_fullset; /* buffer for storing alpha on full sample in loo */
|
496
|
+
TIMING timing_profile;
|
497
|
+
SHRINK_STATE shrink_state;
|
498
|
+
DOC **docs_org;
|
499
|
+
long *label;
|
500
|
+
|
501
|
+
/* set up regression problem in standard form */
|
502
|
+
docs_org=docs;
|
503
|
+
docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
|
504
|
+
label = (long *)my_malloc(sizeof(long)*2*totdoc);
|
505
|
+
c = (double *)my_malloc(sizeof(double)*2*totdoc);
|
506
|
+
for(i=0;i<totdoc;i++) {
|
507
|
+
j=2*totdoc-1-i;
|
508
|
+
docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
|
509
|
+
label[i]=+1;
|
510
|
+
c[i]=value[i];
|
511
|
+
docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
|
512
|
+
label[j]=-1;
|
513
|
+
c[j]=value[i];
|
514
|
+
}
|
515
|
+
totdoc*=2;
|
516
|
+
|
517
|
+
/* need to get a bigger kernel cache */
|
518
|
+
if(*kernel_cache) {
|
519
|
+
kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
|
520
|
+
kernel_cache_cleanup(*kernel_cache);
|
521
|
+
(*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
|
522
|
+
}
|
523
|
+
|
524
|
+
runtime_start=get_runtime();
|
525
|
+
timing_profile.time_kernel=0;
|
526
|
+
timing_profile.time_opti=0;
|
527
|
+
timing_profile.time_shrink=0;
|
528
|
+
timing_profile.time_update=0;
|
529
|
+
timing_profile.time_model=0;
|
530
|
+
timing_profile.time_check=0;
|
531
|
+
timing_profile.time_select=0;
|
532
|
+
kernel_cache_statistic=0;
|
533
|
+
|
534
|
+
learn_parm->totwords=totwords;
|
535
|
+
|
536
|
+
/* make sure -n value is reasonable */
|
537
|
+
if((learn_parm->svm_newvarsinqp < 2)
|
538
|
+
|| (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
539
|
+
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
540
|
+
}
|
541
|
+
|
542
|
+
init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
543
|
+
|
544
|
+
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
545
|
+
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
546
|
+
a = (double *)my_malloc(sizeof(double)*totdoc);
|
547
|
+
a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
548
|
+
xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
549
|
+
lin = (double *)my_malloc(sizeof(double)*totdoc);
|
550
|
+
learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
551
|
+
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
552
|
+
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
553
|
+
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
554
|
+
|
555
|
+
model->at_upper_bound=0;
|
556
|
+
model->b=0;
|
557
|
+
model->supvec[0]=0; /* element 0 reserved and empty for now */
|
558
|
+
model->alpha[0]=0;
|
559
|
+
model->lin_weights=NULL;
|
560
|
+
model->totwords=totwords;
|
561
|
+
model->totdoc=totdoc;
|
562
|
+
model->kernel_parm=(*kernel_parm);
|
563
|
+
model->sv_num=1;
|
564
|
+
model->loo_error=-1;
|
565
|
+
model->loo_recall=-1;
|
566
|
+
model->loo_precision=-1;
|
567
|
+
model->xa_error=-1;
|
568
|
+
model->xa_recall=-1;
|
569
|
+
model->xa_precision=-1;
|
570
|
+
inconsistentnum=0;
|
571
|
+
|
572
|
+
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
573
|
+
r_delta_sq=r_delta*r_delta;
|
574
|
+
|
575
|
+
r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
576
|
+
if(learn_parm->svm_c == 0.0) { /* default value for C */
|
577
|
+
learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
578
|
+
if(verbosity>=1)
|
579
|
+
printf("Setting default regularization parameter C=%.4f\n",
|
580
|
+
learn_parm->svm_c);
|
581
|
+
}
|
582
|
+
|
583
|
+
for(i=0;i<totdoc;i++) { /* various inits */
|
584
|
+
inconsistent[i]=0;
|
585
|
+
a[i]=0;
|
586
|
+
lin[i]=0;
|
587
|
+
unlabeled[i]=0;
|
588
|
+
if(label[i] > 0) {
|
589
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
590
|
+
docs[i]->costfactor;
|
591
|
+
}
|
592
|
+
else if(label[i] < 0) {
|
593
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
|
594
|
+
}
|
595
|
+
}
|
596
|
+
|
597
|
+
/* caching makes no sense for linear kernel */
|
598
|
+
if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
|
599
|
+
printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
|
600
|
+
}
|
601
|
+
|
602
|
+
if(verbosity==1) {
|
603
|
+
printf("Optimizing"); fflush(stdout);
|
604
|
+
}
|
605
|
+
|
606
|
+
/* train the svm */
|
607
|
+
iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
608
|
+
kernel_parm,*kernel_cache,&shrink_state,
|
609
|
+
model,inconsistent,unlabeled,a,lin,c,
|
610
|
+
&timing_profile,&maxdiff,(long)-1,
|
611
|
+
(long)1);
|
612
|
+
|
613
|
+
if(verbosity>=1) {
|
614
|
+
if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
615
|
+
|
616
|
+
printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
|
617
|
+
|
618
|
+
runtime_end=get_runtime();
|
619
|
+
if(verbosity>=2) {
|
620
|
+
printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
621
|
+
((float)runtime_end-(float)runtime_start)/100.0,
|
622
|
+
(100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
623
|
+
(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
624
|
+
(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
625
|
+
(100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
626
|
+
(100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
627
|
+
(100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
628
|
+
(100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
629
|
+
}
|
630
|
+
else {
|
631
|
+
printf("Runtime in cpu-seconds: %.2f\n",
|
632
|
+
(runtime_end-runtime_start)/100.0);
|
633
|
+
}
|
634
|
+
|
635
|
+
if(learn_parm->remove_inconsistent) {
|
636
|
+
inconsistentnum=0;
|
637
|
+
for(i=0;i<totdoc;i++)
|
638
|
+
if(inconsistent[i])
|
639
|
+
inconsistentnum++;
|
640
|
+
printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
|
641
|
+
model->sv_num-1,inconsistentnum);
|
642
|
+
}
|
643
|
+
else {
|
644
|
+
upsupvecnum=0;
|
645
|
+
for(i=1;i<model->sv_num;i++) {
|
646
|
+
if(fabs(model->alpha[i]) >=
|
647
|
+
(learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
648
|
+
learn_parm->epsilon_a))
|
649
|
+
upsupvecnum++;
|
650
|
+
}
|
651
|
+
printf("Number of SV: %ld (including %ld at upper bound)\n",
|
652
|
+
model->sv_num-1,upsupvecnum);
|
653
|
+
}
|
654
|
+
|
655
|
+
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
656
|
+
loss=0;
|
657
|
+
model_length=0;
|
658
|
+
for(i=0;i<totdoc;i++) {
|
659
|
+
if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
|
660
|
+
loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
|
661
|
+
model_length+=a[i]*label[i]*lin[i];
|
662
|
+
}
|
663
|
+
model_length=sqrt(model_length);
|
664
|
+
fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
665
|
+
fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
666
|
+
example_length=estimate_sphere(model,kernel_parm);
|
667
|
+
fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
668
|
+
length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
669
|
+
}
|
670
|
+
if(verbosity>=1) {
|
671
|
+
printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
672
|
+
}
|
673
|
+
}
|
674
|
+
|
675
|
+
if(learn_parm->alphafile[0])
|
676
|
+
write_alphas(learn_parm->alphafile,a,label,totdoc);
|
677
|
+
|
678
|
+
/* this makes sure the model we return does not contain pointers to the
|
679
|
+
temporary documents */
|
680
|
+
for(i=1;i<model->sv_num;i++) {
|
681
|
+
j=model->supvec[i]->docnum;
|
682
|
+
if(j >= (totdoc/2)) {
|
683
|
+
j=totdoc-j-1;
|
684
|
+
}
|
685
|
+
model->supvec[i]=docs_org[j];
|
686
|
+
}
|
687
|
+
|
688
|
+
shrink_state_cleanup(&shrink_state);
|
689
|
+
for(i=0;i<totdoc;i++)
|
690
|
+
free_example(docs[i],0);
|
691
|
+
free(docs);
|
692
|
+
free(label);
|
693
|
+
free(inconsistent);
|
694
|
+
free(unlabeled);
|
695
|
+
free(c);
|
696
|
+
free(a);
|
697
|
+
free(a_fullset);
|
698
|
+
free(xi_fullset);
|
699
|
+
free(lin);
|
700
|
+
free(learn_parm->svm_cost);
|
701
|
+
}
|
702
|
+
|
703
|
+
void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
|
704
|
+
long int totwords, LEARN_PARM *learn_parm,
|
705
|
+
KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
|
706
|
+
MODEL *model)
|
707
|
+
/* docs: Training vectors (x-part) */
|
708
|
+
/* rankvalue: Training target values that determine the ranking */
|
709
|
+
/* totdoc: Number of examples in docs/label */
|
710
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
711
|
+
/* learn_parm: Learning paramenters */
|
712
|
+
/* kernel_parm: Kernel paramenters */
|
713
|
+
/* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
|
714
|
+
using a kernel. NULL if linear. NOTE: Cache is
|
715
|
+
getting reinitialized in this function */
|
716
|
+
/* model: Returns learning result (assumed empty before called) */
|
717
|
+
{
|
718
|
+
DOC **docdiff;
|
719
|
+
long i,j,k,totpair,kernel_cache_size;
|
720
|
+
double *target,*alpha,cost;
|
721
|
+
long *greater,*lesser;
|
722
|
+
MODEL *pairmodel;
|
723
|
+
SVECTOR *flow,*fhigh;
|
724
|
+
|
725
|
+
totpair=0;
|
726
|
+
for(i=0;i<totdoc;i++) {
|
727
|
+
for(j=i+1;j<totdoc;j++) {
|
728
|
+
if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
|
729
|
+
totpair++;
|
730
|
+
}
|
731
|
+
}
|
732
|
+
}
|
733
|
+
|
734
|
+
printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
|
735
|
+
docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
|
736
|
+
target=(double *)my_malloc(sizeof(double)*totpair);
|
737
|
+
greater=(long *)my_malloc(sizeof(long)*totpair);
|
738
|
+
lesser=(long *)my_malloc(sizeof(long)*totpair);
|
739
|
+
|
740
|
+
k=0;
|
741
|
+
for(i=0;i<totdoc;i++) {
|
742
|
+
for(j=i+1;j<totdoc;j++) {
|
743
|
+
if(docs[i]->queryid == docs[j]->queryid) {
|
744
|
+
cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
|
745
|
+
if(rankvalue[i] > rankvalue[j]) {
|
746
|
+
if(kernel_parm->kernel_type == LINEAR)
|
747
|
+
docdiff[k]=create_example(k,0,0,cost,
|
748
|
+
sub_ss(docs[i]->fvec,docs[j]->fvec));
|
749
|
+
else {
|
750
|
+
flow=copy_svector(docs[j]->fvec);
|
751
|
+
flow->factor=-1.0;
|
752
|
+
flow->next=NULL;
|
753
|
+
fhigh=copy_svector(docs[i]->fvec);
|
754
|
+
fhigh->factor=1.0;
|
755
|
+
fhigh->next=flow;
|
756
|
+
docdiff[k]=create_example(k,0,0,cost,fhigh);
|
757
|
+
}
|
758
|
+
target[k]=1;
|
759
|
+
greater[k]=i;
|
760
|
+
lesser[k]=j;
|
761
|
+
k++;
|
762
|
+
}
|
763
|
+
else if(rankvalue[i] < rankvalue[j]) {
|
764
|
+
if(kernel_parm->kernel_type == LINEAR)
|
765
|
+
docdiff[k]=create_example(k,0,0,cost,
|
766
|
+
sub_ss(docs[i]->fvec,docs[j]->fvec));
|
767
|
+
else {
|
768
|
+
flow=copy_svector(docs[j]->fvec);
|
769
|
+
flow->factor=-1.0;
|
770
|
+
flow->next=NULL;
|
771
|
+
fhigh=copy_svector(docs[i]->fvec);
|
772
|
+
fhigh->factor=1.0;
|
773
|
+
fhigh->next=flow;
|
774
|
+
docdiff[k]=create_example(k,0,0,cost,fhigh);
|
775
|
+
}
|
776
|
+
target[k]=-1;
|
777
|
+
greater[k]=i;
|
778
|
+
lesser[k]=j;
|
779
|
+
k++;
|
780
|
+
}
|
781
|
+
}
|
782
|
+
}
|
783
|
+
}
|
784
|
+
printf("done.\n"); fflush(stdout);
|
785
|
+
|
786
|
+
/* need to get a bigger kernel cache */
|
787
|
+
if(*kernel_cache) {
|
788
|
+
kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
|
789
|
+
kernel_cache_cleanup(*kernel_cache);
|
790
|
+
(*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
|
791
|
+
}
|
792
|
+
|
793
|
+
/* must use unbiased hyperplane on difference vectors */
|
794
|
+
learn_parm->biased_hyperplane=0;
|
795
|
+
pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
|
796
|
+
svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
|
797
|
+
kernel_parm,(*kernel_cache),pairmodel,NULL);
|
798
|
+
|
799
|
+
/* Transfer the result into a more compact model. If you would like
|
800
|
+
to output the original model on pairs of documents, see below. */
|
801
|
+
alpha=(double *)my_malloc(sizeof(double)*totdoc);
|
802
|
+
for(i=0;i<totdoc;i++) {
|
803
|
+
alpha[i]=0;
|
804
|
+
}
|
805
|
+
for(i=1;i<pairmodel->sv_num;i++) {
|
806
|
+
alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
|
807
|
+
alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
|
808
|
+
}
|
809
|
+
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
810
|
+
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
811
|
+
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
812
|
+
model->supvec[0]=0; /* element 0 reserved and empty for now */
|
813
|
+
model->alpha[0]=0;
|
814
|
+
model->sv_num=1;
|
815
|
+
for(i=0;i<totdoc;i++) {
|
816
|
+
if(alpha[i]) {
|
817
|
+
model->supvec[model->sv_num]=docs[i];
|
818
|
+
model->alpha[model->sv_num]=alpha[i];
|
819
|
+
model->index[i]=model->sv_num;
|
820
|
+
model->sv_num++;
|
821
|
+
}
|
822
|
+
else {
|
823
|
+
model->index[i]=-1;
|
824
|
+
}
|
825
|
+
}
|
826
|
+
model->at_upper_bound=0;
|
827
|
+
model->b=0;
|
828
|
+
model->lin_weights=NULL;
|
829
|
+
model->totwords=totwords;
|
830
|
+
model->totdoc=totdoc;
|
831
|
+
model->kernel_parm=(*kernel_parm);
|
832
|
+
model->loo_error=-1;
|
833
|
+
model->loo_recall=-1;
|
834
|
+
model->loo_precision=-1;
|
835
|
+
model->xa_error=-1;
|
836
|
+
model->xa_recall=-1;
|
837
|
+
model->xa_precision=-1;
|
838
|
+
|
839
|
+
free(alpha);
|
840
|
+
free(greater);
|
841
|
+
free(lesser);
|
842
|
+
free(target);
|
843
|
+
|
844
|
+
/* If you would like to output the original model on pairs of
|
845
|
+
document, replace the following lines with '(*model)=(*pairmodel);' */
|
846
|
+
for(i=0;i<totpair;i++)
|
847
|
+
free_example(docdiff[i],1);
|
848
|
+
free(docdiff);
|
849
|
+
free_model(pairmodel,0);
|
850
|
+
}
|
851
|
+
|
852
|
+
|
853
|
+
/* The following solves a freely defined and given set of
|
854
|
+
inequalities. The optimization problem is of the following form:
|
855
|
+
|
856
|
+
min 0.5 w*w + C sum_i C_i \xi_i
|
857
|
+
s.t. x_i * w > rhs_i - \xi_i
|
858
|
+
|
859
|
+
This corresponds to the -z o option. */
|
860
|
+
|
861
|
+
void svm_learn_optimization(DOC **docs, double *rhs, long int
|
862
|
+
totdoc, long int totwords,
|
863
|
+
LEARN_PARM *learn_parm,
|
864
|
+
KERNEL_PARM *kernel_parm,
|
865
|
+
KERNEL_CACHE *kernel_cache, MODEL *model,
|
866
|
+
double *alpha)
|
867
|
+
/* docs: Left-hand side of inequalities (x-part) */
|
868
|
+
/* rhs: Right-hand side of inequalities */
|
869
|
+
/* totdoc: Number of examples in docs/label */
|
870
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
871
|
+
/* learn_parm: Learning paramenters */
|
872
|
+
/* kernel_parm: Kernel paramenters */
|
873
|
+
/* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
|
874
|
+
NULL if linear.*/
|
875
|
+
/* model: Returns solution as SV expansion (assumed empty before called) */
|
876
|
+
/* alpha: Start values for the alpha variables or NULL
|
877
|
+
pointer. The new alpha values are returned after
|
878
|
+
optimization if not NULL. Array must be of size totdoc. */
|
879
|
+
{
|
880
|
+
long i,*label;
|
881
|
+
long misclassified,upsupvecnum;
|
882
|
+
double loss,model_length,example_length;
|
883
|
+
double maxdiff,*lin,*a,*c;
|
884
|
+
long runtime_start,runtime_end;
|
885
|
+
long iterations,maxslackid,svsetnum;
|
886
|
+
long *unlabeled,*inconsistent;
|
887
|
+
double r_delta_sq=0,r_delta,r_delta_avg;
|
888
|
+
long *index,*index2dnum;
|
889
|
+
double *weights,*slack,*alphaslack;
|
890
|
+
CFLOAT *aicache; /* buffer to keep one row of hessian */
|
891
|
+
|
892
|
+
TIMING timing_profile;
|
893
|
+
SHRINK_STATE shrink_state;
|
894
|
+
|
895
|
+
runtime_start=get_runtime();
|
896
|
+
timing_profile.time_kernel=0;
|
897
|
+
timing_profile.time_opti=0;
|
898
|
+
timing_profile.time_shrink=0;
|
899
|
+
timing_profile.time_update=0;
|
900
|
+
timing_profile.time_model=0;
|
901
|
+
timing_profile.time_check=0;
|
902
|
+
timing_profile.time_select=0;
|
903
|
+
kernel_cache_statistic=0;
|
904
|
+
|
905
|
+
learn_parm->totwords=totwords;
|
906
|
+
|
907
|
+
/* make sure -n value is reasonable */
|
908
|
+
if((learn_parm->svm_newvarsinqp < 2)
|
909
|
+
|| (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
910
|
+
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
911
|
+
}
|
912
|
+
|
913
|
+
init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
914
|
+
|
915
|
+
label = (long *)my_malloc(sizeof(long)*totdoc);
|
916
|
+
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
917
|
+
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
918
|
+
c = (double *)my_malloc(sizeof(double)*totdoc);
|
919
|
+
a = (double *)my_malloc(sizeof(double)*totdoc);
|
920
|
+
lin = (double *)my_malloc(sizeof(double)*totdoc);
|
921
|
+
learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
922
|
+
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
923
|
+
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
924
|
+
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
925
|
+
|
926
|
+
model->at_upper_bound=0;
|
927
|
+
model->b=0;
|
928
|
+
model->supvec[0]=0; /* element 0 reserved and empty for now */
|
929
|
+
model->alpha[0]=0;
|
930
|
+
model->lin_weights=NULL;
|
931
|
+
model->totwords=totwords;
|
932
|
+
model->totdoc=totdoc;
|
933
|
+
model->kernel_parm=(*kernel_parm);
|
934
|
+
model->sv_num=1;
|
935
|
+
model->loo_error=-1;
|
936
|
+
model->loo_recall=-1;
|
937
|
+
model->loo_precision=-1;
|
938
|
+
model->xa_error=-1;
|
939
|
+
model->xa_recall=-1;
|
940
|
+
model->xa_precision=-1;
|
941
|
+
|
942
|
+
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
943
|
+
r_delta_sq=r_delta*r_delta;
|
944
|
+
|
945
|
+
r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
946
|
+
if(learn_parm->svm_c == 0.0) { /* default value for C */
|
947
|
+
learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
948
|
+
if(verbosity>=1)
|
949
|
+
printf("Setting default regularization parameter C=%.4f\n",
|
950
|
+
learn_parm->svm_c);
|
951
|
+
}
|
952
|
+
|
953
|
+
learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
|
954
|
+
|
955
|
+
learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
|
956
|
+
in the right-hand side in the training
|
957
|
+
set. */
|
958
|
+
|
959
|
+
for(i=0;i<totdoc;i++) { /* various inits */
|
960
|
+
docs[i]->docnum=i;
|
961
|
+
a[i]=0;
|
962
|
+
lin[i]=0;
|
963
|
+
c[i]=rhs[i]; /* set right-hand side */
|
964
|
+
unlabeled[i]=0;
|
965
|
+
inconsistent[i]=0;
|
966
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
967
|
+
docs[i]->costfactor;
|
968
|
+
label[i]=1;
|
969
|
+
}
|
970
|
+
if(learn_parm->sharedslack) /* if shared slacks are used, they must */
|
971
|
+
for(i=0;i<totdoc;i++) /* be used on every constraint */
|
972
|
+
if(!docs[i]->slackid) {
|
973
|
+
perror("Error: Missing shared slacks definitions in some of the examples.");
|
974
|
+
exit(0);
|
975
|
+
}
|
976
|
+
|
977
|
+
/* compute starting state for initial alpha values */
|
978
|
+
if(alpha) {
|
979
|
+
if(verbosity>=1) {
|
980
|
+
printf("Computing starting state..."); fflush(stdout);
|
981
|
+
}
|
982
|
+
index = (long *)my_malloc(sizeof(long)*totdoc);
|
983
|
+
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
984
|
+
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
985
|
+
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
986
|
+
for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
|
987
|
+
index[i]=1;
|
988
|
+
alpha[i]=fabs(alpha[i]);
|
989
|
+
if(alpha[i]<0) alpha[i]=0;
|
990
|
+
if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
|
991
|
+
}
|
992
|
+
if(kernel_parm->kernel_type != LINEAR) {
|
993
|
+
for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
|
994
|
+
if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
|
995
|
+
&& (kernel_cache_space_available(kernel_cache)))
|
996
|
+
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
997
|
+
for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
|
998
|
+
if((alpha[i]==learn_parm->svm_cost[i])
|
999
|
+
&& (kernel_cache_space_available(kernel_cache)))
|
1000
|
+
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
1001
|
+
}
|
1002
|
+
(void)compute_index(index,totdoc,index2dnum);
|
1003
|
+
update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
|
1004
|
+
totwords,kernel_parm,kernel_cache,lin,aicache,
|
1005
|
+
weights);
|
1006
|
+
(void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
|
1007
|
+
learn_parm,index2dnum,index2dnum,model);
|
1008
|
+
for(i=0;i<totdoc;i++) { /* copy initial alphas */
|
1009
|
+
a[i]=alpha[i];
|
1010
|
+
}
|
1011
|
+
free(index);
|
1012
|
+
free(index2dnum);
|
1013
|
+
free(weights);
|
1014
|
+
free(aicache);
|
1015
|
+
if(verbosity>=1) {
|
1016
|
+
printf("done.\n"); fflush(stdout);
|
1017
|
+
}
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
/* removing inconsistent does not work for general optimization problem */
|
1021
|
+
if(learn_parm->remove_inconsistent) {
|
1022
|
+
learn_parm->remove_inconsistent = 0;
|
1023
|
+
printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
|
1024
|
+
}
|
1025
|
+
|
1026
|
+
/* caching makes no sense for linear kernel */
|
1027
|
+
if(kernel_parm->kernel_type == LINEAR) {
|
1028
|
+
kernel_cache = NULL;
|
1029
|
+
}
|
1030
|
+
|
1031
|
+
if(verbosity==1) {
|
1032
|
+
printf("Optimizing"); fflush(stdout);
|
1033
|
+
}
|
1034
|
+
|
1035
|
+
/* train the svm */
|
1036
|
+
if(learn_parm->sharedslack)
|
1037
|
+
iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
|
1038
|
+
totwords,learn_parm,kernel_parm,
|
1039
|
+
kernel_cache,&shrink_state,model,
|
1040
|
+
a,lin,c,&timing_profile,
|
1041
|
+
&maxdiff);
|
1042
|
+
else
|
1043
|
+
iterations=optimize_to_convergence(docs,label,totdoc,
|
1044
|
+
totwords,learn_parm,kernel_parm,
|
1045
|
+
kernel_cache,&shrink_state,model,
|
1046
|
+
inconsistent,unlabeled,
|
1047
|
+
a,lin,c,&timing_profile,
|
1048
|
+
&maxdiff,(long)-1,(long)1);
|
1049
|
+
|
1050
|
+
if(verbosity>=1) {
|
1051
|
+
if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
1052
|
+
|
1053
|
+
misclassified=0;
|
1054
|
+
for(i=0;(i<totdoc);i++) { /* get final statistic */
|
1055
|
+
if((lin[i]-model->b)*(double)label[i] <= 0.0)
|
1056
|
+
misclassified++;
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
|
1060
|
+
|
1061
|
+
runtime_end=get_runtime();
|
1062
|
+
if(verbosity>=2) {
|
1063
|
+
printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
1064
|
+
((float)runtime_end-(float)runtime_start)/100.0,
|
1065
|
+
(100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
1066
|
+
(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
1067
|
+
(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
1068
|
+
(100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
1069
|
+
(100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
1070
|
+
(100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
1071
|
+
(100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
1072
|
+
}
|
1073
|
+
else {
|
1074
|
+
printf("Runtime in cpu-seconds: %.2f\n",
|
1075
|
+
(runtime_end-runtime_start)/100.0);
|
1076
|
+
}
|
1077
|
+
}
|
1078
|
+
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
1079
|
+
loss=0;
|
1080
|
+
model_length=0;
|
1081
|
+
for(i=0;i<totdoc;i++) {
|
1082
|
+
if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
|
1083
|
+
loss+=c[i]-(lin[i]-model->b)*(double)label[i];
|
1084
|
+
model_length+=a[i]*label[i]*lin[i];
|
1085
|
+
}
|
1086
|
+
model_length=sqrt(model_length);
|
1087
|
+
fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
if(learn_parm->sharedslack) {
|
1091
|
+
index = (long *)my_malloc(sizeof(long)*totdoc);
|
1092
|
+
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1093
|
+
maxslackid=0;
|
1094
|
+
for(i=0;i<totdoc;i++) { /* create full index */
|
1095
|
+
index[i]=1;
|
1096
|
+
if(maxslackid<docs[i]->slackid)
|
1097
|
+
maxslackid=docs[i]->slackid;
|
1098
|
+
}
|
1099
|
+
(void)compute_index(index,totdoc,index2dnum);
|
1100
|
+
slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
1101
|
+
alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
1102
|
+
for(i=0;i<=maxslackid;i++) { /* init shared slacks */
|
1103
|
+
slack[i]=0;
|
1104
|
+
alphaslack[i]=0;
|
1105
|
+
}
|
1106
|
+
compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
|
1107
|
+
slack,alphaslack);
|
1108
|
+
loss=0;
|
1109
|
+
model->at_upper_bound=0;
|
1110
|
+
svsetnum=0;
|
1111
|
+
for(i=0;i<=maxslackid;i++) { /* create full index */
|
1112
|
+
loss+=slack[i];
|
1113
|
+
if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
|
1114
|
+
model->at_upper_bound++;
|
1115
|
+
if(alphaslack[i] > learn_parm->epsilon_a)
|
1116
|
+
svsetnum++;
|
1117
|
+
}
|
1118
|
+
free(index);
|
1119
|
+
free(index2dnum);
|
1120
|
+
free(slack);
|
1121
|
+
free(alphaslack);
|
1122
|
+
}
|
1123
|
+
|
1124
|
+
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
1125
|
+
if(learn_parm->sharedslack) {
|
1126
|
+
printf("Number of SV: %ld\n",
|
1127
|
+
model->sv_num-1);
|
1128
|
+
printf("Number of non-zero slack variables: %ld (out of %ld)\n",
|
1129
|
+
model->at_upper_bound,svsetnum);
|
1130
|
+
fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
1131
|
+
}
|
1132
|
+
else {
|
1133
|
+
upsupvecnum=0;
|
1134
|
+
for(i=1;i<model->sv_num;i++) {
|
1135
|
+
if(fabs(model->alpha[i]) >=
|
1136
|
+
(learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
1137
|
+
learn_parm->epsilon_a))
|
1138
|
+
upsupvecnum++;
|
1139
|
+
}
|
1140
|
+
printf("Number of SV: %ld (including %ld at upper bound)\n",
|
1141
|
+
model->sv_num-1,upsupvecnum);
|
1142
|
+
fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
1143
|
+
}
|
1144
|
+
example_length=estimate_sphere(model,kernel_parm);
|
1145
|
+
fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
1146
|
+
length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
1147
|
+
}
|
1148
|
+
if(verbosity>=1) {
|
1149
|
+
printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
1150
|
+
}
|
1151
|
+
|
1152
|
+
if(alpha) {
|
1153
|
+
for(i=0;i<totdoc;i++) { /* copy final alphas */
|
1154
|
+
alpha[i]=a[i];
|
1155
|
+
}
|
1156
|
+
}
|
1157
|
+
|
1158
|
+
if(learn_parm->alphafile[0])
|
1159
|
+
write_alphas(learn_parm->alphafile,a,label,totdoc);
|
1160
|
+
|
1161
|
+
shrink_state_cleanup(&shrink_state);
|
1162
|
+
free(label);
|
1163
|
+
free(unlabeled);
|
1164
|
+
free(inconsistent);
|
1165
|
+
free(c);
|
1166
|
+
free(a);
|
1167
|
+
free(lin);
|
1168
|
+
free(learn_parm->svm_cost);
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
|
1172
|
+
long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
|
1173
|
+
long int totwords, LEARN_PARM *learn_parm,
|
1174
|
+
KERNEL_PARM *kernel_parm,
|
1175
|
+
KERNEL_CACHE *kernel_cache,
|
1176
|
+
SHRINK_STATE *shrink_state, MODEL *model,
|
1177
|
+
long int *inconsistent, long int *unlabeled,
|
1178
|
+
double *a, double *lin, double *c,
|
1179
|
+
TIMING *timing_profile, double *maxdiff,
|
1180
|
+
long int heldout, long int retrain)
|
1181
|
+
/* docs: Training vectors (x-part) */
|
1182
|
+
/* label: Training labels/value (y-part, zero if test example for
|
1183
|
+
transduction) */
|
1184
|
+
/* totdoc: Number of examples in docs/label */
|
1185
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
1186
|
+
/* laern_parm: Learning paramenters */
|
1187
|
+
/* kernel_parm: Kernel paramenters */
|
1188
|
+
/* kernel_cache: Initialized/partly filled Cache, if using a kernel.
|
1189
|
+
NULL if linear. */
|
1190
|
+
/* shrink_state: State of active variables */
|
1191
|
+
/* model: Returns learning result */
|
1192
|
+
/* inconsistent: examples thrown out as inconstistent */
|
1193
|
+
/* unlabeled: test examples for transduction */
|
1194
|
+
/* a: alphas */
|
1195
|
+
/* lin: linear component of gradient */
|
1196
|
+
/* c: right hand side of inequalities (margin) */
|
1197
|
+
/* maxdiff: returns maximum violation of KT-conditions */
|
1198
|
+
/* heldout: marks held-out example for leave-one-out (or -1) */
|
1199
|
+
/* retrain: selects training mode (1=regular / 2=holdout) */
|
1200
|
+
{
|
1201
|
+
long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
|
1202
|
+
long inconsistentnum,choosenum,already_chosen=0,iteration;
|
1203
|
+
long misclassified,supvecnum=0,*active2dnum,inactivenum;
|
1204
|
+
long *working2dnum,*selexam;
|
1205
|
+
long activenum;
|
1206
|
+
double criterion,eq;
|
1207
|
+
double *a_old;
|
1208
|
+
long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
|
1209
|
+
long transductcycle;
|
1210
|
+
long transduction;
|
1211
|
+
double epsilon_crit_org;
|
1212
|
+
double bestmaxdiff;
|
1213
|
+
long bestmaxdiffiter,terminate;
|
1214
|
+
|
1215
|
+
double *selcrit; /* buffer for sorting */
|
1216
|
+
CFLOAT *aicache; /* buffer to keep one row of hessian */
|
1217
|
+
double *weights; /* buffer for weight vector in linear case */
|
1218
|
+
QP qp; /* buffer for one quadratic program */
|
1219
|
+
|
1220
|
+
epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
|
1221
|
+
if(kernel_parm->kernel_type == LINEAR) {
|
1222
|
+
learn_parm->epsilon_crit=2.0;
|
1223
|
+
kernel_cache=NULL; /* caching makes no sense for linear kernel */
|
1224
|
+
}
|
1225
|
+
learn_parm->epsilon_shrink=2;
|
1226
|
+
(*maxdiff)=1;
|
1227
|
+
|
1228
|
+
learn_parm->totwords=totwords;
|
1229
|
+
|
1230
|
+
chosen = (long *)my_malloc(sizeof(long)*totdoc);
|
1231
|
+
last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
|
1232
|
+
key = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1233
|
+
selcrit = (double *)my_malloc(sizeof(double)*totdoc);
|
1234
|
+
selexam = (long *)my_malloc(sizeof(long)*totdoc);
|
1235
|
+
a_old = (double *)my_malloc(sizeof(double)*totdoc);
|
1236
|
+
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
1237
|
+
working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1238
|
+
active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1239
|
+
qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1240
|
+
qp.opt_ce0 = (double *)my_malloc(sizeof(double));
|
1241
|
+
qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
|
1242
|
+
*learn_parm->svm_maxqpsize);
|
1243
|
+
qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1244
|
+
qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1245
|
+
qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1246
|
+
qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1247
|
+
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
1248
|
+
|
1249
|
+
choosenum=0;
|
1250
|
+
inconsistentnum=0;
|
1251
|
+
transductcycle=0;
|
1252
|
+
transduction=0;
|
1253
|
+
if(!retrain) retrain=1;
|
1254
|
+
iteration=1;
|
1255
|
+
bestmaxdiffiter=1;
|
1256
|
+
bestmaxdiff=999999999;
|
1257
|
+
terminate=0;
|
1258
|
+
|
1259
|
+
if(kernel_cache) {
|
1260
|
+
kernel_cache->time=iteration; /* for lru cache */
|
1261
|
+
kernel_cache_reset_lru(kernel_cache);
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
for(i=0;i<totdoc;i++) { /* various inits */
|
1265
|
+
chosen[i]=0;
|
1266
|
+
a_old[i]=a[i];
|
1267
|
+
last_suboptimal_at[i]=1;
|
1268
|
+
if(inconsistent[i])
|
1269
|
+
inconsistentnum++;
|
1270
|
+
if(unlabeled[i]) {
|
1271
|
+
transduction=1;
|
1272
|
+
}
|
1273
|
+
}
|
1274
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
1275
|
+
inactivenum=totdoc-activenum;
|
1276
|
+
clear_index(working2dnum);
|
1277
|
+
|
1278
|
+
/* repeat this loop until we have convergence */
|
1279
|
+
for(;retrain && (!terminate);iteration++) {
|
1280
|
+
|
1281
|
+
if(kernel_cache)
|
1282
|
+
kernel_cache->time=iteration; /* for lru cache */
|
1283
|
+
if(verbosity>=2) {
|
1284
|
+
printf(
|
1285
|
+
"Iteration %ld: ",iteration); fflush(stdout);
|
1286
|
+
}
|
1287
|
+
else if(verbosity==1) {
|
1288
|
+
printf("."); fflush(stdout);
|
1289
|
+
}
|
1290
|
+
|
1291
|
+
if(verbosity>=2) t0=get_runtime();
|
1292
|
+
if(verbosity>=3) {
|
1293
|
+
printf("\nSelecting working set... "); fflush(stdout);
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
|
1297
|
+
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
1298
|
+
|
1299
|
+
i=0;
|
1300
|
+
for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
|
1301
|
+
if((chosen[j]>=(learn_parm->svm_maxqpsize/
|
1302
|
+
minl(learn_parm->svm_maxqpsize,
|
1303
|
+
learn_parm->svm_newvarsinqp)))
|
1304
|
+
|| (inconsistent[j])
|
1305
|
+
|| (j == heldout)) {
|
1306
|
+
chosen[j]=0;
|
1307
|
+
choosenum--;
|
1308
|
+
}
|
1309
|
+
else {
|
1310
|
+
chosen[j]++;
|
1311
|
+
working2dnum[i++]=j;
|
1312
|
+
}
|
1313
|
+
}
|
1314
|
+
working2dnum[i]=-1;
|
1315
|
+
|
1316
|
+
if(retrain == 2) {
|
1317
|
+
choosenum=0;
|
1318
|
+
for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
|
1319
|
+
chosen[j]=0;
|
1320
|
+
}
|
1321
|
+
clear_index(working2dnum);
|
1322
|
+
for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
|
1323
|
+
if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
|
1324
|
+
chosen[i]=99999;
|
1325
|
+
choosenum++;
|
1326
|
+
a[i]=0;
|
1327
|
+
}
|
1328
|
+
}
|
1329
|
+
if(learn_parm->biased_hyperplane) {
|
1330
|
+
eq=0;
|
1331
|
+
for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
|
1332
|
+
eq+=a[i]*label[i];
|
1333
|
+
}
|
1334
|
+
for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
|
1335
|
+
if((eq*label[i] > 0) && (a[i] > 0)) {
|
1336
|
+
chosen[i]=88888;
|
1337
|
+
choosenum++;
|
1338
|
+
if((eq*label[i]) > a[i]) {
|
1339
|
+
eq-=(a[i]*label[i]);
|
1340
|
+
a[i]=0;
|
1341
|
+
}
|
1342
|
+
else {
|
1343
|
+
a[i]-=(eq*label[i]);
|
1344
|
+
eq=0;
|
1345
|
+
}
|
1346
|
+
}
|
1347
|
+
}
|
1348
|
+
}
|
1349
|
+
compute_index(chosen,totdoc,working2dnum);
|
1350
|
+
}
|
1351
|
+
else { /* select working set according to steepest gradient */
|
1352
|
+
if(iteration % 101) {
|
1353
|
+
already_chosen=0;
|
1354
|
+
if((minl(learn_parm->svm_newvarsinqp,
|
1355
|
+
learn_parm->svm_maxqpsize-choosenum)>=4)
|
1356
|
+
&& (kernel_parm->kernel_type != LINEAR)) {
|
1357
|
+
/* select part of the working set from cache */
|
1358
|
+
already_chosen=select_next_qp_subproblem_grad(
|
1359
|
+
label,unlabeled,a,lin,c,totdoc,
|
1360
|
+
(long)(minl(learn_parm->svm_maxqpsize-choosenum,
|
1361
|
+
learn_parm->svm_newvarsinqp)
|
1362
|
+
/2),
|
1363
|
+
learn_parm,inconsistent,active2dnum,
|
1364
|
+
working2dnum,selcrit,selexam,kernel_cache,1,
|
1365
|
+
key,chosen);
|
1366
|
+
choosenum+=already_chosen;
|
1367
|
+
}
|
1368
|
+
choosenum+=select_next_qp_subproblem_grad(
|
1369
|
+
label,unlabeled,a,lin,c,totdoc,
|
1370
|
+
minl(learn_parm->svm_maxqpsize-choosenum,
|
1371
|
+
learn_parm->svm_newvarsinqp-already_chosen),
|
1372
|
+
learn_parm,inconsistent,active2dnum,
|
1373
|
+
working2dnum,selcrit,selexam,kernel_cache,0,key,
|
1374
|
+
chosen);
|
1375
|
+
}
|
1376
|
+
else { /* once in a while, select a somewhat random working set
|
1377
|
+
to get unlocked of infinite loops due to numerical
|
1378
|
+
inaccuracies in the core qp-solver */
|
1379
|
+
choosenum+=select_next_qp_subproblem_rand(
|
1380
|
+
label,unlabeled,a,lin,c,totdoc,
|
1381
|
+
minl(learn_parm->svm_maxqpsize-choosenum,
|
1382
|
+
learn_parm->svm_newvarsinqp),
|
1383
|
+
learn_parm,inconsistent,active2dnum,
|
1384
|
+
working2dnum,selcrit,selexam,kernel_cache,key,
|
1385
|
+
chosen,iteration);
|
1386
|
+
}
|
1387
|
+
}
|
1388
|
+
|
1389
|
+
if(verbosity>=2) {
|
1390
|
+
printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
|
1391
|
+
}
|
1392
|
+
|
1393
|
+
if(verbosity>=2) t1=get_runtime();
|
1394
|
+
|
1395
|
+
if(kernel_cache)
|
1396
|
+
cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
|
1397
|
+
choosenum,kernel_parm);
|
1398
|
+
|
1399
|
+
if(verbosity>=2) t2=get_runtime();
|
1400
|
+
if(retrain != 2) {
|
1401
|
+
optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
|
1402
|
+
model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
|
1403
|
+
aicache,kernel_parm,&qp,&epsilon_crit_org);
|
1404
|
+
}
|
1405
|
+
|
1406
|
+
if(verbosity>=2) t3=get_runtime();
|
1407
|
+
update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
|
1408
|
+
totwords,kernel_parm,kernel_cache,lin,aicache,
|
1409
|
+
weights);
|
1410
|
+
|
1411
|
+
if(verbosity>=2) t4=get_runtime();
|
1412
|
+
supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
|
1413
|
+
learn_parm,working2dnum,active2dnum,model);
|
1414
|
+
|
1415
|
+
if(verbosity>=2) t5=get_runtime();
|
1416
|
+
|
1417
|
+
/* The following computation of the objective function works only */
|
1418
|
+
/* relative to the active variables */
|
1419
|
+
if(verbosity>=3) {
|
1420
|
+
criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
|
1421
|
+
active2dnum);
|
1422
|
+
printf("Objective function (over active variables): %.16f\n",criterion);
|
1423
|
+
fflush(stdout);
|
1424
|
+
}
|
1425
|
+
|
1426
|
+
for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
1427
|
+
a_old[i]=a[i];
|
1428
|
+
}
|
1429
|
+
|
1430
|
+
if(retrain == 2) { /* reset inconsistent unlabeled examples */
|
1431
|
+
for(i=0;(i<totdoc);i++) {
|
1432
|
+
if(inconsistent[i] && unlabeled[i]) {
|
1433
|
+
inconsistent[i]=0;
|
1434
|
+
label[i]=0;
|
1435
|
+
}
|
1436
|
+
}
|
1437
|
+
}
|
1438
|
+
|
1439
|
+
retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
|
1440
|
+
maxdiff,epsilon_crit_org,&misclassified,
|
1441
|
+
inconsistent,active2dnum,last_suboptimal_at,
|
1442
|
+
iteration,kernel_parm);
|
1443
|
+
|
1444
|
+
if(verbosity>=2) {
|
1445
|
+
t6=get_runtime();
|
1446
|
+
timing_profile->time_select+=t1-t0;
|
1447
|
+
timing_profile->time_kernel+=t2-t1;
|
1448
|
+
timing_profile->time_opti+=t3-t2;
|
1449
|
+
timing_profile->time_update+=t4-t3;
|
1450
|
+
timing_profile->time_model+=t5-t4;
|
1451
|
+
timing_profile->time_check+=t6-t5;
|
1452
|
+
}
|
1453
|
+
|
1454
|
+
/* checking whether optimizer got stuck */
|
1455
|
+
if((*maxdiff) < bestmaxdiff) {
|
1456
|
+
bestmaxdiff=(*maxdiff);
|
1457
|
+
bestmaxdiffiter=iteration;
|
1458
|
+
}
|
1459
|
+
if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
|
1460
|
+
/* long time no progress? */
|
1461
|
+
terminate=1;
|
1462
|
+
retrain=0;
|
1463
|
+
if(verbosity>=1)
|
1464
|
+
printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
|
1465
|
+
}
|
1466
|
+
|
1467
|
+
noshrink=0;
|
1468
|
+
if((!retrain) && (inactivenum>0)
|
1469
|
+
&& ((!learn_parm->skip_final_opt_check)
|
1470
|
+
|| (kernel_parm->kernel_type == LINEAR))) {
|
1471
|
+
if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
1472
|
+
|| (verbosity>=2)) {
|
1473
|
+
if(verbosity==1) {
|
1474
|
+
printf("\n");
|
1475
|
+
}
|
1476
|
+
printf(" Checking optimality of inactive variables...");
|
1477
|
+
fflush(stdout);
|
1478
|
+
}
|
1479
|
+
t1=get_runtime();
|
1480
|
+
reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
|
1481
|
+
totwords,iteration,learn_parm,inconsistent,
|
1482
|
+
docs,kernel_parm,kernel_cache,model,aicache,
|
1483
|
+
weights,maxdiff);
|
1484
|
+
/* Update to new active variables. */
|
1485
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
1486
|
+
inactivenum=totdoc-activenum;
|
1487
|
+
/* reset watchdog */
|
1488
|
+
bestmaxdiff=(*maxdiff);
|
1489
|
+
bestmaxdiffiter=iteration;
|
1490
|
+
/* termination criterion */
|
1491
|
+
noshrink=1;
|
1492
|
+
retrain=0;
|
1493
|
+
if((*maxdiff) > learn_parm->epsilon_crit)
|
1494
|
+
retrain=1;
|
1495
|
+
timing_profile->time_shrink+=get_runtime()-t1;
|
1496
|
+
if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
1497
|
+
|| (verbosity>=2)) {
|
1498
|
+
printf("done.\n"); fflush(stdout);
|
1499
|
+
printf(" Number of inactive variables = %ld\n",inactivenum);
|
1500
|
+
}
|
1501
|
+
}
|
1502
|
+
|
1503
|
+
if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
|
1504
|
+
learn_parm->epsilon_crit=(*maxdiff);
|
1505
|
+
if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
|
1506
|
+
learn_parm->epsilon_crit/=2.0;
|
1507
|
+
retrain=1;
|
1508
|
+
noshrink=1;
|
1509
|
+
}
|
1510
|
+
if(learn_parm->epsilon_crit<epsilon_crit_org)
|
1511
|
+
learn_parm->epsilon_crit=epsilon_crit_org;
|
1512
|
+
|
1513
|
+
if(verbosity>=2) {
|
1514
|
+
printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
|
1515
|
+
supvecnum,model->at_upper_bound,(*maxdiff));
|
1516
|
+
fflush(stdout);
|
1517
|
+
}
|
1518
|
+
if(verbosity>=3) {
|
1519
|
+
printf("\n");
|
1520
|
+
}
|
1521
|
+
|
1522
|
+
if((!retrain) && (transduction)) {
|
1523
|
+
for(i=0;(i<totdoc);i++) {
|
1524
|
+
shrink_state->active[i]=1;
|
1525
|
+
}
|
1526
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
1527
|
+
inactivenum=0;
|
1528
|
+
if(verbosity==1) printf("done\n");
|
1529
|
+
retrain=incorporate_unlabeled_examples(model,label,inconsistent,
|
1530
|
+
unlabeled,a,lin,totdoc,
|
1531
|
+
selcrit,selexam,key,
|
1532
|
+
transductcycle,kernel_parm,
|
1533
|
+
learn_parm);
|
1534
|
+
epsilon_crit_org=learn_parm->epsilon_crit;
|
1535
|
+
if(kernel_parm->kernel_type == LINEAR)
|
1536
|
+
learn_parm->epsilon_crit=1;
|
1537
|
+
transductcycle++;
|
1538
|
+
/* reset watchdog */
|
1539
|
+
bestmaxdiff=(*maxdiff);
|
1540
|
+
bestmaxdiffiter=iteration;
|
1541
|
+
}
|
1542
|
+
else if(((iteration % 10) == 0) && (!noshrink)) {
|
1543
|
+
activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
|
1544
|
+
active2dnum,last_suboptimal_at,iteration,totdoc,
|
1545
|
+
maxl((long)(activenum/10),
|
1546
|
+
maxl((long)(totdoc/500),100)),
|
1547
|
+
a,inconsistent);
|
1548
|
+
inactivenum=totdoc-activenum;
|
1549
|
+
if((kernel_cache)
|
1550
|
+
&& (supvecnum>kernel_cache->max_elems)
|
1551
|
+
&& ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
|
1552
|
+
kernel_cache_shrink(kernel_cache,totdoc,
|
1553
|
+
minl((kernel_cache->activenum-activenum),
|
1554
|
+
(kernel_cache->activenum-supvecnum)),
|
1555
|
+
shrink_state->active);
|
1556
|
+
}
|
1557
|
+
}
|
1558
|
+
|
1559
|
+
if((!retrain) && learn_parm->remove_inconsistent) {
|
1560
|
+
if(verbosity>=1) {
|
1561
|
+
printf(" Moving training errors to inconsistent examples...");
|
1562
|
+
fflush(stdout);
|
1563
|
+
}
|
1564
|
+
if(learn_parm->remove_inconsistent == 1) {
|
1565
|
+
retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
|
1566
|
+
&inconsistentnum,inconsistent);
|
1567
|
+
}
|
1568
|
+
else if(learn_parm->remove_inconsistent == 2) {
|
1569
|
+
retrain=identify_misclassified(lin,label,unlabeled,totdoc,
|
1570
|
+
model,&inconsistentnum,inconsistent);
|
1571
|
+
}
|
1572
|
+
else if(learn_parm->remove_inconsistent == 3) {
|
1573
|
+
retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
|
1574
|
+
model,&inconsistentnum,inconsistent);
|
1575
|
+
}
|
1576
|
+
if(retrain) {
|
1577
|
+
if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
|
1578
|
+
learn_parm->epsilon_crit=2.0;
|
1579
|
+
}
|
1580
|
+
}
|
1581
|
+
if(verbosity>=1) {
|
1582
|
+
printf("done.\n");
|
1583
|
+
if(retrain) {
|
1584
|
+
printf(" Now %ld inconsistent examples.\n",inconsistentnum);
|
1585
|
+
}
|
1586
|
+
}
|
1587
|
+
}
|
1588
|
+
} /* end of loop */
|
1589
|
+
|
1590
|
+
free(chosen);
|
1591
|
+
free(last_suboptimal_at);
|
1592
|
+
free(key);
|
1593
|
+
free(selcrit);
|
1594
|
+
free(selexam);
|
1595
|
+
free(a_old);
|
1596
|
+
free(aicache);
|
1597
|
+
free(working2dnum);
|
1598
|
+
free(active2dnum);
|
1599
|
+
free(qp.opt_ce);
|
1600
|
+
free(qp.opt_ce0);
|
1601
|
+
free(qp.opt_g);
|
1602
|
+
free(qp.opt_g0);
|
1603
|
+
free(qp.opt_xinit);
|
1604
|
+
free(qp.opt_low);
|
1605
|
+
free(qp.opt_up);
|
1606
|
+
free(weights);
|
1607
|
+
|
1608
|
+
learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
|
1609
|
+
model->maxdiff=(*maxdiff);
|
1610
|
+
|
1611
|
+
return(iteration);
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
|
1615
|
+
long int totdoc,
|
1616
|
+
long int totwords, LEARN_PARM *learn_parm,
|
1617
|
+
KERNEL_PARM *kernel_parm,
|
1618
|
+
KERNEL_CACHE *kernel_cache,
|
1619
|
+
SHRINK_STATE *shrink_state, MODEL *model,
|
1620
|
+
double *a, double *lin, double *c,
|
1621
|
+
TIMING *timing_profile, double *maxdiff)
|
1622
|
+
/* docs: Training vectors (x-part) */
|
1623
|
+
/* label: Training labels/value (y-part, zero if test example for
|
1624
|
+
transduction) */
|
1625
|
+
/* totdoc: Number of examples in docs/label */
|
1626
|
+
/* totwords: Number of features (i.e. highest feature index) */
|
1627
|
+
/* learn_parm: Learning paramenters */
|
1628
|
+
/* kernel_parm: Kernel paramenters */
|
1629
|
+
/* kernel_cache: Initialized/partly filled Cache, if using a kernel.
|
1630
|
+
NULL if linear. */
|
1631
|
+
/* shrink_state: State of active variables */
|
1632
|
+
/* model: Returns learning result */
|
1633
|
+
/* a: alphas */
|
1634
|
+
/* lin: linear component of gradient */
|
1635
|
+
/* c: right hand side of inequalities (margin) */
|
1636
|
+
/* maxdiff: returns maximum violation of KT-conditions */
|
1637
|
+
{
|
1638
|
+
long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
|
1639
|
+
long *inconsistent,choosenum,already_chosen=0,iteration;
|
1640
|
+
long misclassified,supvecnum=0,*active2dnum,inactivenum;
|
1641
|
+
long *working2dnum,*selexam,*ignore;
|
1642
|
+
long activenum,retrain,maxslackid,slackset,jointstep;
|
1643
|
+
double criterion,eq_target;
|
1644
|
+
double *a_old,*alphaslack;
|
1645
|
+
long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
|
1646
|
+
double epsilon_crit_org,maxsharedviol;
|
1647
|
+
double bestmaxdiff;
|
1648
|
+
long bestmaxdiffiter,terminate;
|
1649
|
+
|
1650
|
+
double *selcrit; /* buffer for sorting */
|
1651
|
+
CFLOAT *aicache; /* buffer to keep one row of hessian */
|
1652
|
+
double *weights; /* buffer for weight vector in linear case */
|
1653
|
+
QP qp; /* buffer for one quadratic program */
|
1654
|
+
double *slack; /* vector of slack variables for optimization with
|
1655
|
+
shared slacks */
|
1656
|
+
|
1657
|
+
epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
|
1658
|
+
if(kernel_parm->kernel_type == LINEAR) {
|
1659
|
+
learn_parm->epsilon_crit=2.0;
|
1660
|
+
kernel_cache=NULL; /* caching makes no sense for linear kernel */
|
1661
|
+
}
|
1662
|
+
learn_parm->epsilon_shrink=2;
|
1663
|
+
(*maxdiff)=1;
|
1664
|
+
|
1665
|
+
learn_parm->totwords=totwords;
|
1666
|
+
|
1667
|
+
chosen = (long *)my_malloc(sizeof(long)*totdoc);
|
1668
|
+
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
1669
|
+
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
1670
|
+
ignore = (long *)my_malloc(sizeof(long)*totdoc);
|
1671
|
+
key = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1672
|
+
selcrit = (double *)my_malloc(sizeof(double)*totdoc);
|
1673
|
+
selexam = (long *)my_malloc(sizeof(long)*totdoc);
|
1674
|
+
a_old = (double *)my_malloc(sizeof(double)*totdoc);
|
1675
|
+
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
1676
|
+
working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1677
|
+
active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
1678
|
+
qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1679
|
+
qp.opt_ce0 = (double *)my_malloc(sizeof(double));
|
1680
|
+
qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
|
1681
|
+
*learn_parm->svm_maxqpsize);
|
1682
|
+
qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1683
|
+
qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1684
|
+
qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1685
|
+
qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
1686
|
+
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
1687
|
+
maxslackid=0;
|
1688
|
+
for(i=0;i<totdoc;i++) { /* determine size of slack array */
|
1689
|
+
if(maxslackid<docs[i]->slackid)
|
1690
|
+
maxslackid=docs[i]->slackid;
|
1691
|
+
}
|
1692
|
+
slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
1693
|
+
alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
1694
|
+
last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
|
1695
|
+
for(i=0;i<=maxslackid;i++) { /* init shared slacks */
|
1696
|
+
slack[i]=0;
|
1697
|
+
alphaslack[i]=0;
|
1698
|
+
last_suboptimal_at[i]=1;
|
1699
|
+
}
|
1700
|
+
|
1701
|
+
choosenum=0;
|
1702
|
+
retrain=1;
|
1703
|
+
iteration=1;
|
1704
|
+
bestmaxdiffiter=1;
|
1705
|
+
bestmaxdiff=999999999;
|
1706
|
+
terminate=0;
|
1707
|
+
|
1708
|
+
if(kernel_cache) {
|
1709
|
+
kernel_cache->time=iteration; /* for lru cache */
|
1710
|
+
kernel_cache_reset_lru(kernel_cache);
|
1711
|
+
}
|
1712
|
+
|
1713
|
+
for(i=0;i<totdoc;i++) { /* various inits */
|
1714
|
+
chosen[i]=0;
|
1715
|
+
unlabeled[i]=0;
|
1716
|
+
inconsistent[i]=0;
|
1717
|
+
ignore[i]=0;
|
1718
|
+
a_old[i]=a[i];
|
1719
|
+
}
|
1720
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
1721
|
+
inactivenum=totdoc-activenum;
|
1722
|
+
clear_index(working2dnum);
|
1723
|
+
|
1724
|
+
/* call to init slack and alphaslack */
|
1725
|
+
compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
|
1726
|
+
slack,alphaslack);
|
1727
|
+
|
1728
|
+
/* repeat this loop until we have convergence */
|
1729
|
+
for(;retrain && (!terminate);iteration++) {
|
1730
|
+
|
1731
|
+
if(kernel_cache)
|
1732
|
+
kernel_cache->time=iteration; /* for lru cache */
|
1733
|
+
if(verbosity>=2) {
|
1734
|
+
printf(
|
1735
|
+
"Iteration %ld: ",iteration); fflush(stdout);
|
1736
|
+
}
|
1737
|
+
else if(verbosity==1) {
|
1738
|
+
printf("."); fflush(stdout);
|
1739
|
+
}
|
1740
|
+
|
1741
|
+
if(verbosity>=2) t0=get_runtime();
|
1742
|
+
if(verbosity>=3) {
|
1743
|
+
printf("\nSelecting working set... "); fflush(stdout);
|
1744
|
+
}
|
1745
|
+
|
1746
|
+
if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
|
1747
|
+
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
1748
|
+
|
1749
|
+
/* select working set according to steepest gradient */
|
1750
|
+
jointstep=0;
|
1751
|
+
eq_target=0;
|
1752
|
+
if(iteration % 101) {
|
1753
|
+
slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
|
1754
|
+
learn_parm,active2dnum,&maxsharedviol);
|
1755
|
+
if((iteration % 2)
|
1756
|
+
|| (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
|
1757
|
+
/* do a step with examples from different slack sets */
|
1758
|
+
if(verbosity >= 2) {
|
1759
|
+
printf("(i-step)"); fflush(stdout);
|
1760
|
+
}
|
1761
|
+
i=0;
|
1762
|
+
for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
|
1763
|
+
if((chosen[j]>=(learn_parm->svm_maxqpsize/
|
1764
|
+
minl(learn_parm->svm_maxqpsize,
|
1765
|
+
learn_parm->svm_newvarsinqp)))) {
|
1766
|
+
chosen[j]=0;
|
1767
|
+
choosenum--;
|
1768
|
+
}
|
1769
|
+
else {
|
1770
|
+
chosen[j]++;
|
1771
|
+
working2dnum[i++]=j;
|
1772
|
+
}
|
1773
|
+
}
|
1774
|
+
working2dnum[i]=-1;
|
1775
|
+
|
1776
|
+
already_chosen=0;
|
1777
|
+
if((minl(learn_parm->svm_newvarsinqp,
|
1778
|
+
learn_parm->svm_maxqpsize-choosenum)>=4)
|
1779
|
+
&& (kernel_parm->kernel_type != LINEAR)) {
|
1780
|
+
/* select part of the working set from cache */
|
1781
|
+
already_chosen=select_next_qp_subproblem_grad(
|
1782
|
+
label,unlabeled,a,lin,c,totdoc,
|
1783
|
+
(long)(minl(learn_parm->svm_maxqpsize-choosenum,
|
1784
|
+
learn_parm->svm_newvarsinqp)
|
1785
|
+
/2),
|
1786
|
+
learn_parm,inconsistent,active2dnum,
|
1787
|
+
working2dnum,selcrit,selexam,kernel_cache,
|
1788
|
+
(long)1,key,chosen);
|
1789
|
+
choosenum+=already_chosen;
|
1790
|
+
}
|
1791
|
+
choosenum+=select_next_qp_subproblem_grad(
|
1792
|
+
label,unlabeled,a,lin,c,totdoc,
|
1793
|
+
minl(learn_parm->svm_maxqpsize-choosenum,
|
1794
|
+
learn_parm->svm_newvarsinqp-already_chosen),
|
1795
|
+
learn_parm,inconsistent,active2dnum,
|
1796
|
+
working2dnum,selcrit,selexam,kernel_cache,
|
1797
|
+
(long)0,key,chosen);
|
1798
|
+
}
|
1799
|
+
else { /* do a step with all examples from same slack set */
|
1800
|
+
if(verbosity >= 2) {
|
1801
|
+
printf("(j-step on %ld)",slackset); fflush(stdout);
|
1802
|
+
}
|
1803
|
+
jointstep=1;
|
1804
|
+
for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
|
1805
|
+
chosen[j]=0;
|
1806
|
+
}
|
1807
|
+
working2dnum[0]=-1;
|
1808
|
+
eq_target=alphaslack[slackset];
|
1809
|
+
for(j=0;j<totdoc;j++) { /* mask all but slackset */
|
1810
|
+
/* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
|
1811
|
+
if(docs[j]->slackid != slackset)
|
1812
|
+
ignore[j]=1;
|
1813
|
+
else {
|
1814
|
+
ignore[j]=0;
|
1815
|
+
learn_parm->svm_cost[j]=learn_parm->svm_c;
|
1816
|
+
/* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
|
1817
|
+
}
|
1818
|
+
}
|
1819
|
+
learn_parm->biased_hyperplane=1;
|
1820
|
+
choosenum=select_next_qp_subproblem_grad(
|
1821
|
+
label,unlabeled,a,lin,c,totdoc,
|
1822
|
+
learn_parm->svm_maxqpsize,
|
1823
|
+
learn_parm,ignore,active2dnum,
|
1824
|
+
working2dnum,selcrit,selexam,kernel_cache,
|
1825
|
+
(long)0,key,chosen);
|
1826
|
+
learn_parm->biased_hyperplane=0;
|
1827
|
+
}
|
1828
|
+
}
|
1829
|
+
else { /* once in a while, select a somewhat random working set
|
1830
|
+
to get unlocked of infinite loops due to numerical
|
1831
|
+
inaccuracies in the core qp-solver */
|
1832
|
+
choosenum+=select_next_qp_subproblem_rand(
|
1833
|
+
label,unlabeled,a,lin,c,totdoc,
|
1834
|
+
minl(learn_parm->svm_maxqpsize-choosenum,
|
1835
|
+
learn_parm->svm_newvarsinqp),
|
1836
|
+
learn_parm,inconsistent,active2dnum,
|
1837
|
+
working2dnum,selcrit,selexam,kernel_cache,key,
|
1838
|
+
chosen,iteration);
|
1839
|
+
}
|
1840
|
+
|
1841
|
+
if(verbosity>=2) {
|
1842
|
+
printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
|
1843
|
+
}
|
1844
|
+
|
1845
|
+
if(verbosity>=2) t1=get_runtime();
|
1846
|
+
|
1847
|
+
if(kernel_cache)
|
1848
|
+
cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
|
1849
|
+
choosenum,kernel_parm);
|
1850
|
+
|
1851
|
+
if(verbosity>=2) t2=get_runtime();
|
1852
|
+
if(jointstep) learn_parm->biased_hyperplane=1;
|
1853
|
+
optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
|
1854
|
+
model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
|
1855
|
+
aicache,kernel_parm,&qp,&epsilon_crit_org);
|
1856
|
+
learn_parm->biased_hyperplane=0;
|
1857
|
+
|
1858
|
+
for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
|
1859
|
+
alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
|
1860
|
+
for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
|
1861
|
+
constraints */
|
1862
|
+
if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
|
1863
|
+
if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
|
1864
|
+
alphaslack[docs[i]->slackid]-=a[i];
|
1865
|
+
a[i]=0;
|
1866
|
+
}
|
1867
|
+
else {
|
1868
|
+
a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
|
1869
|
+
alphaslack[docs[i]->slackid]=learn_parm->svm_c;
|
1870
|
+
}
|
1871
|
+
}
|
1872
|
+
}
|
1873
|
+
for(jj=0;(i=active2dnum[jj])>=0;jj++)
|
1874
|
+
learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
|
1875
|
+
-alphaslack[docs[i]->slackid]);
|
1876
|
+
|
1877
|
+
if(verbosity>=2) t3=get_runtime();
|
1878
|
+
update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
|
1879
|
+
totwords,kernel_parm,kernel_cache,lin,aicache,
|
1880
|
+
weights);
|
1881
|
+
compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
|
1882
|
+
slack,alphaslack);
|
1883
|
+
|
1884
|
+
if(verbosity>=2) t4=get_runtime();
|
1885
|
+
supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
|
1886
|
+
learn_parm,working2dnum,active2dnum,model);
|
1887
|
+
|
1888
|
+
if(verbosity>=2) t5=get_runtime();
|
1889
|
+
|
1890
|
+
/* The following computation of the objective function works only */
|
1891
|
+
/* relative to the active variables */
|
1892
|
+
if(verbosity>=3) {
|
1893
|
+
criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
|
1894
|
+
active2dnum);
|
1895
|
+
printf("Objective function (over active variables): %.16f\n",criterion);
|
1896
|
+
fflush(stdout);
|
1897
|
+
}
|
1898
|
+
|
1899
|
+
for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
1900
|
+
a_old[i]=a[i];
|
1901
|
+
}
|
1902
|
+
|
1903
|
+
retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
|
1904
|
+
slack,alphaslack,totdoc,learn_parm,
|
1905
|
+
maxdiff,epsilon_crit_org,&misclassified,
|
1906
|
+
active2dnum,last_suboptimal_at,
|
1907
|
+
iteration,kernel_parm);
|
1908
|
+
|
1909
|
+
if(verbosity>=2) {
|
1910
|
+
t6=get_runtime();
|
1911
|
+
timing_profile->time_select+=t1-t0;
|
1912
|
+
timing_profile->time_kernel+=t2-t1;
|
1913
|
+
timing_profile->time_opti+=t3-t2;
|
1914
|
+
timing_profile->time_update+=t4-t3;
|
1915
|
+
timing_profile->time_model+=t5-t4;
|
1916
|
+
timing_profile->time_check+=t6-t5;
|
1917
|
+
}
|
1918
|
+
|
1919
|
+
/* checking whether optimizer got stuck */
|
1920
|
+
if((*maxdiff) < bestmaxdiff) {
|
1921
|
+
bestmaxdiff=(*maxdiff);
|
1922
|
+
bestmaxdiffiter=iteration;
|
1923
|
+
}
|
1924
|
+
if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
|
1925
|
+
/* long time no progress? */
|
1926
|
+
terminate=1;
|
1927
|
+
retrain=0;
|
1928
|
+
if(verbosity>=1)
|
1929
|
+
printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
noshrink=0;
|
1933
|
+
|
1934
|
+
if((!retrain) && (inactivenum>0)
|
1935
|
+
&& ((!learn_parm->skip_final_opt_check)
|
1936
|
+
|| (kernel_parm->kernel_type == LINEAR))) {
|
1937
|
+
if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
1938
|
+
|| (verbosity>=2)) {
|
1939
|
+
if(verbosity==1) {
|
1940
|
+
printf("\n");
|
1941
|
+
}
|
1942
|
+
printf(" Checking optimality of inactive variables...");
|
1943
|
+
fflush(stdout);
|
1944
|
+
}
|
1945
|
+
t1=get_runtime();
|
1946
|
+
reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
|
1947
|
+
totwords,iteration,learn_parm,inconsistent,
|
1948
|
+
docs,kernel_parm,kernel_cache,model,aicache,
|
1949
|
+
weights,maxdiff);
|
1950
|
+
/* Update to new active variables. */
|
1951
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
1952
|
+
inactivenum=totdoc-activenum;
|
1953
|
+
/* check optimality, since check in reactivate does not work for
|
1954
|
+
sharedslacks */
|
1955
|
+
retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
|
1956
|
+
slack,alphaslack,totdoc,learn_parm,
|
1957
|
+
maxdiff,epsilon_crit_org,&misclassified,
|
1958
|
+
active2dnum,last_suboptimal_at,
|
1959
|
+
iteration,kernel_parm);
|
1960
|
+
|
1961
|
+
/* reset watchdog */
|
1962
|
+
bestmaxdiff=(*maxdiff);
|
1963
|
+
bestmaxdiffiter=iteration;
|
1964
|
+
/* termination criterion */
|
1965
|
+
noshrink=1;
|
1966
|
+
retrain=0;
|
1967
|
+
if((*maxdiff) > learn_parm->epsilon_crit)
|
1968
|
+
retrain=1;
|
1969
|
+
timing_profile->time_shrink+=get_runtime()-t1;
|
1970
|
+
if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
1971
|
+
|| (verbosity>=2)) {
|
1972
|
+
printf("done.\n"); fflush(stdout);
|
1973
|
+
printf(" Number of inactive variables = %ld\n",inactivenum);
|
1974
|
+
}
|
1975
|
+
}
|
1976
|
+
|
1977
|
+
if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
|
1978
|
+
learn_parm->epsilon_crit=(*maxdiff);
|
1979
|
+
if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
|
1980
|
+
learn_parm->epsilon_crit/=2.0;
|
1981
|
+
retrain=1;
|
1982
|
+
noshrink=1;
|
1983
|
+
}
|
1984
|
+
if(learn_parm->epsilon_crit<epsilon_crit_org)
|
1985
|
+
learn_parm->epsilon_crit=epsilon_crit_org;
|
1986
|
+
|
1987
|
+
if(verbosity>=2) {
|
1988
|
+
printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
|
1989
|
+
supvecnum,model->at_upper_bound,(*maxdiff));
|
1990
|
+
fflush(stdout);
|
1991
|
+
}
|
1992
|
+
if(verbosity>=3) {
|
1993
|
+
printf("\n");
|
1994
|
+
}
|
1995
|
+
|
1996
|
+
if(((iteration % 10) == 0) && (!noshrink)) {
|
1997
|
+
activenum=shrink_problem(docs,learn_parm,shrink_state,
|
1998
|
+
kernel_parm,active2dnum,
|
1999
|
+
last_suboptimal_at,iteration,totdoc,
|
2000
|
+
maxl((long)(activenum/10),
|
2001
|
+
maxl((long)(totdoc/500),100)),
|
2002
|
+
a,inconsistent);
|
2003
|
+
inactivenum=totdoc-activenum;
|
2004
|
+
if((kernel_cache)
|
2005
|
+
&& (supvecnum>kernel_cache->max_elems)
|
2006
|
+
&& ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
|
2007
|
+
kernel_cache_shrink(kernel_cache,totdoc,
|
2008
|
+
minl((kernel_cache->activenum-activenum),
|
2009
|
+
(kernel_cache->activenum-supvecnum)),
|
2010
|
+
shrink_state->active);
|
2011
|
+
}
|
2012
|
+
}
|
2013
|
+
|
2014
|
+
} /* end of loop */
|
2015
|
+
|
2016
|
+
|
2017
|
+
free(alphaslack);
|
2018
|
+
free(slack);
|
2019
|
+
free(chosen);
|
2020
|
+
free(unlabeled);
|
2021
|
+
free(inconsistent);
|
2022
|
+
free(ignore);
|
2023
|
+
free(last_suboptimal_at);
|
2024
|
+
free(key);
|
2025
|
+
free(selcrit);
|
2026
|
+
free(selexam);
|
2027
|
+
free(a_old);
|
2028
|
+
free(aicache);
|
2029
|
+
free(working2dnum);
|
2030
|
+
free(active2dnum);
|
2031
|
+
free(qp.opt_ce);
|
2032
|
+
free(qp.opt_ce0);
|
2033
|
+
free(qp.opt_g);
|
2034
|
+
free(qp.opt_g0);
|
2035
|
+
free(qp.opt_xinit);
|
2036
|
+
free(qp.opt_low);
|
2037
|
+
free(qp.opt_up);
|
2038
|
+
free(weights);
|
2039
|
+
|
2040
|
+
learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
|
2041
|
+
model->maxdiff=(*maxdiff);
|
2042
|
+
|
2043
|
+
return(iteration);
|
2044
|
+
}
|
2045
|
+
|
2046
|
+
|
2047
|
+
double compute_objective_function(double *a, double *lin, double *c,
|
2048
|
+
double eps, long int *label,
|
2049
|
+
long int *active2dnum)
|
2050
|
+
/* Return value of objective function. */
|
2051
|
+
/* Works only relative to the active variables! */
|
2052
|
+
{
|
2053
|
+
long i,ii;
|
2054
|
+
double criterion;
|
2055
|
+
/* calculate value of objective function */
|
2056
|
+
criterion=0;
|
2057
|
+
for(ii=0;active2dnum[ii]>=0;ii++) {
|
2058
|
+
i=active2dnum[ii];
|
2059
|
+
criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
|
2060
|
+
}
|
2061
|
+
return(criterion);
|
2062
|
+
}
|
2063
|
+
|
2064
|
+
void clear_index(long int *index)
|
2065
|
+
/* initializes and empties index */
|
2066
|
+
{
|
2067
|
+
index[0]=-1;
|
2068
|
+
}
|
2069
|
+
|
2070
|
+
void add_to_index(long int *index, long int elem)
|
2071
|
+
/* initializes and empties index */
|
2072
|
+
{
|
2073
|
+
register long i;
|
2074
|
+
for(i=0;index[i] != -1;i++);
|
2075
|
+
index[i]=elem;
|
2076
|
+
index[i+1]=-1;
|
2077
|
+
}
|
2078
|
+
|
2079
|
+
long compute_index(long int *binfeature, long int range, long int *index)
|
2080
|
+
/* create an inverted index of binfeature */
|
2081
|
+
{
|
2082
|
+
register long i,ii;
|
2083
|
+
|
2084
|
+
ii=0;
|
2085
|
+
for(i=0;i<range;i++) {
|
2086
|
+
if(binfeature[i]) {
|
2087
|
+
index[ii]=i;
|
2088
|
+
ii++;
|
2089
|
+
}
|
2090
|
+
}
|
2091
|
+
for(i=0;i<4;i++) {
|
2092
|
+
index[ii+i]=-1;
|
2093
|
+
}
|
2094
|
+
return(ii);
|
2095
|
+
}
|
2096
|
+
|
2097
|
+
|
2098
|
+
void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
|
2099
|
+
long int *exclude_from_eq_const, double eq_target,
|
2100
|
+
long int *chosen, long int *active2dnum, MODEL *model,
|
2101
|
+
long int totdoc, long int *working2dnum, long int varnum,
|
2102
|
+
double *a, double *lin, double *c, LEARN_PARM *learn_parm,
|
2103
|
+
CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
|
2104
|
+
double *epsilon_crit_target)
|
2105
|
+
/* Do optimization on the working set. */
|
2106
|
+
{
|
2107
|
+
long i;
|
2108
|
+
double *a_v;
|
2109
|
+
|
2110
|
+
compute_matrices_for_optimization(docs,label,unlabeled,
|
2111
|
+
exclude_from_eq_const,eq_target,chosen,
|
2112
|
+
active2dnum,working2dnum,model,a,lin,c,
|
2113
|
+
varnum,totdoc,learn_parm,aicache,
|
2114
|
+
kernel_parm,qp);
|
2115
|
+
|
2116
|
+
if(verbosity>=3) {
|
2117
|
+
printf("Running optimizer..."); fflush(stdout);
|
2118
|
+
}
|
2119
|
+
/* call the qp-subsolver */
|
2120
|
+
a_v=optimize_qp(qp,epsilon_crit_target,
|
2121
|
+
learn_parm->svm_maxqpsize,
|
2122
|
+
&(model->b), /* in case the optimizer gives us */
|
2123
|
+
/* the threshold for free. otherwise */
|
2124
|
+
/* b is calculated in calculate_model. */
|
2125
|
+
learn_parm);
|
2126
|
+
if(verbosity>=3) {
|
2127
|
+
printf("done\n");
|
2128
|
+
}
|
2129
|
+
|
2130
|
+
for(i=0;i<varnum;i++) {
|
2131
|
+
a[working2dnum[i]]=a_v[i];
|
2132
|
+
/*
|
2133
|
+
if(a_v[i]<=(0+learn_parm->epsilon_a)) {
|
2134
|
+
a[working2dnum[i]]=0;
|
2135
|
+
}
|
2136
|
+
else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
|
2137
|
+
a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
|
2138
|
+
}
|
2139
|
+
*/
|
2140
|
+
}
|
2141
|
+
}
|
2142
|
+
|
2143
|
+
void compute_matrices_for_optimization(DOC **docs, long int *label,
|
2144
|
+
long int *unlabeled, long *exclude_from_eq_const, double eq_target,
|
2145
|
+
long int *chosen, long int *active2dnum,
|
2146
|
+
long int *key, MODEL *model, double *a, double *lin, double *c,
|
2147
|
+
long int varnum, long int totdoc, LEARN_PARM *learn_parm,
|
2148
|
+
CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
|
2149
|
+
{
|
2150
|
+
register long ki,kj,i,j;
|
2151
|
+
register double kernel_temp;
|
2152
|
+
|
2153
|
+
if(verbosity>=3) {
|
2154
|
+
fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
|
2155
|
+
fflush(stdout);
|
2156
|
+
}
|
2157
|
+
|
2158
|
+
qp->opt_n=varnum;
|
2159
|
+
qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
|
2160
|
+
for(j=1;j<model->sv_num;j++) { /* start at 1 */
|
2161
|
+
if((!chosen[(model->supvec[j])->docnum])
|
2162
|
+
&& (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
|
2163
|
+
qp->opt_ce0[0]+=model->alpha[j];
|
2164
|
+
}
|
2165
|
+
}
|
2166
|
+
if(learn_parm->biased_hyperplane)
|
2167
|
+
qp->opt_m=1;
|
2168
|
+
else
|
2169
|
+
qp->opt_m=0; /* eq-constraint will be ignored */
|
2170
|
+
|
2171
|
+
/* init linear part of objective function */
|
2172
|
+
for(i=0;i<varnum;i++) {
|
2173
|
+
qp->opt_g0[i]=lin[key[i]];
|
2174
|
+
}
|
2175
|
+
|
2176
|
+
for(i=0;i<varnum;i++) {
|
2177
|
+
ki=key[i];
|
2178
|
+
|
2179
|
+
/* Compute the matrix for equality constraints */
|
2180
|
+
qp->opt_ce[i]=label[ki];
|
2181
|
+
qp->opt_low[i]=0;
|
2182
|
+
qp->opt_up[i]=learn_parm->svm_cost[ki];
|
2183
|
+
|
2184
|
+
kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
|
2185
|
+
/* compute linear part of objective function */
|
2186
|
+
qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
|
2187
|
+
/* compute quadratic part of objective function */
|
2188
|
+
qp->opt_g[varnum*i+i]=kernel_temp;
|
2189
|
+
for(j=i+1;j<varnum;j++) {
|
2190
|
+
kj=key[j];
|
2191
|
+
kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
|
2192
|
+
/* compute linear part of objective function */
|
2193
|
+
qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
|
2194
|
+
qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
|
2195
|
+
/* compute quadratic part of objective function */
|
2196
|
+
qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
|
2197
|
+
qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
|
2198
|
+
}
|
2199
|
+
|
2200
|
+
if(verbosity>=3) {
|
2201
|
+
if(i % 20 == 0) {
|
2202
|
+
fprintf(stdout,"%ld..",i); fflush(stdout);
|
2203
|
+
}
|
2204
|
+
}
|
2205
|
+
}
|
2206
|
+
|
2207
|
+
for(i=0;i<varnum;i++) {
|
2208
|
+
/* assure starting at feasible point */
|
2209
|
+
qp->opt_xinit[i]=a[key[i]];
|
2210
|
+
/* set linear part of objective function */
|
2211
|
+
qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
|
2212
|
+
}
|
2213
|
+
|
2214
|
+
if(verbosity>=3) {
|
2215
|
+
fprintf(stdout,"done\n");
|
2216
|
+
}
|
2217
|
+
}
|
2218
|
+
|
2219
|
+
long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
|
2220
|
+
double *lin, double *a, double *a_old, double *c,
|
2221
|
+
LEARN_PARM *learn_parm, long int *working2dnum,
|
2222
|
+
long int *active2dnum, MODEL *model)
|
2223
|
+
/* Compute decision function based on current values */
|
2224
|
+
/* of alpha. */
|
2225
|
+
{
|
2226
|
+
long i,ii,pos,b_calculated=0,first_low,first_high;
|
2227
|
+
double ex_c,b_temp,b_low,b_high;
|
2228
|
+
|
2229
|
+
if(verbosity>=3) {
|
2230
|
+
printf("Calculating model..."); fflush(stdout);
|
2231
|
+
}
|
2232
|
+
|
2233
|
+
if(!learn_parm->biased_hyperplane) {
|
2234
|
+
model->b=0;
|
2235
|
+
b_calculated=1;
|
2236
|
+
}
|
2237
|
+
|
2238
|
+
for(ii=0;(i=working2dnum[ii])>=0;ii++) {
|
2239
|
+
if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
|
2240
|
+
pos=model->index[i];
|
2241
|
+
model->index[i]=-1;
|
2242
|
+
(model->sv_num)--;
|
2243
|
+
model->supvec[pos]=model->supvec[model->sv_num];
|
2244
|
+
model->alpha[pos]=model->alpha[model->sv_num];
|
2245
|
+
model->index[(model->supvec[pos])->docnum]=pos;
|
2246
|
+
}
|
2247
|
+
else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
|
2248
|
+
model->supvec[model->sv_num]=docs[i];
|
2249
|
+
model->alpha[model->sv_num]=a[i]*(double)label[i];
|
2250
|
+
model->index[i]=model->sv_num;
|
2251
|
+
(model->sv_num)++;
|
2252
|
+
}
|
2253
|
+
else if(a_old[i]==a[i]) { /* nothing to do */
|
2254
|
+
}
|
2255
|
+
else { /* just update alpha */
|
2256
|
+
model->alpha[model->index[i]]=a[i]*(double)label[i];
|
2257
|
+
}
|
2258
|
+
|
2259
|
+
ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
2260
|
+
if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
|
2261
|
+
(model->at_upper_bound)--;
|
2262
|
+
}
|
2263
|
+
else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
|
2264
|
+
(model->at_upper_bound)++;
|
2265
|
+
}
|
2266
|
+
|
2267
|
+
if((!b_calculated)
|
2268
|
+
&& (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
|
2269
|
+
model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
|
2270
|
+
/* model->b=(-(double)label[i]+lin[i]); */
|
2271
|
+
b_calculated=1;
|
2272
|
+
}
|
2273
|
+
}
|
2274
|
+
|
2275
|
+
/* No alpha in the working set not at bounds, so b was not
|
2276
|
+
calculated in the usual way. The following handles this special
|
2277
|
+
case. */
|
2278
|
+
if(learn_parm->biased_hyperplane
|
2279
|
+
&& (!b_calculated)
|
2280
|
+
&& (model->sv_num-1 == model->at_upper_bound)) {
|
2281
|
+
first_low=1;
|
2282
|
+
first_high=1;
|
2283
|
+
b_low=0;
|
2284
|
+
b_high=0;
|
2285
|
+
for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
2286
|
+
ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
2287
|
+
if(a[i]<ex_c) {
|
2288
|
+
if(label[i]>0) {
|
2289
|
+
b_temp=-(learn_parm->eps-c[i]+lin[i]);
|
2290
|
+
if((b_temp>b_low) || (first_low)) {
|
2291
|
+
b_low=b_temp;
|
2292
|
+
first_low=0;
|
2293
|
+
}
|
2294
|
+
}
|
2295
|
+
else {
|
2296
|
+
b_temp=-(-learn_parm->eps-c[i]+lin[i]);
|
2297
|
+
if((b_temp<b_high) || (first_high)) {
|
2298
|
+
b_high=b_temp;
|
2299
|
+
first_high=0;
|
2300
|
+
}
|
2301
|
+
}
|
2302
|
+
}
|
2303
|
+
else {
|
2304
|
+
if(label[i]<0) {
|
2305
|
+
b_temp=-(-learn_parm->eps-c[i]+lin[i]);
|
2306
|
+
if((b_temp>b_low) || (first_low)) {
|
2307
|
+
b_low=b_temp;
|
2308
|
+
first_low=0;
|
2309
|
+
}
|
2310
|
+
}
|
2311
|
+
else {
|
2312
|
+
b_temp=-(learn_parm->eps-c[i]+lin[i]);
|
2313
|
+
if((b_temp<b_high) || (first_high)) {
|
2314
|
+
b_high=b_temp;
|
2315
|
+
first_high=0;
|
2316
|
+
}
|
2317
|
+
}
|
2318
|
+
}
|
2319
|
+
}
|
2320
|
+
if(first_high) {
|
2321
|
+
model->b=-b_low;
|
2322
|
+
}
|
2323
|
+
else if(first_low) {
|
2324
|
+
model->b=-b_high;
|
2325
|
+
}
|
2326
|
+
else {
|
2327
|
+
model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
|
2328
|
+
/* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
|
2329
|
+
}
|
2330
|
+
}
|
2331
|
+
|
2332
|
+
if(verbosity>=3) {
|
2333
|
+
printf("done\n"); fflush(stdout);
|
2334
|
+
}
|
2335
|
+
|
2336
|
+
return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
|
2337
|
+
}
|
2338
|
+
|
2339
|
+
long check_optimality(MODEL *model, long int *label, long int *unlabeled,
|
2340
|
+
double *a, double *lin, double *c, long int totdoc,
|
2341
|
+
LEARN_PARM *learn_parm, double *maxdiff,
|
2342
|
+
double epsilon_crit_org, long int *misclassified,
|
2343
|
+
long int *inconsistent, long int *active2dnum,
|
2344
|
+
long int *last_suboptimal_at,
|
2345
|
+
long int iteration, KERNEL_PARM *kernel_parm)
|
2346
|
+
/* Check KT-conditions */
|
2347
|
+
{
|
2348
|
+
long i,ii,retrain;
|
2349
|
+
double dist,ex_c,target;
|
2350
|
+
|
2351
|
+
if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
|
2352
|
+
learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
|
2353
|
+
}
|
2354
|
+
else { /* be conservative */
|
2355
|
+
learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
|
2356
|
+
}
|
2357
|
+
retrain=0;
|
2358
|
+
(*maxdiff)=0;
|
2359
|
+
(*misclassified)=0;
|
2360
|
+
for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
2361
|
+
if((!inconsistent[i]) && label[i]) {
|
2362
|
+
dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
|
2363
|
+
hyperplane*/
|
2364
|
+
target=-(learn_parm->eps-(double)label[i]*c[i]);
|
2365
|
+
ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
2366
|
+
if(dist <= 0) {
|
2367
|
+
(*misclassified)++; /* does not work due to deactivation of var */
|
2368
|
+
}
|
2369
|
+
if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
2370
|
+
if((dist-target)>(*maxdiff)) /* largest violation */
|
2371
|
+
(*maxdiff)=dist-target;
|
2372
|
+
}
|
2373
|
+
else if((a[i]<ex_c) && (dist < target)) {
|
2374
|
+
if((target-dist)>(*maxdiff)) /* largest violation */
|
2375
|
+
(*maxdiff)=target-dist;
|
2376
|
+
}
|
2377
|
+
/* Count how long a variable was at lower/upper bound (and optimal).*/
|
2378
|
+
/* Variables, which were at the bound and optimal for a long */
|
2379
|
+
/* time are unlikely to become support vectors. In case our */
|
2380
|
+
/* cache is filled up, those variables are excluded to save */
|
2381
|
+
/* kernel evaluations. (See chapter 'Shrinking').*/
|
2382
|
+
if((a[i]>(learn_parm->epsilon_a))
|
2383
|
+
&& (a[i]<ex_c)) {
|
2384
|
+
last_suboptimal_at[i]=iteration; /* not at bound */
|
2385
|
+
}
|
2386
|
+
else if((a[i]<=(learn_parm->epsilon_a))
|
2387
|
+
&& (dist < (target+learn_parm->epsilon_shrink))) {
|
2388
|
+
last_suboptimal_at[i]=iteration; /* not likely optimal */
|
2389
|
+
}
|
2390
|
+
else if((a[i]>=ex_c)
|
2391
|
+
&& (dist > (target-learn_parm->epsilon_shrink))) {
|
2392
|
+
last_suboptimal_at[i]=iteration; /* not likely optimal */
|
2393
|
+
}
|
2394
|
+
}
|
2395
|
+
}
|
2396
|
+
/* termination criterion */
|
2397
|
+
if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
|
2398
|
+
retrain=1;
|
2399
|
+
}
|
2400
|
+
return(retrain);
|
2401
|
+
}
|
2402
|
+
|
2403
|
+
long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
|
2404
|
+
double *a, double *lin, double *c, double *slack,
|
2405
|
+
double *alphaslack,
|
2406
|
+
long int totdoc,
|
2407
|
+
LEARN_PARM *learn_parm, double *maxdiff,
|
2408
|
+
double epsilon_crit_org, long int *misclassified,
|
2409
|
+
long int *active2dnum,
|
2410
|
+
long int *last_suboptimal_at,
|
2411
|
+
long int iteration, KERNEL_PARM *kernel_parm)
|
2412
|
+
/* Check KT-conditions */
|
2413
|
+
{
|
2414
|
+
long i,ii,retrain;
|
2415
|
+
double dist,ex_c=0,target;
|
2416
|
+
|
2417
|
+
if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
|
2418
|
+
learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
|
2419
|
+
}
|
2420
|
+
else { /* be conservative */
|
2421
|
+
learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
|
2422
|
+
}
|
2423
|
+
|
2424
|
+
retrain=0;
|
2425
|
+
(*maxdiff)=0;
|
2426
|
+
(*misclassified)=0;
|
2427
|
+
for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
2428
|
+
/* 'distance' from hyperplane*/
|
2429
|
+
dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
|
2430
|
+
target=-(learn_parm->eps-(double)label[i]*c[i]);
|
2431
|
+
ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
|
2432
|
+
if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
2433
|
+
if((dist-target)>(*maxdiff)) { /* largest violation */
|
2434
|
+
(*maxdiff)=dist-target;
|
2435
|
+
if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
|
2436
|
+
if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
|
2437
|
+
}
|
2438
|
+
}
|
2439
|
+
if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
|
2440
|
+
if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
|
2441
|
+
(*maxdiff)=slack[docs[i]->slackid];
|
2442
|
+
if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
|
2443
|
+
if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
|
2444
|
+
}
|
2445
|
+
}
|
2446
|
+
/* Count how long a variable was at lower/upper bound (and optimal).*/
|
2447
|
+
/* Variables, which were at the bound and optimal for a long */
|
2448
|
+
/* time are unlikely to become support vectors. In case our */
|
2449
|
+
/* cache is filled up, those variables are excluded to save */
|
2450
|
+
/* kernel evaluations. (See chapter 'Shrinking').*/
|
2451
|
+
if((a[i]>(learn_parm->epsilon_a))
|
2452
|
+
&& (a[i]<ex_c)) {
|
2453
|
+
last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
|
2454
|
+
}
|
2455
|
+
else if((a[i]<=(learn_parm->epsilon_a))
|
2456
|
+
&& (dist < (target+learn_parm->epsilon_shrink))) {
|
2457
|
+
last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
|
2458
|
+
}
|
2459
|
+
else if((a[i]>=ex_c)
|
2460
|
+
&& (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
|
2461
|
+
last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
|
2462
|
+
}
|
2463
|
+
}
|
2464
|
+
/* termination criterion */
|
2465
|
+
if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
|
2466
|
+
retrain=1;
|
2467
|
+
}
|
2468
|
+
return(retrain);
|
2469
|
+
}
|
2470
|
+
|
2471
|
+
void compute_shared_slacks(DOC **docs, long int *label,
|
2472
|
+
double *a, double *lin,
|
2473
|
+
double *c, long int *active2dnum,
|
2474
|
+
LEARN_PARM *learn_parm,
|
2475
|
+
double *slack, double *alphaslack)
|
2476
|
+
/* compute the value of shared slacks and the joint alphas */
|
2477
|
+
{
|
2478
|
+
long jj,i;
|
2479
|
+
double dist,target;
|
2480
|
+
|
2481
|
+
for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
|
2482
|
+
slack[docs[i]->slackid]=0.0;
|
2483
|
+
alphaslack[docs[i]->slackid]=0.0;
|
2484
|
+
}
|
2485
|
+
for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
|
2486
|
+
dist=(lin[i])*(double)label[i];
|
2487
|
+
target=-(learn_parm->eps-(double)label[i]*c[i]);
|
2488
|
+
if((target-dist) > slack[docs[i]->slackid])
|
2489
|
+
slack[docs[i]->slackid]=target-dist;
|
2490
|
+
alphaslack[docs[i]->slackid]+=a[i];
|
2491
|
+
}
|
2492
|
+
}
|
2493
|
+
|
2494
|
+
|
2495
|
+
long identify_inconsistent(double *a, long int *label,
|
2496
|
+
long int *unlabeled, long int totdoc,
|
2497
|
+
LEARN_PARM *learn_parm,
|
2498
|
+
long int *inconsistentnum, long int *inconsistent)
|
2499
|
+
{
|
2500
|
+
long i,retrain;
|
2501
|
+
|
2502
|
+
/* Throw out examples with multipliers at upper bound. This */
|
2503
|
+
/* corresponds to the -i 1 option. */
|
2504
|
+
/* ATTENTION: this is just a heuristic for finding a close */
|
2505
|
+
/* to minimum number of examples to exclude to */
|
2506
|
+
/* make the problem separable with desired margin */
|
2507
|
+
retrain=0;
|
2508
|
+
for(i=0;i<totdoc;i++) {
|
2509
|
+
if((!inconsistent[i]) && (!unlabeled[i])
|
2510
|
+
&& (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
|
2511
|
+
(*inconsistentnum)++;
|
2512
|
+
inconsistent[i]=1; /* never choose again */
|
2513
|
+
retrain=2; /* start over */
|
2514
|
+
if(verbosity>=3) {
|
2515
|
+
printf("inconsistent(%ld)..",i); fflush(stdout);
|
2516
|
+
}
|
2517
|
+
}
|
2518
|
+
}
|
2519
|
+
return(retrain);
|
2520
|
+
}
|
2521
|
+
|
2522
|
+
long identify_misclassified(double *lin, long int *label,
|
2523
|
+
long int *unlabeled, long int totdoc,
|
2524
|
+
MODEL *model, long int *inconsistentnum,
|
2525
|
+
long int *inconsistent)
|
2526
|
+
{
|
2527
|
+
long i,retrain;
|
2528
|
+
double dist;
|
2529
|
+
|
2530
|
+
/* Throw out misclassified examples. This */
|
2531
|
+
/* corresponds to the -i 2 option. */
|
2532
|
+
/* ATTENTION: this is just a heuristic for finding a close */
|
2533
|
+
/* to minimum number of examples to exclude to */
|
2534
|
+
/* make the problem separable with desired margin */
|
2535
|
+
retrain=0;
|
2536
|
+
for(i=0;i<totdoc;i++) {
|
2537
|
+
dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
|
2538
|
+
if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
|
2539
|
+
(*inconsistentnum)++;
|
2540
|
+
inconsistent[i]=1; /* never choose again */
|
2541
|
+
retrain=2; /* start over */
|
2542
|
+
if(verbosity>=3) {
|
2543
|
+
printf("inconsistent(%ld)..",i); fflush(stdout);
|
2544
|
+
}
|
2545
|
+
}
|
2546
|
+
}
|
2547
|
+
return(retrain);
|
2548
|
+
}
|
2549
|
+
|
2550
|
+
long identify_one_misclassified(double *lin, long int *label,
|
2551
|
+
long int *unlabeled,
|
2552
|
+
long int totdoc, MODEL *model,
|
2553
|
+
long int *inconsistentnum,
|
2554
|
+
long int *inconsistent)
|
2555
|
+
{
|
2556
|
+
long i,retrain,maxex=-1;
|
2557
|
+
double dist,maxdist=0;
|
2558
|
+
|
2559
|
+
/* Throw out the 'most misclassified' example. This */
|
2560
|
+
/* corresponds to the -i 3 option. */
|
2561
|
+
/* ATTENTION: this is just a heuristic for finding a close */
|
2562
|
+
/* to minimum number of examples to exclude to */
|
2563
|
+
/* make the problem separable with desired margin */
|
2564
|
+
retrain=0;
|
2565
|
+
for(i=0;i<totdoc;i++) {
|
2566
|
+
if((!inconsistent[i]) && (!unlabeled[i])) {
|
2567
|
+
dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
|
2568
|
+
if(dist<maxdist) {
|
2569
|
+
maxdist=dist;
|
2570
|
+
maxex=i;
|
2571
|
+
}
|
2572
|
+
}
|
2573
|
+
}
|
2574
|
+
if(maxex>=0) {
|
2575
|
+
(*inconsistentnum)++;
|
2576
|
+
inconsistent[maxex]=1; /* never choose again */
|
2577
|
+
retrain=2; /* start over */
|
2578
|
+
if(verbosity>=3) {
|
2579
|
+
printf("inconsistent(%ld)..",i); fflush(stdout);
|
2580
|
+
}
|
2581
|
+
}
|
2582
|
+
return(retrain);
|
2583
|
+
}
|
2584
|
+
|
2585
|
+
void update_linear_component(DOC **docs, long int *label,
|
2586
|
+
long int *active2dnum, double *a,
|
2587
|
+
double *a_old, long int *working2dnum,
|
2588
|
+
long int totdoc, long int totwords,
|
2589
|
+
KERNEL_PARM *kernel_parm,
|
2590
|
+
KERNEL_CACHE *kernel_cache,
|
2591
|
+
double *lin, CFLOAT *aicache, double *weights)
|
2592
|
+
/* keep track of the linear component */
|
2593
|
+
/* lin of the gradient etc. by updating */
|
2594
|
+
/* based on the change of the variables */
|
2595
|
+
/* in the current working set */
|
2596
|
+
{
|
2597
|
+
register long i,ii,j,jj;
|
2598
|
+
register double tec;
|
2599
|
+
SVECTOR *f;
|
2600
|
+
|
2601
|
+
if(kernel_parm->kernel_type==0) { /* special linear case */
|
2602
|
+
clear_vector_n(weights,totwords);
|
2603
|
+
for(ii=0;(i=working2dnum[ii])>=0;ii++) {
|
2604
|
+
if(a[i] != a_old[i]) {
|
2605
|
+
for(f=docs[i]->fvec;f;f=f->next)
|
2606
|
+
add_vector_ns(weights,f,
|
2607
|
+
f->factor*((a[i]-a_old[i])*(double)label[i]));
|
2608
|
+
}
|
2609
|
+
}
|
2610
|
+
for(jj=0;(j=active2dnum[jj])>=0;jj++) {
|
2611
|
+
for(f=docs[j]->fvec;f;f=f->next)
|
2612
|
+
lin[j]+=f->factor*sprod_ns(weights,f);
|
2613
|
+
}
|
2614
|
+
}
|
2615
|
+
else { /* general case */
|
2616
|
+
for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
2617
|
+
if(a[i] != a_old[i]) {
|
2618
|
+
get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
|
2619
|
+
kernel_parm);
|
2620
|
+
for(ii=0;(j=active2dnum[ii])>=0;ii++) {
|
2621
|
+
tec=aicache[j];
|
2622
|
+
lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
|
2623
|
+
}
|
2624
|
+
}
|
2625
|
+
}
|
2626
|
+
}
|
2627
|
+
}
|
2628
|
+
|
2629
|
+
|
2630
|
+
long incorporate_unlabeled_examples(MODEL *model, long int *label,
|
2631
|
+
long int *inconsistent,
|
2632
|
+
long int *unlabeled,
|
2633
|
+
double *a, double *lin,
|
2634
|
+
long int totdoc, double *selcrit,
|
2635
|
+
long int *select, long int *key,
|
2636
|
+
long int transductcycle,
|
2637
|
+
KERNEL_PARM *kernel_parm,
|
2638
|
+
LEARN_PARM *learn_parm)
|
2639
|
+
{
|
2640
|
+
long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
|
2641
|
+
long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
|
2642
|
+
double dist,model_length,posratio,negratio;
|
2643
|
+
long check_every=2;
|
2644
|
+
double loss;
|
2645
|
+
static double switchsens=0.0,switchsensorg=0.0;
|
2646
|
+
double umin,umax,sumalpha;
|
2647
|
+
long imin=0,imax=0;
|
2648
|
+
static long switchnum=0;
|
2649
|
+
|
2650
|
+
switchsens/=1.2;
|
2651
|
+
|
2652
|
+
/* assumes that lin[] is up to date -> no inactive vars */
|
2653
|
+
|
2654
|
+
orgpos=0;
|
2655
|
+
orgneg=0;
|
2656
|
+
newpos=0;
|
2657
|
+
newneg=0;
|
2658
|
+
nolabel=0;
|
2659
|
+
allunlab=0;
|
2660
|
+
for(i=0;i<totdoc;i++) {
|
2661
|
+
if(!unlabeled[i]) {
|
2662
|
+
if(label[i] > 0) {
|
2663
|
+
orgpos++;
|
2664
|
+
}
|
2665
|
+
else {
|
2666
|
+
orgneg++;
|
2667
|
+
}
|
2668
|
+
}
|
2669
|
+
else {
|
2670
|
+
allunlab++;
|
2671
|
+
if(unlabeled[i]) {
|
2672
|
+
if(label[i] > 0) {
|
2673
|
+
newpos++;
|
2674
|
+
}
|
2675
|
+
else if(label[i] < 0) {
|
2676
|
+
newneg++;
|
2677
|
+
}
|
2678
|
+
}
|
2679
|
+
}
|
2680
|
+
if(label[i]==0) {
|
2681
|
+
nolabel++;
|
2682
|
+
}
|
2683
|
+
}
|
2684
|
+
|
2685
|
+
if(learn_parm->transduction_posratio >= 0) {
|
2686
|
+
posratio=learn_parm->transduction_posratio;
|
2687
|
+
}
|
2688
|
+
else {
|
2689
|
+
posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
|
2690
|
+
} /* in training data */
|
2691
|
+
negratio=1.0-posratio;
|
2692
|
+
|
2693
|
+
learn_parm->svm_costratio=1.0; /* global */
|
2694
|
+
if(posratio>0) {
|
2695
|
+
learn_parm->svm_costratio_unlab=negratio/posratio;
|
2696
|
+
}
|
2697
|
+
else {
|
2698
|
+
learn_parm->svm_costratio_unlab=1.0;
|
2699
|
+
}
|
2700
|
+
|
2701
|
+
pos=0;
|
2702
|
+
neg=0;
|
2703
|
+
upos=0;
|
2704
|
+
uneg=0;
|
2705
|
+
for(i=0;i<totdoc;i++) {
|
2706
|
+
dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
2707
|
+
if(dist>0) {
|
2708
|
+
pos++;
|
2709
|
+
}
|
2710
|
+
else {
|
2711
|
+
neg++;
|
2712
|
+
}
|
2713
|
+
if(unlabeled[i]) {
|
2714
|
+
if(dist>0) {
|
2715
|
+
upos++;
|
2716
|
+
}
|
2717
|
+
else {
|
2718
|
+
uneg++;
|
2719
|
+
}
|
2720
|
+
}
|
2721
|
+
if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
|
2722
|
+
/* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
|
2723
|
+
}
|
2724
|
+
}
|
2725
|
+
if(verbosity>=2) {
|
2726
|
+
printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
|
2727
|
+
printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
|
2728
|
+
printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
|
2729
|
+
fflush(stdout);
|
2730
|
+
}
|
2731
|
+
|
2732
|
+
if(transductcycle == 0) {
|
2733
|
+
j1=0;
|
2734
|
+
j2=0;
|
2735
|
+
j4=0;
|
2736
|
+
for(i=0;i<totdoc;i++) {
|
2737
|
+
dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
2738
|
+
if((label[i]==0) && (unlabeled[i])) {
|
2739
|
+
selcrit[j4]=dist;
|
2740
|
+
key[j4]=i;
|
2741
|
+
j4++;
|
2742
|
+
}
|
2743
|
+
}
|
2744
|
+
unsupaddnum1=0;
|
2745
|
+
unsupaddnum2=0;
|
2746
|
+
select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
|
2747
|
+
for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
|
2748
|
+
i=key[select[k]];
|
2749
|
+
label[i]=1;
|
2750
|
+
unsupaddnum1++;
|
2751
|
+
j1++;
|
2752
|
+
}
|
2753
|
+
for(i=0;i<totdoc;i++) {
|
2754
|
+
if((label[i]==0) && (unlabeled[i])) {
|
2755
|
+
label[i]=-1;
|
2756
|
+
j2++;
|
2757
|
+
unsupaddnum2++;
|
2758
|
+
}
|
2759
|
+
}
|
2760
|
+
for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
2761
|
+
if(unlabeled[i]) {
|
2762
|
+
if(label[i] == 1) {
|
2763
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2764
|
+
learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
2765
|
+
}
|
2766
|
+
else if(label[i] == -1) {
|
2767
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2768
|
+
learn_parm->svm_unlabbound;
|
2769
|
+
}
|
2770
|
+
}
|
2771
|
+
}
|
2772
|
+
if(verbosity>=1) {
|
2773
|
+
/* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
|
2774
|
+
learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
|
2775
|
+
learn_parm->svm_unlabbound); */
|
2776
|
+
printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
|
2777
|
+
unsupaddnum1,unsupaddnum2);
|
2778
|
+
fflush(stdout);
|
2779
|
+
}
|
2780
|
+
if(verbosity >= 1)
|
2781
|
+
printf("Retraining.");
|
2782
|
+
if(verbosity >= 2) printf("\n");
|
2783
|
+
return((long)3);
|
2784
|
+
}
|
2785
|
+
if((transductcycle % check_every) == 0) {
|
2786
|
+
if(verbosity >= 1)
|
2787
|
+
printf("Retraining.");
|
2788
|
+
if(verbosity >= 2) printf("\n");
|
2789
|
+
j1=0;
|
2790
|
+
j2=0;
|
2791
|
+
unsupaddnum1=0;
|
2792
|
+
unsupaddnum2=0;
|
2793
|
+
for(i=0;i<totdoc;i++) {
|
2794
|
+
if((unlabeled[i] == 2)) {
|
2795
|
+
unlabeled[i]=1;
|
2796
|
+
label[i]=1;
|
2797
|
+
j1++;
|
2798
|
+
unsupaddnum1++;
|
2799
|
+
}
|
2800
|
+
else if((unlabeled[i] == 3)) {
|
2801
|
+
unlabeled[i]=1;
|
2802
|
+
label[i]=-1;
|
2803
|
+
j2++;
|
2804
|
+
unsupaddnum2++;
|
2805
|
+
}
|
2806
|
+
}
|
2807
|
+
for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
2808
|
+
if(unlabeled[i]) {
|
2809
|
+
if(label[i] == 1) {
|
2810
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2811
|
+
learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
2812
|
+
}
|
2813
|
+
else if(label[i] == -1) {
|
2814
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2815
|
+
learn_parm->svm_unlabbound;
|
2816
|
+
}
|
2817
|
+
}
|
2818
|
+
}
|
2819
|
+
|
2820
|
+
if(verbosity>=2) {
|
2821
|
+
/* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
|
2822
|
+
learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
|
2823
|
+
learn_parm->svm_unlabbound); */
|
2824
|
+
printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
|
2825
|
+
upos,unsupaddnum1,unsupaddnum2);
|
2826
|
+
fflush(stdout);
|
2827
|
+
}
|
2828
|
+
|
2829
|
+
if(learn_parm->svm_unlabbound == 1) {
|
2830
|
+
learn_parm->epsilon_crit=0.001; /* do the last run right */
|
2831
|
+
}
|
2832
|
+
else {
|
2833
|
+
learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
|
2834
|
+
}
|
2835
|
+
|
2836
|
+
return((long)3);
|
2837
|
+
}
|
2838
|
+
else if(((transductcycle % check_every) < check_every)) {
|
2839
|
+
model_length=0;
|
2840
|
+
sumalpha=0;
|
2841
|
+
loss=0;
|
2842
|
+
for(i=0;i<totdoc;i++) {
|
2843
|
+
model_length+=a[i]*label[i]*lin[i];
|
2844
|
+
sumalpha+=a[i];
|
2845
|
+
dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
2846
|
+
if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
|
2847
|
+
loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
|
2848
|
+
}
|
2849
|
+
}
|
2850
|
+
model_length=sqrt(model_length);
|
2851
|
+
if(verbosity>=2) {
|
2852
|
+
printf("Model-length = %f (%f), loss = %f, objective = %f\n",
|
2853
|
+
model_length,sumalpha,loss,loss+0.5*model_length*model_length);
|
2854
|
+
fflush(stdout);
|
2855
|
+
}
|
2856
|
+
j1=0;
|
2857
|
+
j2=0;
|
2858
|
+
j3=0;
|
2859
|
+
j4=0;
|
2860
|
+
unsupaddnum1=0;
|
2861
|
+
unsupaddnum2=0;
|
2862
|
+
umin=99999;
|
2863
|
+
umax=-99999;
|
2864
|
+
j4=1;
|
2865
|
+
while(j4) {
|
2866
|
+
umin=99999;
|
2867
|
+
umax=-99999;
|
2868
|
+
for(i=0;(i<totdoc);i++) {
|
2869
|
+
dist=(lin[i]-model->b);
|
2870
|
+
if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
|
2871
|
+
&& (dist<umin)) {
|
2872
|
+
umin=dist;
|
2873
|
+
imin=i;
|
2874
|
+
}
|
2875
|
+
if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
|
2876
|
+
&& (dist>umax)) {
|
2877
|
+
umax=dist;
|
2878
|
+
imax=i;
|
2879
|
+
}
|
2880
|
+
}
|
2881
|
+
if((umin < (umax+switchsens-1E-4))) {
|
2882
|
+
j1++;
|
2883
|
+
j2++;
|
2884
|
+
unsupaddnum1++;
|
2885
|
+
unlabeled[imin]=3;
|
2886
|
+
inconsistent[imin]=1;
|
2887
|
+
unsupaddnum2++;
|
2888
|
+
unlabeled[imax]=2;
|
2889
|
+
inconsistent[imax]=1;
|
2890
|
+
}
|
2891
|
+
else
|
2892
|
+
j4=0;
|
2893
|
+
j4=0;
|
2894
|
+
}
|
2895
|
+
for(j=0;(j<totdoc);j++) {
|
2896
|
+
if(unlabeled[j] && (!inconsistent[j])) {
|
2897
|
+
if(label[j]>0) {
|
2898
|
+
unlabeled[j]=2;
|
2899
|
+
}
|
2900
|
+
else if(label[j]<0) {
|
2901
|
+
unlabeled[j]=3;
|
2902
|
+
}
|
2903
|
+
/* inconsistent[j]=1; */
|
2904
|
+
j3++;
|
2905
|
+
}
|
2906
|
+
}
|
2907
|
+
switchnum+=unsupaddnum1+unsupaddnum2;
|
2908
|
+
|
2909
|
+
/* stop and print out current margin
|
2910
|
+
printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
|
2911
|
+
if(switchnum == 2*kernel_parm->poly_degree) {
|
2912
|
+
learn_parm->svm_unlabbound=1;
|
2913
|
+
}
|
2914
|
+
*/
|
2915
|
+
|
2916
|
+
if((!unsupaddnum1) && (!unsupaddnum2)) {
|
2917
|
+
if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
|
2918
|
+
for(j=0;(j<totdoc);j++) {
|
2919
|
+
inconsistent[j]=0;
|
2920
|
+
if(unlabeled[j]) unlabeled[j]=1;
|
2921
|
+
}
|
2922
|
+
write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
|
2923
|
+
totdoc,learn_parm);
|
2924
|
+
if(verbosity>=1)
|
2925
|
+
printf("Number of switches: %ld\n",switchnum);
|
2926
|
+
return((long)0);
|
2927
|
+
}
|
2928
|
+
switchsens=switchsensorg;
|
2929
|
+
learn_parm->svm_unlabbound*=1.5;
|
2930
|
+
if(learn_parm->svm_unlabbound>1) {
|
2931
|
+
learn_parm->svm_unlabbound=1;
|
2932
|
+
}
|
2933
|
+
model->at_upper_bound=0; /* since upper bound increased */
|
2934
|
+
if(verbosity>=1)
|
2935
|
+
printf("Increasing influence of unlabeled examples to %f%% .",
|
2936
|
+
learn_parm->svm_unlabbound*100.0);
|
2937
|
+
}
|
2938
|
+
else if(verbosity>=1) {
|
2939
|
+
printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
|
2940
|
+
upos,unsupaddnum1,unsupaddnum2);
|
2941
|
+
fflush(stdout);
|
2942
|
+
}
|
2943
|
+
|
2944
|
+
if(verbosity >= 2) printf("\n");
|
2945
|
+
|
2946
|
+
learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
|
2947
|
+
|
2948
|
+
for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
2949
|
+
if(unlabeled[i]) {
|
2950
|
+
if(label[i] == 1) {
|
2951
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2952
|
+
learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
2953
|
+
}
|
2954
|
+
else if(label[i] == -1) {
|
2955
|
+
learn_parm->svm_cost[i]=learn_parm->svm_c*
|
2956
|
+
learn_parm->svm_unlabbound;
|
2957
|
+
}
|
2958
|
+
}
|
2959
|
+
}
|
2960
|
+
|
2961
|
+
return((long)2);
|
2962
|
+
}
|
2963
|
+
|
2964
|
+
return((long)0);
|
2965
|
+
}
|
2966
|
+
|
2967
|
+
/*************************** Working set selection ***************************/
|
2968
|
+
|
2969
|
+
long select_next_qp_subproblem_grad(long int *label,
|
2970
|
+
long int *unlabeled,
|
2971
|
+
double *a, double *lin,
|
2972
|
+
double *c, long int totdoc,
|
2973
|
+
long int qp_size,
|
2974
|
+
LEARN_PARM *learn_parm,
|
2975
|
+
long int *inconsistent,
|
2976
|
+
long int *active2dnum,
|
2977
|
+
long int *working2dnum,
|
2978
|
+
double *selcrit,
|
2979
|
+
long int *select,
|
2980
|
+
KERNEL_CACHE *kernel_cache,
|
2981
|
+
long int cache_only,
|
2982
|
+
long int *key, long int *chosen)
|
2983
|
+
/* Use the feasible direction approach to select the next
|
2984
|
+
qp-subproblem (see chapter 'Selecting a good working set'). If
|
2985
|
+
'cache_only' is true, then the variables are selected only among
|
2986
|
+
those for which the kernel evaluations are cached. */
|
2987
|
+
{
|
2988
|
+
long choosenum,i,j,k,activedoc,inum,valid;
|
2989
|
+
double s;
|
2990
|
+
|
2991
|
+
for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
|
2992
|
+
choosenum=0;
|
2993
|
+
activedoc=0;
|
2994
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
2995
|
+
s=-label[j];
|
2996
|
+
if(kernel_cache && cache_only)
|
2997
|
+
valid=(kernel_cache->index[j]>=0);
|
2998
|
+
else
|
2999
|
+
valid=1;
|
3000
|
+
if(valid
|
3001
|
+
&& (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
3002
|
+
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
3003
|
+
&& (s>0)))
|
3004
|
+
&& (!chosen[j])
|
3005
|
+
&& (label[j])
|
3006
|
+
&& (!inconsistent[j]))
|
3007
|
+
{
|
3008
|
+
selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
|
3009
|
+
/* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
|
3010
|
+
key[activedoc]=j;
|
3011
|
+
activedoc++;
|
3012
|
+
}
|
3013
|
+
}
|
3014
|
+
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
3015
|
+
for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
3016
|
+
/* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
|
3017
|
+
i=key[select[k]];
|
3018
|
+
chosen[i]=1;
|
3019
|
+
working2dnum[inum+choosenum]=i;
|
3020
|
+
choosenum+=1;
|
3021
|
+
if(kernel_cache)
|
3022
|
+
kernel_cache_touch(kernel_cache,i); /* make sure it does not get
|
3023
|
+
kicked out of cache */
|
3024
|
+
/* } */
|
3025
|
+
}
|
3026
|
+
|
3027
|
+
activedoc=0;
|
3028
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
3029
|
+
s=label[j];
|
3030
|
+
if(kernel_cache && cache_only)
|
3031
|
+
valid=(kernel_cache->index[j]>=0);
|
3032
|
+
else
|
3033
|
+
valid=1;
|
3034
|
+
if(valid
|
3035
|
+
&& (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
3036
|
+
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
3037
|
+
&& (s>0)))
|
3038
|
+
&& (!chosen[j])
|
3039
|
+
&& (label[j])
|
3040
|
+
&& (!inconsistent[j]))
|
3041
|
+
{
|
3042
|
+
selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
|
3043
|
+
/* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
|
3044
|
+
key[activedoc]=j;
|
3045
|
+
activedoc++;
|
3046
|
+
}
|
3047
|
+
}
|
3048
|
+
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
3049
|
+
for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
3050
|
+
/* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
|
3051
|
+
i=key[select[k]];
|
3052
|
+
chosen[i]=1;
|
3053
|
+
working2dnum[inum+choosenum]=i;
|
3054
|
+
choosenum+=1;
|
3055
|
+
if(kernel_cache)
|
3056
|
+
kernel_cache_touch(kernel_cache,i); /* make sure it does not get
|
3057
|
+
kicked out of cache */
|
3058
|
+
/* } */
|
3059
|
+
}
|
3060
|
+
working2dnum[inum+choosenum]=-1; /* complete index */
|
3061
|
+
return(choosenum);
|
3062
|
+
}
|
3063
|
+
|
3064
|
+
long select_next_qp_subproblem_rand(long int *label,
|
3065
|
+
long int *unlabeled,
|
3066
|
+
double *a, double *lin,
|
3067
|
+
double *c, long int totdoc,
|
3068
|
+
long int qp_size,
|
3069
|
+
LEARN_PARM *learn_parm,
|
3070
|
+
long int *inconsistent,
|
3071
|
+
long int *active2dnum,
|
3072
|
+
long int *working2dnum,
|
3073
|
+
double *selcrit,
|
3074
|
+
long int *select,
|
3075
|
+
KERNEL_CACHE *kernel_cache,
|
3076
|
+
long int *key,
|
3077
|
+
long int *chosen,
|
3078
|
+
long int iteration)
|
3079
|
+
/* Use the feasible direction approach to select the next
|
3080
|
+
qp-subproblem (see section 'Selecting a good working set'). Chooses
|
3081
|
+
a feasible direction at (pseudo) random to help jump over numerical
|
3082
|
+
problem. */
|
3083
|
+
{
|
3084
|
+
long choosenum,i,j,k,activedoc,inum;
|
3085
|
+
double s;
|
3086
|
+
|
3087
|
+
for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
|
3088
|
+
choosenum=0;
|
3089
|
+
activedoc=0;
|
3090
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
3091
|
+
s=-label[j];
|
3092
|
+
if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
3093
|
+
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
3094
|
+
&& (s>0)))
|
3095
|
+
&& (!inconsistent[j])
|
3096
|
+
&& (label[j])
|
3097
|
+
&& (!chosen[j])) {
|
3098
|
+
selcrit[activedoc]=(j+iteration) % totdoc;
|
3099
|
+
key[activedoc]=j;
|
3100
|
+
activedoc++;
|
3101
|
+
}
|
3102
|
+
}
|
3103
|
+
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
3104
|
+
for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
3105
|
+
i=key[select[k]];
|
3106
|
+
chosen[i]=1;
|
3107
|
+
working2dnum[inum+choosenum]=i;
|
3108
|
+
choosenum+=1;
|
3109
|
+
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
|
3110
|
+
/* out of cache */
|
3111
|
+
}
|
3112
|
+
|
3113
|
+
activedoc=0;
|
3114
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
3115
|
+
s=label[j];
|
3116
|
+
if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
3117
|
+
&& (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
3118
|
+
&& (s>0)))
|
3119
|
+
&& (!inconsistent[j])
|
3120
|
+
&& (label[j])
|
3121
|
+
&& (!chosen[j])) {
|
3122
|
+
selcrit[activedoc]=(j+iteration) % totdoc;
|
3123
|
+
key[activedoc]=j;
|
3124
|
+
activedoc++;
|
3125
|
+
}
|
3126
|
+
}
|
3127
|
+
select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
3128
|
+
for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
3129
|
+
i=key[select[k]];
|
3130
|
+
chosen[i]=1;
|
3131
|
+
working2dnum[inum+choosenum]=i;
|
3132
|
+
choosenum+=1;
|
3133
|
+
kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
|
3134
|
+
/* out of cache */
|
3135
|
+
}
|
3136
|
+
working2dnum[inum+choosenum]=-1; /* complete index */
|
3137
|
+
return(choosenum);
|
3138
|
+
}
|
3139
|
+
|
3140
|
+
long select_next_qp_slackset(DOC **docs, long int *label,
|
3141
|
+
double *a, double *lin,
|
3142
|
+
double *slack, double *alphaslack,
|
3143
|
+
double *c,
|
3144
|
+
LEARN_PARM *learn_parm,
|
3145
|
+
long int *active2dnum, double *maxviol)
|
3146
|
+
/* returns the slackset with the largest internal violation */
|
3147
|
+
{
|
3148
|
+
long i,ii,maxdiffid;
|
3149
|
+
double dist,target,maxdiff,ex_c;
|
3150
|
+
|
3151
|
+
maxdiff=0;
|
3152
|
+
maxdiffid=0;
|
3153
|
+
for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
3154
|
+
ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
|
3155
|
+
if(alphaslack[docs[i]->slackid] >= ex_c) {
|
3156
|
+
dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
|
3157
|
+
target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
|
3158
|
+
if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
3159
|
+
if((dist-target)>maxdiff) { /* largest violation */
|
3160
|
+
maxdiff=dist-target;
|
3161
|
+
maxdiffid=docs[i]->slackid;
|
3162
|
+
}
|
3163
|
+
}
|
3164
|
+
}
|
3165
|
+
}
|
3166
|
+
(*maxviol)=maxdiff;
|
3167
|
+
return(maxdiffid);
|
3168
|
+
}
|
3169
|
+
|
3170
|
+
|
3171
|
+
void select_top_n(double *selcrit, long int range, long int *select,
|
3172
|
+
long int n)
|
3173
|
+
{
|
3174
|
+
register long i,j;
|
3175
|
+
|
3176
|
+
for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
|
3177
|
+
for(j=i;j>=0;j--) {
|
3178
|
+
if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
|
3179
|
+
select[j]=select[j-1];
|
3180
|
+
}
|
3181
|
+
else {
|
3182
|
+
select[j]=i;
|
3183
|
+
j=-1;
|
3184
|
+
}
|
3185
|
+
}
|
3186
|
+
}
|
3187
|
+
if(n>0) {
|
3188
|
+
for(i=n;i<range;i++) {
|
3189
|
+
if(selcrit[i]>selcrit[select[n-1]]) {
|
3190
|
+
for(j=n-1;j>=0;j--) {
|
3191
|
+
if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
|
3192
|
+
select[j]=select[j-1];
|
3193
|
+
}
|
3194
|
+
else {
|
3195
|
+
select[j]=i;
|
3196
|
+
j=-1;
|
3197
|
+
}
|
3198
|
+
}
|
3199
|
+
}
|
3200
|
+
}
|
3201
|
+
}
|
3202
|
+
}
|
3203
|
+
|
3204
|
+
|
3205
|
+
/******************************** Shrinking *********************************/
|
3206
|
+
|
3207
|
+
void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
|
3208
|
+
long int maxhistory)
|
3209
|
+
{
|
3210
|
+
long i;
|
3211
|
+
|
3212
|
+
shrink_state->deactnum=0;
|
3213
|
+
shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
|
3214
|
+
shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
|
3215
|
+
shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
|
3216
|
+
shrink_state->maxhistory=maxhistory;
|
3217
|
+
shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
|
3218
|
+
shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
|
3219
|
+
|
3220
|
+
for(i=0;i<totdoc;i++) {
|
3221
|
+
shrink_state->active[i]=1;
|
3222
|
+
shrink_state->inactive_since[i]=0;
|
3223
|
+
shrink_state->last_a[i]=0;
|
3224
|
+
shrink_state->last_lin[i]=0;
|
3225
|
+
}
|
3226
|
+
}
|
3227
|
+
|
3228
|
+
void shrink_state_cleanup(SHRINK_STATE *shrink_state)
|
3229
|
+
{
|
3230
|
+
free(shrink_state->active);
|
3231
|
+
free(shrink_state->inactive_since);
|
3232
|
+
if(shrink_state->deactnum > 0)
|
3233
|
+
free(shrink_state->a_history[shrink_state->deactnum-1]);
|
3234
|
+
free(shrink_state->a_history);
|
3235
|
+
free(shrink_state->last_a);
|
3236
|
+
free(shrink_state->last_lin);
|
3237
|
+
}
|
3238
|
+
|
3239
|
+
long shrink_problem(DOC **docs,
|
3240
|
+
LEARN_PARM *learn_parm,
|
3241
|
+
SHRINK_STATE *shrink_state,
|
3242
|
+
KERNEL_PARM *kernel_parm,
|
3243
|
+
long int *active2dnum,
|
3244
|
+
long int *last_suboptimal_at,
|
3245
|
+
long int iteration,
|
3246
|
+
long int totdoc,
|
3247
|
+
long int minshrink,
|
3248
|
+
double *a,
|
3249
|
+
long int *inconsistent)
|
3250
|
+
/* Shrink some variables away. Do the shrinking only if at least
|
3251
|
+
minshrink variables can be removed. */
|
3252
|
+
{
|
3253
|
+
long i,ii,change,activenum,lastiter;
|
3254
|
+
double *a_old;
|
3255
|
+
|
3256
|
+
activenum=0;
|
3257
|
+
change=0;
|
3258
|
+
for(ii=0;active2dnum[ii]>=0;ii++) {
|
3259
|
+
i=active2dnum[ii];
|
3260
|
+
activenum++;
|
3261
|
+
if(learn_parm->sharedslack)
|
3262
|
+
lastiter=last_suboptimal_at[docs[i]->slackid];
|
3263
|
+
else
|
3264
|
+
lastiter=last_suboptimal_at[i];
|
3265
|
+
if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
|
3266
|
+
|| (inconsistent[i])) {
|
3267
|
+
change++;
|
3268
|
+
}
|
3269
|
+
}
|
3270
|
+
if((change>=minshrink) /* shrink only if sufficiently many candidates */
|
3271
|
+
&& (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
|
3272
|
+
/* Shrink problem by removing those variables which are */
|
3273
|
+
/* optimal at a bound for a minimum number of iterations */
|
3274
|
+
if(verbosity>=2) {
|
3275
|
+
printf(" Shrinking..."); fflush(stdout);
|
3276
|
+
}
|
3277
|
+
if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
|
3278
|
+
a_old=(double *)my_malloc(sizeof(double)*totdoc);
|
3279
|
+
shrink_state->a_history[shrink_state->deactnum]=a_old;
|
3280
|
+
for(i=0;i<totdoc;i++) {
|
3281
|
+
a_old[i]=a[i];
|
3282
|
+
}
|
3283
|
+
}
|
3284
|
+
for(ii=0;active2dnum[ii]>=0;ii++) {
|
3285
|
+
i=active2dnum[ii];
|
3286
|
+
if(learn_parm->sharedslack)
|
3287
|
+
lastiter=last_suboptimal_at[docs[i]->slackid];
|
3288
|
+
else
|
3289
|
+
lastiter=last_suboptimal_at[i];
|
3290
|
+
if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
|
3291
|
+
|| (inconsistent[i])) {
|
3292
|
+
shrink_state->active[i]=0;
|
3293
|
+
shrink_state->inactive_since[i]=shrink_state->deactnum;
|
3294
|
+
}
|
3295
|
+
}
|
3296
|
+
activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
3297
|
+
shrink_state->deactnum++;
|
3298
|
+
if(kernel_parm->kernel_type == LINEAR) {
|
3299
|
+
shrink_state->deactnum=0;
|
3300
|
+
}
|
3301
|
+
if(verbosity>=2) {
|
3302
|
+
printf("done.\n"); fflush(stdout);
|
3303
|
+
printf(" Number of inactive variables = %ld\n",totdoc-activenum);
|
3304
|
+
}
|
3305
|
+
}
|
3306
|
+
return(activenum);
|
3307
|
+
}
|
3308
|
+
|
3309
|
+
|
3310
|
+
void reactivate_inactive_examples(long int *label,
|
3311
|
+
long int *unlabeled,
|
3312
|
+
double *a,
|
3313
|
+
SHRINK_STATE *shrink_state,
|
3314
|
+
double *lin,
|
3315
|
+
double *c,
|
3316
|
+
long int totdoc,
|
3317
|
+
long int totwords,
|
3318
|
+
long int iteration,
|
3319
|
+
LEARN_PARM *learn_parm,
|
3320
|
+
long int *inconsistent,
|
3321
|
+
DOC **docs,
|
3322
|
+
KERNEL_PARM *kernel_parm,
|
3323
|
+
KERNEL_CACHE *kernel_cache,
|
3324
|
+
MODEL *model,
|
3325
|
+
CFLOAT *aicache,
|
3326
|
+
double *weights,
|
3327
|
+
double *maxdiff)
|
3328
|
+
/* Make all variables active again which had been removed by
|
3329
|
+
shrinking. */
|
3330
|
+
/* Computes lin for those variables from scratch. */
|
3331
|
+
{
|
3332
|
+
register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
|
3333
|
+
long *changed,*inactive;
|
3334
|
+
register double kernel_val,*a_old,dist;
|
3335
|
+
double ex_c,target;
|
3336
|
+
SVECTOR *f;
|
3337
|
+
|
3338
|
+
if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
|
3339
|
+
a_old=shrink_state->last_a;
|
3340
|
+
clear_vector_n(weights,totwords);
|
3341
|
+
for(i=0;i<totdoc;i++) {
|
3342
|
+
if(a[i] != a_old[i]) {
|
3343
|
+
for(f=docs[i]->fvec;f;f=f->next)
|
3344
|
+
add_vector_ns(weights,f,
|
3345
|
+
f->factor*((a[i]-a_old[i])*(double)label[i]));
|
3346
|
+
a_old[i]=a[i];
|
3347
|
+
}
|
3348
|
+
}
|
3349
|
+
for(i=0;i<totdoc;i++) {
|
3350
|
+
if(!shrink_state->active[i]) {
|
3351
|
+
for(f=docs[i]->fvec;f;f=f->next)
|
3352
|
+
lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
|
3353
|
+
}
|
3354
|
+
shrink_state->last_lin[i]=lin[i];
|
3355
|
+
}
|
3356
|
+
}
|
3357
|
+
else {
|
3358
|
+
changed=(long *)my_malloc(sizeof(long)*totdoc);
|
3359
|
+
changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
|
3360
|
+
inactive=(long *)my_malloc(sizeof(long)*totdoc);
|
3361
|
+
inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
|
3362
|
+
for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
|
3363
|
+
if(verbosity>=2) {
|
3364
|
+
printf("%ld..",t); fflush(stdout);
|
3365
|
+
}
|
3366
|
+
a_old=shrink_state->a_history[t];
|
3367
|
+
for(i=0;i<totdoc;i++) {
|
3368
|
+
inactive[i]=((!shrink_state->active[i])
|
3369
|
+
&& (shrink_state->inactive_since[i] == t));
|
3370
|
+
changed[i]= (a[i] != a_old[i]);
|
3371
|
+
}
|
3372
|
+
compute_index(inactive,totdoc,inactive2dnum);
|
3373
|
+
compute_index(changed,totdoc,changed2dnum);
|
3374
|
+
|
3375
|
+
for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
|
3376
|
+
get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
|
3377
|
+
kernel_parm);
|
3378
|
+
for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
|
3379
|
+
kernel_val=aicache[j];
|
3380
|
+
lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
|
3381
|
+
}
|
3382
|
+
}
|
3383
|
+
}
|
3384
|
+
free(changed);
|
3385
|
+
free(changed2dnum);
|
3386
|
+
free(inactive);
|
3387
|
+
free(inactive2dnum);
|
3388
|
+
}
|
3389
|
+
(*maxdiff)=0;
|
3390
|
+
for(i=0;i<totdoc;i++) {
|
3391
|
+
shrink_state->inactive_since[i]=shrink_state->deactnum-1;
|
3392
|
+
if(!inconsistent[i]) {
|
3393
|
+
dist=(lin[i]-model->b)*(double)label[i];
|
3394
|
+
target=-(learn_parm->eps-(double)label[i]*c[i]);
|
3395
|
+
ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
3396
|
+
if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
3397
|
+
if((dist-target)>(*maxdiff)) /* largest violation */
|
3398
|
+
(*maxdiff)=dist-target;
|
3399
|
+
}
|
3400
|
+
else if((a[i]<ex_c) && (dist < target)) {
|
3401
|
+
if((target-dist)>(*maxdiff)) /* largest violation */
|
3402
|
+
(*maxdiff)=target-dist;
|
3403
|
+
}
|
3404
|
+
if((a[i]>(0+learn_parm->epsilon_a))
|
3405
|
+
&& (a[i]<ex_c)) {
|
3406
|
+
shrink_state->active[i]=1; /* not at bound */
|
3407
|
+
}
|
3408
|
+
else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
|
3409
|
+
shrink_state->active[i]=1;
|
3410
|
+
}
|
3411
|
+
else if((a[i]>=ex_c)
|
3412
|
+
&& (dist > (target-learn_parm->epsilon_shrink))) {
|
3413
|
+
shrink_state->active[i]=1;
|
3414
|
+
}
|
3415
|
+
else if(learn_parm->sharedslack) { /* make all active when sharedslack */
|
3416
|
+
shrink_state->active[i]=1;
|
3417
|
+
}
|
3418
|
+
}
|
3419
|
+
}
|
3420
|
+
if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
|
3421
|
+
for(i=0;i<totdoc;i++) {
|
3422
|
+
(shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
|
3423
|
+
}
|
3424
|
+
for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
|
3425
|
+
free(shrink_state->a_history[t]);
|
3426
|
+
shrink_state->a_history[t]=0;
|
3427
|
+
}
|
3428
|
+
}
|
3429
|
+
}
|
3430
|
+
|
3431
|
+
/****************************** Cache handling *******************************/
|
3432
|
+
|
3433
|
+
void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
|
3434
|
+
long int docnum, long int totdoc,
|
3435
|
+
long int *active2dnum, CFLOAT *buffer,
|
3436
|
+
KERNEL_PARM *kernel_parm)
|
3437
|
+
/* Get's a row of the matrix of kernel values This matrix has the
|
3438
|
+
same form as the Hessian, just that the elements are not
|
3439
|
+
multiplied by */
|
3440
|
+
/* y_i * y_j * a_i * a_j */
|
3441
|
+
/* Takes the values from the cache if available. */
|
3442
|
+
{
|
3443
|
+
register long i,j,start;
|
3444
|
+
DOC *ex;
|
3445
|
+
|
3446
|
+
ex=docs[docnum];
|
3447
|
+
|
3448
|
+
if(kernel_cache->index[docnum] != -1) { /* row is cached? */
|
3449
|
+
kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
3450
|
+
start=kernel_cache->activenum*kernel_cache->index[docnum];
|
3451
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
3452
|
+
if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
|
3453
|
+
buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
|
3454
|
+
}
|
3455
|
+
else {
|
3456
|
+
buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
|
3457
|
+
}
|
3458
|
+
}
|
3459
|
+
}
|
3460
|
+
else {
|
3461
|
+
for(i=0;(j=active2dnum[i])>=0;i++) {
|
3462
|
+
buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
|
3463
|
+
}
|
3464
|
+
}
|
3465
|
+
}
|
3466
|
+
|
3467
|
+
|
3468
|
+
void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
|
3469
|
+
long int m, KERNEL_PARM *kernel_parm)
|
3470
|
+
/* Fills cache for the row m */
|
3471
|
+
{
|
3472
|
+
register DOC *ex;
|
3473
|
+
register long j,k,l;
|
3474
|
+
register CFLOAT *cache;
|
3475
|
+
|
3476
|
+
if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
|
3477
|
+
cache = kernel_cache_clean_and_malloc(kernel_cache,m);
|
3478
|
+
if(cache) {
|
3479
|
+
l=kernel_cache->totdoc2active[m];
|
3480
|
+
ex=docs[m];
|
3481
|
+
for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
|
3482
|
+
k=kernel_cache->active2totdoc[j];
|
3483
|
+
if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
|
3484
|
+
cache[j]=kernel_cache->buffer[kernel_cache->activenum
|
3485
|
+
*kernel_cache->index[k]+l];
|
3486
|
+
}
|
3487
|
+
else {
|
3488
|
+
cache[j]=kernel(kernel_parm,ex,docs[k]);
|
3489
|
+
}
|
3490
|
+
}
|
3491
|
+
}
|
3492
|
+
else {
|
3493
|
+
perror("Error: Kernel cache full! => increase cache size");
|
3494
|
+
}
|
3495
|
+
}
|
3496
|
+
}
|
3497
|
+
|
3498
|
+
|
3499
|
+
void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
|
3500
|
+
long int *key, long int varnum,
|
3501
|
+
KERNEL_PARM *kernel_parm)
|
3502
|
+
/* Fills cache for the rows in key */
|
3503
|
+
{
|
3504
|
+
register long i;
|
3505
|
+
|
3506
|
+
for(i=0;i<varnum;i++) { /* fill up kernel cache */
|
3507
|
+
cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
|
3508
|
+
}
|
3509
|
+
}
|
3510
|
+
|
3511
|
+
|
3512
|
+
void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
|
3513
|
+
long int numshrink, long int *after)
|
3514
|
+
/* Remove numshrink columns in the cache which correspond to
|
3515
|
+
examples marked 0 in after. */
|
3516
|
+
{
|
3517
|
+
register long i,j,jj,from=0,to=0,scount;
|
3518
|
+
long *keep;
|
3519
|
+
|
3520
|
+
if(verbosity>=2) {
|
3521
|
+
printf(" Reorganizing cache..."); fflush(stdout);
|
3522
|
+
}
|
3523
|
+
|
3524
|
+
keep=(long *)my_malloc(sizeof(long)*totdoc);
|
3525
|
+
for(j=0;j<totdoc;j++) {
|
3526
|
+
keep[j]=1;
|
3527
|
+
}
|
3528
|
+
scount=0;
|
3529
|
+
for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
|
3530
|
+
j=kernel_cache->active2totdoc[jj];
|
3531
|
+
if(!after[j]) {
|
3532
|
+
scount++;
|
3533
|
+
keep[j]=0;
|
3534
|
+
}
|
3535
|
+
}
|
3536
|
+
|
3537
|
+
for(i=0;i<kernel_cache->max_elems;i++) {
|
3538
|
+
for(jj=0;jj<kernel_cache->activenum;jj++) {
|
3539
|
+
j=kernel_cache->active2totdoc[jj];
|
3540
|
+
if(!keep[j]) {
|
3541
|
+
from++;
|
3542
|
+
}
|
3543
|
+
else {
|
3544
|
+
kernel_cache->buffer[to]=kernel_cache->buffer[from];
|
3545
|
+
to++;
|
3546
|
+
from++;
|
3547
|
+
}
|
3548
|
+
}
|
3549
|
+
}
|
3550
|
+
|
3551
|
+
kernel_cache->activenum=0;
|
3552
|
+
for(j=0;j<totdoc;j++) {
|
3553
|
+
if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
|
3554
|
+
kernel_cache->active2totdoc[kernel_cache->activenum]=j;
|
3555
|
+
kernel_cache->totdoc2active[j]=kernel_cache->activenum;
|
3556
|
+
kernel_cache->activenum++;
|
3557
|
+
}
|
3558
|
+
else {
|
3559
|
+
kernel_cache->totdoc2active[j]=-1;
|
3560
|
+
}
|
3561
|
+
}
|
3562
|
+
|
3563
|
+
kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
|
3564
|
+
if(kernel_cache->max_elems>totdoc) {
|
3565
|
+
kernel_cache->max_elems=totdoc;
|
3566
|
+
}
|
3567
|
+
|
3568
|
+
free(keep);
|
3569
|
+
|
3570
|
+
if(verbosity>=2) {
|
3571
|
+
printf("done.\n"); fflush(stdout);
|
3572
|
+
printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
|
3573
|
+
}
|
3574
|
+
}
|
3575
|
+
|
3576
|
+
KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
|
3577
|
+
{
|
3578
|
+
long i;
|
3579
|
+
KERNEL_CACHE *kernel_cache;
|
3580
|
+
|
3581
|
+
kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
|
3582
|
+
kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
|
3583
|
+
kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
|
3584
|
+
kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
|
3585
|
+
kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
|
3586
|
+
kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
|
3587
|
+
kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
|
3588
|
+
kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
|
3589
|
+
|
3590
|
+
kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
|
3591
|
+
|
3592
|
+
kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
|
3593
|
+
if(kernel_cache->max_elems>totdoc) {
|
3594
|
+
kernel_cache->max_elems=totdoc;
|
3595
|
+
}
|
3596
|
+
|
3597
|
+
if(verbosity>=2) {
|
3598
|
+
printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
|
3599
|
+
printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
|
3600
|
+
}
|
3601
|
+
|
3602
|
+
kernel_cache->elems=0; /* initialize cache */
|
3603
|
+
for(i=0;i<totdoc;i++) {
|
3604
|
+
kernel_cache->index[i]=-1;
|
3605
|
+
kernel_cache->lru[i]=0;
|
3606
|
+
}
|
3607
|
+
for(i=0;i<totdoc;i++) {
|
3608
|
+
kernel_cache->occu[i]=0;
|
3609
|
+
kernel_cache->invindex[i]=-1;
|
3610
|
+
}
|
3611
|
+
|
3612
|
+
kernel_cache->activenum=totdoc;;
|
3613
|
+
for(i=0;i<totdoc;i++) {
|
3614
|
+
kernel_cache->active2totdoc[i]=i;
|
3615
|
+
kernel_cache->totdoc2active[i]=i;
|
3616
|
+
}
|
3617
|
+
|
3618
|
+
kernel_cache->time=0;
|
3619
|
+
|
3620
|
+
return(kernel_cache);
|
3621
|
+
}
|
3622
|
+
|
3623
|
+
void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
|
3624
|
+
{
|
3625
|
+
long maxlru=0,k;
|
3626
|
+
|
3627
|
+
for(k=0;k<kernel_cache->max_elems;k++) {
|
3628
|
+
if(maxlru < kernel_cache->lru[k])
|
3629
|
+
maxlru=kernel_cache->lru[k];
|
3630
|
+
}
|
3631
|
+
for(k=0;k<kernel_cache->max_elems;k++) {
|
3632
|
+
kernel_cache->lru[k]-=maxlru;
|
3633
|
+
}
|
3634
|
+
}
|
3635
|
+
|
3636
|
+
void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
|
3637
|
+
{
|
3638
|
+
free(kernel_cache->index);
|
3639
|
+
free(kernel_cache->occu);
|
3640
|
+
free(kernel_cache->lru);
|
3641
|
+
free(kernel_cache->invindex);
|
3642
|
+
free(kernel_cache->active2totdoc);
|
3643
|
+
free(kernel_cache->totdoc2active);
|
3644
|
+
free(kernel_cache->buffer);
|
3645
|
+
free(kernel_cache);
|
3646
|
+
}
|
3647
|
+
|
3648
|
+
long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
|
3649
|
+
{
|
3650
|
+
long i;
|
3651
|
+
|
3652
|
+
if(kernel_cache_space_available(kernel_cache)) {
|
3653
|
+
for(i=0;i<kernel_cache->max_elems;i++) {
|
3654
|
+
if(!kernel_cache->occu[i]) {
|
3655
|
+
kernel_cache->occu[i]=1;
|
3656
|
+
kernel_cache->elems++;
|
3657
|
+
return(i);
|
3658
|
+
}
|
3659
|
+
}
|
3660
|
+
}
|
3661
|
+
return(-1);
|
3662
|
+
}
|
3663
|
+
|
3664
|
+
void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
|
3665
|
+
{
|
3666
|
+
kernel_cache->occu[i]=0;
|
3667
|
+
kernel_cache->elems--;
|
3668
|
+
}
|
3669
|
+
|
3670
|
+
long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
|
3671
|
+
/* remove least recently used cache element */
|
3672
|
+
{
|
3673
|
+
register long k,least_elem=-1,least_time;
|
3674
|
+
|
3675
|
+
least_time=kernel_cache->time+1;
|
3676
|
+
for(k=0;k<kernel_cache->max_elems;k++) {
|
3677
|
+
if(kernel_cache->invindex[k] != -1) {
|
3678
|
+
if(kernel_cache->lru[k]<least_time) {
|
3679
|
+
least_time=kernel_cache->lru[k];
|
3680
|
+
least_elem=k;
|
3681
|
+
}
|
3682
|
+
}
|
3683
|
+
}
|
3684
|
+
if(least_elem != -1) {
|
3685
|
+
kernel_cache_free(kernel_cache,least_elem);
|
3686
|
+
kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
|
3687
|
+
kernel_cache->invindex[least_elem]=-1;
|
3688
|
+
return(1);
|
3689
|
+
}
|
3690
|
+
return(0);
|
3691
|
+
}
|
3692
|
+
|
3693
|
+
|
3694
|
+
CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
|
3695
|
+
long int docnum)
|
3696
|
+
/* Get a free cache entry. In case cache is full, the lru element
|
3697
|
+
is removed. */
|
3698
|
+
{
|
3699
|
+
long result;
|
3700
|
+
if((result = kernel_cache_malloc(kernel_cache)) == -1) {
|
3701
|
+
if(kernel_cache_free_lru(kernel_cache)) {
|
3702
|
+
result = kernel_cache_malloc(kernel_cache);
|
3703
|
+
}
|
3704
|
+
}
|
3705
|
+
kernel_cache->index[docnum]=result;
|
3706
|
+
if(result == -1) {
|
3707
|
+
return(0);
|
3708
|
+
}
|
3709
|
+
kernel_cache->invindex[result]=docnum;
|
3710
|
+
kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
3711
|
+
return((CFLOAT *)((long)kernel_cache->buffer
|
3712
|
+
+(kernel_cache->activenum*sizeof(CFLOAT)*
|
3713
|
+
kernel_cache->index[docnum])));
|
3714
|
+
}
|
3715
|
+
|
3716
|
+
long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
|
3717
|
+
/* Update lru time to avoid removal from cache. */
|
3718
|
+
{
|
3719
|
+
if(kernel_cache && kernel_cache->index[docnum] != -1) {
|
3720
|
+
kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
3721
|
+
return(1);
|
3722
|
+
}
|
3723
|
+
return(0);
|
3724
|
+
}
|
3725
|
+
|
3726
|
+
long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
|
3727
|
+
/* Is that row cached? */
|
3728
|
+
{
|
3729
|
+
return(kernel_cache->index[docnum] != -1);
|
3730
|
+
}
|
3731
|
+
|
3732
|
+
long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
|
3733
|
+
/* Is there room for one more row? */
|
3734
|
+
{
|
3735
|
+
return(kernel_cache->elems < kernel_cache->max_elems);
|
3736
|
+
}
|
3737
|
+
|
3738
|
+
/************************** Compute estimates ******************************/
|
3739
|
+
|
3740
|
+
void compute_xa_estimates(MODEL *model, long int *label,
|
3741
|
+
long int *unlabeled, long int totdoc,
|
3742
|
+
DOC **docs, double *lin, double *a,
|
3743
|
+
KERNEL_PARM *kernel_parm,
|
3744
|
+
LEARN_PARM *learn_parm, double *error,
|
3745
|
+
double *recall, double *precision)
|
3746
|
+
/* Computes xa-estimate of error rate, recall, and precision. See
|
3747
|
+
T. Joachims, Estimating the Generalization Performance of an SVM
|
3748
|
+
Efficiently, IMCL, 2000. */
|
3749
|
+
{
|
3750
|
+
long i,looerror,looposerror,loonegerror;
|
3751
|
+
long totex,totposex;
|
3752
|
+
double xi,r_delta,r_delta_sq,sim=0;
|
3753
|
+
long *sv2dnum=NULL,*sv=NULL,svnum;
|
3754
|
+
|
3755
|
+
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
3756
|
+
r_delta_sq=r_delta*r_delta;
|
3757
|
+
|
3758
|
+
looerror=0;
|
3759
|
+
looposerror=0;
|
3760
|
+
loonegerror=0;
|
3761
|
+
totex=0;
|
3762
|
+
totposex=0;
|
3763
|
+
svnum=0;
|
3764
|
+
|
3765
|
+
if(learn_parm->xa_depth > 0) {
|
3766
|
+
sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
3767
|
+
for(i=0;i<totdoc;i++)
|
3768
|
+
sv[i]=0;
|
3769
|
+
for(i=1;i<model->sv_num;i++)
|
3770
|
+
if(a[model->supvec[i]->docnum]
|
3771
|
+
< (learn_parm->svm_cost[model->supvec[i]->docnum]
|
3772
|
+
-learn_parm->epsilon_a)) {
|
3773
|
+
sv[model->supvec[i]->docnum]=1;
|
3774
|
+
svnum++;
|
3775
|
+
}
|
3776
|
+
sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
3777
|
+
clear_index(sv2dnum);
|
3778
|
+
compute_index(sv,totdoc,sv2dnum);
|
3779
|
+
}
|
3780
|
+
|
3781
|
+
for(i=0;i<totdoc;i++) {
|
3782
|
+
if(unlabeled[i]) {
|
3783
|
+
/* ignore it */
|
3784
|
+
}
|
3785
|
+
else {
|
3786
|
+
xi=1.0-((lin[i]-model->b)*(double)label[i]);
|
3787
|
+
if(xi<0) xi=0;
|
3788
|
+
if(label[i]>0) {
|
3789
|
+
totposex++;
|
3790
|
+
}
|
3791
|
+
if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
|
3792
|
+
if(learn_parm->xa_depth > 0) { /* makes assumptions */
|
3793
|
+
sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
|
3794
|
+
kernel_parm,learn_parm,
|
3795
|
+
(double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
|
3796
|
+
}
|
3797
|
+
if((learn_parm->xa_depth == 0) ||
|
3798
|
+
((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
|
3799
|
+
looerror++;
|
3800
|
+
if(label[i]>0) {
|
3801
|
+
looposerror++;
|
3802
|
+
}
|
3803
|
+
else {
|
3804
|
+
loonegerror++;
|
3805
|
+
}
|
3806
|
+
}
|
3807
|
+
}
|
3808
|
+
totex++;
|
3809
|
+
}
|
3810
|
+
}
|
3811
|
+
|
3812
|
+
(*error)=((double)looerror/(double)totex)*100.0;
|
3813
|
+
(*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
|
3814
|
+
(*precision)=(((double)totposex-(double)looposerror)
|
3815
|
+
/((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
|
3816
|
+
|
3817
|
+
free(sv);
|
3818
|
+
free(sv2dnum);
|
3819
|
+
}
|
3820
|
+
|
3821
|
+
|
3822
|
+
double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
|
3823
|
+
DOC **docs, double *a,
|
3824
|
+
long int docnum,
|
3825
|
+
long int *label,
|
3826
|
+
KERNEL_PARM *kernel_parm,
|
3827
|
+
LEARN_PARM *learn_parm, double thresh)
|
3828
|
+
/* Experimental Code improving plain XiAlpha Estimates by
|
3829
|
+
computing a better bound using a greedy optimzation strategy. */
|
3830
|
+
{
|
3831
|
+
long best_depth=0;
|
3832
|
+
long i,j,k,d,skip,allskip;
|
3833
|
+
double best,best_val[101],val,init_val_sq,init_val_lin;
|
3834
|
+
long best_ex[101];
|
3835
|
+
CFLOAT *cache,*trow;
|
3836
|
+
|
3837
|
+
cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
|
3838
|
+
trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
|
3839
|
+
|
3840
|
+
for(k=0;k<svnum;k++) {
|
3841
|
+
trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
|
3842
|
+
}
|
3843
|
+
|
3844
|
+
init_val_sq=0;
|
3845
|
+
init_val_lin=0;
|
3846
|
+
best=0;
|
3847
|
+
|
3848
|
+
for(d=0;d<learn_parm->xa_depth;d++) {
|
3849
|
+
allskip=1;
|
3850
|
+
if(d>=1) {
|
3851
|
+
init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
|
3852
|
+
for(k=0;k<d-1;k++) {
|
3853
|
+
init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
|
3854
|
+
}
|
3855
|
+
init_val_lin+=trow[best_ex[d-1]];
|
3856
|
+
}
|
3857
|
+
for(i=0;i<svnum;i++) {
|
3858
|
+
skip=0;
|
3859
|
+
if(sv2dnum[i] == docnum) skip=1;
|
3860
|
+
for(j=0;j<d;j++) {
|
3861
|
+
if(i == best_ex[j]) skip=1;
|
3862
|
+
}
|
3863
|
+
|
3864
|
+
if(!skip) {
|
3865
|
+
val=init_val_sq;
|
3866
|
+
if(kernel_parm->kernel_type == LINEAR)
|
3867
|
+
val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
|
3868
|
+
else
|
3869
|
+
val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
|
3870
|
+
for(j=0;j<d;j++) {
|
3871
|
+
val+=2.0*cache[i+j*svnum];
|
3872
|
+
}
|
3873
|
+
val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
|
3874
|
+
val-=((init_val_lin+trow[i])/(d+1.0));
|
3875
|
+
|
3876
|
+
if(allskip || (val < best_val[d])) {
|
3877
|
+
best_val[d]=val;
|
3878
|
+
best_ex[d]=i;
|
3879
|
+
}
|
3880
|
+
allskip=0;
|
3881
|
+
if(val < thresh) {
|
3882
|
+
i=svnum;
|
3883
|
+
/* printf("EARLY"); */
|
3884
|
+
}
|
3885
|
+
}
|
3886
|
+
}
|
3887
|
+
if(!allskip) {
|
3888
|
+
for(k=0;k<svnum;k++) {
|
3889
|
+
cache[d*svnum+k]=kernel(kernel_parm,
|
3890
|
+
docs[sv2dnum[best_ex[d]]],
|
3891
|
+
docs[sv2dnum[k]]);
|
3892
|
+
}
|
3893
|
+
}
|
3894
|
+
if((!allskip) && ((best_val[d] < best) || (d == 0))) {
|
3895
|
+
best=best_val[d];
|
3896
|
+
best_depth=d;
|
3897
|
+
}
|
3898
|
+
if(allskip || (best < thresh)) {
|
3899
|
+
d=learn_parm->xa_depth;
|
3900
|
+
}
|
3901
|
+
}
|
3902
|
+
|
3903
|
+
free(cache);
|
3904
|
+
free(trow);
|
3905
|
+
|
3906
|
+
/* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
|
3907
|
+
return(best);
|
3908
|
+
}
|
3909
|
+
|
3910
|
+
|
3911
|
+
void estimate_transduction_quality(MODEL *model, long int *label,
|
3912
|
+
long int *unlabeled,
|
3913
|
+
long int totdoc, DOC **docs, double *lin)
|
3914
|
+
/* Loo-bound based on observation that loo-errors must have an
|
3915
|
+
equal distribution in both training and test examples, given
|
3916
|
+
that the test examples are classified correctly. Compare
|
3917
|
+
chapter "Constraints on the Transductive Hyperplane" in my
|
3918
|
+
Dissertation. */
|
3919
|
+
{
|
3920
|
+
long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
|
3921
|
+
double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
|
3922
|
+
double r_delta,r_delta_sq,xi,xisum=0,asum=0;
|
3923
|
+
|
3924
|
+
r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
|
3925
|
+
r_delta_sq=r_delta*r_delta;
|
3926
|
+
|
3927
|
+
for(j=0;j<totdoc;j++) {
|
3928
|
+
if(unlabeled[j]) {
|
3929
|
+
totulab++;
|
3930
|
+
}
|
3931
|
+
else {
|
3932
|
+
totlab++;
|
3933
|
+
if(label[j] > 0)
|
3934
|
+
totlabpos++;
|
3935
|
+
else
|
3936
|
+
totlabneg++;
|
3937
|
+
}
|
3938
|
+
}
|
3939
|
+
for(j=1;j<model->sv_num;j++) {
|
3940
|
+
i=model->supvec[j]->docnum;
|
3941
|
+
xi=1.0-((lin[i]-model->b)*(double)label[i]);
|
3942
|
+
if(xi<0) xi=0;
|
3943
|
+
|
3944
|
+
xisum+=xi;
|
3945
|
+
asum+=fabs(model->alpha[j]);
|
3946
|
+
if(unlabeled[i]) {
|
3947
|
+
ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
|
3948
|
+
}
|
3949
|
+
else {
|
3950
|
+
labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
|
3951
|
+
}
|
3952
|
+
if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
|
3953
|
+
l++;
|
3954
|
+
if(unlabeled[model->supvec[j]->docnum]) {
|
3955
|
+
ulab++;
|
3956
|
+
if(model->alpha[j] > 0)
|
3957
|
+
ulabpos++;
|
3958
|
+
else
|
3959
|
+
ulabneg++;
|
3960
|
+
}
|
3961
|
+
else {
|
3962
|
+
lab++;
|
3963
|
+
if(model->alpha[j] > 0)
|
3964
|
+
labpos++;
|
3965
|
+
else
|
3966
|
+
labneg++;
|
3967
|
+
}
|
3968
|
+
}
|
3969
|
+
}
|
3970
|
+
printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
|
3971
|
+
printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
|
3972
|
+
printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
|
3973
|
+
printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
|
3974
|
+
printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
|
3975
|
+
}
|
3976
|
+
|
3977
|
+
double estimate_margin_vcdim(MODEL *model, double w, double R,
|
3978
|
+
KERNEL_PARM *kernel_parm)
|
3979
|
+
/* optional: length of model vector in feature space */
|
3980
|
+
/* optional: radius of ball containing the data */
|
3981
|
+
{
|
3982
|
+
double h;
|
3983
|
+
|
3984
|
+
/* follows chapter 5.6.4 in [Vapnik/95] */
|
3985
|
+
|
3986
|
+
if(w<0) {
|
3987
|
+
w=model_length_s(model,kernel_parm);
|
3988
|
+
}
|
3989
|
+
if(R<0) {
|
3990
|
+
R=estimate_sphere(model,kernel_parm);
|
3991
|
+
}
|
3992
|
+
h = w*w * R*R +1;
|
3993
|
+
return(h);
|
3994
|
+
}
|
3995
|
+
|
3996
|
+
double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
|
3997
|
+
/* Approximates the radius of the ball containing */
|
3998
|
+
/* the support vectors by bounding it with the */
|
3999
|
+
{ /* length of the longest support vector. This is */
|
4000
|
+
register long j; /* pretty good for text categorization, since all */
|
4001
|
+
double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
|
4002
|
+
DOC *nulldoc; /* assumes that the center of the ball is at the */
|
4003
|
+
WORD nullword; /* origin of the space. */
|
4004
|
+
|
4005
|
+
nullword.wnum=0;
|
4006
|
+
nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
4007
|
+
|
4008
|
+
for(j=1;j<model->sv_num;j++) {
|
4009
|
+
xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
|
4010
|
+
-2*kernel(kernel_parm,model->supvec[j],nulldoc)
|
4011
|
+
+kernel(kernel_parm,nulldoc,nulldoc));
|
4012
|
+
if(xlen>maxxlen) {
|
4013
|
+
maxxlen=xlen;
|
4014
|
+
}
|
4015
|
+
}
|
4016
|
+
|
4017
|
+
free_example(nulldoc,1);
|
4018
|
+
return(maxxlen);
|
4019
|
+
}
|
4020
|
+
|
4021
|
+
double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
|
4022
|
+
{
|
4023
|
+
long i;
|
4024
|
+
double maxxlen,xlen;
|
4025
|
+
DOC *nulldoc; /* assumes that the center of the ball is at the */
|
4026
|
+
WORD nullword; /* origin of the space. */
|
4027
|
+
|
4028
|
+
nullword.wnum=0;
|
4029
|
+
nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
4030
|
+
|
4031
|
+
maxxlen=0;
|
4032
|
+
for(i=0;i<totdoc;i++) {
|
4033
|
+
xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
|
4034
|
+
-2*kernel(kernel_parm,docs[i],nulldoc)
|
4035
|
+
+kernel(kernel_parm,nulldoc,nulldoc));
|
4036
|
+
if(xlen>maxxlen) {
|
4037
|
+
maxxlen=xlen;
|
4038
|
+
}
|
4039
|
+
}
|
4040
|
+
|
4041
|
+
free_example(nulldoc,1);
|
4042
|
+
return(maxxlen);
|
4043
|
+
}
|
4044
|
+
|
4045
|
+
double estimate_r_delta_average(DOC **docs, long int totdoc,
|
4046
|
+
KERNEL_PARM *kernel_parm)
|
4047
|
+
{
|
4048
|
+
long i;
|
4049
|
+
double avgxlen;
|
4050
|
+
DOC *nulldoc; /* assumes that the center of the ball is at the */
|
4051
|
+
WORD nullword; /* origin of the space. */
|
4052
|
+
|
4053
|
+
nullword.wnum=0;
|
4054
|
+
nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
4055
|
+
|
4056
|
+
avgxlen=0;
|
4057
|
+
for(i=0;i<totdoc;i++) {
|
4058
|
+
avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
|
4059
|
+
-2*kernel(kernel_parm,docs[i],nulldoc)
|
4060
|
+
+kernel(kernel_parm,nulldoc,nulldoc));
|
4061
|
+
}
|
4062
|
+
|
4063
|
+
free_example(nulldoc,1);
|
4064
|
+
return(avgxlen/totdoc);
|
4065
|
+
}
|
4066
|
+
|
4067
|
+
double length_of_longest_document_vector(DOC **docs, long int totdoc,
|
4068
|
+
KERNEL_PARM *kernel_parm)
|
4069
|
+
{
|
4070
|
+
long i;
|
4071
|
+
double maxxlen,xlen;
|
4072
|
+
|
4073
|
+
maxxlen=0;
|
4074
|
+
for(i=0;i<totdoc;i++) {
|
4075
|
+
xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
|
4076
|
+
if(xlen>maxxlen) {
|
4077
|
+
maxxlen=xlen;
|
4078
|
+
}
|
4079
|
+
}
|
4080
|
+
|
4081
|
+
return(maxxlen);
|
4082
|
+
}
|
4083
|
+
|
4084
|
+
/****************************** IO-handling **********************************/
|
4085
|
+
|
4086
|
+
void write_prediction(char *predfile, MODEL *model, double *lin,
|
4087
|
+
double *a, long int *unlabeled,
|
4088
|
+
long int *label, long int totdoc,
|
4089
|
+
LEARN_PARM *learn_parm)
|
4090
|
+
{
|
4091
|
+
FILE *predfl;
|
4092
|
+
long i;
|
4093
|
+
double dist,a_max;
|
4094
|
+
|
4095
|
+
if(verbosity>=1) {
|
4096
|
+
printf("Writing prediction file..."); fflush(stdout);
|
4097
|
+
}
|
4098
|
+
if ((predfl = fopen (predfile, "w")) == NULL)
|
4099
|
+
{ perror (predfile); exit (1); }
|
4100
|
+
a_max=learn_parm->epsilon_a;
|
4101
|
+
for(i=0;i<totdoc;i++) {
|
4102
|
+
if((unlabeled[i]) && (a[i]>a_max)) {
|
4103
|
+
a_max=a[i];
|
4104
|
+
}
|
4105
|
+
}
|
4106
|
+
for(i=0;i<totdoc;i++) {
|
4107
|
+
if(unlabeled[i]) {
|
4108
|
+
if((a[i]>(learn_parm->epsilon_a))) {
|
4109
|
+
dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
|
4110
|
+
}
|
4111
|
+
else {
|
4112
|
+
dist=(lin[i]-model->b);
|
4113
|
+
}
|
4114
|
+
if(dist>0) {
|
4115
|
+
fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
|
4116
|
+
}
|
4117
|
+
else {
|
4118
|
+
fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
|
4119
|
+
}
|
4120
|
+
}
|
4121
|
+
}
|
4122
|
+
fclose(predfl);
|
4123
|
+
if(verbosity>=1) {
|
4124
|
+
printf("done\n");
|
4125
|
+
}
|
4126
|
+
}
|
4127
|
+
|
4128
|
+
void write_alphas(char *alphafile, double *a,
|
4129
|
+
long int *label, long int totdoc)
|
4130
|
+
{
|
4131
|
+
FILE *alphafl;
|
4132
|
+
long i;
|
4133
|
+
|
4134
|
+
if(verbosity>=1) {
|
4135
|
+
printf("Writing alpha file..."); fflush(stdout);
|
4136
|
+
}
|
4137
|
+
if ((alphafl = fopen (alphafile, "w")) == NULL)
|
4138
|
+
{ perror (alphafile); exit (1); }
|
4139
|
+
for(i=0;i<totdoc;i++) {
|
4140
|
+
fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
|
4141
|
+
}
|
4142
|
+
fclose(alphafl);
|
4143
|
+
if(verbosity>=1) {
|
4144
|
+
printf("done\n");
|
4145
|
+
}
|
4146
|
+
}
|
4147
|
+
|