liblinear-ruby 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +2 -2
- data/ext/liblinear_wrap.cxx +434 -398
- data/ext/linear.cpp +251 -90
- data/ext/linear.h +2 -2
- data/ext/tron.cpp +72 -33
- data/ext/tron.h +2 -1
- data/lib/liblinear/parameter.rb +1 -1
- data/lib/liblinear/version.rb +1 -1
- metadata +3 -4
data/ext/linear.cpp
CHANGED
@@ -20,8 +20,8 @@ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
|
|
20
20
|
dst = new T[n];
|
21
21
|
memcpy((void *)dst,(void *)src,sizeof(T)*n);
|
22
22
|
}
|
23
|
-
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
24
23
|
#define INF HUGE_VAL
|
24
|
+
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
25
25
|
|
26
26
|
static void print_string_stdout(const char *s)
|
27
27
|
{
|
@@ -91,6 +91,7 @@ public:
|
|
91
91
|
void Hv(double *s, double *Hs);
|
92
92
|
|
93
93
|
int get_nr_variable(void);
|
94
|
+
void get_diag_preconditioner(double *M);
|
94
95
|
|
95
96
|
private:
|
96
97
|
void Xv(double *v, double *Xv);
|
@@ -169,6 +170,27 @@ int l2r_lr_fun::get_nr_variable(void)
|
|
169
170
|
return prob->n;
|
170
171
|
}
|
171
172
|
|
173
|
+
void l2r_lr_fun::get_diag_preconditioner(double *M)
|
174
|
+
{
|
175
|
+
int i;
|
176
|
+
int l = prob->l;
|
177
|
+
int w_size=get_nr_variable();
|
178
|
+
feature_node **x = prob->x;
|
179
|
+
|
180
|
+
for (i=0; i<w_size; i++)
|
181
|
+
M[i] = 1;
|
182
|
+
|
183
|
+
for (i=0; i<l; i++)
|
184
|
+
{
|
185
|
+
feature_node *s = x[i];
|
186
|
+
while (s->index!=-1)
|
187
|
+
{
|
188
|
+
M[s->index-1] += s->value*s->value*C[i]*D[i];
|
189
|
+
s++;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
172
194
|
void l2r_lr_fun::Hv(double *s, double *Hs)
|
173
195
|
{
|
174
196
|
int i;
|
@@ -225,6 +247,7 @@ public:
|
|
225
247
|
void Hv(double *s, double *Hs);
|
226
248
|
|
227
249
|
int get_nr_variable(void);
|
250
|
+
void get_diag_preconditioner(double *M);
|
228
251
|
|
229
252
|
protected:
|
230
253
|
void Xv(double *v, double *Xv);
|
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
|
|
304
327
|
return prob->n;
|
305
328
|
}
|
306
329
|
|
330
|
+
void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
|
331
|
+
{
|
332
|
+
int i;
|
333
|
+
int w_size=get_nr_variable();
|
334
|
+
feature_node **x = prob->x;
|
335
|
+
|
336
|
+
for (i=0; i<w_size; i++)
|
337
|
+
M[i] = 1;
|
338
|
+
|
339
|
+
for (i=0; i<sizeI; i++)
|
340
|
+
{
|
341
|
+
int idx = I[i];
|
342
|
+
feature_node *s = x[idx];
|
343
|
+
while (s->index!=-1)
|
344
|
+
{
|
345
|
+
M[s->index-1] += s->value*s->value*C[idx]*2;
|
346
|
+
s++;
|
347
|
+
}
|
348
|
+
}
|
349
|
+
}
|
350
|
+
|
307
351
|
void l2r_l2_svc_fun::Hv(double *s, double *Hs)
|
308
352
|
{
|
309
353
|
int i;
|
@@ -1356,7 +1400,7 @@ static void solve_l1r_l2_svc(
|
|
1356
1400
|
double Gmax_new, Gnorm1_new;
|
1357
1401
|
double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
|
1358
1402
|
double d_old, d_diff;
|
1359
|
-
double loss_old, loss_new;
|
1403
|
+
double loss_old = 0, loss_new;
|
1360
1404
|
double appxcond, cond;
|
1361
1405
|
|
1362
1406
|
int *index = new int[w_size];
|
@@ -2223,7 +2267,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
|
|
2223
2267
|
static double calc_start_C(const problem *prob, const parameter *param)
|
2224
2268
|
{
|
2225
2269
|
int i;
|
2226
|
-
double xTx,max_xTx;
|
2270
|
+
double xTx, max_xTx;
|
2227
2271
|
max_xTx = 0;
|
2228
2272
|
for(i=0; i<prob->l; i++)
|
2229
2273
|
{
|
@@ -2244,10 +2288,157 @@ static double calc_start_C(const problem *prob, const parameter *param)
|
|
2244
2288
|
min_C = 1.0 / (prob->l * max_xTx);
|
2245
2289
|
else if(param->solver_type == L2R_L2LOSS_SVC)
|
2246
2290
|
min_C = 1.0 / (2 * prob->l * max_xTx);
|
2291
|
+
else if(param->solver_type == L2R_L2LOSS_SVR)
|
2292
|
+
{
|
2293
|
+
double sum_y, loss, y_abs;
|
2294
|
+
double delta2 = 0.1;
|
2295
|
+
sum_y = 0, loss = 0;
|
2296
|
+
for(i=0; i<prob->l; i++)
|
2297
|
+
{
|
2298
|
+
y_abs = fabs(prob->y[i]);
|
2299
|
+
sum_y += y_abs;
|
2300
|
+
loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
|
2301
|
+
}
|
2302
|
+
if(loss > 0)
|
2303
|
+
min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
|
2304
|
+
else
|
2305
|
+
min_C = INF;
|
2306
|
+
}
|
2247
2307
|
|
2248
2308
|
return pow( 2, floor(log(min_C) / log(2.0)) );
|
2249
2309
|
}
|
2250
2310
|
|
2311
|
+
static double calc_max_p(const problem *prob, const parameter *param)
|
2312
|
+
{
|
2313
|
+
int i;
|
2314
|
+
double max_p = 0.0;
|
2315
|
+
for(i = 0; i < prob->l; i++)
|
2316
|
+
max_p = max(max_p, fabs(prob->y[i]));
|
2317
|
+
|
2318
|
+
return max_p;
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
|
2322
|
+
{
|
2323
|
+
// variables for CV
|
2324
|
+
int i;
|
2325
|
+
double *target = Malloc(double, prob->l);
|
2326
|
+
|
2327
|
+
// variables for warm start
|
2328
|
+
double ratio = 2;
|
2329
|
+
double **prev_w = Malloc(double*, nr_fold);
|
2330
|
+
for(i = 0; i < nr_fold; i++)
|
2331
|
+
prev_w[i] = NULL;
|
2332
|
+
int num_unchanged_w = 0;
|
2333
|
+
void (*default_print_string) (const char *) = liblinear_print_string;
|
2334
|
+
|
2335
|
+
if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
|
2336
|
+
*best_score = 0.0;
|
2337
|
+
else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
|
2338
|
+
*best_score = INF;
|
2339
|
+
*best_C = start_C;
|
2340
|
+
|
2341
|
+
param_tmp->C = start_C;
|
2342
|
+
while(param_tmp->C <= max_C)
|
2343
|
+
{
|
2344
|
+
//Output disabled for running CV at a particular C
|
2345
|
+
set_print_string_function(&print_null);
|
2346
|
+
|
2347
|
+
for(i=0; i<nr_fold; i++)
|
2348
|
+
{
|
2349
|
+
int j;
|
2350
|
+
int begin = fold_start[i];
|
2351
|
+
int end = fold_start[i+1];
|
2352
|
+
|
2353
|
+
param_tmp->init_sol = prev_w[i];
|
2354
|
+
struct model *submodel = train(&subprob[i],param_tmp);
|
2355
|
+
|
2356
|
+
int total_w_size;
|
2357
|
+
if(submodel->nr_class == 2)
|
2358
|
+
total_w_size = subprob[i].n;
|
2359
|
+
else
|
2360
|
+
total_w_size = subprob[i].n * submodel->nr_class;
|
2361
|
+
|
2362
|
+
if(prev_w[i] == NULL)
|
2363
|
+
{
|
2364
|
+
prev_w[i] = Malloc(double, total_w_size);
|
2365
|
+
for(j=0; j<total_w_size; j++)
|
2366
|
+
prev_w[i][j] = submodel->w[j];
|
2367
|
+
}
|
2368
|
+
else if(num_unchanged_w >= 0)
|
2369
|
+
{
|
2370
|
+
double norm_w_diff = 0;
|
2371
|
+
for(j=0; j<total_w_size; j++)
|
2372
|
+
{
|
2373
|
+
norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
|
2374
|
+
prev_w[i][j] = submodel->w[j];
|
2375
|
+
}
|
2376
|
+
norm_w_diff = sqrt(norm_w_diff);
|
2377
|
+
|
2378
|
+
if(norm_w_diff > 1e-15)
|
2379
|
+
num_unchanged_w = -1;
|
2380
|
+
}
|
2381
|
+
else
|
2382
|
+
{
|
2383
|
+
for(j=0; j<total_w_size; j++)
|
2384
|
+
prev_w[i][j] = submodel->w[j];
|
2385
|
+
}
|
2386
|
+
|
2387
|
+
for(j=begin; j<end; j++)
|
2388
|
+
target[perm[j]] = predict(submodel,prob->x[perm[j]]);
|
2389
|
+
|
2390
|
+
free_and_destroy_model(&submodel);
|
2391
|
+
}
|
2392
|
+
set_print_string_function(default_print_string);
|
2393
|
+
|
2394
|
+
if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
|
2395
|
+
{
|
2396
|
+
int total_correct = 0;
|
2397
|
+
for(i=0; i<prob->l; i++)
|
2398
|
+
if(target[i] == prob->y[i])
|
2399
|
+
++total_correct;
|
2400
|
+
double current_rate = (double)total_correct/prob->l;
|
2401
|
+
if(current_rate > *best_score)
|
2402
|
+
{
|
2403
|
+
*best_C = param_tmp->C;
|
2404
|
+
*best_score = current_rate;
|
2405
|
+
}
|
2406
|
+
|
2407
|
+
info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
|
2408
|
+
}
|
2409
|
+
else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
|
2410
|
+
{
|
2411
|
+
double total_error = 0.0;
|
2412
|
+
for(i=0; i<prob->l; i++)
|
2413
|
+
{
|
2414
|
+
double y = prob->y[i];
|
2415
|
+
double v = target[i];
|
2416
|
+
total_error += (v-y)*(v-y);
|
2417
|
+
}
|
2418
|
+
double current_error = total_error/prob->l;
|
2419
|
+
if(current_error < *best_score)
|
2420
|
+
{
|
2421
|
+
*best_C = param_tmp->C;
|
2422
|
+
*best_score = current_error;
|
2423
|
+
}
|
2424
|
+
|
2425
|
+
info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
|
2426
|
+
}
|
2427
|
+
|
2428
|
+
num_unchanged_w++;
|
2429
|
+
if(num_unchanged_w == 5)
|
2430
|
+
break;
|
2431
|
+
param_tmp->C = param_tmp->C*ratio;
|
2432
|
+
}
|
2433
|
+
|
2434
|
+
if(param_tmp->C > max_C)
|
2435
|
+
info("warning: maximum C reached.\n");
|
2436
|
+
free(target);
|
2437
|
+
for(i=0; i<nr_fold; i++)
|
2438
|
+
free(prev_w[i]);
|
2439
|
+
free(prev_w);
|
2440
|
+
}
|
2441
|
+
|
2251
2442
|
|
2252
2443
|
//
|
2253
2444
|
// Interface functions
|
@@ -2270,8 +2461,14 @@ model* train(const problem *prob, const parameter *param)
|
|
2270
2461
|
if(check_regression_model(model_))
|
2271
2462
|
{
|
2272
2463
|
model_->w = Malloc(double, w_size);
|
2273
|
-
|
2274
|
-
|
2464
|
+
|
2465
|
+
if(param->init_sol != NULL)
|
2466
|
+
for(i=0;i<w_size;i++)
|
2467
|
+
model_->w[i] = param->init_sol[i];
|
2468
|
+
else
|
2469
|
+
for(i=0;i<w_size;i++)
|
2470
|
+
model_->w[i] = 0;
|
2471
|
+
|
2275
2472
|
model_->nr_class = 2;
|
2276
2473
|
model_->label = NULL;
|
2277
2474
|
train_one(prob, param, model_->w, 0, 0);
|
@@ -2458,25 +2655,17 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
|
|
2458
2655
|
free(perm);
|
2459
2656
|
}
|
2460
2657
|
|
2461
|
-
|
2658
|
+
|
2659
|
+
void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
|
2462
2660
|
{
|
2463
|
-
//
|
2661
|
+
// prepare CV folds
|
2662
|
+
|
2464
2663
|
int i;
|
2465
2664
|
int *fold_start;
|
2466
2665
|
int l = prob->l;
|
2467
2666
|
int *perm = Malloc(int, l);
|
2468
|
-
double *target = Malloc(double, prob->l);
|
2469
2667
|
struct problem *subprob = Malloc(problem,nr_fold);
|
2470
2668
|
|
2471
|
-
// variables for warm start
|
2472
|
-
double ratio = 2;
|
2473
|
-
double **prev_w = Malloc(double*, nr_fold);
|
2474
|
-
for(i = 0; i < nr_fold; i++)
|
2475
|
-
prev_w[i] = NULL;
|
2476
|
-
int num_unchanged_w = 0;
|
2477
|
-
struct parameter param1 = *param;
|
2478
|
-
void (*default_print_string) (const char *) = liblinear_print_string;
|
2479
|
-
|
2480
2669
|
if (nr_fold > l)
|
2481
2670
|
{
|
2482
2671
|
nr_fold = l;
|
@@ -2520,93 +2709,60 @@ void find_parameter_C(const problem *prob, const parameter *param, int nr_fold,
|
|
2520
2709
|
|
2521
2710
|
}
|
2522
2711
|
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
param1.C = start_C;
|
2527
|
-
|
2528
|
-
while(param1.C <= max_C)
|
2712
|
+
struct parameter param_tmp = *param;
|
2713
|
+
*best_p = -1;
|
2714
|
+
if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
|
2529
2715
|
{
|
2530
|
-
|
2531
|
-
|
2716
|
+
if(start_C <= 0)
|
2717
|
+
start_C = calc_start_C(prob, ¶m_tmp);
|
2718
|
+
double max_C = 1024;
|
2719
|
+
start_C = min(start_C, max_C);
|
2720
|
+
double best_C_tmp, best_score_tmp;
|
2721
|
+
|
2722
|
+
find_parameter_C(prob, ¶m_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
|
2723
|
+
|
2724
|
+
*best_C = best_C_tmp;
|
2725
|
+
*best_score = best_score_tmp;
|
2726
|
+
}
|
2727
|
+
else if(param->solver_type == L2R_L2LOSS_SVR)
|
2728
|
+
{
|
2729
|
+
double max_p = calc_max_p(prob, ¶m_tmp);
|
2730
|
+
int num_p_steps = 20;
|
2731
|
+
double max_C = 1048576;
|
2732
|
+
*best_score = INF;
|
2532
2733
|
|
2533
|
-
|
2734
|
+
i = num_p_steps-1;
|
2735
|
+
if(start_p > 0)
|
2736
|
+
i = min((int)(start_p/(max_p/num_p_steps)), i);
|
2737
|
+
for(; i >= 0; i--)
|
2534
2738
|
{
|
2535
|
-
|
2536
|
-
|
2537
|
-
|
2538
|
-
|
2539
|
-
param1.init_sol = prev_w[i];
|
2540
|
-
struct model *submodel = train(&subprob[i],¶m1);
|
2541
|
-
|
2542
|
-
int total_w_size;
|
2543
|
-
if(submodel->nr_class == 2)
|
2544
|
-
total_w_size = subprob[i].n;
|
2545
|
-
else
|
2546
|
-
total_w_size = subprob[i].n * submodel->nr_class;
|
2547
|
-
|
2548
|
-
if(prev_w[i] == NULL)
|
2549
|
-
{
|
2550
|
-
prev_w[i] = Malloc(double, total_w_size);
|
2551
|
-
for(j=0; j<total_w_size; j++)
|
2552
|
-
prev_w[i][j] = submodel->w[j];
|
2553
|
-
}
|
2554
|
-
else if(num_unchanged_w >= 0)
|
2555
|
-
{
|
2556
|
-
double norm_w_diff = 0;
|
2557
|
-
for(j=0; j<total_w_size; j++)
|
2558
|
-
{
|
2559
|
-
norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
|
2560
|
-
prev_w[i][j] = submodel->w[j];
|
2561
|
-
}
|
2562
|
-
norm_w_diff = sqrt(norm_w_diff);
|
2563
|
-
|
2564
|
-
if(norm_w_diff > 1e-15)
|
2565
|
-
num_unchanged_w = -1;
|
2566
|
-
}
|
2739
|
+
param_tmp.p = i*max_p/num_p_steps;
|
2740
|
+
double start_C_tmp;
|
2741
|
+
if(start_C <= 0)
|
2742
|
+
start_C_tmp = calc_start_C(prob, ¶m_tmp);
|
2567
2743
|
else
|
2744
|
+
start_C_tmp = start_C;
|
2745
|
+
start_C_tmp = min(start_C_tmp, max_C);
|
2746
|
+
double best_C_tmp, best_score_tmp;
|
2747
|
+
|
2748
|
+
find_parameter_C(prob, ¶m_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
|
2749
|
+
|
2750
|
+
if(best_score_tmp < *best_score)
|
2568
2751
|
{
|
2569
|
-
|
2570
|
-
|
2752
|
+
*best_p = param_tmp.p;
|
2753
|
+
*best_C = best_C_tmp;
|
2754
|
+
*best_score = best_score_tmp;
|
2571
2755
|
}
|
2572
|
-
|
2573
|
-
for(j=begin; j<end; j++)
|
2574
|
-
target[perm[j]] = predict(submodel,prob->x[perm[j]]);
|
2575
|
-
|
2576
|
-
free_and_destroy_model(&submodel);
|
2577
2756
|
}
|
2578
|
-
set_print_string_function(default_print_string);
|
2579
|
-
|
2580
|
-
int total_correct = 0;
|
2581
|
-
for(i=0; i<prob->l; i++)
|
2582
|
-
if(target[i] == prob->y[i])
|
2583
|
-
++total_correct;
|
2584
|
-
double current_rate = (double)total_correct/prob->l;
|
2585
|
-
if(current_rate > *best_rate)
|
2586
|
-
{
|
2587
|
-
*best_C = param1.C;
|
2588
|
-
*best_rate = current_rate;
|
2589
|
-
}
|
2590
|
-
|
2591
|
-
info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
|
2592
|
-
num_unchanged_w++;
|
2593
|
-
if(num_unchanged_w == 3)
|
2594
|
-
break;
|
2595
|
-
param1.C = param1.C*ratio;
|
2596
2757
|
}
|
2597
2758
|
|
2598
|
-
if(param1.C > max_C && max_C > start_C)
|
2599
|
-
info("warning: maximum C reached.\n");
|
2600
2759
|
free(fold_start);
|
2601
2760
|
free(perm);
|
2602
|
-
free(target);
|
2603
2761
|
for(i=0; i<nr_fold; i++)
|
2604
2762
|
{
|
2605
2763
|
free(subprob[i].x);
|
2606
2764
|
free(subprob[i].y);
|
2607
|
-
free(prev_w[i]);
|
2608
2765
|
}
|
2609
|
-
free(prev_w);
|
2610
2766
|
free(subprob);
|
2611
2767
|
}
|
2612
2768
|
|
@@ -2748,14 +2904,14 @@ int save_model(const char *model_file_name, const struct model *model_)
|
|
2748
2904
|
|
2749
2905
|
fprintf(fp, "nr_feature %d\n", nr_feature);
|
2750
2906
|
|
2751
|
-
fprintf(fp, "bias %.
|
2907
|
+
fprintf(fp, "bias %.17g\n", model_->bias);
|
2752
2908
|
|
2753
2909
|
fprintf(fp, "w\n");
|
2754
2910
|
for(i=0; i<w_size; i++)
|
2755
2911
|
{
|
2756
2912
|
int j;
|
2757
2913
|
for(j=0; j<nr_w; j++)
|
2758
|
-
fprintf(fp, "%.
|
2914
|
+
fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
|
2759
2915
|
fprintf(fp, "\n");
|
2760
2916
|
}
|
2761
2917
|
|
@@ -2802,6 +2958,11 @@ struct model *load_model(const char *model_file_name)
|
|
2802
2958
|
double bias;
|
2803
2959
|
model *model_ = Malloc(model,1);
|
2804
2960
|
parameter& param = model_->param;
|
2961
|
+
// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
|
2962
|
+
param.nr_weight = 0;
|
2963
|
+
param.weight_label = NULL;
|
2964
|
+
param.weight = NULL;
|
2965
|
+
param.init_sol = NULL;
|
2805
2966
|
|
2806
2967
|
model_->label = NULL;
|
2807
2968
|
|