liblinear-ruby 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +2 -2
- data/ext/liblinear_wrap.cxx +434 -398
- data/ext/linear.cpp +251 -90
- data/ext/linear.h +2 -2
- data/ext/tron.cpp +72 -33
- data/ext/tron.h +2 -1
- data/lib/liblinear/parameter.rb +1 -1
- data/lib/liblinear/version.rb +1 -1
- metadata +3 -4
data/ext/linear.cpp
CHANGED
@@ -20,8 +20,8 @@ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
|
|
20
20
|
dst = new T[n];
|
21
21
|
memcpy((void *)dst,(void *)src,sizeof(T)*n);
|
22
22
|
}
|
23
|
-
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
24
23
|
#define INF HUGE_VAL
|
24
|
+
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
25
25
|
|
26
26
|
static void print_string_stdout(const char *s)
|
27
27
|
{
|
@@ -91,6 +91,7 @@ public:
|
|
91
91
|
void Hv(double *s, double *Hs);
|
92
92
|
|
93
93
|
int get_nr_variable(void);
|
94
|
+
void get_diag_preconditioner(double *M);
|
94
95
|
|
95
96
|
private:
|
96
97
|
void Xv(double *v, double *Xv);
|
@@ -169,6 +170,27 @@ int l2r_lr_fun::get_nr_variable(void)
|
|
169
170
|
return prob->n;
|
170
171
|
}
|
171
172
|
|
173
|
+
void l2r_lr_fun::get_diag_preconditioner(double *M)
|
174
|
+
{
|
175
|
+
int i;
|
176
|
+
int l = prob->l;
|
177
|
+
int w_size=get_nr_variable();
|
178
|
+
feature_node **x = prob->x;
|
179
|
+
|
180
|
+
for (i=0; i<w_size; i++)
|
181
|
+
M[i] = 1;
|
182
|
+
|
183
|
+
for (i=0; i<l; i++)
|
184
|
+
{
|
185
|
+
feature_node *s = x[i];
|
186
|
+
while (s->index!=-1)
|
187
|
+
{
|
188
|
+
M[s->index-1] += s->value*s->value*C[i]*D[i];
|
189
|
+
s++;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
172
194
|
void l2r_lr_fun::Hv(double *s, double *Hs)
|
173
195
|
{
|
174
196
|
int i;
|
@@ -225,6 +247,7 @@ public:
|
|
225
247
|
void Hv(double *s, double *Hs);
|
226
248
|
|
227
249
|
int get_nr_variable(void);
|
250
|
+
void get_diag_preconditioner(double *M);
|
228
251
|
|
229
252
|
protected:
|
230
253
|
void Xv(double *v, double *Xv);
|
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
|
|
304
327
|
return prob->n;
|
305
328
|
}
|
306
329
|
|
330
|
+
void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
|
331
|
+
{
|
332
|
+
int i;
|
333
|
+
int w_size=get_nr_variable();
|
334
|
+
feature_node **x = prob->x;
|
335
|
+
|
336
|
+
for (i=0; i<w_size; i++)
|
337
|
+
M[i] = 1;
|
338
|
+
|
339
|
+
for (i=0; i<sizeI; i++)
|
340
|
+
{
|
341
|
+
int idx = I[i];
|
342
|
+
feature_node *s = x[idx];
|
343
|
+
while (s->index!=-1)
|
344
|
+
{
|
345
|
+
M[s->index-1] += s->value*s->value*C[idx]*2;
|
346
|
+
s++;
|
347
|
+
}
|
348
|
+
}
|
349
|
+
}
|
350
|
+
|
307
351
|
void l2r_l2_svc_fun::Hv(double *s, double *Hs)
|
308
352
|
{
|
309
353
|
int i;
|
@@ -1356,7 +1400,7 @@ static void solve_l1r_l2_svc(
|
|
1356
1400
|
double Gmax_new, Gnorm1_new;
|
1357
1401
|
double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
|
1358
1402
|
double d_old, d_diff;
|
1359
|
-
double loss_old, loss_new;
|
1403
|
+
double loss_old = 0, loss_new;
|
1360
1404
|
double appxcond, cond;
|
1361
1405
|
|
1362
1406
|
int *index = new int[w_size];
|
@@ -2223,7 +2267,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
|
|
2223
2267
|
static double calc_start_C(const problem *prob, const parameter *param)
|
2224
2268
|
{
|
2225
2269
|
int i;
|
2226
|
-
double xTx,max_xTx;
|
2270
|
+
double xTx, max_xTx;
|
2227
2271
|
max_xTx = 0;
|
2228
2272
|
for(i=0; i<prob->l; i++)
|
2229
2273
|
{
|
@@ -2244,10 +2288,157 @@ static double calc_start_C(const problem *prob, const parameter *param)
|
|
2244
2288
|
min_C = 1.0 / (prob->l * max_xTx);
|
2245
2289
|
else if(param->solver_type == L2R_L2LOSS_SVC)
|
2246
2290
|
min_C = 1.0 / (2 * prob->l * max_xTx);
|
2291
|
+
else if(param->solver_type == L2R_L2LOSS_SVR)
|
2292
|
+
{
|
2293
|
+
double sum_y, loss, y_abs;
|
2294
|
+
double delta2 = 0.1;
|
2295
|
+
sum_y = 0, loss = 0;
|
2296
|
+
for(i=0; i<prob->l; i++)
|
2297
|
+
{
|
2298
|
+
y_abs = fabs(prob->y[i]);
|
2299
|
+
sum_y += y_abs;
|
2300
|
+
loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
|
2301
|
+
}
|
2302
|
+
if(loss > 0)
|
2303
|
+
min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
|
2304
|
+
else
|
2305
|
+
min_C = INF;
|
2306
|
+
}
|
2247
2307
|
|
2248
2308
|
return pow( 2, floor(log(min_C) / log(2.0)) );
|
2249
2309
|
}
|
2250
2310
|
|
2311
|
+
static double calc_max_p(const problem *prob, const parameter *param)
|
2312
|
+
{
|
2313
|
+
int i;
|
2314
|
+
double max_p = 0.0;
|
2315
|
+
for(i = 0; i < prob->l; i++)
|
2316
|
+
max_p = max(max_p, fabs(prob->y[i]));
|
2317
|
+
|
2318
|
+
return max_p;
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
|
2322
|
+
{
|
2323
|
+
// variables for CV
|
2324
|
+
int i;
|
2325
|
+
double *target = Malloc(double, prob->l);
|
2326
|
+
|
2327
|
+
// variables for warm start
|
2328
|
+
double ratio = 2;
|
2329
|
+
double **prev_w = Malloc(double*, nr_fold);
|
2330
|
+
for(i = 0; i < nr_fold; i++)
|
2331
|
+
prev_w[i] = NULL;
|
2332
|
+
int num_unchanged_w = 0;
|
2333
|
+
void (*default_print_string) (const char *) = liblinear_print_string;
|
2334
|
+
|
2335
|
+
if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
|
2336
|
+
*best_score = 0.0;
|
2337
|
+
else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
|
2338
|
+
*best_score = INF;
|
2339
|
+
*best_C = start_C;
|
2340
|
+
|
2341
|
+
param_tmp->C = start_C;
|
2342
|
+
while(param_tmp->C <= max_C)
|
2343
|
+
{
|
2344
|
+
//Output disabled for running CV at a particular C
|
2345
|
+
set_print_string_function(&print_null);
|
2346
|
+
|
2347
|
+
for(i=0; i<nr_fold; i++)
|
2348
|
+
{
|
2349
|
+
int j;
|
2350
|
+
int begin = fold_start[i];
|
2351
|
+
int end = fold_start[i+1];
|
2352
|
+
|
2353
|
+
param_tmp->init_sol = prev_w[i];
|
2354
|
+
struct model *submodel = train(&subprob[i],param_tmp);
|
2355
|
+
|
2356
|
+
int total_w_size;
|
2357
|
+
if(submodel->nr_class == 2)
|
2358
|
+
total_w_size = subprob[i].n;
|
2359
|
+
else
|
2360
|
+
total_w_size = subprob[i].n * submodel->nr_class;
|
2361
|
+
|
2362
|
+
if(prev_w[i] == NULL)
|
2363
|
+
{
|
2364
|
+
prev_w[i] = Malloc(double, total_w_size);
|
2365
|
+
for(j=0; j<total_w_size; j++)
|
2366
|
+
prev_w[i][j] = submodel->w[j];
|
2367
|
+
}
|
2368
|
+
else if(num_unchanged_w >= 0)
|
2369
|
+
{
|
2370
|
+
double norm_w_diff = 0;
|
2371
|
+
for(j=0; j<total_w_size; j++)
|
2372
|
+
{
|
2373
|
+
norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
|
2374
|
+
prev_w[i][j] = submodel->w[j];
|
2375
|
+
}
|
2376
|
+
norm_w_diff = sqrt(norm_w_diff);
|
2377
|
+
|
2378
|
+
if(norm_w_diff > 1e-15)
|
2379
|
+
num_unchanged_w = -1;
|
2380
|
+
}
|
2381
|
+
else
|
2382
|
+
{
|
2383
|
+
for(j=0; j<total_w_size; j++)
|
2384
|
+
prev_w[i][j] = submodel->w[j];
|
2385
|
+
}
|
2386
|
+
|
2387
|
+
for(j=begin; j<end; j++)
|
2388
|
+
target[perm[j]] = predict(submodel,prob->x[perm[j]]);
|
2389
|
+
|
2390
|
+
free_and_destroy_model(&submodel);
|
2391
|
+
}
|
2392
|
+
set_print_string_function(default_print_string);
|
2393
|
+
|
2394
|
+
if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
|
2395
|
+
{
|
2396
|
+
int total_correct = 0;
|
2397
|
+
for(i=0; i<prob->l; i++)
|
2398
|
+
if(target[i] == prob->y[i])
|
2399
|
+
++total_correct;
|
2400
|
+
double current_rate = (double)total_correct/prob->l;
|
2401
|
+
if(current_rate > *best_score)
|
2402
|
+
{
|
2403
|
+
*best_C = param_tmp->C;
|
2404
|
+
*best_score = current_rate;
|
2405
|
+
}
|
2406
|
+
|
2407
|
+
info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
|
2408
|
+
}
|
2409
|
+
else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
|
2410
|
+
{
|
2411
|
+
double total_error = 0.0;
|
2412
|
+
for(i=0; i<prob->l; i++)
|
2413
|
+
{
|
2414
|
+
double y = prob->y[i];
|
2415
|
+
double v = target[i];
|
2416
|
+
total_error += (v-y)*(v-y);
|
2417
|
+
}
|
2418
|
+
double current_error = total_error/prob->l;
|
2419
|
+
if(current_error < *best_score)
|
2420
|
+
{
|
2421
|
+
*best_C = param_tmp->C;
|
2422
|
+
*best_score = current_error;
|
2423
|
+
}
|
2424
|
+
|
2425
|
+
info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
|
2426
|
+
}
|
2427
|
+
|
2428
|
+
num_unchanged_w++;
|
2429
|
+
if(num_unchanged_w == 5)
|
2430
|
+
break;
|
2431
|
+
param_tmp->C = param_tmp->C*ratio;
|
2432
|
+
}
|
2433
|
+
|
2434
|
+
if(param_tmp->C > max_C)
|
2435
|
+
info("warning: maximum C reached.\n");
|
2436
|
+
free(target);
|
2437
|
+
for(i=0; i<nr_fold; i++)
|
2438
|
+
free(prev_w[i]);
|
2439
|
+
free(prev_w);
|
2440
|
+
}
|
2441
|
+
|
2251
2442
|
|
2252
2443
|
//
|
2253
2444
|
// Interface functions
|
@@ -2270,8 +2461,14 @@ model* train(const problem *prob, const parameter *param)
|
|
2270
2461
|
if(check_regression_model(model_))
|
2271
2462
|
{
|
2272
2463
|
model_->w = Malloc(double, w_size);
|
2273
|
-
|
2274
|
-
|
2464
|
+
|
2465
|
+
if(param->init_sol != NULL)
|
2466
|
+
for(i=0;i<w_size;i++)
|
2467
|
+
model_->w[i] = param->init_sol[i];
|
2468
|
+
else
|
2469
|
+
for(i=0;i<w_size;i++)
|
2470
|
+
model_->w[i] = 0;
|
2471
|
+
|
2275
2472
|
model_->nr_class = 2;
|
2276
2473
|
model_->label = NULL;
|
2277
2474
|
train_one(prob, param, model_->w, 0, 0);
|
@@ -2458,25 +2655,17 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
|
|
2458
2655
|
free(perm);
|
2459
2656
|
}
|
2460
2657
|
|
2461
|
-
|
2658
|
+
|
2659
|
+
void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
|
2462
2660
|
{
|
2463
|
-
//
|
2661
|
+
// prepare CV folds
|
2662
|
+
|
2464
2663
|
int i;
|
2465
2664
|
int *fold_start;
|
2466
2665
|
int l = prob->l;
|
2467
2666
|
int *perm = Malloc(int, l);
|
2468
|
-
double *target = Malloc(double, prob->l);
|
2469
2667
|
struct problem *subprob = Malloc(problem,nr_fold);
|
2470
2668
|
|
2471
|
-
// variables for warm start
|
2472
|
-
double ratio = 2;
|
2473
|
-
double **prev_w = Malloc(double*, nr_fold);
|
2474
|
-
for(i = 0; i < nr_fold; i++)
|
2475
|
-
prev_w[i] = NULL;
|
2476
|
-
int num_unchanged_w = 0;
|
2477
|
-
struct parameter param1 = *param;
|
2478
|
-
void (*default_print_string) (const char *) = liblinear_print_string;
|
2479
|
-
|
2480
2669
|
if (nr_fold > l)
|
2481
2670
|
{
|
2482
2671
|
nr_fold = l;
|
@@ -2520,93 +2709,60 @@ void find_parameter_C(const problem *prob, const parameter *param, int nr_fold,
|
|
2520
2709
|
|
2521
2710
|
}
|
2522
2711
|
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
param1.C = start_C;
|
2527
|
-
|
2528
|
-
while(param1.C <= max_C)
|
2712
|
+
struct parameter param_tmp = *param;
|
2713
|
+
*best_p = -1;
|
2714
|
+
if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
|
2529
2715
|
{
|
2530
|
-
|
2531
|
-
|
2716
|
+
if(start_C <= 0)
|
2717
|
+
start_C = calc_start_C(prob, ¶m_tmp);
|
2718
|
+
double max_C = 1024;
|
2719
|
+
start_C = min(start_C, max_C);
|
2720
|
+
double best_C_tmp, best_score_tmp;
|
2721
|
+
|
2722
|
+
find_parameter_C(prob, ¶m_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
|
2723
|
+
|
2724
|
+
*best_C = best_C_tmp;
|
2725
|
+
*best_score = best_score_tmp;
|
2726
|
+
}
|
2727
|
+
else if(param->solver_type == L2R_L2LOSS_SVR)
|
2728
|
+
{
|
2729
|
+
double max_p = calc_max_p(prob, ¶m_tmp);
|
2730
|
+
int num_p_steps = 20;
|
2731
|
+
double max_C = 1048576;
|
2732
|
+
*best_score = INF;
|
2532
2733
|
|
2533
|
-
|
2734
|
+
i = num_p_steps-1;
|
2735
|
+
if(start_p > 0)
|
2736
|
+
i = min((int)(start_p/(max_p/num_p_steps)), i);
|
2737
|
+
for(; i >= 0; i--)
|
2534
2738
|
{
|
2535
|
-
|
2536
|
-
|
2537
|
-
|
2538
|
-
|
2539
|
-
param1.init_sol = prev_w[i];
|
2540
|
-
struct model *submodel = train(&subprob[i],¶m1);
|
2541
|
-
|
2542
|
-
int total_w_size;
|
2543
|
-
if(submodel->nr_class == 2)
|
2544
|
-
total_w_size = subprob[i].n;
|
2545
|
-
else
|
2546
|
-
total_w_size = subprob[i].n * submodel->nr_class;
|
2547
|
-
|
2548
|
-
if(prev_w[i] == NULL)
|
2549
|
-
{
|
2550
|
-
prev_w[i] = Malloc(double, total_w_size);
|
2551
|
-
for(j=0; j<total_w_size; j++)
|
2552
|
-
prev_w[i][j] = submodel->w[j];
|
2553
|
-
}
|
2554
|
-
else if(num_unchanged_w >= 0)
|
2555
|
-
{
|
2556
|
-
double norm_w_diff = 0;
|
2557
|
-
for(j=0; j<total_w_size; j++)
|
2558
|
-
{
|
2559
|
-
norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
|
2560
|
-
prev_w[i][j] = submodel->w[j];
|
2561
|
-
}
|
2562
|
-
norm_w_diff = sqrt(norm_w_diff);
|
2563
|
-
|
2564
|
-
if(norm_w_diff > 1e-15)
|
2565
|
-
num_unchanged_w = -1;
|
2566
|
-
}
|
2739
|
+
param_tmp.p = i*max_p/num_p_steps;
|
2740
|
+
double start_C_tmp;
|
2741
|
+
if(start_C <= 0)
|
2742
|
+
start_C_tmp = calc_start_C(prob, ¶m_tmp);
|
2567
2743
|
else
|
2744
|
+
start_C_tmp = start_C;
|
2745
|
+
start_C_tmp = min(start_C_tmp, max_C);
|
2746
|
+
double best_C_tmp, best_score_tmp;
|
2747
|
+
|
2748
|
+
find_parameter_C(prob, ¶m_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
|
2749
|
+
|
2750
|
+
if(best_score_tmp < *best_score)
|
2568
2751
|
{
|
2569
|
-
|
2570
|
-
|
2752
|
+
*best_p = param_tmp.p;
|
2753
|
+
*best_C = best_C_tmp;
|
2754
|
+
*best_score = best_score_tmp;
|
2571
2755
|
}
|
2572
|
-
|
2573
|
-
for(j=begin; j<end; j++)
|
2574
|
-
target[perm[j]] = predict(submodel,prob->x[perm[j]]);
|
2575
|
-
|
2576
|
-
free_and_destroy_model(&submodel);
|
2577
2756
|
}
|
2578
|
-
set_print_string_function(default_print_string);
|
2579
|
-
|
2580
|
-
int total_correct = 0;
|
2581
|
-
for(i=0; i<prob->l; i++)
|
2582
|
-
if(target[i] == prob->y[i])
|
2583
|
-
++total_correct;
|
2584
|
-
double current_rate = (double)total_correct/prob->l;
|
2585
|
-
if(current_rate > *best_rate)
|
2586
|
-
{
|
2587
|
-
*best_C = param1.C;
|
2588
|
-
*best_rate = current_rate;
|
2589
|
-
}
|
2590
|
-
|
2591
|
-
info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
|
2592
|
-
num_unchanged_w++;
|
2593
|
-
if(num_unchanged_w == 3)
|
2594
|
-
break;
|
2595
|
-
param1.C = param1.C*ratio;
|
2596
2757
|
}
|
2597
2758
|
|
2598
|
-
if(param1.C > max_C && max_C > start_C)
|
2599
|
-
info("warning: maximum C reached.\n");
|
2600
2759
|
free(fold_start);
|
2601
2760
|
free(perm);
|
2602
|
-
free(target);
|
2603
2761
|
for(i=0; i<nr_fold; i++)
|
2604
2762
|
{
|
2605
2763
|
free(subprob[i].x);
|
2606
2764
|
free(subprob[i].y);
|
2607
|
-
free(prev_w[i]);
|
2608
2765
|
}
|
2609
|
-
free(prev_w);
|
2610
2766
|
free(subprob);
|
2611
2767
|
}
|
2612
2768
|
|
@@ -2748,14 +2904,14 @@ int save_model(const char *model_file_name, const struct model *model_)
|
|
2748
2904
|
|
2749
2905
|
fprintf(fp, "nr_feature %d\n", nr_feature);
|
2750
2906
|
|
2751
|
-
fprintf(fp, "bias %.
|
2907
|
+
fprintf(fp, "bias %.17g\n", model_->bias);
|
2752
2908
|
|
2753
2909
|
fprintf(fp, "w\n");
|
2754
2910
|
for(i=0; i<w_size; i++)
|
2755
2911
|
{
|
2756
2912
|
int j;
|
2757
2913
|
for(j=0; j<nr_w; j++)
|
2758
|
-
fprintf(fp, "%.
|
2914
|
+
fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
|
2759
2915
|
fprintf(fp, "\n");
|
2760
2916
|
}
|
2761
2917
|
|
@@ -2802,6 +2958,11 @@ struct model *load_model(const char *model_file_name)
|
|
2802
2958
|
double bias;
|
2803
2959
|
model *model_ = Malloc(model,1);
|
2804
2960
|
parameter& param = model_->param;
|
2961
|
+
// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
|
2962
|
+
param.nr_weight = 0;
|
2963
|
+
param.weight_label = NULL;
|
2964
|
+
param.weight = NULL;
|
2965
|
+
param.init_sol = NULL;
|
2805
2966
|
|
2806
2967
|
model_->label = NULL;
|
2807
2968
|
|