liblinear-ruby 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,8 +20,8 @@ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
20
20
  dst = new T[n];
21
21
  memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
22
  }
23
- #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
24
23
  #define INF HUGE_VAL
24
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
25
25
 
26
26
  static void print_string_stdout(const char *s)
27
27
  {
@@ -91,6 +91,7 @@ public:
91
91
  void Hv(double *s, double *Hs);
92
92
 
93
93
  int get_nr_variable(void);
94
+ void get_diag_preconditioner(double *M);
94
95
 
95
96
  private:
96
97
  void Xv(double *v, double *Xv);
@@ -169,6 +170,27 @@ int l2r_lr_fun::get_nr_variable(void)
169
170
  return prob->n;
170
171
  }
171
172
 
173
+ void l2r_lr_fun::get_diag_preconditioner(double *M)
174
+ {
175
+ int i;
176
+ int l = prob->l;
177
+ int w_size=get_nr_variable();
178
+ feature_node **x = prob->x;
179
+
180
+ for (i=0; i<w_size; i++)
181
+ M[i] = 1;
182
+
183
+ for (i=0; i<l; i++)
184
+ {
185
+ feature_node *s = x[i];
186
+ while (s->index!=-1)
187
+ {
188
+ M[s->index-1] += s->value*s->value*C[i]*D[i];
189
+ s++;
190
+ }
191
+ }
192
+ }
193
+
172
194
  void l2r_lr_fun::Hv(double *s, double *Hs)
173
195
  {
174
196
  int i;
@@ -225,6 +247,7 @@ public:
225
247
  void Hv(double *s, double *Hs);
226
248
 
227
249
  int get_nr_variable(void);
250
+ void get_diag_preconditioner(double *M);
228
251
 
229
252
  protected:
230
253
  void Xv(double *v, double *Xv);
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
304
327
  return prob->n;
305
328
  }
306
329
 
330
+ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
331
+ {
332
+ int i;
333
+ int w_size=get_nr_variable();
334
+ feature_node **x = prob->x;
335
+
336
+ for (i=0; i<w_size; i++)
337
+ M[i] = 1;
338
+
339
+ for (i=0; i<sizeI; i++)
340
+ {
341
+ int idx = I[i];
342
+ feature_node *s = x[idx];
343
+ while (s->index!=-1)
344
+ {
345
+ M[s->index-1] += s->value*s->value*C[idx]*2;
346
+ s++;
347
+ }
348
+ }
349
+ }
350
+
307
351
  void l2r_l2_svc_fun::Hv(double *s, double *Hs)
308
352
  {
309
353
  int i;
@@ -1356,7 +1400,7 @@ static void solve_l1r_l2_svc(
1356
1400
  double Gmax_new, Gnorm1_new;
1357
1401
  double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1358
1402
  double d_old, d_diff;
1359
- double loss_old, loss_new;
1403
+ double loss_old = 0, loss_new;
1360
1404
  double appxcond, cond;
1361
1405
 
1362
1406
  int *index = new int[w_size];
@@ -2223,7 +2267,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2223
2267
  static double calc_start_C(const problem *prob, const parameter *param)
2224
2268
  {
2225
2269
  int i;
2226
- double xTx,max_xTx;
2270
+ double xTx, max_xTx;
2227
2271
  max_xTx = 0;
2228
2272
  for(i=0; i<prob->l; i++)
2229
2273
  {
@@ -2244,10 +2288,157 @@ static double calc_start_C(const problem *prob, const parameter *param)
2244
2288
  min_C = 1.0 / (prob->l * max_xTx);
2245
2289
  else if(param->solver_type == L2R_L2LOSS_SVC)
2246
2290
  min_C = 1.0 / (2 * prob->l * max_xTx);
2291
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2292
+ {
2293
+ double sum_y, loss, y_abs;
2294
+ double delta2 = 0.1;
2295
+ sum_y = 0, loss = 0;
2296
+ for(i=0; i<prob->l; i++)
2297
+ {
2298
+ y_abs = fabs(prob->y[i]);
2299
+ sum_y += y_abs;
2300
+ loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2301
+ }
2302
+ if(loss > 0)
2303
+ min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2304
+ else
2305
+ min_C = INF;
2306
+ }
2247
2307
 
2248
2308
  return pow( 2, floor(log(min_C) / log(2.0)) );
2249
2309
  }
2250
2310
 
2311
+ static double calc_max_p(const problem *prob, const parameter *param)
2312
+ {
2313
+ int i;
2314
+ double max_p = 0.0;
2315
+ for(i = 0; i < prob->l; i++)
2316
+ max_p = max(max_p, fabs(prob->y[i]));
2317
+
2318
+ return max_p;
2319
+ }
2320
+
2321
+ static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2322
+ {
2323
+ // variables for CV
2324
+ int i;
2325
+ double *target = Malloc(double, prob->l);
2326
+
2327
+ // variables for warm start
2328
+ double ratio = 2;
2329
+ double **prev_w = Malloc(double*, nr_fold);
2330
+ for(i = 0; i < nr_fold; i++)
2331
+ prev_w[i] = NULL;
2332
+ int num_unchanged_w = 0;
2333
+ void (*default_print_string) (const char *) = liblinear_print_string;
2334
+
2335
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2336
+ *best_score = 0.0;
2337
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2338
+ *best_score = INF;
2339
+ *best_C = start_C;
2340
+
2341
+ param_tmp->C = start_C;
2342
+ while(param_tmp->C <= max_C)
2343
+ {
2344
+ //Output disabled for running CV at a particular C
2345
+ set_print_string_function(&print_null);
2346
+
2347
+ for(i=0; i<nr_fold; i++)
2348
+ {
2349
+ int j;
2350
+ int begin = fold_start[i];
2351
+ int end = fold_start[i+1];
2352
+
2353
+ param_tmp->init_sol = prev_w[i];
2354
+ struct model *submodel = train(&subprob[i],param_tmp);
2355
+
2356
+ int total_w_size;
2357
+ if(submodel->nr_class == 2)
2358
+ total_w_size = subprob[i].n;
2359
+ else
2360
+ total_w_size = subprob[i].n * submodel->nr_class;
2361
+
2362
+ if(prev_w[i] == NULL)
2363
+ {
2364
+ prev_w[i] = Malloc(double, total_w_size);
2365
+ for(j=0; j<total_w_size; j++)
2366
+ prev_w[i][j] = submodel->w[j];
2367
+ }
2368
+ else if(num_unchanged_w >= 0)
2369
+ {
2370
+ double norm_w_diff = 0;
2371
+ for(j=0; j<total_w_size; j++)
2372
+ {
2373
+ norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2374
+ prev_w[i][j] = submodel->w[j];
2375
+ }
2376
+ norm_w_diff = sqrt(norm_w_diff);
2377
+
2378
+ if(norm_w_diff > 1e-15)
2379
+ num_unchanged_w = -1;
2380
+ }
2381
+ else
2382
+ {
2383
+ for(j=0; j<total_w_size; j++)
2384
+ prev_w[i][j] = submodel->w[j];
2385
+ }
2386
+
2387
+ for(j=begin; j<end; j++)
2388
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2389
+
2390
+ free_and_destroy_model(&submodel);
2391
+ }
2392
+ set_print_string_function(default_print_string);
2393
+
2394
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2395
+ {
2396
+ int total_correct = 0;
2397
+ for(i=0; i<prob->l; i++)
2398
+ if(target[i] == prob->y[i])
2399
+ ++total_correct;
2400
+ double current_rate = (double)total_correct/prob->l;
2401
+ if(current_rate > *best_score)
2402
+ {
2403
+ *best_C = param_tmp->C;
2404
+ *best_score = current_rate;
2405
+ }
2406
+
2407
+ info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2408
+ }
2409
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2410
+ {
2411
+ double total_error = 0.0;
2412
+ for(i=0; i<prob->l; i++)
2413
+ {
2414
+ double y = prob->y[i];
2415
+ double v = target[i];
2416
+ total_error += (v-y)*(v-y);
2417
+ }
2418
+ double current_error = total_error/prob->l;
2419
+ if(current_error < *best_score)
2420
+ {
2421
+ *best_C = param_tmp->C;
2422
+ *best_score = current_error;
2423
+ }
2424
+
2425
+ info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2426
+ }
2427
+
2428
+ num_unchanged_w++;
2429
+ if(num_unchanged_w == 5)
2430
+ break;
2431
+ param_tmp->C = param_tmp->C*ratio;
2432
+ }
2433
+
2434
+ if(param_tmp->C > max_C)
2435
+ info("warning: maximum C reached.\n");
2436
+ free(target);
2437
+ for(i=0; i<nr_fold; i++)
2438
+ free(prev_w[i]);
2439
+ free(prev_w);
2440
+ }
2441
+
2251
2442
 
2252
2443
  //
2253
2444
  // Interface functions
@@ -2270,8 +2461,14 @@ model* train(const problem *prob, const parameter *param)
2270
2461
  if(check_regression_model(model_))
2271
2462
  {
2272
2463
  model_->w = Malloc(double, w_size);
2273
- for(i=0; i<w_size; i++)
2274
- model_->w[i] = 0;
2464
+
2465
+ if(param->init_sol != NULL)
2466
+ for(i=0;i<w_size;i++)
2467
+ model_->w[i] = param->init_sol[i];
2468
+ else
2469
+ for(i=0;i<w_size;i++)
2470
+ model_->w[i] = 0;
2471
+
2275
2472
  model_->nr_class = 2;
2276
2473
  model_->label = NULL;
2277
2474
  train_one(prob, param, model_->w, 0, 0);
@@ -2458,25 +2655,17 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
2458
2655
  free(perm);
2459
2656
  }
2460
2657
 
2461
- void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2658
+
2659
+ void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
2462
2660
  {
2463
- // variables for CV
2661
+ // prepare CV folds
2662
+
2464
2663
  int i;
2465
2664
  int *fold_start;
2466
2665
  int l = prob->l;
2467
2666
  int *perm = Malloc(int, l);
2468
- double *target = Malloc(double, prob->l);
2469
2667
  struct problem *subprob = Malloc(problem,nr_fold);
2470
2668
 
2471
- // variables for warm start
2472
- double ratio = 2;
2473
- double **prev_w = Malloc(double*, nr_fold);
2474
- for(i = 0; i < nr_fold; i++)
2475
- prev_w[i] = NULL;
2476
- int num_unchanged_w = 0;
2477
- struct parameter param1 = *param;
2478
- void (*default_print_string) (const char *) = liblinear_print_string;
2479
-
2480
2669
  if (nr_fold > l)
2481
2670
  {
2482
2671
  nr_fold = l;
@@ -2520,93 +2709,60 @@ void find_parameter_C(const problem *prob, const parameter *param, int nr_fold,
2520
2709
 
2521
2710
  }
2522
2711
 
2523
- *best_rate = 0;
2524
- if(start_C <= 0)
2525
- start_C = calc_start_C(prob,param);
2526
- param1.C = start_C;
2527
-
2528
- while(param1.C <= max_C)
2712
+ struct parameter param_tmp = *param;
2713
+ *best_p = -1;
2714
+ if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
2529
2715
  {
2530
- //Output disabled for running CV at a particular C
2531
- set_print_string_function(&print_null);
2716
+ if(start_C <= 0)
2717
+ start_C = calc_start_C(prob, &param_tmp);
2718
+ double max_C = 1024;
2719
+ start_C = min(start_C, max_C);
2720
+ double best_C_tmp, best_score_tmp;
2721
+
2722
+ find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2723
+
2724
+ *best_C = best_C_tmp;
2725
+ *best_score = best_score_tmp;
2726
+ }
2727
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2728
+ {
2729
+ double max_p = calc_max_p(prob, &param_tmp);
2730
+ int num_p_steps = 20;
2731
+ double max_C = 1048576;
2732
+ *best_score = INF;
2532
2733
 
2533
- for(i=0; i<nr_fold; i++)
2734
+ i = num_p_steps-1;
2735
+ if(start_p > 0)
2736
+ i = min((int)(start_p/(max_p/num_p_steps)), i);
2737
+ for(; i >= 0; i--)
2534
2738
  {
2535
- int j;
2536
- int begin = fold_start[i];
2537
- int end = fold_start[i+1];
2538
-
2539
- param1.init_sol = prev_w[i];
2540
- struct model *submodel = train(&subprob[i],&param1);
2541
-
2542
- int total_w_size;
2543
- if(submodel->nr_class == 2)
2544
- total_w_size = subprob[i].n;
2545
- else
2546
- total_w_size = subprob[i].n * submodel->nr_class;
2547
-
2548
- if(prev_w[i] == NULL)
2549
- {
2550
- prev_w[i] = Malloc(double, total_w_size);
2551
- for(j=0; j<total_w_size; j++)
2552
- prev_w[i][j] = submodel->w[j];
2553
- }
2554
- else if(num_unchanged_w >= 0)
2555
- {
2556
- double norm_w_diff = 0;
2557
- for(j=0; j<total_w_size; j++)
2558
- {
2559
- norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2560
- prev_w[i][j] = submodel->w[j];
2561
- }
2562
- norm_w_diff = sqrt(norm_w_diff);
2563
-
2564
- if(norm_w_diff > 1e-15)
2565
- num_unchanged_w = -1;
2566
- }
2739
+ param_tmp.p = i*max_p/num_p_steps;
2740
+ double start_C_tmp;
2741
+ if(start_C <= 0)
2742
+ start_C_tmp = calc_start_C(prob, &param_tmp);
2567
2743
  else
2744
+ start_C_tmp = start_C;
2745
+ start_C_tmp = min(start_C_tmp, max_C);
2746
+ double best_C_tmp, best_score_tmp;
2747
+
2748
+ find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2749
+
2750
+ if(best_score_tmp < *best_score)
2568
2751
  {
2569
- for(j=0; j<total_w_size; j++)
2570
- prev_w[i][j] = submodel->w[j];
2752
+ *best_p = param_tmp.p;
2753
+ *best_C = best_C_tmp;
2754
+ *best_score = best_score_tmp;
2571
2755
  }
2572
-
2573
- for(j=begin; j<end; j++)
2574
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2575
-
2576
- free_and_destroy_model(&submodel);
2577
2756
  }
2578
- set_print_string_function(default_print_string);
2579
-
2580
- int total_correct = 0;
2581
- for(i=0; i<prob->l; i++)
2582
- if(target[i] == prob->y[i])
2583
- ++total_correct;
2584
- double current_rate = (double)total_correct/prob->l;
2585
- if(current_rate > *best_rate)
2586
- {
2587
- *best_C = param1.C;
2588
- *best_rate = current_rate;
2589
- }
2590
-
2591
- info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2592
- num_unchanged_w++;
2593
- if(num_unchanged_w == 3)
2594
- break;
2595
- param1.C = param1.C*ratio;
2596
2757
  }
2597
2758
 
2598
- if(param1.C > max_C && max_C > start_C)
2599
- info("warning: maximum C reached.\n");
2600
2759
  free(fold_start);
2601
2760
  free(perm);
2602
- free(target);
2603
2761
  for(i=0; i<nr_fold; i++)
2604
2762
  {
2605
2763
  free(subprob[i].x);
2606
2764
  free(subprob[i].y);
2607
- free(prev_w[i]);
2608
2765
  }
2609
- free(prev_w);
2610
2766
  free(subprob);
2611
2767
  }
2612
2768
 
@@ -2748,14 +2904,14 @@ int save_model(const char *model_file_name, const struct model *model_)
2748
2904
 
2749
2905
  fprintf(fp, "nr_feature %d\n", nr_feature);
2750
2906
 
2751
- fprintf(fp, "bias %.16g\n", model_->bias);
2907
+ fprintf(fp, "bias %.17g\n", model_->bias);
2752
2908
 
2753
2909
  fprintf(fp, "w\n");
2754
2910
  for(i=0; i<w_size; i++)
2755
2911
  {
2756
2912
  int j;
2757
2913
  for(j=0; j<nr_w; j++)
2758
- fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2914
+ fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2759
2915
  fprintf(fp, "\n");
2760
2916
  }
2761
2917
 
@@ -2802,6 +2958,11 @@ struct model *load_model(const char *model_file_name)
2802
2958
  double bias;
2803
2959
  model *model_ = Malloc(model,1);
2804
2960
  parameter& param = model_->param;
2961
+ // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2962
+ param.nr_weight = 0;
2963
+ param.weight_label = NULL;
2964
+ param.weight = NULL;
2965
+ param.init_sol = NULL;
2805
2966
 
2806
2967
  model_->label = NULL;
2807
2968