liblinear-ruby 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,8 +20,8 @@ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
20
20
  dst = new T[n];
21
21
  memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
22
  }
23
- #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
24
23
  #define INF HUGE_VAL
24
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
25
25
 
26
26
  static void print_string_stdout(const char *s)
27
27
  {
@@ -91,6 +91,7 @@ public:
91
91
  void Hv(double *s, double *Hs);
92
92
 
93
93
  int get_nr_variable(void);
94
+ void get_diag_preconditioner(double *M);
94
95
 
95
96
  private:
96
97
  void Xv(double *v, double *Xv);
@@ -169,6 +170,27 @@ int l2r_lr_fun::get_nr_variable(void)
169
170
  return prob->n;
170
171
  }
171
172
 
173
+ void l2r_lr_fun::get_diag_preconditioner(double *M)
174
+ {
175
+ int i;
176
+ int l = prob->l;
177
+ int w_size=get_nr_variable();
178
+ feature_node **x = prob->x;
179
+
180
+ for (i=0; i<w_size; i++)
181
+ M[i] = 1;
182
+
183
+ for (i=0; i<l; i++)
184
+ {
185
+ feature_node *s = x[i];
186
+ while (s->index!=-1)
187
+ {
188
+ M[s->index-1] += s->value*s->value*C[i]*D[i];
189
+ s++;
190
+ }
191
+ }
192
+ }
193
+
172
194
  void l2r_lr_fun::Hv(double *s, double *Hs)
173
195
  {
174
196
  int i;
@@ -225,6 +247,7 @@ public:
225
247
  void Hv(double *s, double *Hs);
226
248
 
227
249
  int get_nr_variable(void);
250
+ void get_diag_preconditioner(double *M);
228
251
 
229
252
  protected:
230
253
  void Xv(double *v, double *Xv);
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
304
327
  return prob->n;
305
328
  }
306
329
 
330
+ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
331
+ {
332
+ int i;
333
+ int w_size=get_nr_variable();
334
+ feature_node **x = prob->x;
335
+
336
+ for (i=0; i<w_size; i++)
337
+ M[i] = 1;
338
+
339
+ for (i=0; i<sizeI; i++)
340
+ {
341
+ int idx = I[i];
342
+ feature_node *s = x[idx];
343
+ while (s->index!=-1)
344
+ {
345
+ M[s->index-1] += s->value*s->value*C[idx]*2;
346
+ s++;
347
+ }
348
+ }
349
+ }
350
+
307
351
  void l2r_l2_svc_fun::Hv(double *s, double *Hs)
308
352
  {
309
353
  int i;
@@ -1356,7 +1400,7 @@ static void solve_l1r_l2_svc(
1356
1400
  double Gmax_new, Gnorm1_new;
1357
1401
  double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1358
1402
  double d_old, d_diff;
1359
- double loss_old, loss_new;
1403
+ double loss_old = 0, loss_new;
1360
1404
  double appxcond, cond;
1361
1405
 
1362
1406
  int *index = new int[w_size];
@@ -2223,7 +2267,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2223
2267
  static double calc_start_C(const problem *prob, const parameter *param)
2224
2268
  {
2225
2269
  int i;
2226
- double xTx,max_xTx;
2270
+ double xTx, max_xTx;
2227
2271
  max_xTx = 0;
2228
2272
  for(i=0; i<prob->l; i++)
2229
2273
  {
@@ -2244,10 +2288,157 @@ static double calc_start_C(const problem *prob, const parameter *param)
2244
2288
  min_C = 1.0 / (prob->l * max_xTx);
2245
2289
  else if(param->solver_type == L2R_L2LOSS_SVC)
2246
2290
  min_C = 1.0 / (2 * prob->l * max_xTx);
2291
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2292
+ {
2293
+ double sum_y, loss, y_abs;
2294
+ double delta2 = 0.1;
2295
+ sum_y = 0, loss = 0;
2296
+ for(i=0; i<prob->l; i++)
2297
+ {
2298
+ y_abs = fabs(prob->y[i]);
2299
+ sum_y += y_abs;
2300
+ loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2301
+ }
2302
+ if(loss > 0)
2303
+ min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2304
+ else
2305
+ min_C = INF;
2306
+ }
2247
2307
 
2248
2308
  return pow( 2, floor(log(min_C) / log(2.0)) );
2249
2309
  }
2250
2310
 
2311
+ static double calc_max_p(const problem *prob, const parameter *param)
2312
+ {
2313
+ int i;
2314
+ double max_p = 0.0;
2315
+ for(i = 0; i < prob->l; i++)
2316
+ max_p = max(max_p, fabs(prob->y[i]));
2317
+
2318
+ return max_p;
2319
+ }
2320
+
2321
+ static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2322
+ {
2323
+ // variables for CV
2324
+ int i;
2325
+ double *target = Malloc(double, prob->l);
2326
+
2327
+ // variables for warm start
2328
+ double ratio = 2;
2329
+ double **prev_w = Malloc(double*, nr_fold);
2330
+ for(i = 0; i < nr_fold; i++)
2331
+ prev_w[i] = NULL;
2332
+ int num_unchanged_w = 0;
2333
+ void (*default_print_string) (const char *) = liblinear_print_string;
2334
+
2335
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2336
+ *best_score = 0.0;
2337
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2338
+ *best_score = INF;
2339
+ *best_C = start_C;
2340
+
2341
+ param_tmp->C = start_C;
2342
+ while(param_tmp->C <= max_C)
2343
+ {
2344
+ //Output disabled for running CV at a particular C
2345
+ set_print_string_function(&print_null);
2346
+
2347
+ for(i=0; i<nr_fold; i++)
2348
+ {
2349
+ int j;
2350
+ int begin = fold_start[i];
2351
+ int end = fold_start[i+1];
2352
+
2353
+ param_tmp->init_sol = prev_w[i];
2354
+ struct model *submodel = train(&subprob[i],param_tmp);
2355
+
2356
+ int total_w_size;
2357
+ if(submodel->nr_class == 2)
2358
+ total_w_size = subprob[i].n;
2359
+ else
2360
+ total_w_size = subprob[i].n * submodel->nr_class;
2361
+
2362
+ if(prev_w[i] == NULL)
2363
+ {
2364
+ prev_w[i] = Malloc(double, total_w_size);
2365
+ for(j=0; j<total_w_size; j++)
2366
+ prev_w[i][j] = submodel->w[j];
2367
+ }
2368
+ else if(num_unchanged_w >= 0)
2369
+ {
2370
+ double norm_w_diff = 0;
2371
+ for(j=0; j<total_w_size; j++)
2372
+ {
2373
+ norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2374
+ prev_w[i][j] = submodel->w[j];
2375
+ }
2376
+ norm_w_diff = sqrt(norm_w_diff);
2377
+
2378
+ if(norm_w_diff > 1e-15)
2379
+ num_unchanged_w = -1;
2380
+ }
2381
+ else
2382
+ {
2383
+ for(j=0; j<total_w_size; j++)
2384
+ prev_w[i][j] = submodel->w[j];
2385
+ }
2386
+
2387
+ for(j=begin; j<end; j++)
2388
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2389
+
2390
+ free_and_destroy_model(&submodel);
2391
+ }
2392
+ set_print_string_function(default_print_string);
2393
+
2394
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2395
+ {
2396
+ int total_correct = 0;
2397
+ for(i=0; i<prob->l; i++)
2398
+ if(target[i] == prob->y[i])
2399
+ ++total_correct;
2400
+ double current_rate = (double)total_correct/prob->l;
2401
+ if(current_rate > *best_score)
2402
+ {
2403
+ *best_C = param_tmp->C;
2404
+ *best_score = current_rate;
2405
+ }
2406
+
2407
+ info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2408
+ }
2409
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2410
+ {
2411
+ double total_error = 0.0;
2412
+ for(i=0; i<prob->l; i++)
2413
+ {
2414
+ double y = prob->y[i];
2415
+ double v = target[i];
2416
+ total_error += (v-y)*(v-y);
2417
+ }
2418
+ double current_error = total_error/prob->l;
2419
+ if(current_error < *best_score)
2420
+ {
2421
+ *best_C = param_tmp->C;
2422
+ *best_score = current_error;
2423
+ }
2424
+
2425
+ info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2426
+ }
2427
+
2428
+ num_unchanged_w++;
2429
+ if(num_unchanged_w == 5)
2430
+ break;
2431
+ param_tmp->C = param_tmp->C*ratio;
2432
+ }
2433
+
2434
+ if(param_tmp->C > max_C)
2435
+ info("warning: maximum C reached.\n");
2436
+ free(target);
2437
+ for(i=0; i<nr_fold; i++)
2438
+ free(prev_w[i]);
2439
+ free(prev_w);
2440
+ }
2441
+
2251
2442
 
2252
2443
  //
2253
2444
  // Interface functions
@@ -2270,8 +2461,14 @@ model* train(const problem *prob, const parameter *param)
2270
2461
  if(check_regression_model(model_))
2271
2462
  {
2272
2463
  model_->w = Malloc(double, w_size);
2273
- for(i=0; i<w_size; i++)
2274
- model_->w[i] = 0;
2464
+
2465
+ if(param->init_sol != NULL)
2466
+ for(i=0;i<w_size;i++)
2467
+ model_->w[i] = param->init_sol[i];
2468
+ else
2469
+ for(i=0;i<w_size;i++)
2470
+ model_->w[i] = 0;
2471
+
2275
2472
  model_->nr_class = 2;
2276
2473
  model_->label = NULL;
2277
2474
  train_one(prob, param, model_->w, 0, 0);
@@ -2458,25 +2655,17 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
2458
2655
  free(perm);
2459
2656
  }
2460
2657
 
2461
- void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2658
+
2659
+ void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
2462
2660
  {
2463
- // variables for CV
2661
+ // prepare CV folds
2662
+
2464
2663
  int i;
2465
2664
  int *fold_start;
2466
2665
  int l = prob->l;
2467
2666
  int *perm = Malloc(int, l);
2468
- double *target = Malloc(double, prob->l);
2469
2667
  struct problem *subprob = Malloc(problem,nr_fold);
2470
2668
 
2471
- // variables for warm start
2472
- double ratio = 2;
2473
- double **prev_w = Malloc(double*, nr_fold);
2474
- for(i = 0; i < nr_fold; i++)
2475
- prev_w[i] = NULL;
2476
- int num_unchanged_w = 0;
2477
- struct parameter param1 = *param;
2478
- void (*default_print_string) (const char *) = liblinear_print_string;
2479
-
2480
2669
  if (nr_fold > l)
2481
2670
  {
2482
2671
  nr_fold = l;
@@ -2520,93 +2709,60 @@ void find_parameter_C(const problem *prob, const parameter *param, int nr_fold,
2520
2709
 
2521
2710
  }
2522
2711
 
2523
- *best_rate = 0;
2524
- if(start_C <= 0)
2525
- start_C = calc_start_C(prob,param);
2526
- param1.C = start_C;
2527
-
2528
- while(param1.C <= max_C)
2712
+ struct parameter param_tmp = *param;
2713
+ *best_p = -1;
2714
+ if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
2529
2715
  {
2530
- //Output disabled for running CV at a particular C
2531
- set_print_string_function(&print_null);
2716
+ if(start_C <= 0)
2717
+ start_C = calc_start_C(prob, &param_tmp);
2718
+ double max_C = 1024;
2719
+ start_C = min(start_C, max_C);
2720
+ double best_C_tmp, best_score_tmp;
2721
+
2722
+ find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2723
+
2724
+ *best_C = best_C_tmp;
2725
+ *best_score = best_score_tmp;
2726
+ }
2727
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2728
+ {
2729
+ double max_p = calc_max_p(prob, &param_tmp);
2730
+ int num_p_steps = 20;
2731
+ double max_C = 1048576;
2732
+ *best_score = INF;
2532
2733
 
2533
- for(i=0; i<nr_fold; i++)
2734
+ i = num_p_steps-1;
2735
+ if(start_p > 0)
2736
+ i = min((int)(start_p/(max_p/num_p_steps)), i);
2737
+ for(; i >= 0; i--)
2534
2738
  {
2535
- int j;
2536
- int begin = fold_start[i];
2537
- int end = fold_start[i+1];
2538
-
2539
- param1.init_sol = prev_w[i];
2540
- struct model *submodel = train(&subprob[i],&param1);
2541
-
2542
- int total_w_size;
2543
- if(submodel->nr_class == 2)
2544
- total_w_size = subprob[i].n;
2545
- else
2546
- total_w_size = subprob[i].n * submodel->nr_class;
2547
-
2548
- if(prev_w[i] == NULL)
2549
- {
2550
- prev_w[i] = Malloc(double, total_w_size);
2551
- for(j=0; j<total_w_size; j++)
2552
- prev_w[i][j] = submodel->w[j];
2553
- }
2554
- else if(num_unchanged_w >= 0)
2555
- {
2556
- double norm_w_diff = 0;
2557
- for(j=0; j<total_w_size; j++)
2558
- {
2559
- norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2560
- prev_w[i][j] = submodel->w[j];
2561
- }
2562
- norm_w_diff = sqrt(norm_w_diff);
2563
-
2564
- if(norm_w_diff > 1e-15)
2565
- num_unchanged_w = -1;
2566
- }
2739
+ param_tmp.p = i*max_p/num_p_steps;
2740
+ double start_C_tmp;
2741
+ if(start_C <= 0)
2742
+ start_C_tmp = calc_start_C(prob, &param_tmp);
2567
2743
  else
2744
+ start_C_tmp = start_C;
2745
+ start_C_tmp = min(start_C_tmp, max_C);
2746
+ double best_C_tmp, best_score_tmp;
2747
+
2748
+ find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2749
+
2750
+ if(best_score_tmp < *best_score)
2568
2751
  {
2569
- for(j=0; j<total_w_size; j++)
2570
- prev_w[i][j] = submodel->w[j];
2752
+ *best_p = param_tmp.p;
2753
+ *best_C = best_C_tmp;
2754
+ *best_score = best_score_tmp;
2571
2755
  }
2572
-
2573
- for(j=begin; j<end; j++)
2574
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2575
-
2576
- free_and_destroy_model(&submodel);
2577
2756
  }
2578
- set_print_string_function(default_print_string);
2579
-
2580
- int total_correct = 0;
2581
- for(i=0; i<prob->l; i++)
2582
- if(target[i] == prob->y[i])
2583
- ++total_correct;
2584
- double current_rate = (double)total_correct/prob->l;
2585
- if(current_rate > *best_rate)
2586
- {
2587
- *best_C = param1.C;
2588
- *best_rate = current_rate;
2589
- }
2590
-
2591
- info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2592
- num_unchanged_w++;
2593
- if(num_unchanged_w == 3)
2594
- break;
2595
- param1.C = param1.C*ratio;
2596
2757
  }
2597
2758
 
2598
- if(param1.C > max_C && max_C > start_C)
2599
- info("warning: maximum C reached.\n");
2600
2759
  free(fold_start);
2601
2760
  free(perm);
2602
- free(target);
2603
2761
  for(i=0; i<nr_fold; i++)
2604
2762
  {
2605
2763
  free(subprob[i].x);
2606
2764
  free(subprob[i].y);
2607
- free(prev_w[i]);
2608
2765
  }
2609
- free(prev_w);
2610
2766
  free(subprob);
2611
2767
  }
2612
2768
 
@@ -2748,14 +2904,14 @@ int save_model(const char *model_file_name, const struct model *model_)
2748
2904
 
2749
2905
  fprintf(fp, "nr_feature %d\n", nr_feature);
2750
2906
 
2751
- fprintf(fp, "bias %.16g\n", model_->bias);
2907
+ fprintf(fp, "bias %.17g\n", model_->bias);
2752
2908
 
2753
2909
  fprintf(fp, "w\n");
2754
2910
  for(i=0; i<w_size; i++)
2755
2911
  {
2756
2912
  int j;
2757
2913
  for(j=0; j<nr_w; j++)
2758
- fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2914
+ fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2759
2915
  fprintf(fp, "\n");
2760
2916
  }
2761
2917
 
@@ -2802,6 +2958,11 @@ struct model *load_model(const char *model_file_name)
2802
2958
  double bias;
2803
2959
  model *model_ = Malloc(model,1);
2804
2960
  parameter& param = model_->param;
2961
+ // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2962
+ param.nr_weight = 0;
2963
+ param.weight_label = NULL;
2964
+ param.weight = NULL;
2965
+ param.init_sol = NULL;
2805
2966
 
2806
2967
  model_->label = NULL;
2807
2968