liblinear-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +46 -0
  6. data/Rakefile +1 -0
  7. data/ext/Makefile +237 -0
  8. data/ext/blas.h +25 -0
  9. data/ext/blasp.h +430 -0
  10. data/ext/daxpy.c +49 -0
  11. data/ext/ddot.c +50 -0
  12. data/ext/dnrm2.c +62 -0
  13. data/ext/dscal.c +44 -0
  14. data/ext/extconf.rb +12 -0
  15. data/ext/liblinear_wrap.cxx +4646 -0
  16. data/ext/linear.cpp +2811 -0
  17. data/ext/linear.h +74 -0
  18. data/ext/linear.rb +357 -0
  19. data/ext/tron.cpp +235 -0
  20. data/ext/tron.h +34 -0
  21. data/lib/liblinear.rb +89 -0
  22. data/lib/liblinear/error.rb +4 -0
  23. data/lib/liblinear/model.rb +66 -0
  24. data/lib/liblinear/parameter.rb +42 -0
  25. data/lib/liblinear/problem.rb +55 -0
  26. data/lib/liblinear/version.rb +3 -0
  27. data/liblinear-1.93/COPYRIGHT +31 -0
  28. data/liblinear-1.93/Makefile +37 -0
  29. data/liblinear-1.93/Makefile.win +30 -0
  30. data/liblinear-1.93/README +531 -0
  31. data/liblinear-1.93/blas/Makefile +22 -0
  32. data/liblinear-1.93/blas/blas.a +0 -0
  33. data/liblinear-1.93/blas/blas.h +25 -0
  34. data/liblinear-1.93/blas/blasp.h +430 -0
  35. data/liblinear-1.93/blas/daxpy.c +49 -0
  36. data/liblinear-1.93/blas/daxpy.o +0 -0
  37. data/liblinear-1.93/blas/ddot.c +50 -0
  38. data/liblinear-1.93/blas/ddot.o +0 -0
  39. data/liblinear-1.93/blas/dnrm2.c +62 -0
  40. data/liblinear-1.93/blas/dnrm2.o +0 -0
  41. data/liblinear-1.93/blas/dscal.c +44 -0
  42. data/liblinear-1.93/blas/dscal.o +0 -0
  43. data/liblinear-1.93/heart_scale +270 -0
  44. data/liblinear-1.93/linear.cpp +2811 -0
  45. data/liblinear-1.93/linear.def +18 -0
  46. data/liblinear-1.93/linear.h +74 -0
  47. data/liblinear-1.93/linear.o +0 -0
  48. data/liblinear-1.93/matlab/Makefile +58 -0
  49. data/liblinear-1.93/matlab/README +197 -0
  50. data/liblinear-1.93/matlab/libsvmread.c +212 -0
  51. data/liblinear-1.93/matlab/libsvmwrite.c +106 -0
  52. data/liblinear-1.93/matlab/linear_model_matlab.c +176 -0
  53. data/liblinear-1.93/matlab/linear_model_matlab.h +2 -0
  54. data/liblinear-1.93/matlab/make.m +21 -0
  55. data/liblinear-1.93/matlab/predict.c +331 -0
  56. data/liblinear-1.93/matlab/train.c +418 -0
  57. data/liblinear-1.93/predict +0 -0
  58. data/liblinear-1.93/predict.c +245 -0
  59. data/liblinear-1.93/python/Makefile +4 -0
  60. data/liblinear-1.93/python/README +343 -0
  61. data/liblinear-1.93/python/liblinear.py +277 -0
  62. data/liblinear-1.93/python/liblinearutil.py +250 -0
  63. data/liblinear-1.93/ruby/liblinear.i +41 -0
  64. data/liblinear-1.93/ruby/liblinear_wrap.cxx +4646 -0
  65. data/liblinear-1.93/ruby/linear.h +74 -0
  66. data/liblinear-1.93/ruby/linear.o +0 -0
  67. data/liblinear-1.93/train +0 -0
  68. data/liblinear-1.93/train.c +399 -0
  69. data/liblinear-1.93/tron.cpp +235 -0
  70. data/liblinear-1.93/tron.h +34 -0
  71. data/liblinear-1.93/tron.o +0 -0
  72. data/liblinear-1.93/windows/liblinear.dll +0 -0
  73. data/liblinear-1.93/windows/libsvmread.mexw64 +0 -0
  74. data/liblinear-1.93/windows/libsvmwrite.mexw64 +0 -0
  75. data/liblinear-1.93/windows/predict.exe +0 -0
  76. data/liblinear-1.93/windows/predict.mexw64 +0 -0
  77. data/liblinear-1.93/windows/train.exe +0 -0
  78. data/liblinear-1.93/windows/train.mexw64 +0 -0
  79. data/liblinear-ruby.gemspec +24 -0
  80. metadata +152 -0
@@ -0,0 +1,74 @@
1
+ #ifndef _LIBLINEAR_H
2
+ #define _LIBLINEAR_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ struct feature_node
9
+ {
10
+ int index;
11
+ double value;
12
+ };
13
+
14
+ struct problem
15
+ {
16
+ int l, n;
17
+ double *y;
18
+ struct feature_node **x;
19
+ double bias; /* < 0 if no bias term */
20
+ };
21
+
22
+ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
23
+
24
+ struct parameter
25
+ {
26
+ int solver_type;
27
+
28
+ /* these are for training only */
29
+ double eps; /* stopping criteria */
30
+ double C;
31
+ int nr_weight;
32
+ int *weight_label;
33
+ double* weight;
34
+ double p;
35
+ };
36
+
37
+ struct model
38
+ {
39
+ struct parameter param;
40
+ int nr_class; /* number of classes */
41
+ int nr_feature;
42
+ double *w;
43
+ int *label; /* label of each class */
44
+ double bias;
45
+ };
46
+
47
+ struct model* train(const struct problem *prob, const struct parameter *param);
48
+ void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
49
+
50
+ double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51
+ double predict(const struct model *model_, const struct feature_node *x);
52
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
53
+
54
+ int save_model(const char *model_file_name, const struct model *model_);
55
+ struct model *load_model(const char *model_file_name);
56
+
57
+ int get_nr_feature(const struct model *model_);
58
+ int get_nr_class(const struct model *model_);
59
+ void get_labels(const struct model *model_, int* label);
60
+
61
+ void free_model_content(struct model *model_ptr);
62
+ void free_and_destroy_model(struct model **model_ptr_ptr);
63
+ void destroy_param(struct parameter *param);
64
+
65
+ const char *check_parameter(const struct problem *prob, const struct parameter *param);
66
+ int check_probability_model(const struct model *model);
67
+ void set_print_string_function(void (*print_func) (const char*));
68
+
69
+ #ifdef __cplusplus
70
+ }
71
+ #endif
72
+
73
+ #endif /* _LIBLINEAR_H */
74
+
Binary file
Binary file
@@ -0,0 +1,399 @@
1
+ #include <stdio.h>
2
+ #include <math.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <ctype.h>
6
+ #include <errno.h>
7
+ #include "linear.h"
8
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
9
+ #define INF HUGE_VAL
10
+
11
+ void print_null(const char *s) {}
12
+
13
+ void exit_with_help()
14
+ {
15
+ printf(
16
+ "Usage: train [options] training_set_file [model_file]\n"
17
+ "options:\n"
18
+ "-s type : set type of solver (default 1)\n"
19
+ " for multi-class classification\n"
20
+ " 0 -- L2-regularized logistic regression (primal)\n"
21
+ " 1 -- L2-regularized L2-loss support vector classification (dual)\n"
22
+ " 2 -- L2-regularized L2-loss support vector classification (primal)\n"
23
+ " 3 -- L2-regularized L1-loss support vector classification (dual)\n"
24
+ " 4 -- support vector classification by Crammer and Singer\n"
25
+ " 5 -- L1-regularized L2-loss support vector classification\n"
26
+ " 6 -- L1-regularized logistic regression\n"
27
+ " 7 -- L2-regularized logistic regression (dual)\n"
28
+ " for regression\n"
29
+ " 11 -- L2-regularized L2-loss support vector regression (primal)\n"
30
+ " 12 -- L2-regularized L2-loss support vector regression (dual)\n"
31
+ " 13 -- L2-regularized L1-loss support vector regression (dual)\n"
32
+ "-c cost : set the parameter C (default 1)\n"
33
+ "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
34
+ "-e epsilon : set tolerance of termination criterion\n"
35
+ " -s 0 and 2\n"
36
+ " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
37
+ " where f is the primal function and pos/neg are # of\n"
38
+ " positive/negative data (default 0.01)\n"
39
+ " -s 11\n"
40
+ " |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n"
41
+ " -s 1, 3, 4, and 7\n"
42
+ " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
43
+ " -s 5 and 6\n"
44
+ " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
45
+ " where f is the primal function (default 0.01)\n"
46
+ " -s 12 and 13\n"
47
+ " |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
48
+ " where f is the dual function (default 0.1)\n"
49
+ "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
50
+ "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
51
+ "-v n: n-fold cross validation mode\n"
52
+ "-q : quiet mode (no outputs)\n"
53
+ );
54
+ exit(1);
55
+ }
56
+
57
+ void exit_input_error(int line_num)
58
+ {
59
+ fprintf(stderr,"Wrong input format at line %d\n", line_num);
60
+ exit(1);
61
+ }
62
+
63
+ static char *line = NULL;
64
+ static int max_line_len;
65
+
66
+ static char* readline(FILE *input)
67
+ {
68
+ int len;
69
+
70
+ if(fgets(line,max_line_len,input) == NULL)
71
+ return NULL;
72
+
73
+ while(strrchr(line,'\n') == NULL)
74
+ {
75
+ max_line_len *= 2;
76
+ line = (char *) realloc(line,max_line_len);
77
+ len = (int) strlen(line);
78
+ if(fgets(line+len,max_line_len-len,input) == NULL)
79
+ break;
80
+ }
81
+ return line;
82
+ }
83
+
84
+ void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
85
+ void read_problem(const char *filename);
86
+ void do_cross_validation();
87
+
88
+ struct feature_node *x_space;
89
+ struct parameter param;
90
+ struct problem prob;
91
+ struct model* model_;
92
+ int flag_cross_validation;
93
+ int nr_fold;
94
+ double bias;
95
+
96
+ int main(int argc, char **argv)
97
+ {
98
+ char input_file_name[1024];
99
+ char model_file_name[1024];
100
+ const char *error_msg;
101
+
102
+ parse_command_line(argc, argv, input_file_name, model_file_name);
103
+ read_problem(input_file_name);
104
+ error_msg = check_parameter(&prob,&param);
105
+
106
+ if(error_msg)
107
+ {
108
+ fprintf(stderr,"ERROR: %s\n",error_msg);
109
+ exit(1);
110
+ }
111
+
112
+ if(flag_cross_validation)
113
+ {
114
+ do_cross_validation();
115
+ }
116
+ else
117
+ {
118
+ model_=train(&prob, &param);
119
+ if(save_model(model_file_name, model_))
120
+ {
121
+ fprintf(stderr,"can't save model to file %s\n",model_file_name);
122
+ exit(1);
123
+ }
124
+ free_and_destroy_model(&model_);
125
+ }
126
+ destroy_param(&param);
127
+ free(prob.y);
128
+ free(prob.x);
129
+ free(x_space);
130
+ free(line);
131
+
132
+ return 0;
133
+ }
134
+
135
+ void do_cross_validation()
136
+ {
137
+ int i;
138
+ int total_correct = 0;
139
+ double total_error = 0;
140
+ double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
141
+ double *target = Malloc(double, prob.l);
142
+
143
+ cross_validation(&prob,&param,nr_fold,target);
144
+ if(param.solver_type == L2R_L2LOSS_SVR ||
145
+ param.solver_type == L2R_L1LOSS_SVR_DUAL ||
146
+ param.solver_type == L2R_L2LOSS_SVR_DUAL)
147
+ {
148
+ for(i=0;i<prob.l;i++)
149
+ {
150
+ double y = prob.y[i];
151
+ double v = target[i];
152
+ total_error += (v-y)*(v-y);
153
+ sumv += v;
154
+ sumy += y;
155
+ sumvv += v*v;
156
+ sumyy += y*y;
157
+ sumvy += v*y;
158
+ }
159
+ printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
160
+ printf("Cross Validation Squared correlation coefficient = %g\n",
161
+ ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
162
+ ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
163
+ );
164
+ }
165
+ else
166
+ {
167
+ for(i=0;i<prob.l;i++)
168
+ if(target[i] == prob.y[i])
169
+ ++total_correct;
170
+ printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
171
+ }
172
+
173
+ free(target);
174
+ }
175
+
176
+ void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
177
+ {
178
+ int i;
179
+ void (*print_func)(const char*) = NULL; // default printing to stdout
180
+
181
+ // default values
182
+ param.solver_type = L2R_L2LOSS_SVC_DUAL;
183
+ param.C = 1;
184
+ param.eps = INF; // see setting below
185
+ param.p = 0.1;
186
+ param.nr_weight = 0;
187
+ param.weight_label = NULL;
188
+ param.weight = NULL;
189
+ flag_cross_validation = 0;
190
+ bias = -1;
191
+
192
+ // parse options
193
+ for(i=1;i<argc;i++)
194
+ {
195
+ if(argv[i][0] != '-') break;
196
+ if(++i>=argc)
197
+ exit_with_help();
198
+ switch(argv[i-1][1])
199
+ {
200
+ case 's':
201
+ param.solver_type = atoi(argv[i]);
202
+ break;
203
+
204
+ case 'c':
205
+ param.C = atof(argv[i]);
206
+ break;
207
+
208
+ case 'p':
209
+ param.p = atof(argv[i]);
210
+ break;
211
+
212
+ case 'e':
213
+ param.eps = atof(argv[i]);
214
+ break;
215
+
216
+ case 'B':
217
+ bias = atof(argv[i]);
218
+ break;
219
+
220
+ case 'w':
221
+ ++param.nr_weight;
222
+ param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
223
+ param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
224
+ param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
225
+ param.weight[param.nr_weight-1] = atof(argv[i]);
226
+ break;
227
+
228
+ case 'v':
229
+ flag_cross_validation = 1;
230
+ nr_fold = atoi(argv[i]);
231
+ if(nr_fold < 2)
232
+ {
233
+ fprintf(stderr,"n-fold cross validation: n must >= 2\n");
234
+ exit_with_help();
235
+ }
236
+ break;
237
+
238
+ case 'q':
239
+ print_func = &print_null;
240
+ i--;
241
+ break;
242
+
243
+ default:
244
+ fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
245
+ exit_with_help();
246
+ break;
247
+ }
248
+ }
249
+
250
+ set_print_string_function(print_func);
251
+
252
+ // determine filenames
253
+ if(i>=argc)
254
+ exit_with_help();
255
+
256
+ strcpy(input_file_name, argv[i]);
257
+
258
+ if(i<argc-1)
259
+ strcpy(model_file_name,argv[i+1]);
260
+ else
261
+ {
262
+ char *p = strrchr(argv[i],'/');
263
+ if(p==NULL)
264
+ p = argv[i];
265
+ else
266
+ ++p;
267
+ sprintf(model_file_name,"%s.model",p);
268
+ }
269
+
270
+ if(param.eps == INF)
271
+ {
272
+ switch(param.solver_type)
273
+ {
274
+ case L2R_LR:
275
+ case L2R_L2LOSS_SVC:
276
+ param.eps = 0.01;
277
+ break;
278
+ case L2R_L2LOSS_SVR:
279
+ param.eps = 0.001;
280
+ break;
281
+ case L2R_L2LOSS_SVC_DUAL:
282
+ case L2R_L1LOSS_SVC_DUAL:
283
+ case MCSVM_CS:
284
+ case L2R_LR_DUAL:
285
+ param.eps = 0.1;
286
+ break;
287
+ case L1R_L2LOSS_SVC:
288
+ case L1R_LR:
289
+ param.eps = 0.01;
290
+ break;
291
+ case L2R_L1LOSS_SVR_DUAL:
292
+ case L2R_L2LOSS_SVR_DUAL:
293
+ param.eps = 0.1;
294
+ break;
295
+ }
296
+ }
297
+ }
298
+
299
+ // read in a problem (in libsvm format)
300
+ void read_problem(const char *filename)
301
+ {
302
+ int max_index, inst_max_index, i;
303
+ long int elements, j;
304
+ FILE *fp = fopen(filename,"r");
305
+ char *endptr;
306
+ char *idx, *val, *label;
307
+
308
+ if(fp == NULL)
309
+ {
310
+ fprintf(stderr,"can't open input file %s\n",filename);
311
+ exit(1);
312
+ }
313
+
314
+ prob.l = 0;
315
+ elements = 0;
316
+ max_line_len = 1024;
317
+ line = Malloc(char,max_line_len);
318
+ while(readline(fp)!=NULL)
319
+ {
320
+ char *p = strtok(line," \t"); // label
321
+
322
+ // features
323
+ while(1)
324
+ {
325
+ p = strtok(NULL," \t");
326
+ if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
327
+ break;
328
+ elements++;
329
+ }
330
+ elements++; // for bias term
331
+ prob.l++;
332
+ }
333
+ rewind(fp);
334
+
335
+ prob.bias=bias;
336
+
337
+ prob.y = Malloc(double,prob.l);
338
+ prob.x = Malloc(struct feature_node *,prob.l);
339
+ x_space = Malloc(struct feature_node,elements+prob.l);
340
+
341
+ max_index = 0;
342
+ j=0;
343
+ for(i=0;i<prob.l;i++)
344
+ {
345
+ inst_max_index = 0; // strtol gives 0 if wrong format
346
+ readline(fp);
347
+ prob.x[i] = &x_space[j];
348
+ label = strtok(line," \t\n");
349
+ if(label == NULL) // empty line
350
+ exit_input_error(i+1);
351
+
352
+ prob.y[i] = strtod(label,&endptr);
353
+ if(endptr == label || *endptr != '\0')
354
+ exit_input_error(i+1);
355
+
356
+ while(1)
357
+ {
358
+ idx = strtok(NULL,":");
359
+ val = strtok(NULL," \t");
360
+
361
+ if(val == NULL)
362
+ break;
363
+
364
+ errno = 0;
365
+ x_space[j].index = (int) strtol(idx,&endptr,10);
366
+ if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
367
+ exit_input_error(i+1);
368
+ else
369
+ inst_max_index = x_space[j].index;
370
+
371
+ errno = 0;
372
+ x_space[j].value = strtod(val,&endptr);
373
+ if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
374
+ exit_input_error(i+1);
375
+
376
+ ++j;
377
+ }
378
+
379
+ if(inst_max_index > max_index)
380
+ max_index = inst_max_index;
381
+
382
+ if(prob.bias >= 0)
383
+ x_space[j++].value = prob.bias;
384
+
385
+ x_space[j++].index = -1;
386
+ }
387
+
388
+ if(prob.bias >= 0)
389
+ {
390
+ prob.n=max_index+1;
391
+ for(i=1;i<prob.l;i++)
392
+ (prob.x[i]-2)->index = prob.n;
393
+ x_space[j-2].index = prob.n;
394
+ }
395
+ else
396
+ prob.n=max_index;
397
+
398
+ fclose(fp);
399
+ }