liblinear-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +46 -0
  6. data/Rakefile +1 -0
  7. data/ext/Makefile +237 -0
  8. data/ext/blas.h +25 -0
  9. data/ext/blasp.h +430 -0
  10. data/ext/daxpy.c +49 -0
  11. data/ext/ddot.c +50 -0
  12. data/ext/dnrm2.c +62 -0
  13. data/ext/dscal.c +44 -0
  14. data/ext/extconf.rb +12 -0
  15. data/ext/liblinear_wrap.cxx +4646 -0
  16. data/ext/linear.cpp +2811 -0
  17. data/ext/linear.h +74 -0
  18. data/ext/linear.rb +357 -0
  19. data/ext/tron.cpp +235 -0
  20. data/ext/tron.h +34 -0
  21. data/lib/liblinear.rb +89 -0
  22. data/lib/liblinear/error.rb +4 -0
  23. data/lib/liblinear/model.rb +66 -0
  24. data/lib/liblinear/parameter.rb +42 -0
  25. data/lib/liblinear/problem.rb +55 -0
  26. data/lib/liblinear/version.rb +3 -0
  27. data/liblinear-1.93/COPYRIGHT +31 -0
  28. data/liblinear-1.93/Makefile +37 -0
  29. data/liblinear-1.93/Makefile.win +30 -0
  30. data/liblinear-1.93/README +531 -0
  31. data/liblinear-1.93/blas/Makefile +22 -0
  32. data/liblinear-1.93/blas/blas.a +0 -0
  33. data/liblinear-1.93/blas/blas.h +25 -0
  34. data/liblinear-1.93/blas/blasp.h +430 -0
  35. data/liblinear-1.93/blas/daxpy.c +49 -0
  36. data/liblinear-1.93/blas/daxpy.o +0 -0
  37. data/liblinear-1.93/blas/ddot.c +50 -0
  38. data/liblinear-1.93/blas/ddot.o +0 -0
  39. data/liblinear-1.93/blas/dnrm2.c +62 -0
  40. data/liblinear-1.93/blas/dnrm2.o +0 -0
  41. data/liblinear-1.93/blas/dscal.c +44 -0
  42. data/liblinear-1.93/blas/dscal.o +0 -0
  43. data/liblinear-1.93/heart_scale +270 -0
  44. data/liblinear-1.93/linear.cpp +2811 -0
  45. data/liblinear-1.93/linear.def +18 -0
  46. data/liblinear-1.93/linear.h +74 -0
  47. data/liblinear-1.93/linear.o +0 -0
  48. data/liblinear-1.93/matlab/Makefile +58 -0
  49. data/liblinear-1.93/matlab/README +197 -0
  50. data/liblinear-1.93/matlab/libsvmread.c +212 -0
  51. data/liblinear-1.93/matlab/libsvmwrite.c +106 -0
  52. data/liblinear-1.93/matlab/linear_model_matlab.c +176 -0
  53. data/liblinear-1.93/matlab/linear_model_matlab.h +2 -0
  54. data/liblinear-1.93/matlab/make.m +21 -0
  55. data/liblinear-1.93/matlab/predict.c +331 -0
  56. data/liblinear-1.93/matlab/train.c +418 -0
  57. data/liblinear-1.93/predict +0 -0
  58. data/liblinear-1.93/predict.c +245 -0
  59. data/liblinear-1.93/python/Makefile +4 -0
  60. data/liblinear-1.93/python/README +343 -0
  61. data/liblinear-1.93/python/liblinear.py +277 -0
  62. data/liblinear-1.93/python/liblinearutil.py +250 -0
  63. data/liblinear-1.93/ruby/liblinear.i +41 -0
  64. data/liblinear-1.93/ruby/liblinear_wrap.cxx +4646 -0
  65. data/liblinear-1.93/ruby/linear.h +74 -0
  66. data/liblinear-1.93/ruby/linear.o +0 -0
  67. data/liblinear-1.93/train +0 -0
  68. data/liblinear-1.93/train.c +399 -0
  69. data/liblinear-1.93/tron.cpp +235 -0
  70. data/liblinear-1.93/tron.h +34 -0
  71. data/liblinear-1.93/tron.o +0 -0
  72. data/liblinear-1.93/windows/liblinear.dll +0 -0
  73. data/liblinear-1.93/windows/libsvmread.mexw64 +0 -0
  74. data/liblinear-1.93/windows/libsvmwrite.mexw64 +0 -0
  75. data/liblinear-1.93/windows/predict.exe +0 -0
  76. data/liblinear-1.93/windows/predict.mexw64 +0 -0
  77. data/liblinear-1.93/windows/train.exe +0 -0
  78. data/liblinear-1.93/windows/train.mexw64 +0 -0
  79. data/liblinear-ruby.gemspec +24 -0
  80. metadata +152 -0
@@ -0,0 +1,74 @@
1
+ #ifndef _LIBLINEAR_H
2
+ #define _LIBLINEAR_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ struct feature_node
9
+ {
10
+ int index;
11
+ double value;
12
+ };
13
+
14
+ struct problem
15
+ {
16
+ int l, n;
17
+ double *y;
18
+ struct feature_node **x;
19
+ double bias; /* < 0 if no bias term */
20
+ };
21
+
22
+ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
23
+
24
+ struct parameter
25
+ {
26
+ int solver_type;
27
+
28
+ /* these are for training only */
29
+ double eps; /* stopping criteria */
30
+ double C;
31
+ int nr_weight;
32
+ int *weight_label;
33
+ double* weight;
34
+ double p;
35
+ };
36
+
37
+ struct model
38
+ {
39
+ struct parameter param;
40
+ int nr_class; /* number of classes */
41
+ int nr_feature;
42
+ double *w;
43
+ int *label; /* label of each class */
44
+ double bias;
45
+ };
46
+
47
+ struct model* train(const struct problem *prob, const struct parameter *param);
48
+ void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
49
+
50
+ double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51
+ double predict(const struct model *model_, const struct feature_node *x);
52
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
53
+
54
+ int save_model(const char *model_file_name, const struct model *model_);
55
+ struct model *load_model(const char *model_file_name);
56
+
57
+ int get_nr_feature(const struct model *model_);
58
+ int get_nr_class(const struct model *model_);
59
+ void get_labels(const struct model *model_, int* label);
60
+
61
+ void free_model_content(struct model *model_ptr);
62
+ void free_and_destroy_model(struct model **model_ptr_ptr);
63
+ void destroy_param(struct parameter *param);
64
+
65
+ const char *check_parameter(const struct problem *prob, const struct parameter *param);
66
+ int check_probability_model(const struct model *model);
67
+ void set_print_string_function(void (*print_func) (const char*));
68
+
69
+ #ifdef __cplusplus
70
+ }
71
+ #endif
72
+
73
+ #endif /* _LIBLINEAR_H */
74
+
Binary file
Binary file
@@ -0,0 +1,399 @@
1
+ #include <stdio.h>
2
+ #include <math.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <ctype.h>
6
+ #include <errno.h>
7
+ #include "linear.h"
8
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
9
+ #define INF HUGE_VAL
10
+
11
+ void print_null(const char *s) {}
12
+
13
+ void exit_with_help()
14
+ {
15
+ printf(
16
+ "Usage: train [options] training_set_file [model_file]\n"
17
+ "options:\n"
18
+ "-s type : set type of solver (default 1)\n"
19
+ " for multi-class classification\n"
20
+ " 0 -- L2-regularized logistic regression (primal)\n"
21
+ " 1 -- L2-regularized L2-loss support vector classification (dual)\n"
22
+ " 2 -- L2-regularized L2-loss support vector classification (primal)\n"
23
+ " 3 -- L2-regularized L1-loss support vector classification (dual)\n"
24
+ " 4 -- support vector classification by Crammer and Singer\n"
25
+ " 5 -- L1-regularized L2-loss support vector classification\n"
26
+ " 6 -- L1-regularized logistic regression\n"
27
+ " 7 -- L2-regularized logistic regression (dual)\n"
28
+ " for regression\n"
29
+ " 11 -- L2-regularized L2-loss support vector regression (primal)\n"
30
+ " 12 -- L2-regularized L2-loss support vector regression (dual)\n"
31
+ " 13 -- L2-regularized L1-loss support vector regression (dual)\n"
32
+ "-c cost : set the parameter C (default 1)\n"
33
+ "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
34
+ "-e epsilon : set tolerance of termination criterion\n"
35
+ " -s 0 and 2\n"
36
+ " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
37
+ " where f is the primal function and pos/neg are # of\n"
38
+ " positive/negative data (default 0.01)\n"
39
+ " -s 11\n"
40
+ " |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n"
41
+ " -s 1, 3, 4, and 7\n"
42
+ " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
43
+ " -s 5 and 6\n"
44
+ " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
45
+ " where f is the primal function (default 0.01)\n"
46
+ " -s 12 and 13\n"
47
+ " |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
48
+ " where f is the dual function (default 0.1)\n"
49
+ "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
50
+ "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
51
+ "-v n: n-fold cross validation mode\n"
52
+ "-q : quiet mode (no outputs)\n"
53
+ );
54
+ exit(1);
55
+ }
56
+
57
+ void exit_input_error(int line_num)
58
+ {
59
+ fprintf(stderr,"Wrong input format at line %d\n", line_num);
60
+ exit(1);
61
+ }
62
+
63
+ static char *line = NULL;
64
+ static int max_line_len;
65
+
66
+ static char* readline(FILE *input)
67
+ {
68
+ int len;
69
+
70
+ if(fgets(line,max_line_len,input) == NULL)
71
+ return NULL;
72
+
73
+ while(strrchr(line,'\n') == NULL)
74
+ {
75
+ max_line_len *= 2;
76
+ line = (char *) realloc(line,max_line_len);
77
+ len = (int) strlen(line);
78
+ if(fgets(line+len,max_line_len-len,input) == NULL)
79
+ break;
80
+ }
81
+ return line;
82
+ }
83
+
84
+ void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
85
+ void read_problem(const char *filename);
86
+ void do_cross_validation();
87
+
88
+ struct feature_node *x_space;
89
+ struct parameter param;
90
+ struct problem prob;
91
+ struct model* model_;
92
+ int flag_cross_validation;
93
+ int nr_fold;
94
+ double bias;
95
+
96
+ int main(int argc, char **argv)
97
+ {
98
+ char input_file_name[1024];
99
+ char model_file_name[1024];
100
+ const char *error_msg;
101
+
102
+ parse_command_line(argc, argv, input_file_name, model_file_name);
103
+ read_problem(input_file_name);
104
+ error_msg = check_parameter(&prob,&param);
105
+
106
+ if(error_msg)
107
+ {
108
+ fprintf(stderr,"ERROR: %s\n",error_msg);
109
+ exit(1);
110
+ }
111
+
112
+ if(flag_cross_validation)
113
+ {
114
+ do_cross_validation();
115
+ }
116
+ else
117
+ {
118
+ model_=train(&prob, &param);
119
+ if(save_model(model_file_name, model_))
120
+ {
121
+ fprintf(stderr,"can't save model to file %s\n",model_file_name);
122
+ exit(1);
123
+ }
124
+ free_and_destroy_model(&model_);
125
+ }
126
+ destroy_param(&param);
127
+ free(prob.y);
128
+ free(prob.x);
129
+ free(x_space);
130
+ free(line);
131
+
132
+ return 0;
133
+ }
134
+
135
+ void do_cross_validation()
136
+ {
137
+ int i;
138
+ int total_correct = 0;
139
+ double total_error = 0;
140
+ double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
141
+ double *target = Malloc(double, prob.l);
142
+
143
+ cross_validation(&prob,&param,nr_fold,target);
144
+ if(param.solver_type == L2R_L2LOSS_SVR ||
145
+ param.solver_type == L2R_L1LOSS_SVR_DUAL ||
146
+ param.solver_type == L2R_L2LOSS_SVR_DUAL)
147
+ {
148
+ for(i=0;i<prob.l;i++)
149
+ {
150
+ double y = prob.y[i];
151
+ double v = target[i];
152
+ total_error += (v-y)*(v-y);
153
+ sumv += v;
154
+ sumy += y;
155
+ sumvv += v*v;
156
+ sumyy += y*y;
157
+ sumvy += v*y;
158
+ }
159
+ printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
160
+ printf("Cross Validation Squared correlation coefficient = %g\n",
161
+ ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
162
+ ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
163
+ );
164
+ }
165
+ else
166
+ {
167
+ for(i=0;i<prob.l;i++)
168
+ if(target[i] == prob.y[i])
169
+ ++total_correct;
170
+ printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
171
+ }
172
+
173
+ free(target);
174
+ }
175
+
176
+ void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
177
+ {
178
+ int i;
179
+ void (*print_func)(const char*) = NULL; // default printing to stdout
180
+
181
+ // default values
182
+ param.solver_type = L2R_L2LOSS_SVC_DUAL;
183
+ param.C = 1;
184
+ param.eps = INF; // see setting below
185
+ param.p = 0.1;
186
+ param.nr_weight = 0;
187
+ param.weight_label = NULL;
188
+ param.weight = NULL;
189
+ flag_cross_validation = 0;
190
+ bias = -1;
191
+
192
+ // parse options
193
+ for(i=1;i<argc;i++)
194
+ {
195
+ if(argv[i][0] != '-') break;
196
+ if(++i>=argc)
197
+ exit_with_help();
198
+ switch(argv[i-1][1])
199
+ {
200
+ case 's':
201
+ param.solver_type = atoi(argv[i]);
202
+ break;
203
+
204
+ case 'c':
205
+ param.C = atof(argv[i]);
206
+ break;
207
+
208
+ case 'p':
209
+ param.p = atof(argv[i]);
210
+ break;
211
+
212
+ case 'e':
213
+ param.eps = atof(argv[i]);
214
+ break;
215
+
216
+ case 'B':
217
+ bias = atof(argv[i]);
218
+ break;
219
+
220
+ case 'w':
221
+ ++param.nr_weight;
222
+ param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
223
+ param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
224
+ param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
225
+ param.weight[param.nr_weight-1] = atof(argv[i]);
226
+ break;
227
+
228
+ case 'v':
229
+ flag_cross_validation = 1;
230
+ nr_fold = atoi(argv[i]);
231
+ if(nr_fold < 2)
232
+ {
233
+ fprintf(stderr,"n-fold cross validation: n must >= 2\n");
234
+ exit_with_help();
235
+ }
236
+ break;
237
+
238
+ case 'q':
239
+ print_func = &print_null;
240
+ i--;
241
+ break;
242
+
243
+ default:
244
+ fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
245
+ exit_with_help();
246
+ break;
247
+ }
248
+ }
249
+
250
+ set_print_string_function(print_func);
251
+
252
+ // determine filenames
253
+ if(i>=argc)
254
+ exit_with_help();
255
+
256
+ strcpy(input_file_name, argv[i]);
257
+
258
+ if(i<argc-1)
259
+ strcpy(model_file_name,argv[i+1]);
260
+ else
261
+ {
262
+ char *p = strrchr(argv[i],'/');
263
+ if(p==NULL)
264
+ p = argv[i];
265
+ else
266
+ ++p;
267
+ sprintf(model_file_name,"%s.model",p);
268
+ }
269
+
270
+ if(param.eps == INF)
271
+ {
272
+ switch(param.solver_type)
273
+ {
274
+ case L2R_LR:
275
+ case L2R_L2LOSS_SVC:
276
+ param.eps = 0.01;
277
+ break;
278
+ case L2R_L2LOSS_SVR:
279
+ param.eps = 0.001;
280
+ break;
281
+ case L2R_L2LOSS_SVC_DUAL:
282
+ case L2R_L1LOSS_SVC_DUAL:
283
+ case MCSVM_CS:
284
+ case L2R_LR_DUAL:
285
+ param.eps = 0.1;
286
+ break;
287
+ case L1R_L2LOSS_SVC:
288
+ case L1R_LR:
289
+ param.eps = 0.01;
290
+ break;
291
+ case L2R_L1LOSS_SVR_DUAL:
292
+ case L2R_L2LOSS_SVR_DUAL:
293
+ param.eps = 0.1;
294
+ break;
295
+ }
296
+ }
297
+ }
298
+
299
+ // read in a problem (in libsvm format)
300
+ void read_problem(const char *filename)
301
+ {
302
+ int max_index, inst_max_index, i;
303
+ long int elements, j;
304
+ FILE *fp = fopen(filename,"r");
305
+ char *endptr;
306
+ char *idx, *val, *label;
307
+
308
+ if(fp == NULL)
309
+ {
310
+ fprintf(stderr,"can't open input file %s\n",filename);
311
+ exit(1);
312
+ }
313
+
314
+ prob.l = 0;
315
+ elements = 0;
316
+ max_line_len = 1024;
317
+ line = Malloc(char,max_line_len);
318
+ while(readline(fp)!=NULL)
319
+ {
320
+ char *p = strtok(line," \t"); // label
321
+
322
+ // features
323
+ while(1)
324
+ {
325
+ p = strtok(NULL," \t");
326
+ if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
327
+ break;
328
+ elements++;
329
+ }
330
+ elements++; // for bias term
331
+ prob.l++;
332
+ }
333
+ rewind(fp);
334
+
335
+ prob.bias=bias;
336
+
337
+ prob.y = Malloc(double,prob.l);
338
+ prob.x = Malloc(struct feature_node *,prob.l);
339
+ x_space = Malloc(struct feature_node,elements+prob.l);
340
+
341
+ max_index = 0;
342
+ j=0;
343
+ for(i=0;i<prob.l;i++)
344
+ {
345
+ inst_max_index = 0; // strtol gives 0 if wrong format
346
+ readline(fp);
347
+ prob.x[i] = &x_space[j];
348
+ label = strtok(line," \t\n");
349
+ if(label == NULL) // empty line
350
+ exit_input_error(i+1);
351
+
352
+ prob.y[i] = strtod(label,&endptr);
353
+ if(endptr == label || *endptr != '\0')
354
+ exit_input_error(i+1);
355
+
356
+ while(1)
357
+ {
358
+ idx = strtok(NULL,":");
359
+ val = strtok(NULL," \t");
360
+
361
+ if(val == NULL)
362
+ break;
363
+
364
+ errno = 0;
365
+ x_space[j].index = (int) strtol(idx,&endptr,10);
366
+ if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
367
+ exit_input_error(i+1);
368
+ else
369
+ inst_max_index = x_space[j].index;
370
+
371
+ errno = 0;
372
+ x_space[j].value = strtod(val,&endptr);
373
+ if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
374
+ exit_input_error(i+1);
375
+
376
+ ++j;
377
+ }
378
+
379
+ if(inst_max_index > max_index)
380
+ max_index = inst_max_index;
381
+
382
+ if(prob.bias >= 0)
383
+ x_space[j++].value = prob.bias;
384
+
385
+ x_space[j++].index = -1;
386
+ }
387
+
388
+ if(prob.bias >= 0)
389
+ {
390
+ prob.n=max_index+1;
391
+ for(i=1;i<prob.l;i++)
392
+ (prob.x[i]-2)->index = prob.n;
393
+ x_space[j-2].index = prob.n;
394
+ }
395
+ else
396
+ prob.n=max_index;
397
+
398
+ fclose(fp);
399
+ }