ruby_linear 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/COPYING +24 -0
- data/README.markdown +109 -0
- data/Rakefile +15 -0
- data/ext/blas.h +25 -0
- data/ext/blasp.h +430 -0
- data/ext/daxpy.c +49 -0
- data/ext/ddot.c +50 -0
- data/ext/dnrm2.c +62 -0
- data/ext/dscal.c +44 -0
- data/ext/extconf.rb +4 -0
- data/ext/linear.cpp +2385 -0
- data/ext/linear.h +77 -0
- data/ext/rubylinear.cpp +639 -0
- data/ext/tron.cpp +235 -0
- data/ext/tron.h +34 -0
- data/lib/ruby_linear.rb +11 -0
- data/spec/fixtures/dna.dat +187 -0
- data/spec/fixtures/dna.out +1186 -0
- data/spec/fixtures/dna.scale.t +1186 -0
- data/spec/fixtures/dna.scale.txt +2000 -0
- data/spec/integration_spec.rb +54 -0
- data/spec/model_spec.rb +57 -0
- data/spec/problem_spec.rb +83 -0
- data/spec/spec_helper.rb +10 -0
- metadata +71 -0
data/ext/linear.h
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
#ifndef _LIBLINEAR_H
|
2
|
+
#define _LIBLINEAR_H
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
struct feature_node
|
9
|
+
{
|
10
|
+
int index;
|
11
|
+
double value;
|
12
|
+
};
|
13
|
+
|
14
|
+
struct problem
|
15
|
+
{
|
16
|
+
int l, n;
|
17
|
+
int *y;
|
18
|
+
struct feature_node **x;
|
19
|
+
double bias; /* < 0 if no bias term */
|
20
|
+
|
21
|
+
/* rubylinear addition: the x[i] are pointers into this base (which is allocated in one go) */
|
22
|
+
int offset;
|
23
|
+
struct feature_node *base;
|
24
|
+
};
|
25
|
+
|
26
|
+
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */
|
27
|
+
|
28
|
+
struct parameter
|
29
|
+
{
|
30
|
+
int solver_type;
|
31
|
+
|
32
|
+
/* these are for training only */
|
33
|
+
double eps; /* stopping criteria */
|
34
|
+
double C;
|
35
|
+
int nr_weight;
|
36
|
+
int *weight_label;
|
37
|
+
double* weight;
|
38
|
+
};
|
39
|
+
|
40
|
+
struct model
|
41
|
+
{
|
42
|
+
struct parameter param;
|
43
|
+
int nr_class; /* number of classes */
|
44
|
+
int nr_feature;
|
45
|
+
double *w;
|
46
|
+
int *label; /* label of each class */
|
47
|
+
double bias;
|
48
|
+
};
|
49
|
+
|
50
|
+
struct model* train(const struct problem *prob, const struct parameter *param);
|
51
|
+
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target);
|
52
|
+
|
53
|
+
int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
54
|
+
int predict(const struct model *model_, const struct feature_node *x);
|
55
|
+
int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
|
56
|
+
|
57
|
+
int save_model(const char *model_file_name, const struct model *model_);
|
58
|
+
struct model *load_model(const char *model_file_name);
|
59
|
+
|
60
|
+
int get_nr_feature(const struct model *model_);
|
61
|
+
int get_nr_class(const struct model *model_);
|
62
|
+
void get_labels(const struct model *model_, int* label);
|
63
|
+
|
64
|
+
void free_model_content(struct model *model_ptr);
|
65
|
+
void free_and_destroy_model(struct model **model_ptr_ptr);
|
66
|
+
void destroy_param(struct parameter *param);
|
67
|
+
|
68
|
+
const char *check_parameter(const struct problem *prob, const struct parameter *param);
|
69
|
+
int check_probability_model(const struct model *model);
|
70
|
+
void set_print_string_function(void (*print_func) (const char*));
|
71
|
+
|
72
|
+
#ifdef __cplusplus
|
73
|
+
}
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#endif /* _LIBLINEAR_H */
|
77
|
+
|
data/ext/rubylinear.cpp
ADDED
@@ -0,0 +1,639 @@
|
|
1
|
+
#include "linear.h"
|
2
|
+
#include "tron.h"
|
3
|
+
#include "ruby.h"
|
4
|
+
#include <errno.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
|
11
|
+
VALUE mRubyLinear;
|
12
|
+
VALUE cProblem;
|
13
|
+
VALUE cModel;
|
14
|
+
|
15
|
+
static void model_free(void *p){
|
16
|
+
struct model * m = (struct model *)p;
|
17
|
+
free_and_destroy_model(&m);
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE model_load_file(VALUE klass, VALUE path){
|
21
|
+
path = rb_str_to_str(path);
|
22
|
+
struct model * model = load_model(rb_string_value_cstr(&path));
|
23
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, model_free, model);
|
24
|
+
return tdata;
|
25
|
+
}
|
26
|
+
|
27
|
+
static VALUE model_write_file(VALUE self,VALUE path){
|
28
|
+
struct model *model;
|
29
|
+
Data_Get_Struct(self, struct model, model);
|
30
|
+
path = rb_str_to_str(path);
|
31
|
+
save_model(rb_string_value_cstr(&path), model);
|
32
|
+
return self;
|
33
|
+
}
|
34
|
+
|
35
|
+
static VALUE model_new(VALUE klass, VALUE r_problem, VALUE parameters){
|
36
|
+
|
37
|
+
struct model *model = NULL;
|
38
|
+
struct problem *problem;
|
39
|
+
Data_Get_Struct(r_problem, struct problem, problem);
|
40
|
+
struct parameter param;
|
41
|
+
|
42
|
+
if(!problem->base){
|
43
|
+
rb_raise(rb_eArgError, "problem has been disposed");
|
44
|
+
return Qnil;
|
45
|
+
}
|
46
|
+
|
47
|
+
rb_funcall(mRubyLinear, rb_intern("validate_options"), 1, parameters);
|
48
|
+
VALUE v;
|
49
|
+
|
50
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("eps"))))){
|
51
|
+
param.eps = RFLOAT_VALUE(rb_to_float(v));
|
52
|
+
}else{
|
53
|
+
param.eps = 0.01;
|
54
|
+
}
|
55
|
+
|
56
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("c"))))){
|
57
|
+
param.C = RFLOAT_VALUE(rb_to_float(v));
|
58
|
+
}else{
|
59
|
+
param.C = 1;
|
60
|
+
}
|
61
|
+
|
62
|
+
v = rb_hash_aref(parameters, ID2SYM(rb_intern("solver")));
|
63
|
+
param.solver_type = FIX2INT(v);
|
64
|
+
|
65
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("weights"))))){
|
66
|
+
Check_Type(v, T_HASH);
|
67
|
+
param.nr_weight = RHASH_SIZE(v);
|
68
|
+
param.weight = (double*)calloc(param.nr_weight,sizeof(double));
|
69
|
+
param.weight_label = (int*)calloc(param.nr_weight,sizeof(int));
|
70
|
+
|
71
|
+
VALUE weights_as_array = rb_funcall(v, rb_intern("to_a"),0);
|
72
|
+
|
73
|
+
for(long i=0; i < RARRAY_LEN(weights_as_array); i++){
|
74
|
+
VALUE pair = RARRAY_PTR(weights_as_array)[i];
|
75
|
+
VALUE label = RARRAY_PTR(pair)[0];
|
76
|
+
VALUE weight = RARRAY_PTR(pair)[1];
|
77
|
+
|
78
|
+
param.weight[i] = RFLOAT_VALUE(rb_to_float(weight));
|
79
|
+
param.weight_label[i] = FIX2INT(label);
|
80
|
+
}
|
81
|
+
|
82
|
+
}else{
|
83
|
+
param.nr_weight = 0;
|
84
|
+
param.weight = NULL;
|
85
|
+
param.weight_label = NULL;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
const char *error_string = check_parameter(problem, ¶m);
|
90
|
+
if(error_string){
|
91
|
+
rb_raise(rb_eArgError, "%s", error_string);
|
92
|
+
destroy_param(¶m);
|
93
|
+
return Qnil;
|
94
|
+
}
|
95
|
+
model = train(problem, ¶m);
|
96
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, model_free, model);
|
97
|
+
destroy_param(¶m);
|
98
|
+
return tdata;
|
99
|
+
}
|
100
|
+
static VALUE model_feature_count(VALUE self){
|
101
|
+
struct model *model;
|
102
|
+
Data_Get_Struct(self, struct model, model);
|
103
|
+
return INT2FIX(model->nr_feature);
|
104
|
+
}
|
105
|
+
|
106
|
+
static VALUE model_class_count(VALUE self){
|
107
|
+
struct model *model;
|
108
|
+
Data_Get_Struct(self, struct model, model);
|
109
|
+
return INT2FIX(model->nr_class);
|
110
|
+
}
|
111
|
+
|
112
|
+
static VALUE model_class_bias(VALUE self){
|
113
|
+
struct model *model;
|
114
|
+
Data_Get_Struct(self, struct model, model);
|
115
|
+
return rb_float_new(model->bias);
|
116
|
+
}
|
117
|
+
|
118
|
+
static VALUE model_destroy(VALUE self){
|
119
|
+
struct model *model;
|
120
|
+
Data_Get_Struct(self, struct model, model);
|
121
|
+
free_model_content(model);
|
122
|
+
model->w = NULL;
|
123
|
+
model->label = NULL;
|
124
|
+
return Qnil;
|
125
|
+
}
|
126
|
+
|
127
|
+
static VALUE model_destroyed(VALUE self){
|
128
|
+
struct model *model;
|
129
|
+
Data_Get_Struct(self, struct model, model);
|
130
|
+
return model->w ? Qfalse : Qtrue;
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
struct feature_node * convert_ruby_sample_to_feature_node(struct model * model, VALUE data){
|
135
|
+
Check_Type(data, T_HASH);
|
136
|
+
VALUE pairs = rb_funcall(data,rb_intern("to_a"),0);
|
137
|
+
int node_count = RARRAY_LEN(pairs) + (model->bias > 0 ? 2 : 1);
|
138
|
+
struct feature_node * nodes = (struct feature_node *)calloc(node_count, sizeof(struct feature_node));
|
139
|
+
|
140
|
+
int position = 0;
|
141
|
+
for(int i=0; i < RARRAY_LEN(pairs); i++, position++){
|
142
|
+
VALUE pair = RARRAY_PTR(pairs)[i];
|
143
|
+
VALUE key = RARRAY_PTR(pair)[0];
|
144
|
+
VALUE weight = RARRAY_PTR(pair)[1];
|
145
|
+
|
146
|
+
nodes[i].index = FIX2INT(key);
|
147
|
+
nodes[i].value = RFLOAT_VALUE(rb_to_float(weight));
|
148
|
+
}
|
149
|
+
if(model->bias > 0){
|
150
|
+
nodes[position].index = model->nr_feature+1;
|
151
|
+
nodes[position].value = model->bias;
|
152
|
+
position++;
|
153
|
+
}
|
154
|
+
/*sentinel value*/
|
155
|
+
nodes[position].index = -1;
|
156
|
+
nodes[position].value = -1;
|
157
|
+
return nodes;
|
158
|
+
}
|
159
|
+
|
160
|
+
static VALUE model_predict_values(VALUE self, VALUE data){
|
161
|
+
struct model *model;
|
162
|
+
Data_Get_Struct(self, struct model, model);
|
163
|
+
|
164
|
+
if(!model->w){
|
165
|
+
rb_raise(rb_eArgError, "model has been destroyed");
|
166
|
+
return Qnil;
|
167
|
+
}
|
168
|
+
struct feature_node * nodes = convert_ruby_sample_to_feature_node(model, data);
|
169
|
+
|
170
|
+
double *values = (double*)calloc(sizeof(double), model->nr_class);
|
171
|
+
int label=predict_values(model, nodes, values);
|
172
|
+
|
173
|
+
VALUE label_to_value_hash = rb_hash_new();
|
174
|
+
for(int i = 0; i < model->nr_class; i++){
|
175
|
+
int label = model->label[i];
|
176
|
+
double value = values[i];
|
177
|
+
rb_hash_aset(label_to_value_hash, INT2FIX(label), rb_float_new(value));
|
178
|
+
}
|
179
|
+
free(values);
|
180
|
+
|
181
|
+
VALUE result = rb_ary_new();
|
182
|
+
rb_ary_push(result, INT2FIX(label));
|
183
|
+
rb_ary_push(result, label_to_value_hash);
|
184
|
+
return result;
|
185
|
+
}
|
186
|
+
|
187
|
+
static VALUE model_predict(VALUE self, VALUE data){
|
188
|
+
struct model *model;
|
189
|
+
Data_Get_Struct(self, struct model, model);
|
190
|
+
|
191
|
+
if(!model->w){
|
192
|
+
rb_raise(rb_eArgError, "model has been destroyed");
|
193
|
+
return Qnil;
|
194
|
+
}
|
195
|
+
struct feature_node * nodes = convert_ruby_sample_to_feature_node(model, data);
|
196
|
+
int result = predict(model, nodes);
|
197
|
+
free(nodes);
|
198
|
+
return INT2FIX(result);
|
199
|
+
}
|
200
|
+
|
201
|
+
static VALUE model_inspect(VALUE self){
|
202
|
+
struct model *model;
|
203
|
+
Data_Get_Struct(self, struct model, model);
|
204
|
+
|
205
|
+
return rb_sprintf("#<RubyLinear::Model:%p classes:%d features:%d bias:%f>",(void*)self,model->nr_class, model->nr_feature,model->bias);
|
206
|
+
}
|
207
|
+
|
208
|
+
static VALUE model_labels(VALUE self){
|
209
|
+
struct model *model;
|
210
|
+
Data_Get_Struct(self, struct model, model);
|
211
|
+
VALUE result = rb_ary_new();
|
212
|
+
for(int i=0; i < model->nr_class; i++){
|
213
|
+
rb_ary_store(result, i, INT2FIX(model->label[i]));
|
214
|
+
}
|
215
|
+
return result;
|
216
|
+
|
217
|
+
}
|
218
|
+
|
219
|
+
static VALUE model_solver(VALUE self){
|
220
|
+
struct model *model;
|
221
|
+
Data_Get_Struct(self, struct model, model);
|
222
|
+
return INT2FIX(model->param.solver_type);
|
223
|
+
}
|
224
|
+
|
225
|
+
|
226
|
+
static VALUE model_weights(VALUE self){
|
227
|
+
struct model *model;
|
228
|
+
Data_Get_Struct(self, struct model, model);
|
229
|
+
|
230
|
+
int n;
|
231
|
+
if(model->bias>=0){
|
232
|
+
n=model->nr_feature+1;
|
233
|
+
}
|
234
|
+
else{
|
235
|
+
n=model->nr_feature;
|
236
|
+
}
|
237
|
+
int w_size = n;
|
238
|
+
int nr_w;
|
239
|
+
if(model->nr_class==2 && model->param.solver_type != MCSVM_CS){
|
240
|
+
nr_w = 1;
|
241
|
+
}
|
242
|
+
else{
|
243
|
+
nr_w = model->nr_class;
|
244
|
+
}
|
245
|
+
|
246
|
+
int weight_count = w_size*nr_w;
|
247
|
+
|
248
|
+
VALUE result = rb_ary_new();
|
249
|
+
for(int i=0; i < weight_count; i++){
|
250
|
+
rb_ary_store(result, i, rb_float_new(model->w[i]));
|
251
|
+
}
|
252
|
+
return result;
|
253
|
+
}
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
static void problem_free(void *p) {
|
258
|
+
struct problem * pr = (struct problem*)p;
|
259
|
+
|
260
|
+
free(pr->y);
|
261
|
+
free(pr->base);
|
262
|
+
free(pr);
|
263
|
+
}
|
264
|
+
|
265
|
+
void exit_input_error(int line_num)
|
266
|
+
{
|
267
|
+
rb_raise(rb_eArgError, "Wrong input format at line %d\n", line_num);
|
268
|
+
}
|
269
|
+
|
270
|
+
static char *line = NULL;
|
271
|
+
static int max_line_len;
|
272
|
+
|
273
|
+
static char* readline(FILE *input)
|
274
|
+
{
|
275
|
+
int len;
|
276
|
+
|
277
|
+
if(fgets(line,max_line_len,input) == NULL)
|
278
|
+
return NULL;
|
279
|
+
|
280
|
+
while(strrchr(line,'\n') == NULL)
|
281
|
+
{
|
282
|
+
max_line_len *= 2;
|
283
|
+
line = (char *) realloc(line,max_line_len);
|
284
|
+
len = (int) strlen(line);
|
285
|
+
if(fgets(line+len,max_line_len-len,input) == NULL)
|
286
|
+
break;
|
287
|
+
}
|
288
|
+
return line;
|
289
|
+
}
|
290
|
+
|
291
|
+
static VALUE problem_load_file(VALUE klass, VALUE path, VALUE bias){
|
292
|
+
path = rb_str_to_str(path);
|
293
|
+
/* lifted from train.c*/
|
294
|
+
int max_index, inst_max_index, i;
|
295
|
+
long int elements, j;
|
296
|
+
FILE *fp = fopen(rb_string_value_cstr(&path),"r");
|
297
|
+
char *endptr;
|
298
|
+
char *idx, *val, *label;
|
299
|
+
|
300
|
+
if(fp == NULL)
|
301
|
+
{
|
302
|
+
rb_sys_fail("can't open input file");
|
303
|
+
return Qnil;
|
304
|
+
}
|
305
|
+
|
306
|
+
struct problem *prob = (struct problem*) calloc(1, sizeof(struct problem));
|
307
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, problem_free, prob);
|
308
|
+
prob->bias = RFLOAT_VALUE(rb_to_float(bias));
|
309
|
+
prob->l = 0;
|
310
|
+
elements = 0;
|
311
|
+
max_line_len = 1024;
|
312
|
+
line = (char*)calloc(sizeof(char),max_line_len);
|
313
|
+
while(readline(fp)!=NULL)
|
314
|
+
{
|
315
|
+
char *p = strtok(line," \t"); // label
|
316
|
+
|
317
|
+
// features
|
318
|
+
while(1)
|
319
|
+
{
|
320
|
+
p = strtok(NULL," \t");
|
321
|
+
if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
|
322
|
+
break;
|
323
|
+
elements++;
|
324
|
+
}
|
325
|
+
elements++; // for bias term
|
326
|
+
prob->l++;
|
327
|
+
}
|
328
|
+
rewind(fp);
|
329
|
+
|
330
|
+
|
331
|
+
prob->y = (int*)calloc(sizeof(int),prob->l);
|
332
|
+
prob->x = (struct feature_node **)calloc(sizeof(struct feature_node *),prob->l);
|
333
|
+
prob->base = (struct feature_node *)calloc(sizeof(struct feature_node),elements + prob->l);
|
334
|
+
|
335
|
+
max_index = 0;
|
336
|
+
j=0;
|
337
|
+
for(i=0;i<prob->l;i++)
|
338
|
+
{
|
339
|
+
inst_max_index = 0; // strtol gives 0 if wrong format
|
340
|
+
readline(fp);
|
341
|
+
prob->x[i] = &prob->base[j];
|
342
|
+
label = strtok(line," \t\n");
|
343
|
+
if(label == NULL){ // empty line
|
344
|
+
exit_input_error(i+1);
|
345
|
+
fclose(fp);
|
346
|
+
return Qnil;
|
347
|
+
}
|
348
|
+
prob->y[i] = (int) strtol(label,&endptr,10);
|
349
|
+
if(endptr == label || *endptr != '\0'){
|
350
|
+
exit_input_error(i+1);
|
351
|
+
fclose(fp);
|
352
|
+
return Qnil;
|
353
|
+
}
|
354
|
+
while(1)
|
355
|
+
{
|
356
|
+
idx = strtok(NULL,":");
|
357
|
+
val = strtok(NULL," \t");
|
358
|
+
|
359
|
+
if(val == NULL)
|
360
|
+
break;
|
361
|
+
|
362
|
+
errno = 0;
|
363
|
+
prob->base[j].index = (int) strtol(idx,&endptr,10);
|
364
|
+
if(endptr == idx || errno != 0 || *endptr != '\0' || prob->base[j].index <= inst_max_index){
|
365
|
+
exit_input_error(i+1);
|
366
|
+
fclose(fp);
|
367
|
+
return Qnil;
|
368
|
+
}
|
369
|
+
else
|
370
|
+
inst_max_index = prob->base[j].index;
|
371
|
+
|
372
|
+
errno = 0;
|
373
|
+
prob->base[j].value = strtod(val,&endptr);
|
374
|
+
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))){
|
375
|
+
exit_input_error(i+1);
|
376
|
+
fclose(fp);
|
377
|
+
return Qnil;
|
378
|
+
}
|
379
|
+
|
380
|
+
++j;
|
381
|
+
}
|
382
|
+
|
383
|
+
if(inst_max_index > max_index)
|
384
|
+
max_index = inst_max_index;
|
385
|
+
|
386
|
+
if(prob->bias >= 0)
|
387
|
+
prob->base[j++].value = prob->bias;
|
388
|
+
|
389
|
+
prob->base[j++].index = -1;
|
390
|
+
}
|
391
|
+
|
392
|
+
if(prob->bias >= 0)
|
393
|
+
{
|
394
|
+
prob->n=max_index+1;
|
395
|
+
for(i=1;i<prob->l;i++)
|
396
|
+
(prob->x[i]-2)->index = prob->n;
|
397
|
+
prob->base[j-2].index = prob->n;
|
398
|
+
}
|
399
|
+
else
|
400
|
+
prob->n=max_index;
|
401
|
+
|
402
|
+
fclose(fp);
|
403
|
+
return tdata;
|
404
|
+
}
|
405
|
+
|
406
|
+
static VALUE problem_new(VALUE klass, VALUE labels, VALUE samples, VALUE bias, VALUE attr_count){
|
407
|
+
VALUE argv[4] = {labels, samples,bias,attr_count};
|
408
|
+
|
409
|
+
struct problem *ptr = (struct problem *)calloc(sizeof(struct problem),1);
|
410
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, problem_free, ptr);
|
411
|
+
rb_obj_call_init(tdata, 4, argv);
|
412
|
+
return tdata;
|
413
|
+
}
|
414
|
+
|
415
|
+
static VALUE problem_labels(VALUE self){
|
416
|
+
if(RTEST(rb_funcall(self, rb_intern("destroyed?"),0))){
|
417
|
+
rb_raise(rb_eArgError, "problem has been destroyed");
|
418
|
+
return Qnil;
|
419
|
+
}
|
420
|
+
|
421
|
+
struct problem *problem;
|
422
|
+
Data_Get_Struct(self, struct problem, problem);
|
423
|
+
|
424
|
+
VALUE result = rb_ary_new();
|
425
|
+
|
426
|
+
for( int i=0; i< problem -> l; i++){
|
427
|
+
rb_ary_push(result, INT2FIX(problem->y[i]));
|
428
|
+
}
|
429
|
+
return result;
|
430
|
+
|
431
|
+
}
|
432
|
+
|
433
|
+
static VALUE problem_feature_vector(VALUE self, VALUE r_index){
|
434
|
+
if(RTEST(rb_funcall(self, rb_intern("destroyed?"),0))){
|
435
|
+
rb_raise(rb_eArgError, "problem has been destroyed");
|
436
|
+
return Qnil;
|
437
|
+
}
|
438
|
+
|
439
|
+
struct problem *problem;
|
440
|
+
Data_Get_Struct(self, struct problem, problem);
|
441
|
+
|
442
|
+
int index = FIX2INT(r_index);
|
443
|
+
if(index >= problem->l){
|
444
|
+
rb_raise(rb_eArgError, "index out of bounds");
|
445
|
+
return Qnil;
|
446
|
+
}
|
447
|
+
VALUE result = rb_ary_new();
|
448
|
+
|
449
|
+
for( struct feature_node *current = problem->x[index];current->index != -1; current++){
|
450
|
+
VALUE pair = rb_ary_new();
|
451
|
+
rb_ary_push(pair, INT2FIX(current->index));
|
452
|
+
rb_ary_push(pair, rb_float_new(current->value));
|
453
|
+
rb_ary_push(result, pair);
|
454
|
+
}
|
455
|
+
return result;
|
456
|
+
}
|
457
|
+
|
458
|
+
|
459
|
+
static VALUE problem_destroy(VALUE self){
|
460
|
+
struct problem *problem;
|
461
|
+
Data_Get_Struct(self, struct problem, problem);
|
462
|
+
free(problem->base);
|
463
|
+
problem->base = NULL;
|
464
|
+
return self;
|
465
|
+
}
|
466
|
+
|
467
|
+
static VALUE problem_destroyed(VALUE self){
|
468
|
+
struct problem *problem;
|
469
|
+
Data_Get_Struct(self, struct problem, problem);
|
470
|
+
return problem->base ? Qfalse : Qtrue;
|
471
|
+
}
|
472
|
+
|
473
|
+
|
474
|
+
static VALUE problem_inspect(VALUE self){
|
475
|
+
struct problem *problem;
|
476
|
+
Data_Get_Struct(self, struct problem, problem);
|
477
|
+
|
478
|
+
return rb_sprintf("#<RubyLinear::Problem:%p samples:%d features:%d bias:%f>",(void*)self,problem->l, problem->n,problem->bias);
|
479
|
+
}
|
480
|
+
static VALUE problem_l(VALUE self){
|
481
|
+
struct problem *problem;
|
482
|
+
Data_Get_Struct(self, struct problem, problem);
|
483
|
+
return INT2FIX(problem->l);
|
484
|
+
}
|
485
|
+
|
486
|
+
static VALUE problem_n(VALUE self){
|
487
|
+
struct problem *problem;
|
488
|
+
Data_Get_Struct(self, struct problem, problem);
|
489
|
+
return INT2FIX(problem->n);
|
490
|
+
}
|
491
|
+
|
492
|
+
|
493
|
+
static VALUE problem_bias(VALUE self){
|
494
|
+
struct problem *problem;
|
495
|
+
Data_Get_Struct(self, struct problem, problem);
|
496
|
+
return rb_float_new(problem->bias);
|
497
|
+
}
|
498
|
+
|
499
|
+
|
500
|
+
static void addSample(struct problem * problem, int label, double weight){
|
501
|
+
if(label > problem->n){
|
502
|
+
rb_raise(rb_eArgError, "tried to add sample %d, %f, inconsistent with max feature of %d", label, weight, problem->n);
|
503
|
+
}
|
504
|
+
problem->base[problem->offset].index = label;
|
505
|
+
problem->base[problem->offset].value = weight;
|
506
|
+
problem->offset++;
|
507
|
+
}
|
508
|
+
static VALUE addSampleIterator(VALUE yielded_object, VALUE context, int argc, VALUE argv[]){
|
509
|
+
struct problem *problem;
|
510
|
+
Data_Get_Struct(context, struct problem, problem);
|
511
|
+
VALUE key = RARRAY_PTR(yielded_object)[0];
|
512
|
+
VALUE value = RARRAY_PTR(yielded_object)[1];
|
513
|
+
|
514
|
+
int label = FIX2INT(key);
|
515
|
+
double weight = RFLOAT_VALUE(rb_to_float(value));
|
516
|
+
addSample(problem, label, weight);
|
517
|
+
return Qnil;
|
518
|
+
}
|
519
|
+
|
520
|
+
|
521
|
+
static VALUE problem_init(VALUE self, VALUE labels, VALUE samples, VALUE bias, VALUE r_attr_count){
|
522
|
+
struct problem *problem;
|
523
|
+
Data_Get_Struct(self, struct problem, problem);
|
524
|
+
|
525
|
+
labels = rb_check_array_type(labels);
|
526
|
+
samples = rb_check_array_type(samples);
|
527
|
+
problem->bias = RFLOAT_VALUE(rb_to_float(bias));
|
528
|
+
|
529
|
+
problem->n = FIX2INT(r_attr_count);
|
530
|
+
if(problem->bias > 0){
|
531
|
+
problem->n += 1;
|
532
|
+
}
|
533
|
+
|
534
|
+
if(RARRAY_LEN(labels) != RARRAY_LEN(samples)){
|
535
|
+
rb_raise(rb_eArgError, "samples and labels were of different length (%lu, %lu)", RARRAY_LEN(labels), RARRAY_LEN(samples));
|
536
|
+
return Qnil;
|
537
|
+
}
|
538
|
+
problem->l = RARRAY_LEN(samples);
|
539
|
+
problem->y = (int*)calloc(sizeof(int), problem->l);
|
540
|
+
|
541
|
+
|
542
|
+
/* copy the y values and calculate how many samples to allocate*/
|
543
|
+
int required_feature_nodes = 0;
|
544
|
+
int extra_samples = problem->bias > 0 ? 2 : 1; /*always 1 (the sentinel element, and possibly +1 for bias)*/
|
545
|
+
for(int i=0; i<problem->l; i++){
|
546
|
+
VALUE hash = RARRAY_PTR(samples)[i];
|
547
|
+
problem->y[i] = FIX2INT(RARRAY_PTR(labels)[i]);
|
548
|
+
required_feature_nodes += RHASH_SIZE(hash) + extra_samples;
|
549
|
+
}
|
550
|
+
|
551
|
+
problem->offset = 0;
|
552
|
+
problem->base = (struct feature_node *)calloc(sizeof(struct feature_node), required_feature_nodes);
|
553
|
+
problem->x = (struct feature_node **)calloc(sizeof(struct feature_node*), problem->l);
|
554
|
+
/* copy the samples */
|
555
|
+
|
556
|
+
ID each = rb_intern("each");
|
557
|
+
for(int i=0; i< problem->l; i++){
|
558
|
+
VALUE hash = RARRAY_PTR(samples)[i];
|
559
|
+
problem->x[i] = problem->base + problem->offset;
|
560
|
+
rb_block_call(hash, each,0,NULL, RUBY_METHOD_FUNC(addSampleIterator),self);
|
561
|
+
if(problem->bias>0){
|
562
|
+
addSample(problem,problem->n,problem->bias);
|
563
|
+
}
|
564
|
+
addSample(problem,-1,-1);
|
565
|
+
}
|
566
|
+
if(problem->offset != required_feature_nodes){
|
567
|
+
printf("allocated %d feature_nodes but used %d\n", required_feature_nodes, problem->offset);
|
568
|
+
|
569
|
+
}
|
570
|
+
return self;
|
571
|
+
}
|
572
|
+
|
573
|
+
extern int info_on;
|
574
|
+
|
575
|
+
static VALUE info_on_get(VALUE self) {
|
576
|
+
return info_on ? Qtrue:Qfalse;
|
577
|
+
}
|
578
|
+
|
579
|
+
static VALUE info_on_set(VALUE self, VALUE new_value){
|
580
|
+
info_on = RTEST(new_value) ? 1 : 0;
|
581
|
+
return new_value;
|
582
|
+
}
|
583
|
+
|
584
|
+
void Init_rubylinear_native() {
|
585
|
+
mRubyLinear = rb_define_module("RubyLinear");
|
586
|
+
|
587
|
+
rb_define_const(mRubyLinear, "L2R_LR", INT2FIX(L2R_LR));
|
588
|
+
rb_define_const(mRubyLinear, "L2R_L2LOSS_SVC_DUAL", INT2FIX(L2R_L2LOSS_SVC_DUAL));
|
589
|
+
rb_define_const(mRubyLinear, "L2R_L2LOSS_SVC", INT2FIX(L2R_L2LOSS_SVC));
|
590
|
+
rb_define_const(mRubyLinear, "L2R_L1LOSS_SVC_DUAL", INT2FIX(L2R_L1LOSS_SVC_DUAL));
|
591
|
+
rb_define_const(mRubyLinear, "MCSVM_CS", INT2FIX(MCSVM_CS));
|
592
|
+
rb_define_const(mRubyLinear, "L1R_L2LOSS_SVC", INT2FIX(L1R_L2LOSS_SVC));
|
593
|
+
rb_define_const(mRubyLinear, "L1R_LR", INT2FIX(L1R_LR));
|
594
|
+
rb_define_const(mRubyLinear, "L2R_LR_DUAL", INT2FIX(L2R_LR_DUAL));
|
595
|
+
|
596
|
+
|
597
|
+
rb_define_singleton_method(mRubyLinear, "info_on", RUBY_METHOD_FUNC(info_on_get), 0);
|
598
|
+
rb_define_singleton_method(mRubyLinear, "info_on=", RUBY_METHOD_FUNC(info_on_set), 1);
|
599
|
+
|
600
|
+
|
601
|
+
cProblem = rb_define_class_under(mRubyLinear, "Problem", rb_cObject);
|
602
|
+
rb_define_singleton_method(cProblem, "new", RUBY_METHOD_FUNC(problem_new), 4);
|
603
|
+
rb_define_singleton_method(cProblem, "load_file", RUBY_METHOD_FUNC(problem_load_file),2);
|
604
|
+
rb_define_method(cProblem, "initialize", RUBY_METHOD_FUNC(problem_init), 4);
|
605
|
+
rb_define_method(cProblem, "l", RUBY_METHOD_FUNC(problem_l), 0);
|
606
|
+
rb_define_method(cProblem, "n", RUBY_METHOD_FUNC(problem_n), 0);
|
607
|
+
rb_define_method(cProblem, "bias", RUBY_METHOD_FUNC(problem_bias), 0);
|
608
|
+
rb_define_method(cProblem, "feature_vector", RUBY_METHOD_FUNC(problem_feature_vector), 1);
|
609
|
+
rb_define_method(cProblem, "labels", RUBY_METHOD_FUNC(problem_labels), 0);
|
610
|
+
rb_define_method(cProblem, "destroy!", RUBY_METHOD_FUNC(problem_destroy), 0);
|
611
|
+
rb_define_method(cProblem, "destroyed?", RUBY_METHOD_FUNC(problem_destroyed), 0);
|
612
|
+
rb_define_method(cProblem, "inspect", RUBY_METHOD_FUNC(problem_inspect), 0);
|
613
|
+
|
614
|
+
cModel = rb_define_class_under(mRubyLinear, "Model", rb_cObject);
|
615
|
+
rb_define_singleton_method(cModel, "load_file", RUBY_METHOD_FUNC(model_load_file), 1);
|
616
|
+
rb_define_singleton_method(cModel, "new", RUBY_METHOD_FUNC(model_new), 2);
|
617
|
+
rb_define_method(cModel, "save", RUBY_METHOD_FUNC(model_write_file), 1);
|
618
|
+
rb_define_method(cModel, "predict", RUBY_METHOD_FUNC(model_predict), 1);
|
619
|
+
rb_define_method(cModel, "predict_values", RUBY_METHOD_FUNC(model_predict_values), 1);
|
620
|
+
rb_define_method(cModel, "destroy!", RUBY_METHOD_FUNC(model_destroy), 0);
|
621
|
+
rb_define_method(cModel, "destroyed?", RUBY_METHOD_FUNC(model_destroyed), 0);
|
622
|
+
rb_define_method(cModel, "inspect", RUBY_METHOD_FUNC(model_inspect), 0);
|
623
|
+
rb_define_method(cModel, "labels", RUBY_METHOD_FUNC(model_labels), 0);
|
624
|
+
rb_define_method(cModel, "solver", RUBY_METHOD_FUNC(model_solver), 0);
|
625
|
+
rb_define_method(cModel, "weights", RUBY_METHOD_FUNC(model_weights), 0);
|
626
|
+
|
627
|
+
rb_define_method(cModel, "feature_count", RUBY_METHOD_FUNC(model_feature_count), 0);
|
628
|
+
rb_define_method(cModel, "class_count", RUBY_METHOD_FUNC(model_class_count), 0);
|
629
|
+
rb_define_method(cModel, "bias", RUBY_METHOD_FUNC(model_class_bias), 0);
|
630
|
+
|
631
|
+
|
632
|
+
}
|
633
|
+
|
634
|
+
|
635
|
+
|
636
|
+
|
637
|
+
#ifdef __cplusplus
|
638
|
+
} /* extern "C" */
|
639
|
+
#endif
|