ruby_linear 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/COPYING +24 -0
- data/README.markdown +109 -0
- data/Rakefile +15 -0
- data/ext/blas.h +25 -0
- data/ext/blasp.h +430 -0
- data/ext/daxpy.c +49 -0
- data/ext/ddot.c +50 -0
- data/ext/dnrm2.c +62 -0
- data/ext/dscal.c +44 -0
- data/ext/extconf.rb +4 -0
- data/ext/linear.cpp +2385 -0
- data/ext/linear.h +77 -0
- data/ext/rubylinear.cpp +639 -0
- data/ext/tron.cpp +235 -0
- data/ext/tron.h +34 -0
- data/lib/ruby_linear.rb +11 -0
- data/spec/fixtures/dna.dat +187 -0
- data/spec/fixtures/dna.out +1186 -0
- data/spec/fixtures/dna.scale.t +1186 -0
- data/spec/fixtures/dna.scale.txt +2000 -0
- data/spec/integration_spec.rb +54 -0
- data/spec/model_spec.rb +57 -0
- data/spec/problem_spec.rb +83 -0
- data/spec/spec_helper.rb +10 -0
- metadata +71 -0
data/ext/linear.h
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
#ifndef _LIBLINEAR_H
|
2
|
+
#define _LIBLINEAR_H
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
struct feature_node
|
9
|
+
{
|
10
|
+
int index;
|
11
|
+
double value;
|
12
|
+
};
|
13
|
+
|
14
|
+
struct problem
|
15
|
+
{
|
16
|
+
int l, n;
|
17
|
+
int *y;
|
18
|
+
struct feature_node **x;
|
19
|
+
double bias; /* < 0 if no bias term */
|
20
|
+
|
21
|
+
/* rubylinear addition: the x[i] are pointers into this base (which is allocated in one go) */
|
22
|
+
int offset;
|
23
|
+
struct feature_node *base;
|
24
|
+
};
|
25
|
+
|
26
|
+
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */
|
27
|
+
|
28
|
+
struct parameter
|
29
|
+
{
|
30
|
+
int solver_type;
|
31
|
+
|
32
|
+
/* these are for training only */
|
33
|
+
double eps; /* stopping criteria */
|
34
|
+
double C;
|
35
|
+
int nr_weight;
|
36
|
+
int *weight_label;
|
37
|
+
double* weight;
|
38
|
+
};
|
39
|
+
|
40
|
+
struct model
|
41
|
+
{
|
42
|
+
struct parameter param;
|
43
|
+
int nr_class; /* number of classes */
|
44
|
+
int nr_feature;
|
45
|
+
double *w;
|
46
|
+
int *label; /* label of each class */
|
47
|
+
double bias;
|
48
|
+
};
|
49
|
+
|
50
|
+
struct model* train(const struct problem *prob, const struct parameter *param);
|
51
|
+
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target);
|
52
|
+
|
53
|
+
int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
54
|
+
int predict(const struct model *model_, const struct feature_node *x);
|
55
|
+
int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
|
56
|
+
|
57
|
+
int save_model(const char *model_file_name, const struct model *model_);
|
58
|
+
struct model *load_model(const char *model_file_name);
|
59
|
+
|
60
|
+
int get_nr_feature(const struct model *model_);
|
61
|
+
int get_nr_class(const struct model *model_);
|
62
|
+
void get_labels(const struct model *model_, int* label);
|
63
|
+
|
64
|
+
void free_model_content(struct model *model_ptr);
|
65
|
+
void free_and_destroy_model(struct model **model_ptr_ptr);
|
66
|
+
void destroy_param(struct parameter *param);
|
67
|
+
|
68
|
+
const char *check_parameter(const struct problem *prob, const struct parameter *param);
|
69
|
+
int check_probability_model(const struct model *model);
|
70
|
+
void set_print_string_function(void (*print_func) (const char*));
|
71
|
+
|
72
|
+
#ifdef __cplusplus
|
73
|
+
}
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#endif /* _LIBLINEAR_H */
|
77
|
+
|
data/ext/rubylinear.cpp
ADDED
@@ -0,0 +1,639 @@
|
|
1
|
+
#include "linear.h"
|
2
|
+
#include "tron.h"
|
3
|
+
#include "ruby.h"
|
4
|
+
#include <errno.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
|
11
|
+
VALUE mRubyLinear;
|
12
|
+
VALUE cProblem;
|
13
|
+
VALUE cModel;
|
14
|
+
|
15
|
+
static void model_free(void *p){
|
16
|
+
struct model * m = (struct model *)p;
|
17
|
+
free_and_destroy_model(&m);
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE model_load_file(VALUE klass, VALUE path){
|
21
|
+
path = rb_str_to_str(path);
|
22
|
+
struct model * model = load_model(rb_string_value_cstr(&path));
|
23
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, model_free, model);
|
24
|
+
return tdata;
|
25
|
+
}
|
26
|
+
|
27
|
+
static VALUE model_write_file(VALUE self,VALUE path){
|
28
|
+
struct model *model;
|
29
|
+
Data_Get_Struct(self, struct model, model);
|
30
|
+
path = rb_str_to_str(path);
|
31
|
+
save_model(rb_string_value_cstr(&path), model);
|
32
|
+
return self;
|
33
|
+
}
|
34
|
+
|
35
|
+
static VALUE model_new(VALUE klass, VALUE r_problem, VALUE parameters){
|
36
|
+
|
37
|
+
struct model *model = NULL;
|
38
|
+
struct problem *problem;
|
39
|
+
Data_Get_Struct(r_problem, struct problem, problem);
|
40
|
+
struct parameter param;
|
41
|
+
|
42
|
+
if(!problem->base){
|
43
|
+
rb_raise(rb_eArgError, "problem has been disposed");
|
44
|
+
return Qnil;
|
45
|
+
}
|
46
|
+
|
47
|
+
rb_funcall(mRubyLinear, rb_intern("validate_options"), 1, parameters);
|
48
|
+
VALUE v;
|
49
|
+
|
50
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("eps"))))){
|
51
|
+
param.eps = RFLOAT_VALUE(rb_to_float(v));
|
52
|
+
}else{
|
53
|
+
param.eps = 0.01;
|
54
|
+
}
|
55
|
+
|
56
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("c"))))){
|
57
|
+
param.C = RFLOAT_VALUE(rb_to_float(v));
|
58
|
+
}else{
|
59
|
+
param.C = 1;
|
60
|
+
}
|
61
|
+
|
62
|
+
v = rb_hash_aref(parameters, ID2SYM(rb_intern("solver")));
|
63
|
+
param.solver_type = FIX2INT(v);
|
64
|
+
|
65
|
+
if(!NIL_P(v = rb_hash_aref(parameters, ID2SYM(rb_intern("weights"))))){
|
66
|
+
Check_Type(v, T_HASH);
|
67
|
+
param.nr_weight = RHASH_SIZE(v);
|
68
|
+
param.weight = (double*)calloc(param.nr_weight,sizeof(double));
|
69
|
+
param.weight_label = (int*)calloc(param.nr_weight,sizeof(int));
|
70
|
+
|
71
|
+
VALUE weights_as_array = rb_funcall(v, rb_intern("to_a"),0);
|
72
|
+
|
73
|
+
for(long i=0; i < RARRAY_LEN(weights_as_array); i++){
|
74
|
+
VALUE pair = RARRAY_PTR(weights_as_array)[i];
|
75
|
+
VALUE label = RARRAY_PTR(pair)[0];
|
76
|
+
VALUE weight = RARRAY_PTR(pair)[1];
|
77
|
+
|
78
|
+
param.weight[i] = RFLOAT_VALUE(rb_to_float(weight));
|
79
|
+
param.weight_label[i] = FIX2INT(label);
|
80
|
+
}
|
81
|
+
|
82
|
+
}else{
|
83
|
+
param.nr_weight = 0;
|
84
|
+
param.weight = NULL;
|
85
|
+
param.weight_label = NULL;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
const char *error_string = check_parameter(problem, ¶m);
|
90
|
+
if(error_string){
|
91
|
+
rb_raise(rb_eArgError, "%s", error_string);
|
92
|
+
destroy_param(¶m);
|
93
|
+
return Qnil;
|
94
|
+
}
|
95
|
+
model = train(problem, ¶m);
|
96
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, model_free, model);
|
97
|
+
destroy_param(¶m);
|
98
|
+
return tdata;
|
99
|
+
}
|
100
|
+
static VALUE model_feature_count(VALUE self){
|
101
|
+
struct model *model;
|
102
|
+
Data_Get_Struct(self, struct model, model);
|
103
|
+
return INT2FIX(model->nr_feature);
|
104
|
+
}
|
105
|
+
|
106
|
+
static VALUE model_class_count(VALUE self){
|
107
|
+
struct model *model;
|
108
|
+
Data_Get_Struct(self, struct model, model);
|
109
|
+
return INT2FIX(model->nr_class);
|
110
|
+
}
|
111
|
+
|
112
|
+
static VALUE model_class_bias(VALUE self){
|
113
|
+
struct model *model;
|
114
|
+
Data_Get_Struct(self, struct model, model);
|
115
|
+
return rb_float_new(model->bias);
|
116
|
+
}
|
117
|
+
|
118
|
+
static VALUE model_destroy(VALUE self){
|
119
|
+
struct model *model;
|
120
|
+
Data_Get_Struct(self, struct model, model);
|
121
|
+
free_model_content(model);
|
122
|
+
model->w = NULL;
|
123
|
+
model->label = NULL;
|
124
|
+
return Qnil;
|
125
|
+
}
|
126
|
+
|
127
|
+
static VALUE model_destroyed(VALUE self){
|
128
|
+
struct model *model;
|
129
|
+
Data_Get_Struct(self, struct model, model);
|
130
|
+
return model->w ? Qfalse : Qtrue;
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
struct feature_node * convert_ruby_sample_to_feature_node(struct model * model, VALUE data){
|
135
|
+
Check_Type(data, T_HASH);
|
136
|
+
VALUE pairs = rb_funcall(data,rb_intern("to_a"),0);
|
137
|
+
int node_count = RARRAY_LEN(pairs) + (model->bias > 0 ? 2 : 1);
|
138
|
+
struct feature_node * nodes = (struct feature_node *)calloc(node_count, sizeof(struct feature_node));
|
139
|
+
|
140
|
+
int position = 0;
|
141
|
+
for(int i=0; i < RARRAY_LEN(pairs); i++, position++){
|
142
|
+
VALUE pair = RARRAY_PTR(pairs)[i];
|
143
|
+
VALUE key = RARRAY_PTR(pair)[0];
|
144
|
+
VALUE weight = RARRAY_PTR(pair)[1];
|
145
|
+
|
146
|
+
nodes[i].index = FIX2INT(key);
|
147
|
+
nodes[i].value = RFLOAT_VALUE(rb_to_float(weight));
|
148
|
+
}
|
149
|
+
if(model->bias > 0){
|
150
|
+
nodes[position].index = model->nr_feature+1;
|
151
|
+
nodes[position].value = model->bias;
|
152
|
+
position++;
|
153
|
+
}
|
154
|
+
/*sentinel value*/
|
155
|
+
nodes[position].index = -1;
|
156
|
+
nodes[position].value = -1;
|
157
|
+
return nodes;
|
158
|
+
}
|
159
|
+
|
160
|
+
static VALUE model_predict_values(VALUE self, VALUE data){
|
161
|
+
struct model *model;
|
162
|
+
Data_Get_Struct(self, struct model, model);
|
163
|
+
|
164
|
+
if(!model->w){
|
165
|
+
rb_raise(rb_eArgError, "model has been destroyed");
|
166
|
+
return Qnil;
|
167
|
+
}
|
168
|
+
struct feature_node * nodes = convert_ruby_sample_to_feature_node(model, data);
|
169
|
+
|
170
|
+
double *values = (double*)calloc(sizeof(double), model->nr_class);
|
171
|
+
int label=predict_values(model, nodes, values);
|
172
|
+
|
173
|
+
VALUE label_to_value_hash = rb_hash_new();
|
174
|
+
for(int i = 0; i < model->nr_class; i++){
|
175
|
+
int label = model->label[i];
|
176
|
+
double value = values[i];
|
177
|
+
rb_hash_aset(label_to_value_hash, INT2FIX(label), rb_float_new(value));
|
178
|
+
}
|
179
|
+
free(values);
|
180
|
+
|
181
|
+
VALUE result = rb_ary_new();
|
182
|
+
rb_ary_push(result, INT2FIX(label));
|
183
|
+
rb_ary_push(result, label_to_value_hash);
|
184
|
+
return result;
|
185
|
+
}
|
186
|
+
|
187
|
+
static VALUE model_predict(VALUE self, VALUE data){
|
188
|
+
struct model *model;
|
189
|
+
Data_Get_Struct(self, struct model, model);
|
190
|
+
|
191
|
+
if(!model->w){
|
192
|
+
rb_raise(rb_eArgError, "model has been destroyed");
|
193
|
+
return Qnil;
|
194
|
+
}
|
195
|
+
struct feature_node * nodes = convert_ruby_sample_to_feature_node(model, data);
|
196
|
+
int result = predict(model, nodes);
|
197
|
+
free(nodes);
|
198
|
+
return INT2FIX(result);
|
199
|
+
}
|
200
|
+
|
201
|
+
static VALUE model_inspect(VALUE self){
|
202
|
+
struct model *model;
|
203
|
+
Data_Get_Struct(self, struct model, model);
|
204
|
+
|
205
|
+
return rb_sprintf("#<RubyLinear::Model:%p classes:%d features:%d bias:%f>",(void*)self,model->nr_class, model->nr_feature,model->bias);
|
206
|
+
}
|
207
|
+
|
208
|
+
static VALUE model_labels(VALUE self){
|
209
|
+
struct model *model;
|
210
|
+
Data_Get_Struct(self, struct model, model);
|
211
|
+
VALUE result = rb_ary_new();
|
212
|
+
for(int i=0; i < model->nr_class; i++){
|
213
|
+
rb_ary_store(result, i, INT2FIX(model->label[i]));
|
214
|
+
}
|
215
|
+
return result;
|
216
|
+
|
217
|
+
}
|
218
|
+
|
219
|
+
static VALUE model_solver(VALUE self){
|
220
|
+
struct model *model;
|
221
|
+
Data_Get_Struct(self, struct model, model);
|
222
|
+
return INT2FIX(model->param.solver_type);
|
223
|
+
}
|
224
|
+
|
225
|
+
|
226
|
+
static VALUE model_weights(VALUE self){
|
227
|
+
struct model *model;
|
228
|
+
Data_Get_Struct(self, struct model, model);
|
229
|
+
|
230
|
+
int n;
|
231
|
+
if(model->bias>=0){
|
232
|
+
n=model->nr_feature+1;
|
233
|
+
}
|
234
|
+
else{
|
235
|
+
n=model->nr_feature;
|
236
|
+
}
|
237
|
+
int w_size = n;
|
238
|
+
int nr_w;
|
239
|
+
if(model->nr_class==2 && model->param.solver_type != MCSVM_CS){
|
240
|
+
nr_w = 1;
|
241
|
+
}
|
242
|
+
else{
|
243
|
+
nr_w = model->nr_class;
|
244
|
+
}
|
245
|
+
|
246
|
+
int weight_count = w_size*nr_w;
|
247
|
+
|
248
|
+
VALUE result = rb_ary_new();
|
249
|
+
for(int i=0; i < weight_count; i++){
|
250
|
+
rb_ary_store(result, i, rb_float_new(model->w[i]));
|
251
|
+
}
|
252
|
+
return result;
|
253
|
+
}
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
static void problem_free(void *p) {
|
258
|
+
struct problem * pr = (struct problem*)p;
|
259
|
+
|
260
|
+
free(pr->y);
|
261
|
+
free(pr->base);
|
262
|
+
free(pr);
|
263
|
+
}
|
264
|
+
|
265
|
+
void exit_input_error(int line_num)
|
266
|
+
{
|
267
|
+
rb_raise(rb_eArgError, "Wrong input format at line %d\n", line_num);
|
268
|
+
}
|
269
|
+
|
270
|
+
static char *line = NULL;
|
271
|
+
static int max_line_len;
|
272
|
+
|
273
|
+
static char* readline(FILE *input)
|
274
|
+
{
|
275
|
+
int len;
|
276
|
+
|
277
|
+
if(fgets(line,max_line_len,input) == NULL)
|
278
|
+
return NULL;
|
279
|
+
|
280
|
+
while(strrchr(line,'\n') == NULL)
|
281
|
+
{
|
282
|
+
max_line_len *= 2;
|
283
|
+
line = (char *) realloc(line,max_line_len);
|
284
|
+
len = (int) strlen(line);
|
285
|
+
if(fgets(line+len,max_line_len-len,input) == NULL)
|
286
|
+
break;
|
287
|
+
}
|
288
|
+
return line;
|
289
|
+
}
|
290
|
+
|
291
|
+
static VALUE problem_load_file(VALUE klass, VALUE path, VALUE bias){
|
292
|
+
path = rb_str_to_str(path);
|
293
|
+
/* lifted from train.c*/
|
294
|
+
int max_index, inst_max_index, i;
|
295
|
+
long int elements, j;
|
296
|
+
FILE *fp = fopen(rb_string_value_cstr(&path),"r");
|
297
|
+
char *endptr;
|
298
|
+
char *idx, *val, *label;
|
299
|
+
|
300
|
+
if(fp == NULL)
|
301
|
+
{
|
302
|
+
rb_sys_fail("can't open input file");
|
303
|
+
return Qnil;
|
304
|
+
}
|
305
|
+
|
306
|
+
struct problem *prob = (struct problem*) calloc(1, sizeof(struct problem));
|
307
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, problem_free, prob);
|
308
|
+
prob->bias = RFLOAT_VALUE(rb_to_float(bias));
|
309
|
+
prob->l = 0;
|
310
|
+
elements = 0;
|
311
|
+
max_line_len = 1024;
|
312
|
+
line = (char*)calloc(sizeof(char),max_line_len);
|
313
|
+
while(readline(fp)!=NULL)
|
314
|
+
{
|
315
|
+
char *p = strtok(line," \t"); // label
|
316
|
+
|
317
|
+
// features
|
318
|
+
while(1)
|
319
|
+
{
|
320
|
+
p = strtok(NULL," \t");
|
321
|
+
if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
|
322
|
+
break;
|
323
|
+
elements++;
|
324
|
+
}
|
325
|
+
elements++; // for bias term
|
326
|
+
prob->l++;
|
327
|
+
}
|
328
|
+
rewind(fp);
|
329
|
+
|
330
|
+
|
331
|
+
prob->y = (int*)calloc(sizeof(int),prob->l);
|
332
|
+
prob->x = (struct feature_node **)calloc(sizeof(struct feature_node *),prob->l);
|
333
|
+
prob->base = (struct feature_node *)calloc(sizeof(struct feature_node),elements + prob->l);
|
334
|
+
|
335
|
+
max_index = 0;
|
336
|
+
j=0;
|
337
|
+
for(i=0;i<prob->l;i++)
|
338
|
+
{
|
339
|
+
inst_max_index = 0; // strtol gives 0 if wrong format
|
340
|
+
readline(fp);
|
341
|
+
prob->x[i] = &prob->base[j];
|
342
|
+
label = strtok(line," \t\n");
|
343
|
+
if(label == NULL){ // empty line
|
344
|
+
exit_input_error(i+1);
|
345
|
+
fclose(fp);
|
346
|
+
return Qnil;
|
347
|
+
}
|
348
|
+
prob->y[i] = (int) strtol(label,&endptr,10);
|
349
|
+
if(endptr == label || *endptr != '\0'){
|
350
|
+
exit_input_error(i+1);
|
351
|
+
fclose(fp);
|
352
|
+
return Qnil;
|
353
|
+
}
|
354
|
+
while(1)
|
355
|
+
{
|
356
|
+
idx = strtok(NULL,":");
|
357
|
+
val = strtok(NULL," \t");
|
358
|
+
|
359
|
+
if(val == NULL)
|
360
|
+
break;
|
361
|
+
|
362
|
+
errno = 0;
|
363
|
+
prob->base[j].index = (int) strtol(idx,&endptr,10);
|
364
|
+
if(endptr == idx || errno != 0 || *endptr != '\0' || prob->base[j].index <= inst_max_index){
|
365
|
+
exit_input_error(i+1);
|
366
|
+
fclose(fp);
|
367
|
+
return Qnil;
|
368
|
+
}
|
369
|
+
else
|
370
|
+
inst_max_index = prob->base[j].index;
|
371
|
+
|
372
|
+
errno = 0;
|
373
|
+
prob->base[j].value = strtod(val,&endptr);
|
374
|
+
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))){
|
375
|
+
exit_input_error(i+1);
|
376
|
+
fclose(fp);
|
377
|
+
return Qnil;
|
378
|
+
}
|
379
|
+
|
380
|
+
++j;
|
381
|
+
}
|
382
|
+
|
383
|
+
if(inst_max_index > max_index)
|
384
|
+
max_index = inst_max_index;
|
385
|
+
|
386
|
+
if(prob->bias >= 0)
|
387
|
+
prob->base[j++].value = prob->bias;
|
388
|
+
|
389
|
+
prob->base[j++].index = -1;
|
390
|
+
}
|
391
|
+
|
392
|
+
if(prob->bias >= 0)
|
393
|
+
{
|
394
|
+
prob->n=max_index+1;
|
395
|
+
for(i=1;i<prob->l;i++)
|
396
|
+
(prob->x[i]-2)->index = prob->n;
|
397
|
+
prob->base[j-2].index = prob->n;
|
398
|
+
}
|
399
|
+
else
|
400
|
+
prob->n=max_index;
|
401
|
+
|
402
|
+
fclose(fp);
|
403
|
+
return tdata;
|
404
|
+
}
|
405
|
+
|
406
|
+
static VALUE problem_new(VALUE klass, VALUE labels, VALUE samples, VALUE bias, VALUE attr_count){
|
407
|
+
VALUE argv[4] = {labels, samples,bias,attr_count};
|
408
|
+
|
409
|
+
struct problem *ptr = (struct problem *)calloc(sizeof(struct problem),1);
|
410
|
+
VALUE tdata = Data_Wrap_Struct(klass, 0, problem_free, ptr);
|
411
|
+
rb_obj_call_init(tdata, 4, argv);
|
412
|
+
return tdata;
|
413
|
+
}
|
414
|
+
|
415
|
+
static VALUE problem_labels(VALUE self){
|
416
|
+
if(RTEST(rb_funcall(self, rb_intern("destroyed?"),0))){
|
417
|
+
rb_raise(rb_eArgError, "problem has been destroyed");
|
418
|
+
return Qnil;
|
419
|
+
}
|
420
|
+
|
421
|
+
struct problem *problem;
|
422
|
+
Data_Get_Struct(self, struct problem, problem);
|
423
|
+
|
424
|
+
VALUE result = rb_ary_new();
|
425
|
+
|
426
|
+
for( int i=0; i< problem -> l; i++){
|
427
|
+
rb_ary_push(result, INT2FIX(problem->y[i]));
|
428
|
+
}
|
429
|
+
return result;
|
430
|
+
|
431
|
+
}
|
432
|
+
|
433
|
+
static VALUE problem_feature_vector(VALUE self, VALUE r_index){
|
434
|
+
if(RTEST(rb_funcall(self, rb_intern("destroyed?"),0))){
|
435
|
+
rb_raise(rb_eArgError, "problem has been destroyed");
|
436
|
+
return Qnil;
|
437
|
+
}
|
438
|
+
|
439
|
+
struct problem *problem;
|
440
|
+
Data_Get_Struct(self, struct problem, problem);
|
441
|
+
|
442
|
+
int index = FIX2INT(r_index);
|
443
|
+
if(index >= problem->l){
|
444
|
+
rb_raise(rb_eArgError, "index out of bounds");
|
445
|
+
return Qnil;
|
446
|
+
}
|
447
|
+
VALUE result = rb_ary_new();
|
448
|
+
|
449
|
+
for( struct feature_node *current = problem->x[index];current->index != -1; current++){
|
450
|
+
VALUE pair = rb_ary_new();
|
451
|
+
rb_ary_push(pair, INT2FIX(current->index));
|
452
|
+
rb_ary_push(pair, rb_float_new(current->value));
|
453
|
+
rb_ary_push(result, pair);
|
454
|
+
}
|
455
|
+
return result;
|
456
|
+
}
|
457
|
+
|
458
|
+
|
459
|
+
static VALUE problem_destroy(VALUE self){
|
460
|
+
struct problem *problem;
|
461
|
+
Data_Get_Struct(self, struct problem, problem);
|
462
|
+
free(problem->base);
|
463
|
+
problem->base = NULL;
|
464
|
+
return self;
|
465
|
+
}
|
466
|
+
|
467
|
+
static VALUE problem_destroyed(VALUE self){
|
468
|
+
struct problem *problem;
|
469
|
+
Data_Get_Struct(self, struct problem, problem);
|
470
|
+
return problem->base ? Qfalse : Qtrue;
|
471
|
+
}
|
472
|
+
|
473
|
+
|
474
|
+
static VALUE problem_inspect(VALUE self){
|
475
|
+
struct problem *problem;
|
476
|
+
Data_Get_Struct(self, struct problem, problem);
|
477
|
+
|
478
|
+
return rb_sprintf("#<RubyLinear::Problem:%p samples:%d features:%d bias:%f>",(void*)self,problem->l, problem->n,problem->bias);
|
479
|
+
}
|
480
|
+
static VALUE problem_l(VALUE self){
|
481
|
+
struct problem *problem;
|
482
|
+
Data_Get_Struct(self, struct problem, problem);
|
483
|
+
return INT2FIX(problem->l);
|
484
|
+
}
|
485
|
+
|
486
|
+
static VALUE problem_n(VALUE self){
|
487
|
+
struct problem *problem;
|
488
|
+
Data_Get_Struct(self, struct problem, problem);
|
489
|
+
return INT2FIX(problem->n);
|
490
|
+
}
|
491
|
+
|
492
|
+
|
493
|
+
static VALUE problem_bias(VALUE self){
|
494
|
+
struct problem *problem;
|
495
|
+
Data_Get_Struct(self, struct problem, problem);
|
496
|
+
return rb_float_new(problem->bias);
|
497
|
+
}
|
498
|
+
|
499
|
+
|
500
|
+
static void addSample(struct problem * problem, int label, double weight){
|
501
|
+
if(label > problem->n){
|
502
|
+
rb_raise(rb_eArgError, "tried to add sample %d, %f, inconsistent with max feature of %d", label, weight, problem->n);
|
503
|
+
}
|
504
|
+
problem->base[problem->offset].index = label;
|
505
|
+
problem->base[problem->offset].value = weight;
|
506
|
+
problem->offset++;
|
507
|
+
}
|
508
|
+
static VALUE addSampleIterator(VALUE yielded_object, VALUE context, int argc, VALUE argv[]){
|
509
|
+
struct problem *problem;
|
510
|
+
Data_Get_Struct(context, struct problem, problem);
|
511
|
+
VALUE key = RARRAY_PTR(yielded_object)[0];
|
512
|
+
VALUE value = RARRAY_PTR(yielded_object)[1];
|
513
|
+
|
514
|
+
int label = FIX2INT(key);
|
515
|
+
double weight = RFLOAT_VALUE(rb_to_float(value));
|
516
|
+
addSample(problem, label, weight);
|
517
|
+
return Qnil;
|
518
|
+
}
|
519
|
+
|
520
|
+
|
521
|
+
static VALUE problem_init(VALUE self, VALUE labels, VALUE samples, VALUE bias, VALUE r_attr_count){
|
522
|
+
struct problem *problem;
|
523
|
+
Data_Get_Struct(self, struct problem, problem);
|
524
|
+
|
525
|
+
labels = rb_check_array_type(labels);
|
526
|
+
samples = rb_check_array_type(samples);
|
527
|
+
problem->bias = RFLOAT_VALUE(rb_to_float(bias));
|
528
|
+
|
529
|
+
problem->n = FIX2INT(r_attr_count);
|
530
|
+
if(problem->bias > 0){
|
531
|
+
problem->n += 1;
|
532
|
+
}
|
533
|
+
|
534
|
+
if(RARRAY_LEN(labels) != RARRAY_LEN(samples)){
|
535
|
+
rb_raise(rb_eArgError, "samples and labels were of different length (%lu, %lu)", RARRAY_LEN(labels), RARRAY_LEN(samples));
|
536
|
+
return Qnil;
|
537
|
+
}
|
538
|
+
problem->l = RARRAY_LEN(samples);
|
539
|
+
problem->y = (int*)calloc(sizeof(int), problem->l);
|
540
|
+
|
541
|
+
|
542
|
+
/* copy the y values and calculate how many samples to allocate*/
|
543
|
+
int required_feature_nodes = 0;
|
544
|
+
int extra_samples = problem->bias > 0 ? 2 : 1; /*always 1 (the sentinel element, and possibly +1 for bias)*/
|
545
|
+
for(int i=0; i<problem->l; i++){
|
546
|
+
VALUE hash = RARRAY_PTR(samples)[i];
|
547
|
+
problem->y[i] = FIX2INT(RARRAY_PTR(labels)[i]);
|
548
|
+
required_feature_nodes += RHASH_SIZE(hash) + extra_samples;
|
549
|
+
}
|
550
|
+
|
551
|
+
problem->offset = 0;
|
552
|
+
problem->base = (struct feature_node *)calloc(sizeof(struct feature_node), required_feature_nodes);
|
553
|
+
problem->x = (struct feature_node **)calloc(sizeof(struct feature_node*), problem->l);
|
554
|
+
/* copy the samples */
|
555
|
+
|
556
|
+
ID each = rb_intern("each");
|
557
|
+
for(int i=0; i< problem->l; i++){
|
558
|
+
VALUE hash = RARRAY_PTR(samples)[i];
|
559
|
+
problem->x[i] = problem->base + problem->offset;
|
560
|
+
rb_block_call(hash, each,0,NULL, RUBY_METHOD_FUNC(addSampleIterator),self);
|
561
|
+
if(problem->bias>0){
|
562
|
+
addSample(problem,problem->n,problem->bias);
|
563
|
+
}
|
564
|
+
addSample(problem,-1,-1);
|
565
|
+
}
|
566
|
+
if(problem->offset != required_feature_nodes){
|
567
|
+
printf("allocated %d feature_nodes but used %d\n", required_feature_nodes, problem->offset);
|
568
|
+
|
569
|
+
}
|
570
|
+
return self;
|
571
|
+
}
|
572
|
+
|
573
|
+
extern int info_on;
|
574
|
+
|
575
|
+
static VALUE info_on_get(VALUE self) {
|
576
|
+
return info_on ? Qtrue:Qfalse;
|
577
|
+
}
|
578
|
+
|
579
|
+
static VALUE info_on_set(VALUE self, VALUE new_value){
|
580
|
+
info_on = RTEST(new_value) ? 1 : 0;
|
581
|
+
return new_value;
|
582
|
+
}
|
583
|
+
|
584
|
+
void Init_rubylinear_native() {
|
585
|
+
mRubyLinear = rb_define_module("RubyLinear");
|
586
|
+
|
587
|
+
rb_define_const(mRubyLinear, "L2R_LR", INT2FIX(L2R_LR));
|
588
|
+
rb_define_const(mRubyLinear, "L2R_L2LOSS_SVC_DUAL", INT2FIX(L2R_L2LOSS_SVC_DUAL));
|
589
|
+
rb_define_const(mRubyLinear, "L2R_L2LOSS_SVC", INT2FIX(L2R_L2LOSS_SVC));
|
590
|
+
rb_define_const(mRubyLinear, "L2R_L1LOSS_SVC_DUAL", INT2FIX(L2R_L1LOSS_SVC_DUAL));
|
591
|
+
rb_define_const(mRubyLinear, "MCSVM_CS", INT2FIX(MCSVM_CS));
|
592
|
+
rb_define_const(mRubyLinear, "L1R_L2LOSS_SVC", INT2FIX(L1R_L2LOSS_SVC));
|
593
|
+
rb_define_const(mRubyLinear, "L1R_LR", INT2FIX(L1R_LR));
|
594
|
+
rb_define_const(mRubyLinear, "L2R_LR_DUAL", INT2FIX(L2R_LR_DUAL));
|
595
|
+
|
596
|
+
|
597
|
+
rb_define_singleton_method(mRubyLinear, "info_on", RUBY_METHOD_FUNC(info_on_get), 0);
|
598
|
+
rb_define_singleton_method(mRubyLinear, "info_on=", RUBY_METHOD_FUNC(info_on_set), 1);
|
599
|
+
|
600
|
+
|
601
|
+
cProblem = rb_define_class_under(mRubyLinear, "Problem", rb_cObject);
|
602
|
+
rb_define_singleton_method(cProblem, "new", RUBY_METHOD_FUNC(problem_new), 4);
|
603
|
+
rb_define_singleton_method(cProblem, "load_file", RUBY_METHOD_FUNC(problem_load_file),2);
|
604
|
+
rb_define_method(cProblem, "initialize", RUBY_METHOD_FUNC(problem_init), 4);
|
605
|
+
rb_define_method(cProblem, "l", RUBY_METHOD_FUNC(problem_l), 0);
|
606
|
+
rb_define_method(cProblem, "n", RUBY_METHOD_FUNC(problem_n), 0);
|
607
|
+
rb_define_method(cProblem, "bias", RUBY_METHOD_FUNC(problem_bias), 0);
|
608
|
+
rb_define_method(cProblem, "feature_vector", RUBY_METHOD_FUNC(problem_feature_vector), 1);
|
609
|
+
rb_define_method(cProblem, "labels", RUBY_METHOD_FUNC(problem_labels), 0);
|
610
|
+
rb_define_method(cProblem, "destroy!", RUBY_METHOD_FUNC(problem_destroy), 0);
|
611
|
+
rb_define_method(cProblem, "destroyed?", RUBY_METHOD_FUNC(problem_destroyed), 0);
|
612
|
+
rb_define_method(cProblem, "inspect", RUBY_METHOD_FUNC(problem_inspect), 0);
|
613
|
+
|
614
|
+
cModel = rb_define_class_under(mRubyLinear, "Model", rb_cObject);
|
615
|
+
rb_define_singleton_method(cModel, "load_file", RUBY_METHOD_FUNC(model_load_file), 1);
|
616
|
+
rb_define_singleton_method(cModel, "new", RUBY_METHOD_FUNC(model_new), 2);
|
617
|
+
rb_define_method(cModel, "save", RUBY_METHOD_FUNC(model_write_file), 1);
|
618
|
+
rb_define_method(cModel, "predict", RUBY_METHOD_FUNC(model_predict), 1);
|
619
|
+
rb_define_method(cModel, "predict_values", RUBY_METHOD_FUNC(model_predict_values), 1);
|
620
|
+
rb_define_method(cModel, "destroy!", RUBY_METHOD_FUNC(model_destroy), 0);
|
621
|
+
rb_define_method(cModel, "destroyed?", RUBY_METHOD_FUNC(model_destroyed), 0);
|
622
|
+
rb_define_method(cModel, "inspect", RUBY_METHOD_FUNC(model_inspect), 0);
|
623
|
+
rb_define_method(cModel, "labels", RUBY_METHOD_FUNC(model_labels), 0);
|
624
|
+
rb_define_method(cModel, "solver", RUBY_METHOD_FUNC(model_solver), 0);
|
625
|
+
rb_define_method(cModel, "weights", RUBY_METHOD_FUNC(model_weights), 0);
|
626
|
+
|
627
|
+
rb_define_method(cModel, "feature_count", RUBY_METHOD_FUNC(model_feature_count), 0);
|
628
|
+
rb_define_method(cModel, "class_count", RUBY_METHOD_FUNC(model_class_count), 0);
|
629
|
+
rb_define_method(cModel, "bias", RUBY_METHOD_FUNC(model_class_bias), 0);
|
630
|
+
|
631
|
+
|
632
|
+
}
|
633
|
+
|
634
|
+
|
635
|
+
|
636
|
+
|
637
|
+
#ifdef __cplusplus
|
638
|
+
} /* extern "C" */
|
639
|
+
#endif
|