numo-liblinear 1.2.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +2 -3
  5. data/ext/numo/liblinear/extconf.rb +8 -14
  6. data/ext/numo/liblinear/liblinearext.cpp +215 -0
  7. data/ext/numo/liblinear/liblinearext.hpp +636 -0
  8. data/ext/numo/liblinear/src/COPYRIGHT +31 -0
  9. data/ext/numo/liblinear/{liblinear → src}/blas/blas.h +0 -0
  10. data/ext/numo/liblinear/{liblinear → src}/blas/blasp.h +0 -0
  11. data/ext/numo/liblinear/{liblinear → src}/blas/daxpy.c +0 -0
  12. data/ext/numo/liblinear/{liblinear → src}/blas/ddot.c +0 -0
  13. data/ext/numo/liblinear/{liblinear → src}/blas/dnrm2.c +0 -0
  14. data/ext/numo/liblinear/{liblinear → src}/blas/dscal.c +0 -0
  15. data/ext/numo/liblinear/{liblinear → src}/linear.cpp +152 -95
  16. data/ext/numo/liblinear/{liblinear → src}/linear.h +5 -5
  17. data/ext/numo/liblinear/{liblinear → src}/newton.cpp +20 -14
  18. data/ext/numo/liblinear/{liblinear → src}/newton.h +0 -0
  19. data/lib/numo/liblinear/version.rb +1 -1
  20. metadata +19 -38
  21. data/.github/workflows/build.yml +0 -28
  22. data/.gitignore +0 -20
  23. data/.gitmodules +0 -3
  24. data/.rspec +0 -3
  25. data/CODE_OF_CONDUCT.md +0 -74
  26. data/Gemfile +0 -11
  27. data/Rakefile +0 -15
  28. data/Steepfile +0 -20
  29. data/ext/numo/liblinear/converter.c +0 -133
  30. data/ext/numo/liblinear/converter.h +0 -18
  31. data/ext/numo/liblinear/liblinearext.c +0 -576
  32. data/ext/numo/liblinear/liblinearext.h +0 -17
  33. data/ext/numo/liblinear/model.c +0 -48
  34. data/ext/numo/liblinear/model.h +0 -15
  35. data/ext/numo/liblinear/parameter.c +0 -105
  36. data/ext/numo/liblinear/parameter.h +0 -15
  37. data/ext/numo/liblinear/problem.c +0 -92
  38. data/ext/numo/liblinear/problem.h +0 -12
  39. data/ext/numo/liblinear/solver_type.c +0 -36
  40. data/ext/numo/liblinear/solver_type.h +0 -9
  41. data/numo-liblinear.gemspec +0 -49
  42. data/sig/patch.rbs +0 -8
@@ -0,0 +1,636 @@
1
+ /**
2
+ * Copyright (c) 2019-2022 Atsushi Tatsuma
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * * Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * * Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * * Neither the name of the copyright holder nor the names of its
16
+ * contributors may be used to endorse or promote products derived from
17
+ * this software without specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef LIBLINEAREXT_HPP
32
+ #define LIBLINEAREXT_HPP 1
33
+
34
+ #include <cmath>
35
+ #include <cstring>
36
+
37
+ #include <ruby.h>
38
+
39
+ #include <numo/narray.h>
40
+ #include <numo/template.h>
41
+
42
+ #include <linear.h>
43
+
44
+ typedef struct model LibLinearModel;
45
+ typedef struct feature_node LibLinearNode;
46
+ typedef struct parameter LibLinearParameter;
47
+ typedef struct problem LibLinearProblem;
48
+
49
+ void printNull(const char* s) {}
50
+
51
+ /** CONVERTERS */
52
+ VALUE convertVectorXiToNArray(const int* const arr, const int size) {
53
+ size_t shape[1] = {(size_t)size};
54
+ VALUE vec_val = rb_narray_new(numo_cInt32, 1, shape);
55
+ int32_t* vec_ptr = (int32_t*)na_get_pointer_for_write(vec_val);
56
+ for (int i = 0; i < size; i++) vec_ptr[i] = (int32_t)arr[i];
57
+ return vec_val;
58
+ }
59
+
60
+ int* convertNArrayToVectorXi(VALUE vec_val) {
61
+ if (NIL_P(vec_val)) return NULL;
62
+
63
+ narray_t* vec_nary;
64
+ GetNArray(vec_val, vec_nary);
65
+ const size_t n_elements = NA_SHAPE(vec_nary)[0];
66
+
67
+ int* arr = ALLOC_N(int, n_elements);
68
+ const int32_t* const vec_pt = (int32_t*)na_get_pointer_for_read(vec_val);
69
+ for (size_t i = 0; i < n_elements; i++) arr[i] = (int)vec_pt[i];
70
+
71
+ RB_GC_GUARD(vec_val);
72
+
73
+ return arr;
74
+ }
75
+
76
+ VALUE convertVectorXdToNArray(const double* const arr, const int size) {
77
+ size_t shape[1] = {(size_t)size};
78
+ VALUE vec_val = rb_narray_new(numo_cDFloat, 1, shape);
79
+ double* vec_ptr = (double*)na_get_pointer_for_write(vec_val);
80
+ memcpy(vec_ptr, arr, size * sizeof(double));
81
+ return vec_val;
82
+ }
83
+
84
+ double* convertNArrayToVectorXd(VALUE vec_val) {
85
+ if (NIL_P(vec_val)) return NULL;
86
+
87
+ narray_t* vec_nary;
88
+ GetNArray(vec_val, vec_nary);
89
+ const size_t n_elements = NA_SHAPE(vec_nary)[0];
90
+ double* arr = ALLOC_N(double, n_elements);
91
+ const double* const vec_ptr = (double*)na_get_pointer_for_read(vec_val);
92
+ memcpy(arr, vec_ptr, n_elements * sizeof(double));
93
+
94
+ RB_GC_GUARD(vec_val);
95
+
96
+ return arr;
97
+ }
98
+
99
+ VALUE convertMatrixXdToNArray(const double* const* mat, const int n_rows, const int n_cols) {
100
+ size_t shape[2] = {(size_t)n_rows, (size_t)n_cols};
101
+ VALUE mat_val = rb_narray_new(numo_cDFloat, 2, shape);
102
+ double* mat_ptr = (double*)na_get_pointer_for_write(mat_val);
103
+ for (int i = 0; i < n_rows; i++) memcpy(&mat_ptr[i * n_cols], mat[i], n_cols * sizeof(double));
104
+ return mat_val;
105
+ }
106
+
107
+ double** convertNArrayToMatrixXd(VALUE mat_val) {
108
+ if (NIL_P(mat_val)) return NULL;
109
+
110
+ narray_t* mat_nary;
111
+ GetNArray(mat_val, mat_nary);
112
+ const size_t n_rows = NA_SHAPE(mat_nary)[0];
113
+ const size_t n_cols = NA_SHAPE(mat_nary)[1];
114
+ const double* const mat_ptr = (double*)na_get_pointer_for_read(mat_val);
115
+ double** mat = ALLOC_N(double*, n_rows);
116
+ for (size_t i = 0; i < n_rows; i++) {
117
+ mat[i] = ALLOC_N(double, n_cols);
118
+ memcpy(mat[i], &mat_ptr[i * n_cols], n_cols * sizeof(double));
119
+ }
120
+
121
+ RB_GC_GUARD(mat_val);
122
+
123
+ return mat;
124
+ }
125
+
126
+ LibLinearNode* convertVectorXdToLibLinearNode(const double* const arr, const int size) {
127
+ int n_nonzero_elements = 0;
128
+ for (int i = 0; i < size; i++) {
129
+ if (arr[i] != 0.0) n_nonzero_elements++;
130
+ }
131
+
132
+ LibLinearNode* node = ALLOC_N(LibLinearNode, n_nonzero_elements + 1);
133
+ for (int i = 0, j = 0; i < size; i++) {
134
+ if (arr[i] != 0.0) {
135
+ node[j].index = i + 1;
136
+ node[j].value = arr[i];
137
+ j++;
138
+ }
139
+ }
140
+ node[n_nonzero_elements].index = -1;
141
+ node[n_nonzero_elements].value = 0.0;
142
+
143
+ return node;
144
+ }
145
+
146
+ LibLinearModel* convertHashToLibLinearModel(VALUE model_hash) {
147
+ LibLinearModel* model = ALLOC(LibLinearModel);
148
+ VALUE el;
149
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("nr_class")));
150
+ model->nr_class = !NIL_P(el) ? NUM2INT(el) : 0;
151
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("nr_feature")));
152
+ model->nr_feature = !NIL_P(el) ? NUM2INT(el) : 0;
153
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("w")));
154
+ model->w = convertNArrayToVectorXd(el);
155
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("label")));
156
+ model->label = convertNArrayToVectorXi(el);
157
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("bias")));
158
+ model->bias = NUM2DBL(el);
159
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("rho")));
160
+ model->rho = NUM2DBL(el);
161
+ return model;
162
+ }
163
+
164
+ VALUE convertLibLinearModelToHash(const LibLinearModel* const model) {
165
+ const int n_cols = model->nr_class > 2 ? model->nr_class : 1;
166
+ const int n_rows = model->nr_feature;
167
+ VALUE model_hash = rb_hash_new();
168
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("nr_class")), INT2NUM(model->nr_class));
169
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("nr_feature")), INT2NUM(model->nr_feature));
170
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("w")), model->w ? convertVectorXdToNArray(model->w, n_rows * n_cols) : Qnil);
171
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("label")),
172
+ model->label ? convertVectorXiToNArray(model->label, model->nr_class) : Qnil);
173
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("bias")), DBL2NUM(model->bias));
174
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), DBL2NUM(model->rho));
175
+ return model_hash;
176
+ }
177
+
178
+ LibLinearParameter* convertHashToLibLinearParameter(VALUE param_hash) {
179
+ LibLinearParameter* param = ALLOC(LibLinearParameter);
180
+ VALUE el;
181
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("solver_type")));
182
+ param->solver_type = !NIL_P(el) ? NUM2INT(el) : L2R_L2LOSS_SVC_DUAL;
183
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("eps")));
184
+ if (!NIL_P(el)) {
185
+ param->eps = NUM2DBL(el);
186
+ } else {
187
+ switch (param->solver_type) {
188
+ case L2R_LR:
189
+ case L2R_L2LOSS_SVC:
190
+ param->eps = 0.01;
191
+ break;
192
+ case L2R_L2LOSS_SVR:
193
+ param->eps = 0.0001;
194
+ break;
195
+ case L2R_L2LOSS_SVC_DUAL:
196
+ case L2R_L1LOSS_SVC_DUAL:
197
+ case MCSVM_CS:
198
+ case L2R_LR_DUAL:
199
+ param->eps = 0.1;
200
+ break;
201
+ case L1R_L2LOSS_SVC:
202
+ case L1R_LR:
203
+ param->eps = 0.01;
204
+ break;
205
+ case L2R_L1LOSS_SVR_DUAL:
206
+ case L2R_L2LOSS_SVR_DUAL:
207
+ param->eps = 0.1;
208
+ break;
209
+ case ONECLASS_SVM:
210
+ param->eps = 0.01;
211
+ break;
212
+ }
213
+ }
214
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("C")));
215
+ param->C = !NIL_P(el) ? NUM2DBL(el) : 1;
216
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("nr_weight")));
217
+ param->nr_weight = !NIL_P(el) ? NUM2INT(el) : 0;
218
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("weight_label")));
219
+ param->weight_label = NULL;
220
+ if (!NIL_P(el)) {
221
+ param->weight_label = ALLOC_N(int, param->nr_weight);
222
+ memcpy(param->weight_label, (int32_t*)na_get_pointer_for_read(el), param->nr_weight * sizeof(int32_t));
223
+ }
224
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("weight")));
225
+ param->weight = NULL;
226
+ if (!NIL_P(el)) {
227
+ param->weight = ALLOC_N(double, param->nr_weight);
228
+ memcpy(param->weight, (double*)na_get_pointer_for_read(el), param->nr_weight * sizeof(double));
229
+ }
230
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("p")));
231
+ param->p = !NIL_P(el) ? NUM2DBL(el) : 0.1;
232
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("nu")));
233
+ param->nu = !NIL_P(el) ? NUM2DBL(el) : 0.5;
234
+ el = rb_hash_aref(param_hash, ID2SYM(rb_intern("init_sol")));
235
+ param->init_sol = !NIL_P(el) ? convertNArrayToVectorXd(el) : NULL;
236
+ param->regularize_bias = 1;
237
+ return param;
238
+ }
239
+
240
+ VALUE convertLibLinearParameterToHash(const LibLinearParameter* const param) {
241
+ VALUE param_hash = rb_hash_new();
242
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("solver_type")), INT2NUM(param->solver_type));
243
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("eps")), DBL2NUM(param->eps));
244
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("C")), DBL2NUM(param->C));
245
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("nr_weight")), INT2NUM(param->nr_weight));
246
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("weight_label")),
247
+ param->weight_label ? convertVectorXiToNArray(param->weight_label, param->nr_weight) : Qnil);
248
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("weight")),
249
+ param->weight ? convertVectorXdToNArray(param->weight, param->nr_weight) : Qnil);
250
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("p")), DBL2NUM(param->p));
251
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("nu")), DBL2NUM(param->nu));
252
+ rb_hash_aset(param_hash, ID2SYM(rb_intern("init_sol")), Qnil);
253
+ return param_hash;
254
+ }
255
+
256
+ LibLinearProblem* convertDatasetToLibLinearProblem(VALUE x_val, VALUE y_val) {
257
+ narray_t* x_nary;
258
+ GetNArray(x_val, x_nary);
259
+ const int n_samples = (int)NA_SHAPE(x_nary)[0];
260
+ const int n_features = (int)NA_SHAPE(x_nary)[1];
261
+ const double* const x_ptr = (double*)na_get_pointer_for_read(x_val);
262
+ const double* const y_ptr = (double*)na_get_pointer_for_read(y_val);
263
+
264
+ LibLinearProblem* problem = ALLOC(LibLinearProblem);
265
+ problem->bias = -1;
266
+ problem->n = n_features;
267
+ problem->l = n_samples;
268
+ problem->x = ALLOC_N(LibLinearNode*, n_samples);
269
+ problem->y = ALLOC_N(double, n_samples);
270
+
271
+ int last_feature_id = 0;
272
+ bool is_padded = false;
273
+ for (int i = 0; i < n_samples; i++) {
274
+ int n_nonzero_features = 0;
275
+ for (int j = 0; j < n_features; j++) {
276
+ if (x_ptr[i * n_features + j] != 0.0) {
277
+ n_nonzero_features++;
278
+ last_feature_id = j + 1;
279
+ }
280
+ }
281
+ if (!is_padded && last_feature_id == n_features) is_padded = true;
282
+ if (is_padded) {
283
+ problem->x[i] = ALLOC_N(struct feature_node, n_nonzero_features + 1);
284
+ } else {
285
+ problem->x[i] = ALLOC_N(struct feature_node, n_nonzero_features + 2);
286
+ }
287
+ for (int j = 0, k = 0; j < n_features; j++) {
288
+ if (x_ptr[i * n_features + j] != 0.0) {
289
+ problem->x[i][k].index = j + 1;
290
+ problem->x[i][k].value = x_ptr[i * n_features + j];
291
+ k++;
292
+ }
293
+ }
294
+ if (is_padded) {
295
+ problem->x[i][n_nonzero_features].index = -1;
296
+ problem->x[i][n_nonzero_features].value = 0.0;
297
+ } else {
298
+ problem->x[i][n_nonzero_features].index = n_features;
299
+ problem->x[i][n_nonzero_features].value = 0.0;
300
+ problem->x[i][n_nonzero_features + 1].index = -1;
301
+ problem->x[i][n_nonzero_features + 1].value = 0.0;
302
+ }
303
+ problem->y[i] = y_ptr[i];
304
+ }
305
+
306
+ RB_GC_GUARD(x_val);
307
+ RB_GC_GUARD(y_val);
308
+
309
+ return problem;
310
+ }
311
+
312
+ /** UTILITIES */
313
+ bool isSingleOutputModel(LibLinearModel* model) { return (model->nr_class == 2 && model->param.solver_type != MCSVM_CS); }
314
+
315
+ bool isProbabilisticModel(LibLinearModel* model) {
316
+ return (model->param.solver_type == L2R_LR || model->param.solver_type == L1R_LR || model->param.solver_type == L2R_LR_DUAL);
317
+ }
318
+
319
+ void deleteLibLinearModel(LibLinearModel* model) {
320
+ if (model) {
321
+ xfree(model->w);
322
+ model->w = NULL;
323
+ xfree(model->label);
324
+ model->label = NULL;
325
+ xfree(model);
326
+ model = NULL;
327
+ }
328
+ }
329
+
330
+ void deleteLibLinearParameter(LibLinearParameter* param) {
331
+ if (param) {
332
+ if (param->weight_label) {
333
+ xfree(param->weight_label);
334
+ param->weight_label = NULL;
335
+ }
336
+ if (param->weight) {
337
+ xfree(param->weight);
338
+ param->weight = NULL;
339
+ }
340
+ if (param->init_sol) {
341
+ xfree(param->init_sol);
342
+ param->init_sol = NULL;
343
+ }
344
+ xfree(param);
345
+ param = NULL;
346
+ }
347
+ }
348
+
349
+ void deleteLibLinearProblem(LibLinearProblem* problem) {
350
+ if (problem) {
351
+ if (problem->x) {
352
+ for (int i = 0; i < problem->l; i++) {
353
+ if (problem->x[i]) {
354
+ xfree(problem->x[i]);
355
+ problem->x[i] = NULL;
356
+ }
357
+ }
358
+ xfree(problem->x);
359
+ problem->x = NULL;
360
+ }
361
+ if (problem->y) {
362
+ xfree(problem->y);
363
+ problem->y = NULL;
364
+ }
365
+ xfree(problem);
366
+ problem = NULL;
367
+ }
368
+ }
369
+
370
+ /** MODULE FUNCTIONS */
371
+ static VALUE numo_liblinear_train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash) {
372
+ if (CLASS_OF(x_val) != numo_cDFloat) x_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, x_val);
373
+ if (CLASS_OF(y_val) != numo_cDFloat) y_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, y_val);
374
+ if (!RTEST(nary_check_contiguous(x_val))) x_val = nary_dup(x_val);
375
+ if (!RTEST(nary_check_contiguous(y_val))) y_val = nary_dup(y_val);
376
+
377
+ narray_t* x_nary;
378
+ narray_t* y_nary;
379
+ GetNArray(x_val, x_nary);
380
+ GetNArray(y_val, y_nary);
381
+ if (NA_NDIM(x_nary) != 2) {
382
+ rb_raise(rb_eArgError, "Expect samples to be 2-D array.");
383
+ return Qnil;
384
+ }
385
+ if (NA_NDIM(y_nary) != 1) {
386
+ rb_raise(rb_eArgError, "Expect label or target values to be 1-D arrray.");
387
+ return Qnil;
388
+ }
389
+ if (NA_SHAPE(x_nary)[0] != NA_SHAPE(y_nary)[0]) {
390
+ rb_raise(rb_eArgError, "Expect to have the same number of samples for samples and labels.");
391
+ return Qnil;
392
+ }
393
+
394
+ VALUE random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
395
+ if (!NIL_P(random_seed)) srand(NUM2UINT(random_seed));
396
+
397
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
398
+ LibLinearProblem* problem = convertDatasetToLibLinearProblem(x_val, y_val);
399
+
400
+ const char* err_msg = check_parameter(problem, param);
401
+ if (err_msg) {
402
+ deleteLibLinearProblem(problem);
403
+ deleteLibLinearParameter(param);
404
+ rb_raise(rb_eArgError, "Invalid LIBLINEAR parameter is given: %s", err_msg);
405
+ return Qnil;
406
+ }
407
+
408
+ VALUE verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
409
+ if (!RTEST(verbose)) set_print_string_function(printNull);
410
+
411
+ LibLinearModel* model = train(problem, param);
412
+ VALUE model_hash = convertLibLinearModelToHash(model);
413
+ free_and_destroy_model(&model);
414
+
415
+ deleteLibLinearProblem(problem);
416
+ deleteLibLinearParameter(param);
417
+
418
+ RB_GC_GUARD(x_val);
419
+ RB_GC_GUARD(y_val);
420
+
421
+ return model_hash;
422
+ }
423
+
424
+ static VALUE numo_liblinear_cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, VALUE nr_folds) {
425
+ if (CLASS_OF(x_val) != numo_cDFloat) x_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, x_val);
426
+ if (CLASS_OF(y_val) != numo_cDFloat) y_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, y_val);
427
+ if (!RTEST(nary_check_contiguous(x_val))) x_val = nary_dup(x_val);
428
+ if (!RTEST(nary_check_contiguous(y_val))) y_val = nary_dup(y_val);
429
+
430
+ narray_t* x_nary;
431
+ narray_t* y_nary;
432
+ GetNArray(x_val, x_nary);
433
+ GetNArray(y_val, y_nary);
434
+ if (NA_NDIM(x_nary) != 2) {
435
+ rb_raise(rb_eArgError, "Expect samples to be 2-D array.");
436
+ return Qnil;
437
+ }
438
+ if (NA_NDIM(y_nary) != 1) {
439
+ rb_raise(rb_eArgError, "Expect label or target values to be 1-D arrray.");
440
+ return Qnil;
441
+ }
442
+ if (NA_SHAPE(x_nary)[0] != NA_SHAPE(y_nary)[0]) {
443
+ rb_raise(rb_eArgError, "Expect to have the same number of samples for samples and labels.");
444
+ return Qnil;
445
+ }
446
+
447
+ VALUE random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
448
+ if (!NIL_P(random_seed)) srand(NUM2UINT(random_seed));
449
+
450
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
451
+ LibLinearProblem* problem = convertDatasetToLibLinearProblem(x_val, y_val);
452
+
453
+ const char* err_msg = check_parameter(problem, param);
454
+ if (err_msg) {
455
+ deleteLibLinearProblem(problem);
456
+ deleteLibLinearParameter(param);
457
+ rb_raise(rb_eArgError, "Invalid LIBLINEAR parameter is given: %s", err_msg);
458
+ return Qnil;
459
+ }
460
+
461
+ size_t t_shape[1] = {(size_t)(problem->l)};
462
+ VALUE t_val = rb_narray_new(numo_cDFloat, 1, t_shape);
463
+ double* t_ptr = (double*)na_get_pointer_for_write(t_val);
464
+
465
+ VALUE verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
466
+ if (!RTEST(verbose)) set_print_string_function(printNull);
467
+
468
+ const int n_folds = NUM2INT(nr_folds);
469
+ cross_validation(problem, param, n_folds, t_ptr);
470
+
471
+ deleteLibLinearProblem(problem);
472
+ deleteLibLinearParameter(param);
473
+
474
+ RB_GC_GUARD(x_val);
475
+ RB_GC_GUARD(y_val);
476
+
477
+ return t_val;
478
+ }
479
+
480
+ static VALUE numo_liblinear_predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash) {
481
+ if (CLASS_OF(x_val) != numo_cDFloat) x_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, x_val);
482
+ if (!RTEST(nary_check_contiguous(x_val))) x_val = nary_dup(x_val);
483
+
484
+ narray_t* x_nary;
485
+ GetNArray(x_val, x_nary);
486
+ if (NA_NDIM(x_nary) != 2) {
487
+ rb_raise(rb_eArgError, "Expect samples to be 2-D array.");
488
+ return Qnil;
489
+ }
490
+
491
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
492
+ LibLinearModel* model = convertHashToLibLinearModel(model_hash);
493
+ model->param = *param;
494
+
495
+ const int n_samples = (int)NA_SHAPE(x_nary)[0];
496
+ const int n_features = (int)NA_SHAPE(x_nary)[1];
497
+ size_t y_shape[1] = {(size_t)n_samples};
498
+ VALUE y_val = rb_narray_new(numo_cDFloat, 1, y_shape);
499
+ double* y_ptr = (double*)na_get_pointer_for_write(y_val);
500
+ const double* const x_ptr = (double*)na_get_pointer_for_read(x_val);
501
+ for (int i = 0; i < n_samples; i++) {
502
+ LibLinearNode* x_nodes = convertVectorXdToLibLinearNode(&x_ptr[i * n_features], n_features);
503
+ y_ptr[i] = predict(model, x_nodes);
504
+ xfree(x_nodes);
505
+ }
506
+
507
+ deleteLibLinearModel(model);
508
+ deleteLibLinearParameter(param);
509
+
510
+ RB_GC_GUARD(x_val);
511
+
512
+ return y_val;
513
+ }
514
+
515
+ static VALUE numo_liblinear_decision_function(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash) {
516
+ if (CLASS_OF(x_val) != numo_cDFloat) x_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, x_val);
517
+ if (!RTEST(nary_check_contiguous(x_val))) x_val = nary_dup(x_val);
518
+
519
+ narray_t* x_nary;
520
+ GetNArray(x_val, x_nary);
521
+ if (NA_NDIM(x_nary) != 2) {
522
+ rb_raise(rb_eArgError, "Expect samples to be 2-D array.");
523
+ return Qnil;
524
+ }
525
+
526
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
527
+ LibLinearModel* model = convertHashToLibLinearModel(model_hash);
528
+ model->param = *param;
529
+
530
+ const int n_samples = (int)NA_SHAPE(x_nary)[0];
531
+ const int n_features = (int)NA_SHAPE(x_nary)[1];
532
+ const int y_cols = isSingleOutputModel(model) ? 1 : model->nr_class;
533
+ size_t y_shape[2] = {(size_t)n_samples, (size_t)y_cols};
534
+ const int n_dims = isSingleOutputModel(model) ? 1 : 2;
535
+ VALUE y_val = rb_narray_new(numo_cDFloat, n_dims, y_shape);
536
+ const double* const x_ptr = (double*)na_get_pointer_for_read(x_val);
537
+ double* y_ptr = (double*)na_get_pointer_for_write(y_val);
538
+
539
+ for (int i = 0; i < n_samples; i++) {
540
+ LibLinearNode* x_nodes = convertVectorXdToLibLinearNode(&x_ptr[i * n_features], n_features);
541
+ predict_values(model, x_nodes, &y_ptr[i * y_cols]);
542
+ xfree(x_nodes);
543
+ }
544
+
545
+ deleteLibLinearModel(model);
546
+ deleteLibLinearParameter(param);
547
+
548
+ RB_GC_GUARD(x_val);
549
+
550
+ return y_val;
551
+ }
552
+
553
+ static VALUE numo_liblinear_predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash) {
554
+ narray_t* x_nary;
555
+ GetNArray(x_val, x_nary);
556
+ if (NA_NDIM(x_nary) != 2) {
557
+ rb_raise(rb_eArgError, "Expect samples to be 2-D array.");
558
+ return Qnil;
559
+ }
560
+
561
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
562
+ LibLinearModel* model = convertHashToLibLinearModel(model_hash);
563
+ model->param = *param;
564
+
565
+ if (!isProbabilisticModel(model)) {
566
+ deleteLibLinearModel(model);
567
+ deleteLibLinearParameter(param);
568
+ return Qnil;
569
+ }
570
+
571
+ if (CLASS_OF(x_val) != numo_cDFloat) x_val = rb_funcall(numo_cDFloat, rb_intern("cast"), 1, x_val);
572
+ if (!RTEST(nary_check_contiguous(x_val))) x_val = nary_dup(x_val);
573
+
574
+ const int n_samples = (int)NA_SHAPE(x_nary)[0];
575
+ const int n_features = (int)NA_SHAPE(x_nary)[1];
576
+ size_t y_shape[2] = {(size_t)n_samples, (size_t)(model->nr_class)};
577
+ VALUE y_val = rb_narray_new(numo_cDFloat, 2, y_shape);
578
+ const double* const x_ptr = (double*)na_get_pointer_for_read(x_val);
579
+ double* y_ptr = (double*)na_get_pointer_for_write(y_val);
580
+ for (int i = 0; i < n_samples; i++) {
581
+ LibLinearNode* x_nodes = convertVectorXdToLibLinearNode(&x_ptr[i * n_features], n_features);
582
+ predict_probability(model, x_nodes, &y_ptr[i * model->nr_class]);
583
+ xfree(x_nodes);
584
+ }
585
+
586
+ deleteLibLinearModel(model);
587
+ deleteLibLinearParameter(param);
588
+
589
+ RB_GC_GUARD(x_val);
590
+
591
+ return y_val;
592
+ }
593
+
594
+ static VALUE numo_liblinear_load_model(VALUE self, VALUE filename) {
595
+ const char* const filename_ = StringValuePtr(filename);
596
+ LibLinearModel* model = load_model(filename_);
597
+ if (model == NULL) {
598
+ rb_raise(rb_eIOError, "Failed to load file '%s'", filename_);
599
+ return Qnil;
600
+ }
601
+
602
+ VALUE param_hash = convertLibLinearParameterToHash(&(model->param));
603
+ VALUE model_hash = convertLibLinearModelToHash(model);
604
+ free_and_destroy_model(&model);
605
+
606
+ VALUE res = rb_ary_new2(2);
607
+ rb_ary_store(res, 0, param_hash);
608
+ rb_ary_store(res, 1, model_hash);
609
+
610
+ RB_GC_GUARD(filename);
611
+
612
+ return res;
613
+ }
614
+
615
+ static VALUE numo_liblinear_save_model(VALUE self, VALUE filename, VALUE param_hash, VALUE model_hash) {
616
+ LibLinearParameter* param = convertHashToLibLinearParameter(param_hash);
617
+ LibLinearModel* model = convertHashToLibLinearModel(model_hash);
618
+ model->param = *param;
619
+
620
+ const char* const filename_ = StringValuePtr(filename);
621
+ const int res = save_model(filename_, model);
622
+
623
+ deleteLibLinearModel(model);
624
+ deleteLibLinearParameter(param);
625
+
626
+ if (res < 0) {
627
+ rb_raise(rb_eIOError, "Failed to save file '%s'", filename_);
628
+ return Qfalse;
629
+ }
630
+
631
+ RB_GC_GUARD(filename);
632
+
633
+ return Qtrue;
634
+ }
635
+
636
+ #endif /* LIBLINEAREXT_HPP */
@@ -0,0 +1,31 @@
1
+
2
+ Copyright (c) 2007-2022 The LIBLINEAR Project.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions
7
+ are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright
13
+ notice, this list of conditions and the following disclaimer in the
14
+ documentation and/or other materials provided with the distribution.
15
+
16
+ 3. Neither name of copyright holders nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes