rumale 0.23.1 → 0.23.2

Sign up to get free protection for your applications and to get access to all the features.
data/ext/rumale/tree.c DELETED
@@ -1,547 +0,0 @@
1
- #include "tree.h"
2
-
3
- RUBY_EXTERN VALUE mRumale;
4
-
5
- double* alloc_dbl_array(const long n_dimensions) {
6
- double* arr = ALLOC_N(double, n_dimensions);
7
- memset(arr, 0, n_dimensions * sizeof(double));
8
- return arr;
9
- }
10
-
11
- double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
12
- long i;
13
- double el;
14
- double gini = 0.0;
15
-
16
- for (i = 0; i < n_classes; i++) {
17
- el = histogram[i] / n_elements;
18
- gini += el * el;
19
- }
20
-
21
- return 1.0 - gini;
22
- }
23
-
24
- double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
25
- long i;
26
- double el;
27
- double entropy = 0.0;
28
-
29
- for (i = 0; i < n_classes; i++) {
30
- el = histogram[i] / n_elements;
31
- entropy += el * log(el + 1.0);
32
- }
33
-
34
- return -entropy;
35
- }
36
-
37
- VALUE
38
- calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
39
- long i;
40
- VALUE mean_vec = rb_ary_new2(n_dimensions);
41
-
42
- for (i = 0; i < n_dimensions; i++) {
43
- rb_ary_store(mean_vec, i, DBL2NUM(sum_vec[i] / n_elements));
44
- }
45
-
46
- return mean_vec;
47
- }
48
-
49
- double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
50
- long i;
51
- const long n_dimensions = RARRAY_LEN(vec_a);
52
- double sum = 0.0;
53
- double diff;
54
-
55
- for (i = 0; i < n_dimensions; i++) {
56
- diff = NUM2DBL(rb_ary_entry(vec_a, i)) - NUM2DBL(rb_ary_entry(vec_b, i));
57
- sum += fabs(diff);
58
- }
59
-
60
- return sum / n_dimensions;
61
- }
62
-
63
- double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
64
- long i;
65
- const long n_dimensions = RARRAY_LEN(vec_a);
66
- double sum = 0.0;
67
- double diff;
68
-
69
- for (i = 0; i < n_dimensions; i++) {
70
- diff = NUM2DBL(rb_ary_entry(vec_a, i)) - NUM2DBL(rb_ary_entry(vec_b, i));
71
- sum += diff * diff;
72
- }
73
-
74
- return sum / n_dimensions;
75
- }
76
-
77
- double calc_mae(VALUE target_vecs, VALUE mean_vec) {
78
- long i;
79
- const long n_elements = RARRAY_LEN(target_vecs);
80
- double sum = 0.0;
81
-
82
- for (i = 0; i < n_elements; i++) {
83
- sum += calc_vec_mae(rb_ary_entry(target_vecs, i), mean_vec);
84
- }
85
-
86
- return sum / n_elements;
87
- }
88
-
89
- double calc_mse(VALUE target_vecs, VALUE mean_vec) {
90
- long i;
91
- const long n_elements = RARRAY_LEN(target_vecs);
92
- double sum = 0.0;
93
-
94
- for (i = 0; i < n_elements; i++) {
95
- sum += calc_vec_mse(rb_ary_entry(target_vecs, i), mean_vec);
96
- }
97
-
98
- return sum / n_elements;
99
- }
100
-
101
- double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
102
- if (strcmp(criterion, "entropy") == 0) {
103
- return calc_entropy(histogram, n_elements, n_classes);
104
- }
105
- return calc_gini_coef(histogram, n_elements, n_classes);
106
- }
107
-
108
- double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
109
- const long n_elements = RARRAY_LEN(target_vecs);
110
- const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
111
- VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
112
-
113
- if (strcmp(criterion, "mae") == 0) {
114
- return calc_mae(target_vecs, mean_vec);
115
- }
116
- return calc_mse(target_vecs, mean_vec);
117
- }
118
-
119
- void add_sum_vec(double* sum_vec, VALUE target) {
120
- long i;
121
- const long n_dimensions = RARRAY_LEN(target);
122
-
123
- for (i = 0; i < n_dimensions; i++) {
124
- sum_vec[i] += NUM2DBL(rb_ary_entry(target, i));
125
- }
126
- }
127
-
128
- void sub_sum_vec(double* sum_vec, VALUE target) {
129
- long i;
130
- const long n_dimensions = RARRAY_LEN(target);
131
-
132
- for (i = 0; i < n_dimensions; i++) {
133
- sum_vec[i] -= NUM2DBL(rb_ary_entry(target, i));
134
- }
135
- }
136
-
137
- /**
138
- * @!visibility private
139
- */
140
- typedef struct {
141
- char* criterion;
142
- long n_classes;
143
- double impurity;
144
- } split_opts_cls;
145
- /**
146
- * @!visibility private
147
- */
148
- static void iter_find_split_params_cls(na_loop_t const* lp) {
149
- const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
150
- const double* f = (double*)NDL_PTR(lp, 1);
151
- const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
152
- const long n_elements = NDL_SHAPE(lp, 0)[0];
153
- const char* criterion = ((split_opts_cls*)lp->opt_ptr)->criterion;
154
- const long n_classes = ((split_opts_cls*)lp->opt_ptr)->n_classes;
155
- const double w_impurity = ((split_opts_cls*)lp->opt_ptr)->impurity;
156
- double* params = (double*)NDL_PTR(lp, 3);
157
- long i;
158
- long curr_pos = 0;
159
- long next_pos = 0;
160
- long n_l_elements = 0;
161
- long n_r_elements = n_elements;
162
- double curr_el = f[o[0]];
163
- double last_el = f[o[n_elements - 1]];
164
- double next_el;
165
- double l_impurity;
166
- double r_impurity;
167
- double gain;
168
- double* l_histogram = alloc_dbl_array(n_classes);
169
- double* r_histogram = alloc_dbl_array(n_classes);
170
-
171
- /* Initialize optimal parameters. */
172
- params[0] = 0.0; /* left impurity */
173
- params[1] = w_impurity; /* right impurity */
174
- params[2] = curr_el; /* threshold */
175
- params[3] = 0.0; /* gain */
176
-
177
- /* Initialize child node variables. */
178
- for (i = 0; i < n_elements; i++) {
179
- r_histogram[y[o[i]]] += 1.0;
180
- }
181
-
182
- /* Find optimal parameters. */
183
- while (curr_pos < n_elements && curr_el != last_el) {
184
- next_el = f[o[next_pos]];
185
- while (next_pos < n_elements && next_el == curr_el) {
186
- l_histogram[y[o[next_pos]]] += 1;
187
- n_l_elements++;
188
- r_histogram[y[o[next_pos]]] -= 1;
189
- n_r_elements--;
190
- next_pos++;
191
- next_el = f[o[next_pos]];
192
- }
193
- /* Calculate gain of new split. */
194
- l_impurity = calc_impurity_cls(criterion, l_histogram, n_l_elements, n_classes);
195
- r_impurity = calc_impurity_cls(criterion, r_histogram, n_r_elements, n_classes);
196
- gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
197
- /* Update optimal parameters. */
198
- if (gain > params[3]) {
199
- params[0] = l_impurity;
200
- params[1] = r_impurity;
201
- params[2] = 0.5 * (curr_el + next_el);
202
- params[3] = gain;
203
- }
204
- if (next_pos == n_elements)
205
- break;
206
- curr_pos = next_pos;
207
- curr_el = f[o[curr_pos]];
208
- }
209
-
210
- xfree(l_histogram);
211
- xfree(r_histogram);
212
- }
213
- /**
214
- * @!visibility private
215
- * Find for split point with maximum information gain.
216
- *
217
- * @overload find_split_params(criterion, impurity, order, features, labels, n_classes) -> Array<Float>
218
- *
219
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'gini' and 'entropy'.
220
- * @param impurity [Float] The impurity of whole dataset.
221
- * @param order [Numo::Int32] (shape: [n_elements]) The element indices sorted according to feature values.
222
- * @param features [Numo::DFloat] (shape: [n_elements]) The feature values.
223
- * @param labels [Numo::Int32] (shape: [n_elements]) The labels.
224
- * @param n_classes [Integer] The number of classes.
225
- * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
226
- */
227
- static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
228
- VALUE n_classes) {
229
- ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
230
- size_t out_shape[1] = {4};
231
- ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
232
- ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
233
- split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
234
- VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
235
- VALUE results = rb_ary_new2(4);
236
- double* params_ptr = (double*)na_get_pointer_for_read(params);
237
- rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
238
- rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
239
- rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
240
- rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
241
- RB_GC_GUARD(params);
242
- RB_GC_GUARD(criterion);
243
- return results;
244
- }
245
-
246
- /**
247
- * @!visibility private
248
- */
249
- typedef struct {
250
- char* criterion;
251
- double impurity;
252
- } split_opts_reg;
253
- /**
254
- * @!visibility private
255
- */
256
- static void iter_find_split_params_reg(na_loop_t const* lp) {
257
- const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
258
- const double* f = (double*)NDL_PTR(lp, 1);
259
- const double* y = (double*)NDL_PTR(lp, 2);
260
- const long n_elements = NDL_SHAPE(lp, 0)[0];
261
- const long n_outputs = NDL_SHAPE(lp, 2)[1];
262
- const char* criterion = ((split_opts_reg*)lp->opt_ptr)->criterion;
263
- const double w_impurity = ((split_opts_reg*)lp->opt_ptr)->impurity;
264
- double* params = (double*)NDL_PTR(lp, 3);
265
- long i, j;
266
- long curr_pos = 0;
267
- long next_pos = 0;
268
- long n_l_elements = 0;
269
- long n_r_elements = n_elements;
270
- double curr_el = f[o[0]];
271
- double last_el = f[o[n_elements - 1]];
272
- double next_el;
273
- double l_impurity;
274
- double r_impurity;
275
- double gain;
276
- double* l_sum_vec = alloc_dbl_array(n_outputs);
277
- double* r_sum_vec = alloc_dbl_array(n_outputs);
278
- double target_var;
279
- VALUE l_target_vecs = rb_ary_new();
280
- VALUE r_target_vecs = rb_ary_new();
281
- VALUE target;
282
-
283
- /* Initialize optimal parameters. */
284
- params[0] = 0.0; /* left impurity */
285
- params[1] = w_impurity; /* right impurity */
286
- params[2] = curr_el; /* threshold */
287
- params[3] = 0.0; /* gain */
288
-
289
- /* Initialize child node variables. */
290
- for (i = 0; i < n_elements; i++) {
291
- target = rb_ary_new2(n_outputs);
292
- for (j = 0; j < n_outputs; j++) {
293
- target_var = y[o[i] * n_outputs + j];
294
- rb_ary_store(target, j, DBL2NUM(target_var));
295
- r_sum_vec[j] += target_var;
296
- }
297
- rb_ary_push(r_target_vecs, target);
298
- }
299
-
300
- /* Find optimal parameters. */
301
- while (curr_pos < n_elements && curr_el != last_el) {
302
- next_el = f[o[next_pos]];
303
- while (next_pos < n_elements && next_el == curr_el) {
304
- target = rb_ary_shift(r_target_vecs);
305
- n_r_elements--;
306
- sub_sum_vec(r_sum_vec, target);
307
- rb_ary_push(l_target_vecs, target);
308
- n_l_elements++;
309
- add_sum_vec(l_sum_vec, target);
310
- next_pos++;
311
- next_el = f[o[next_pos]];
312
- }
313
- /* Calculate gain of new split. */
314
- l_impurity = calc_impurity_reg(criterion, l_target_vecs, l_sum_vec);
315
- r_impurity = calc_impurity_reg(criterion, r_target_vecs, r_sum_vec);
316
- gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
317
- /* Update optimal parameters. */
318
- if (gain > params[3]) {
319
- params[0] = l_impurity;
320
- params[1] = r_impurity;
321
- params[2] = 0.5 * (curr_el + next_el);
322
- params[3] = gain;
323
- }
324
- if (next_pos == n_elements)
325
- break;
326
- curr_pos = next_pos;
327
- curr_el = f[o[curr_pos]];
328
- }
329
-
330
- xfree(l_sum_vec);
331
- xfree(r_sum_vec);
332
- }
333
- /**
334
- * @!visibility private
335
- * Find for split point with maximum information gain.
336
- *
337
- * @overload find_split_params(criterion, impurity, order, features, targets) -> Array<Float>
338
- *
339
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
340
- * @param impurity [Float] The impurity of whole dataset.
341
- * @param order [Numo::Int32] (shape: [n_samples]) The element indices sorted according to feature values in ascending order.
342
- * @param features [Numo::DFloat] (shape: [n_samples]) The feature values.
343
- * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
344
- * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
345
- */
346
- static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
347
- ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
348
- size_t out_shape[1] = {4};
349
- ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
350
- ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
351
- split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
352
- VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
353
- VALUE results = rb_ary_new2(4);
354
- double* params_ptr = (double*)na_get_pointer_for_read(params);
355
- rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
356
- rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
357
- rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
358
- rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
359
- RB_GC_GUARD(params);
360
- RB_GC_GUARD(criterion);
361
- return results;
362
- }
363
-
364
- /**
365
- * @!visibility private
366
- */
367
- static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
368
- const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
369
- const double* f = (double*)NDL_PTR(lp, 1);
370
- const double* g = (double*)NDL_PTR(lp, 2);
371
- const double* h = (double*)NDL_PTR(lp, 3);
372
- const double s_grad = ((double*)lp->opt_ptr)[0];
373
- const double s_hess = ((double*)lp->opt_ptr)[1];
374
- const double reg_lambda = ((double*)lp->opt_ptr)[2];
375
- const long n_elements = NDL_SHAPE(lp, 0)[0];
376
- double* params = (double*)NDL_PTR(lp, 4);
377
- long curr_pos = 0;
378
- long next_pos = 0;
379
- double curr_el = f[o[0]];
380
- double last_el = f[o[n_elements - 1]];
381
- double next_el;
382
- double l_grad = 0.0;
383
- double l_hess = 0.0;
384
- double r_grad;
385
- double r_hess;
386
- double threshold = curr_el;
387
- double gain_max = 0.0;
388
- double gain;
389
-
390
- /* Find optimal parameters. */
391
- while (curr_pos < n_elements && curr_el != last_el) {
392
- next_el = f[o[next_pos]];
393
- while (next_pos < n_elements && next_el == curr_el) {
394
- l_grad += g[o[next_pos]];
395
- l_hess += h[o[next_pos]];
396
- next_pos++;
397
- next_el = f[o[next_pos]];
398
- }
399
- /* Calculate gain of new split. */
400
- r_grad = s_grad - l_grad;
401
- r_hess = s_hess - l_hess;
402
- gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
403
- (s_grad * s_grad) / (s_hess + reg_lambda);
404
- /* Update optimal parameters. */
405
- if (gain > gain_max) {
406
- threshold = 0.5 * (curr_el + next_el);
407
- gain_max = gain;
408
- }
409
- if (next_pos == n_elements) {
410
- break;
411
- }
412
- curr_pos = next_pos;
413
- curr_el = f[o[curr_pos]];
414
- }
415
-
416
- params[0] = threshold;
417
- params[1] = gain_max;
418
- }
419
-
420
- /**
421
- * @!visibility private
422
- * Find for split point with maximum information gain.
423
- *
424
- * @overload find_split_params(order, features, gradients, hessians, sum_gradient, sum_hessian, reg_lambda) -> Array<Float>
425
- * @param order [Numo::Int32] (shape: [n_elements]) The element indices sorted according to feature values.
426
- * @param features [Numo::DFloat] (shape: [n_elements]) The feature values.
427
- * @param gradients [Numo::DFloat] (shape: [n_elements]) The gradient values.
428
- * @param hessians [Numo::DFloat] (shape: [n_elements]) The hessian values.
429
- * @param sum_gradient [Float] The sum of gradient values.
430
- * @param sum_hessian [Float] The sum of hessian values.
431
- * @param reg_lambda [Float] The L2 regularization term on weight.
432
- * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
433
- */
434
- static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
435
- VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
436
- ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
437
- size_t out_shape[1] = {2};
438
- ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
439
- ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
440
- double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
441
- VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
442
- VALUE results = rb_ary_new2(2);
443
- double* params_ptr = (double*)na_get_pointer_for_read(params);
444
- rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
445
- rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
446
- RB_GC_GUARD(params);
447
- return results;
448
- }
449
-
450
- /**
451
- * @!visibility private
452
- * Calculate impurity based on criterion.
453
- *
454
- * @overload node_impurity(criterion, y, n_classes) -> Float
455
- *
456
- * @param criterion [String] The function to calculate impurity. Supported criteria are 'gini' and 'entropy'.
457
- * @param y_nary [Numo::Int32] (shape: [n_samples]) The labels.
458
- * @param n_elements_ [Integer] The number of elements.
459
- * @param n_classes_ [Integer] The number of classes.
460
- * @return [Float] impurity
461
- */
462
- static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
463
- long i;
464
- const long n_classes = NUM2LONG(n_classes_);
465
- const long n_elements = NUM2LONG(n_elements_);
466
- const int32_t* y = (int32_t*)na_get_pointer_for_read(y_nary);
467
- double* histogram = alloc_dbl_array(n_classes);
468
- VALUE ret;
469
-
470
- for (i = 0; i < n_elements; i++) {
471
- histogram[y[i]] += 1;
472
- }
473
-
474
- ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
475
-
476
- xfree(histogram);
477
-
478
- RB_GC_GUARD(y_nary);
479
- RB_GC_GUARD(criterion);
480
-
481
- return ret;
482
- }
483
-
484
- /**
485
- * @!visibility private
486
- * Calculate impurity based on criterion.
487
- *
488
- * @overload node_impurity(criterion, y) -> Float
489
- *
490
- * @param criterion [String] The function to calculate impurity. Supported criteria are 'mae' and 'mse'.
491
- * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
492
- * @return [Float] impurity
493
- */
494
- static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
495
- long i;
496
- const long n_elements = RARRAY_LEN(y);
497
- const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
498
- double* sum_vec = alloc_dbl_array(n_outputs);
499
- VALUE target_vecs = rb_ary_new();
500
- VALUE target;
501
- VALUE ret;
502
-
503
- for (i = 0; i < n_elements; i++) {
504
- target = rb_ary_entry(y, i);
505
- add_sum_vec(sum_vec, target);
506
- rb_ary_push(target_vecs, target);
507
- }
508
-
509
- ret = DBL2NUM(calc_impurity_reg(StringValuePtr(criterion), target_vecs, sum_vec));
510
-
511
- xfree(sum_vec);
512
-
513
- RB_GC_GUARD(criterion);
514
-
515
- return ret;
516
- }
517
-
518
- void init_tree_module() {
519
- VALUE mTree = rb_define_module_under(mRumale, "Tree");
520
- /**
521
- * Document-module: Rumale::Tree::ExtDecisionTreeClassifier
522
- * @!visibility private
523
- * The mixin module consisting of extension method for DecisionTreeClassifier class.
524
- * This module is used internally.
525
- */
526
- VALUE mExtDTreeCls = rb_define_module_under(mTree, "ExtDecisionTreeClassifier");
527
- /**
528
- * Document-module: Rumale::Tree::ExtDecisionTreeRegressor
529
- * @!visibility private
530
- * The mixin module consisting of extension method for DecisionTreeRegressor class.
531
- * This module is used internally.
532
- */
533
- VALUE mExtDTreeReg = rb_define_module_under(mTree, "ExtDecisionTreeRegressor");
534
- /**
535
- * Document-module: Rumale::Tree::ExtGradientTreeRegressor
536
- * @!visibility private
537
- * The mixin module consisting of extension method for GradientTreeRegressor class.
538
- * This module is used internally.
539
- */
540
- VALUE mExtGTreeReg = rb_define_module_under(mTree, "ExtGradientTreeRegressor");
541
-
542
- rb_define_private_method(mExtDTreeCls, "find_split_params", find_split_params_cls, 6);
543
- rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 5);
544
- rb_define_private_method(mExtGTreeReg, "find_split_params", find_split_params_grad_reg, 7);
545
- rb_define_private_method(mExtDTreeCls, "node_impurity", node_impurity_cls, 4);
546
- rb_define_private_method(mExtDTreeReg, "node_impurity", node_impurity_reg, 2);
547
- }
data/ext/rumale/tree.h DELETED
@@ -1,14 +0,0 @@
1
- #ifndef RUMALE_TREE_H
2
- #define RUMALE_TREE_H 1
3
-
4
- #include <math.h>
5
- #include <string.h>
6
-
7
- #include <ruby.h>
8
-
9
- #include <numo/narray.h>
10
- #include <numo/template.h>
11
-
12
- void init_tree_module();
13
-
14
- #endif /* RUMALE_TREE_H */
data/rumale.gemspec DELETED
@@ -1,49 +0,0 @@
1
- lib = File.expand_path('lib', __dir__)
2
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
- require 'rumale/version'
4
-
5
- Gem::Specification.new do |spec|
6
- spec.name = 'rumale'
7
- spec.version = Rumale::VERSION
8
- spec.authors = ['yoshoku']
9
- spec.email = ['yoshoku@outlook.com']
10
-
11
- spec.summary = <<~MSG
12
- Rumale is a machine learning library in Ruby.
13
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
14
- MSG
15
- spec.description = <<~MSG
16
- Rumale is a machine learning library in Ruby.
17
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
18
- Rumale supports Support Vector Machine,
19
- Logistic Regression, Ridge, Lasso,
20
- Multi-layer Perceptron,
21
- Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
22
- K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
23
- Mutidimensional Scaling, t-SNE,
24
- Fisher Discriminant Analysis, Neighbourhood Component Analysis,
25
- Principal Component Analysis, Non-negative Matrix Factorization,
26
- and many other algorithms.
27
- MSG
28
- spec.homepage = 'https://github.com/yoshoku/rumale'
29
- spec.license = 'BSD-2-Clause'
30
-
31
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
32
- f.match(%r{^(test|spec|features)/})
33
- end
34
- spec.bindir = 'exe'
35
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
36
- spec.require_paths = ['lib']
37
- spec.extensions = ['ext/rumale/extconf.rb']
38
-
39
- spec.metadata = {
40
- 'homepage_uri' => 'https://github.com/yoshoku/rumale',
41
- 'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md',
42
- 'source_code_uri' => 'https://github.com/yoshoku/rumale',
43
- 'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
44
- 'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
45
- }
46
-
47
- spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
48
- spec.add_runtime_dependency 'lbfgsb', '>=0.3.0'
49
- end