rumale 0.23.0 → 0.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.github/workflows/coverage.yml +2 -0
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +52 -84
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +1 -1
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/version.rb +1 -1
- metadata +9 -9
- data/.travis.yml +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
|
4
|
+
data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
|
7
|
+
data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
|
data/.clang-format
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
---
|
2
|
+
Language: Cpp
|
3
|
+
# BasedOnStyle: LLVM
|
4
|
+
AccessModifierOffset: -2
|
5
|
+
AlignAfterOpenBracket: Align
|
6
|
+
AlignConsecutiveMacros: false
|
7
|
+
AlignConsecutiveAssignments: false
|
8
|
+
AlignConsecutiveBitFields: false
|
9
|
+
AlignConsecutiveDeclarations: false
|
10
|
+
AlignEscapedNewlines: Right
|
11
|
+
AlignOperands: Align
|
12
|
+
AlignTrailingComments: true
|
13
|
+
AllowAllArgumentsOnNextLine: true
|
14
|
+
AllowAllConstructorInitializersOnNextLine: true
|
15
|
+
AllowAllParametersOfDeclarationOnNextLine: true
|
16
|
+
AllowShortEnumsOnASingleLine: true
|
17
|
+
AllowShortBlocksOnASingleLine: Never
|
18
|
+
AllowShortCaseLabelsOnASingleLine: false
|
19
|
+
AllowShortFunctionsOnASingleLine: All
|
20
|
+
AllowShortLambdasOnASingleLine: All
|
21
|
+
AllowShortIfStatementsOnASingleLine: Never
|
22
|
+
AllowShortLoopsOnASingleLine: false
|
23
|
+
AlwaysBreakAfterDefinitionReturnType: None
|
24
|
+
AlwaysBreakAfterReturnType: None
|
25
|
+
AlwaysBreakBeforeMultilineStrings: false
|
26
|
+
AlwaysBreakTemplateDeclarations: MultiLine
|
27
|
+
BinPackArguments: true
|
28
|
+
BinPackParameters: true
|
29
|
+
BraceWrapping:
|
30
|
+
AfterCaseLabel: false
|
31
|
+
AfterClass: false
|
32
|
+
AfterControlStatement: Never
|
33
|
+
AfterEnum: false
|
34
|
+
AfterFunction: false
|
35
|
+
AfterNamespace: false
|
36
|
+
AfterObjCDeclaration: false
|
37
|
+
AfterStruct: false
|
38
|
+
AfterUnion: false
|
39
|
+
AfterExternBlock: false
|
40
|
+
BeforeCatch: false
|
41
|
+
BeforeElse: false
|
42
|
+
BeforeLambdaBody: false
|
43
|
+
BeforeWhile: false
|
44
|
+
IndentBraces: false
|
45
|
+
SplitEmptyFunction: true
|
46
|
+
SplitEmptyRecord: true
|
47
|
+
SplitEmptyNamespace: true
|
48
|
+
BreakBeforeBinaryOperators: None
|
49
|
+
BreakBeforeBraces: Attach
|
50
|
+
BreakBeforeInheritanceComma: false
|
51
|
+
BreakInheritanceList: BeforeColon
|
52
|
+
BreakBeforeTernaryOperators: true
|
53
|
+
BreakConstructorInitializersBeforeComma: false
|
54
|
+
BreakConstructorInitializers: BeforeColon
|
55
|
+
BreakAfterJavaFieldAnnotations: false
|
56
|
+
BreakStringLiterals: true
|
57
|
+
ColumnLimit: 128
|
58
|
+
CommentPragmas: '^ IWYU pragma:'
|
59
|
+
CompactNamespaces: false
|
60
|
+
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
61
|
+
ConstructorInitializerIndentWidth: 4
|
62
|
+
ContinuationIndentWidth: 4
|
63
|
+
Cpp11BracedListStyle: true
|
64
|
+
DeriveLineEnding: true
|
65
|
+
DerivePointerAlignment: false
|
66
|
+
DisableFormat: false
|
67
|
+
ExperimentalAutoDetectBinPacking: false
|
68
|
+
FixNamespaceComments: true
|
69
|
+
ForEachMacros:
|
70
|
+
- foreach
|
71
|
+
- Q_FOREACH
|
72
|
+
- BOOST_FOREACH
|
73
|
+
IncludeBlocks: Preserve
|
74
|
+
IncludeCategories:
|
75
|
+
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
76
|
+
Priority: 2
|
77
|
+
SortPriority: 0
|
78
|
+
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
79
|
+
Priority: 3
|
80
|
+
SortPriority: 0
|
81
|
+
- Regex: '.*'
|
82
|
+
Priority: 1
|
83
|
+
SortPriority: 0
|
84
|
+
IncludeIsMainRegex: '(Test)?$'
|
85
|
+
IncludeIsMainSourceRegex: ''
|
86
|
+
IndentCaseLabels: false
|
87
|
+
IndentCaseBlocks: false
|
88
|
+
IndentGotoLabels: true
|
89
|
+
IndentPPDirectives: None
|
90
|
+
IndentExternBlock: AfterExternBlock
|
91
|
+
IndentWidth: 2
|
92
|
+
IndentWrappedFunctionNames: false
|
93
|
+
InsertTrailingCommas: None
|
94
|
+
JavaScriptQuotes: Leave
|
95
|
+
JavaScriptWrapImports: true
|
96
|
+
KeepEmptyLinesAtTheStartOfBlocks: true
|
97
|
+
MacroBlockBegin: ''
|
98
|
+
MacroBlockEnd: ''
|
99
|
+
MaxEmptyLinesToKeep: 1
|
100
|
+
NamespaceIndentation: None
|
101
|
+
ObjCBinPackProtocolList: Auto
|
102
|
+
ObjCBlockIndentWidth: 2
|
103
|
+
ObjCBreakBeforeNestedBlockParam: true
|
104
|
+
ObjCSpaceAfterProperty: false
|
105
|
+
ObjCSpaceBeforeProtocolList: true
|
106
|
+
PenaltyBreakAssignment: 2
|
107
|
+
PenaltyBreakBeforeFirstCallParameter: 19
|
108
|
+
PenaltyBreakComment: 300
|
109
|
+
PenaltyBreakFirstLessLess: 120
|
110
|
+
PenaltyBreakString: 1000
|
111
|
+
PenaltyBreakTemplateDeclaration: 10
|
112
|
+
PenaltyExcessCharacter: 1000000
|
113
|
+
PenaltyReturnTypeOnItsOwnLine: 60
|
114
|
+
PointerAlignment: Left
|
115
|
+
ReflowComments: true
|
116
|
+
SortIncludes: true
|
117
|
+
SortUsingDeclarations: true
|
118
|
+
SpaceAfterCStyleCast: false
|
119
|
+
SpaceAfterLogicalNot: false
|
120
|
+
SpaceAfterTemplateKeyword: true
|
121
|
+
SpaceBeforeAssignmentOperators: true
|
122
|
+
SpaceBeforeCpp11BracedList: false
|
123
|
+
SpaceBeforeCtorInitializerColon: true
|
124
|
+
SpaceBeforeInheritanceColon: true
|
125
|
+
SpaceBeforeParens: ControlStatements
|
126
|
+
SpaceBeforeRangeBasedForLoopColon: true
|
127
|
+
SpaceInEmptyBlock: false
|
128
|
+
SpaceInEmptyParentheses: false
|
129
|
+
SpacesBeforeTrailingComments: 1
|
130
|
+
SpacesInAngles: false
|
131
|
+
SpacesInConditionalStatement: false
|
132
|
+
SpacesInContainerLiterals: true
|
133
|
+
SpacesInCStyleCastParentheses: false
|
134
|
+
SpacesInParentheses: false
|
135
|
+
SpacesInSquareBrackets: false
|
136
|
+
SpaceBeforeSquareBrackets: false
|
137
|
+
Standard: Latest
|
138
|
+
StatementMacros:
|
139
|
+
- Q_UNUSED
|
140
|
+
- QT_REQUIRE_VERSION
|
141
|
+
TabWidth: 8
|
142
|
+
UseCRLF: false
|
143
|
+
UseTab: Never
|
144
|
+
WhitespaceSensitiveMacros:
|
145
|
+
- STRINGIZE
|
146
|
+
- PP_STRINGIZE
|
147
|
+
- BOOST_PP_STRINGIZE
|
148
|
+
...
|
149
|
+
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.23.1
|
2
|
+
- Fix all estimators to return inference results in a contiguous narray.
|
3
|
+
- Fix to use until statement instead of recursive call on apply methods of tree estimators.
|
4
|
+
- Rename native extension files.
|
5
|
+
- Introduce clang-format for native extension codes.
|
6
|
+
|
1
7
|
# 0.23.0
|
2
8
|
## Breaking change
|
3
9
|
- Change automalically selected solver from sgd to lbfgs in
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions
|
5
|
+
[![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
|
6
6
|
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
8
8
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
data/Rakefile
CHANGED
data/ext/rumale/extconf.rb
CHANGED
data/ext/rumale/tree.c
CHANGED
@@ -2,17 +2,13 @@
|
|
2
2
|
|
3
3
|
RUBY_EXTERN VALUE mRumale;
|
4
4
|
|
5
|
-
double*
|
6
|
-
alloc_dbl_array(const long n_dimensions)
|
7
|
-
{
|
5
|
+
double* alloc_dbl_array(const long n_dimensions) {
|
8
6
|
double* arr = ALLOC_N(double, n_dimensions);
|
9
7
|
memset(arr, 0, n_dimensions * sizeof(double));
|
10
8
|
return arr;
|
11
9
|
}
|
12
10
|
|
13
|
-
double
|
14
|
-
calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
15
|
-
{
|
11
|
+
double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
|
16
12
|
long i;
|
17
13
|
double el;
|
18
14
|
double gini = 0.0;
|
@@ -25,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
|
25
21
|
return 1.0 - gini;
|
26
22
|
}
|
27
23
|
|
28
|
-
double
|
29
|
-
calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
30
|
-
{
|
24
|
+
double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
|
31
25
|
long i;
|
32
26
|
double el;
|
33
27
|
double entropy = 0.0;
|
@@ -41,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
|
41
35
|
}
|
42
36
|
|
43
37
|
VALUE
|
44
|
-
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
45
|
-
{
|
38
|
+
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
|
46
39
|
long i;
|
47
40
|
VALUE mean_vec = rb_ary_new2(n_dimensions);
|
48
41
|
|
@@ -53,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
|
53
46
|
return mean_vec;
|
54
47
|
}
|
55
48
|
|
56
|
-
double
|
57
|
-
calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
58
|
-
{
|
49
|
+
double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
|
59
50
|
long i;
|
60
51
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
61
52
|
double sum = 0.0;
|
@@ -69,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
|
69
60
|
return sum / n_dimensions;
|
70
61
|
}
|
71
62
|
|
72
|
-
double
|
73
|
-
calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
74
|
-
{
|
63
|
+
double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
|
75
64
|
long i;
|
76
65
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
77
66
|
double sum = 0.0;
|
@@ -85,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
|
85
74
|
return sum / n_dimensions;
|
86
75
|
}
|
87
76
|
|
88
|
-
double
|
89
|
-
calc_mae(VALUE target_vecs, VALUE mean_vec)
|
90
|
-
{
|
77
|
+
double calc_mae(VALUE target_vecs, VALUE mean_vec) {
|
91
78
|
long i;
|
92
79
|
const long n_elements = RARRAY_LEN(target_vecs);
|
93
80
|
double sum = 0.0;
|
@@ -99,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
|
|
99
86
|
return sum / n_elements;
|
100
87
|
}
|
101
88
|
|
102
|
-
double
|
103
|
-
calc_mse(VALUE target_vecs, VALUE mean_vec)
|
104
|
-
{
|
89
|
+
double calc_mse(VALUE target_vecs, VALUE mean_vec) {
|
105
90
|
long i;
|
106
91
|
const long n_elements = RARRAY_LEN(target_vecs);
|
107
92
|
double sum = 0.0;
|
@@ -113,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
|
|
113
98
|
return sum / n_elements;
|
114
99
|
}
|
115
100
|
|
116
|
-
double
|
117
|
-
calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
|
118
|
-
{
|
101
|
+
double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
|
119
102
|
if (strcmp(criterion, "entropy") == 0) {
|
120
103
|
return calc_entropy(histogram, n_elements, n_classes);
|
121
104
|
}
|
122
105
|
return calc_gini_coef(histogram, n_elements, n_classes);
|
123
106
|
}
|
124
107
|
|
125
|
-
double
|
126
|
-
calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
127
|
-
{
|
108
|
+
double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
|
128
109
|
const long n_elements = RARRAY_LEN(target_vecs);
|
129
110
|
const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
|
130
111
|
VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
|
@@ -135,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
|
135
116
|
return calc_mse(target_vecs, mean_vec);
|
136
117
|
}
|
137
118
|
|
138
|
-
void
|
139
|
-
add_sum_vec(double* sum_vec, VALUE target)
|
140
|
-
{
|
119
|
+
void add_sum_vec(double* sum_vec, VALUE target) {
|
141
120
|
long i;
|
142
121
|
const long n_dimensions = RARRAY_LEN(target);
|
143
122
|
|
@@ -146,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
|
|
146
125
|
}
|
147
126
|
}
|
148
127
|
|
149
|
-
void
|
150
|
-
sub_sum_vec(double* sum_vec, VALUE target)
|
151
|
-
{
|
128
|
+
void sub_sum_vec(double* sum_vec, VALUE target) {
|
152
129
|
long i;
|
153
130
|
const long n_dimensions = RARRAY_LEN(target);
|
154
131
|
|
@@ -168,9 +145,7 @@ typedef struct {
|
|
168
145
|
/**
|
169
146
|
* @!visibility private
|
170
147
|
*/
|
171
|
-
static void
|
172
|
-
iter_find_split_params_cls(na_loop_t const* lp)
|
173
|
-
{
|
148
|
+
static void iter_find_split_params_cls(na_loop_t const* lp) {
|
174
149
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
175
150
|
const double* f = (double*)NDL_PTR(lp, 1);
|
176
151
|
const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
|
@@ -200,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
200
175
|
params[3] = 0.0; /* gain */
|
201
176
|
|
202
177
|
/* Initialize child node variables. */
|
203
|
-
for (i = 0; i < n_elements; i++) {
|
178
|
+
for (i = 0; i < n_elements; i++) {
|
179
|
+
r_histogram[y[o[i]]] += 1.0;
|
180
|
+
}
|
204
181
|
|
205
182
|
/* Find optimal parameters. */
|
206
183
|
while (curr_pos < n_elements && curr_el != last_el) {
|
@@ -224,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
224
201
|
params[2] = 0.5 * (curr_el + next_el);
|
225
202
|
params[3] = gain;
|
226
203
|
}
|
227
|
-
if (next_pos == n_elements)
|
204
|
+
if (next_pos == n_elements)
|
205
|
+
break;
|
228
206
|
curr_pos = next_pos;
|
229
207
|
curr_el = f[o[curr_pos]];
|
230
208
|
}
|
@@ -246,14 +224,13 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
246
224
|
* @param n_classes [Integer] The number of classes.
|
247
225
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
248
226
|
*/
|
249
|
-
static VALUE
|
250
|
-
|
251
|
-
{
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
|
227
|
+
static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
|
228
|
+
VALUE n_classes) {
|
229
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
|
230
|
+
size_t out_shape[1] = {4};
|
231
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
232
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
|
233
|
+
split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
|
257
234
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
|
258
235
|
VALUE results = rb_ary_new2(4);
|
259
236
|
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
@@ -276,9 +253,7 @@ typedef struct {
|
|
276
253
|
/**
|
277
254
|
* @!visibility private
|
278
255
|
*/
|
279
|
-
static void
|
280
|
-
iter_find_split_params_reg(na_loop_t const* lp)
|
281
|
-
{
|
256
|
+
static void iter_find_split_params_reg(na_loop_t const* lp) {
|
282
257
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
283
258
|
const double* f = (double*)NDL_PTR(lp, 1);
|
284
259
|
const double* y = (double*)NDL_PTR(lp, 2);
|
@@ -346,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
346
321
|
params[2] = 0.5 * (curr_el + next_el);
|
347
322
|
params[3] = gain;
|
348
323
|
}
|
349
|
-
if (next_pos == n_elements)
|
324
|
+
if (next_pos == n_elements)
|
325
|
+
break;
|
350
326
|
curr_pos = next_pos;
|
351
327
|
curr_el = f[o[curr_pos]];
|
352
328
|
}
|
@@ -367,14 +343,12 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
367
343
|
* @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
|
368
344
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
369
345
|
*/
|
370
|
-
static VALUE
|
371
|
-
|
372
|
-
{
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
|
377
|
-
split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
|
346
|
+
static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
|
347
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
|
348
|
+
size_t out_shape[1] = {4};
|
349
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
350
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
|
351
|
+
split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
|
378
352
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
|
379
353
|
VALUE results = rb_ary_new2(4);
|
380
354
|
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
@@ -390,9 +364,7 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
|
|
390
364
|
/**
|
391
365
|
* @!visibility private
|
392
366
|
*/
|
393
|
-
static void
|
394
|
-
iter_find_split_params_grad_reg(na_loop_t const* lp)
|
395
|
-
{
|
367
|
+
static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
|
396
368
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
397
369
|
const double* f = (double*)NDL_PTR(lp, 1);
|
398
370
|
const double* g = (double*)NDL_PTR(lp, 2);
|
@@ -427,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
427
399
|
/* Calculate gain of new split. */
|
428
400
|
r_grad = s_grad - l_grad;
|
429
401
|
r_hess = s_hess - l_hess;
|
430
|
-
gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
|
431
|
-
(r_grad * r_grad) / (r_hess + reg_lambda) -
|
402
|
+
gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
|
432
403
|
(s_grad * s_grad) / (s_hess + reg_lambda);
|
433
404
|
/* Update optimal parameters. */
|
434
405
|
if (gain > gain_max) {
|
435
406
|
threshold = 0.5 * (curr_el + next_el);
|
436
407
|
gain_max = gain;
|
437
408
|
}
|
438
|
-
if (next_pos == n_elements)
|
409
|
+
if (next_pos == n_elements) {
|
410
|
+
break;
|
411
|
+
}
|
439
412
|
curr_pos = next_pos;
|
440
413
|
curr_el = f[o[curr_pos]];
|
441
414
|
}
|
@@ -458,15 +431,13 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
458
431
|
* @param reg_lambda [Float] The L2 regularization term on weight.
|
459
432
|
* @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
|
460
433
|
*/
|
461
|
-
static VALUE
|
462
|
-
|
463
|
-
|
464
|
-
{
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
|
469
|
-
double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
|
434
|
+
static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
|
435
|
+
VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
|
436
|
+
ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
|
437
|
+
size_t out_shape[1] = {2};
|
438
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
439
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
|
440
|
+
double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
|
470
441
|
VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
|
471
442
|
VALUE results = rb_ary_new2(2);
|
472
443
|
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
@@ -488,9 +459,7 @@ find_split_params_grad_reg
|
|
488
459
|
* @param n_classes_ [Integer] The number of classes.
|
489
460
|
* @return [Float] impurity
|
490
461
|
*/
|
491
|
-
static VALUE
|
492
|
-
node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
|
493
|
-
{
|
462
|
+
static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
|
494
463
|
long i;
|
495
464
|
const long n_classes = NUM2LONG(n_classes_);
|
496
465
|
const long n_elements = NUM2LONG(n_elements_);
|
@@ -498,7 +467,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
498
467
|
double* histogram = alloc_dbl_array(n_classes);
|
499
468
|
VALUE ret;
|
500
469
|
|
501
|
-
for (i = 0; i < n_elements; i++) {
|
470
|
+
for (i = 0; i < n_elements; i++) {
|
471
|
+
histogram[y[i]] += 1;
|
472
|
+
}
|
502
473
|
|
503
474
|
ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
|
504
475
|
|
@@ -520,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
520
491
|
* @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
|
521
492
|
* @return [Float] impurity
|
522
493
|
*/
|
523
|
-
static VALUE
|
524
|
-
node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
525
|
-
{
|
494
|
+
static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
|
526
495
|
long i;
|
527
496
|
const long n_elements = RARRAY_LEN(y);
|
528
497
|
const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
|
@@ -546,8 +515,7 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
|
546
515
|
return ret;
|
547
516
|
}
|
548
517
|
|
549
|
-
void init_tree_module()
|
550
|
-
{
|
518
|
+
void init_tree_module() {
|
551
519
|
VALUE mTree = rb_define_module_under(mRumale, "Tree");
|
552
520
|
/**
|
553
521
|
* Document-module: Rumale::Tree::ExtDecisionTreeClassifier
|
data/ext/rumale/tree.h
CHANGED
data/lib/rumale.rb
CHANGED
@@ -81,7 +81,7 @@ module Rumale
|
|
81
81
|
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
82
82
|
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
83
83
|
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
84
|
-
@mixing = Numo::Linalg.pinv(@components)
|
84
|
+
@mixing = Numo::Linalg.pinv(@components).dup
|
85
85
|
if @params[:n_components] == 1
|
86
86
|
@components = @components.flatten.dup
|
87
87
|
@mixing = @mixing.flatten.dup
|
@@ -161,7 +161,7 @@ module Rumale
|
|
161
161
|
|
162
162
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
163
163
|
|
164
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
164
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
|
165
165
|
|
166
166
|
n_samples, = x.shape
|
167
167
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -182,7 +182,7 @@ module Rumale
|
|
182
182
|
else
|
183
183
|
@estimators.map { |tree| tree.apply(x) }
|
184
184
|
end
|
185
|
-
Numo::Int32[*leaf_ids].transpose
|
185
|
+
Numo::Int32[*leaf_ids].transpose.dup
|
186
186
|
end
|
187
187
|
|
188
188
|
private
|
@@ -159,7 +159,7 @@ module Rumale
|
|
159
159
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
160
160
|
def apply(x)
|
161
161
|
x = check_convert_sample_array(x)
|
162
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
162
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
163
163
|
end
|
164
164
|
|
165
165
|
private
|
@@ -136,7 +136,7 @@ module Rumale
|
|
136
136
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
137
137
|
def apply(x)
|
138
138
|
x = check_convert_sample_array(x)
|
139
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
139
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
140
140
|
end
|
141
141
|
|
142
142
|
private
|
@@ -73,7 +73,7 @@ module Rumale
|
|
73
73
|
|
74
74
|
# random sampling.
|
75
75
|
@component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
|
76
|
-
@components = x[@component_indices, true]
|
76
|
+
@components = x[@component_indices, true].dup
|
77
77
|
|
78
78
|
# calculate normalizing factor.
|
79
79
|
kernel_mat = kernel_mat(@components)
|
@@ -152,7 +152,7 @@ module Rumale
|
|
152
152
|
|
153
153
|
if @classes.size > 2
|
154
154
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
155
|
-
return (probs.transpose / probs.sum(axis: 1)).transpose
|
155
|
+
return (probs.transpose / probs.sum(axis: 1)).transpose.dup
|
156
156
|
end
|
157
157
|
|
158
158
|
n_samples, = x.shape
|
@@ -75,7 +75,7 @@ module Rumale
|
|
75
75
|
(Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
76
76
|
(Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
|
77
77
|
end
|
78
|
-
Numo::DFloat[*log_likelihoods].transpose
|
78
|
+
Numo::DFloat[*log_likelihoods].transpose.dup
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
@@ -67,7 +67,7 @@ module Rumale
|
|
67
67
|
log_likelihoods = Array.new(n_classes) do |l|
|
68
68
|
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
69
69
|
end
|
70
|
-
Numo::DFloat[*log_likelihoods].transpose
|
70
|
+
Numo::DFloat[*log_likelihoods].transpose.dup
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/tree/node'
|
5
|
+
require 'rumale/rumaleext'
|
5
6
|
|
6
7
|
module Rumale
|
7
8
|
# This module consists of the classes that implement tree models.
|
@@ -44,21 +45,25 @@ module Rumale
|
|
44
45
|
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
45
46
|
def apply(x)
|
46
47
|
x = check_convert_sample_array(x)
|
47
|
-
Numo::Int32[*(Array.new(x.shape[0]) { |n|
|
48
|
+
Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
51
52
|
|
52
|
-
def
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
def partial_apply(tree, sample)
|
54
|
+
node = tree
|
55
|
+
until node.leaf
|
56
|
+
# :nocov:
|
57
|
+
node = if node.right.nil?
|
58
|
+
node.left
|
59
|
+
elsif node.left.nil?
|
60
|
+
node.right
|
61
|
+
# :nocov:
|
62
|
+
else
|
63
|
+
sample[node.feature_id] <= node.threshold ? node.left : node.right
|
64
|
+
end
|
61
65
|
end
|
66
|
+
node.leaf_id
|
62
67
|
end
|
63
68
|
|
64
69
|
def build_tree(x, y)
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'rumale/rumale'
|
4
3
|
require 'rumale/tree/base_decision_tree'
|
5
4
|
require 'rumale/base/classifier'
|
6
5
|
|
@@ -101,21 +100,25 @@ module Rumale
|
|
101
100
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
102
101
|
def predict_proba(x)
|
103
102
|
x = check_convert_sample_array(x)
|
104
|
-
Numo::DFloat[*(Array.new(x.shape[0]) { |n|
|
103
|
+
Numo::DFloat[*(Array.new(x.shape[0]) { |n| partial_predict_proba(@tree, x[n, true]) })]
|
105
104
|
end
|
106
105
|
|
107
106
|
private
|
108
107
|
|
109
|
-
def
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
108
|
+
def partial_predict_proba(tree, sample)
|
109
|
+
node = tree
|
110
|
+
until node.leaf
|
111
|
+
# :nocov:
|
112
|
+
node = if node.right.nil?
|
113
|
+
node.left
|
114
|
+
elsif node.left.nil?
|
115
|
+
node.right
|
116
|
+
# :nocov:
|
117
|
+
else
|
118
|
+
sample[node.feature_id] <= node.threshold ? node.left : node.right
|
119
|
+
end
|
118
120
|
end
|
121
|
+
node.probs
|
119
122
|
end
|
120
123
|
|
121
124
|
def stop_growing?(y)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'rumale/rumale'
|
4
3
|
require 'rumale/base/base_estimator'
|
5
4
|
require 'rumale/base/regressor'
|
5
|
+
require 'rumale/rumaleext'
|
6
6
|
require 'rumale/tree/node'
|
7
7
|
|
8
8
|
module Rumale
|
@@ -114,21 +114,25 @@ module Rumale
|
|
114
114
|
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
115
115
|
def apply(x)
|
116
116
|
x = check_convert_sample_array(x)
|
117
|
-
Numo::Int32[*(Array.new(x.shape[0]) { |n|
|
117
|
+
Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
|
118
118
|
end
|
119
119
|
|
120
120
|
private
|
121
121
|
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
122
|
+
def partial_apply(tree, sample)
|
123
|
+
node = tree
|
124
|
+
until node.leaf
|
125
|
+
# :nocov:
|
126
|
+
node = if node.right.nil?
|
127
|
+
node.left
|
128
|
+
elsif node.left.nil?
|
129
|
+
node.right
|
130
|
+
# :nocov:
|
131
|
+
else
|
132
|
+
sample[node.feature_id] <= node.threshold ? node.left : node.right
|
133
|
+
end
|
131
134
|
end
|
135
|
+
node.leaf_id
|
132
136
|
end
|
133
137
|
|
134
138
|
def build_tree(x, y, g, h)
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.23.
|
4
|
+
version: 0.23.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -57,13 +57,13 @@ extensions:
|
|
57
57
|
- ext/rumale/extconf.rb
|
58
58
|
extra_rdoc_files: []
|
59
59
|
files:
|
60
|
+
- ".clang-format"
|
60
61
|
- ".coveralls.yml"
|
61
62
|
- ".github/workflows/build.yml"
|
62
63
|
- ".github/workflows/coverage.yml"
|
63
64
|
- ".gitignore"
|
64
65
|
- ".rspec"
|
65
66
|
- ".rubocop.yml"
|
66
|
-
- ".travis.yml"
|
67
67
|
- CHANGELOG.md
|
68
68
|
- CODE_OF_CONDUCT.md
|
69
69
|
- Gemfile
|
@@ -71,8 +71,8 @@ files:
|
|
71
71
|
- README.md
|
72
72
|
- Rakefile
|
73
73
|
- ext/rumale/extconf.rb
|
74
|
-
- ext/rumale/
|
75
|
-
- ext/rumale/
|
74
|
+
- ext/rumale/rumaleext.c
|
75
|
+
- ext/rumale/rumaleext.h
|
76
76
|
- ext/rumale/tree.c
|
77
77
|
- ext/rumale/tree.h
|
78
78
|
- lib/rumale.rb
|
@@ -218,7 +218,7 @@ metadata:
|
|
218
218
|
source_code_uri: https://github.com/yoshoku/rumale
|
219
219
|
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
220
220
|
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
221
|
-
post_install_message:
|
221
|
+
post_install_message:
|
222
222
|
rdoc_options: []
|
223
223
|
require_paths:
|
224
224
|
- lib
|
@@ -233,8 +233,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
233
233
|
- !ruby/object:Gem::Version
|
234
234
|
version: '0'
|
235
235
|
requirements: []
|
236
|
-
rubygems_version: 3.
|
237
|
-
signing_key:
|
236
|
+
rubygems_version: 3.1.6
|
237
|
+
signing_key:
|
238
238
|
specification_version: 4
|
239
239
|
summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
|
240
240
|
algorithms with interfaces similar to Scikit-Learn in Python.
|