rumale 0.23.0 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f66b61c7ae0d76fdccc2b95c8f1ae3ea181c1622024a0dd93a15dea17e9a632
4
- data.tar.gz: 1bf6934f79110b4bc59528bc6d656250f6e3fcb7834c9bf262e53f42fae78b69
3
+ metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
4
+ data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
5
5
  SHA512:
6
- metadata.gz: da5c9c6463d3fbefc2d48628b053379cfc7e71195780dcbceddefadd0211f15de509911a96e722d464640e9da1107be8298354f0df464ba63ca7e385632adcc8
7
- data.tar.gz: a54bce60f8d9c0f65a4ea2899d900b7fd7069181d5e76461dc48820314c603c3069323d96ceb8851d57910802b2c8f361a2c1c5f7345159736622cd23145fc87
6
+ metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
7
+ data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
data/.clang-format ADDED
@@ -0,0 +1,149 @@
1
+ ---
2
+ Language: Cpp
3
+ # BasedOnStyle: LLVM
4
+ AccessModifierOffset: -2
5
+ AlignAfterOpenBracket: Align
6
+ AlignConsecutiveMacros: false
7
+ AlignConsecutiveAssignments: false
8
+ AlignConsecutiveBitFields: false
9
+ AlignConsecutiveDeclarations: false
10
+ AlignEscapedNewlines: Right
11
+ AlignOperands: Align
12
+ AlignTrailingComments: true
13
+ AllowAllArgumentsOnNextLine: true
14
+ AllowAllConstructorInitializersOnNextLine: true
15
+ AllowAllParametersOfDeclarationOnNextLine: true
16
+ AllowShortEnumsOnASingleLine: true
17
+ AllowShortBlocksOnASingleLine: Never
18
+ AllowShortCaseLabelsOnASingleLine: false
19
+ AllowShortFunctionsOnASingleLine: All
20
+ AllowShortLambdasOnASingleLine: All
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLoopsOnASingleLine: false
23
+ AlwaysBreakAfterDefinitionReturnType: None
24
+ AlwaysBreakAfterReturnType: None
25
+ AlwaysBreakBeforeMultilineStrings: false
26
+ AlwaysBreakTemplateDeclarations: MultiLine
27
+ BinPackArguments: true
28
+ BinPackParameters: true
29
+ BraceWrapping:
30
+ AfterCaseLabel: false
31
+ AfterClass: false
32
+ AfterControlStatement: Never
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: true
46
+ SplitEmptyRecord: true
47
+ SplitEmptyNamespace: true
48
+ BreakBeforeBinaryOperators: None
49
+ BreakBeforeBraces: Attach
50
+ BreakBeforeInheritanceComma: false
51
+ BreakInheritanceList: BeforeColon
52
+ BreakBeforeTernaryOperators: true
53
+ BreakConstructorInitializersBeforeComma: false
54
+ BreakConstructorInitializers: BeforeColon
55
+ BreakAfterJavaFieldAnnotations: false
56
+ BreakStringLiterals: true
57
+ ColumnLimit: 128
58
+ CommentPragmas: '^ IWYU pragma:'
59
+ CompactNamespaces: false
60
+ ConstructorInitializerAllOnOneLineOrOnePerLine: false
61
+ ConstructorInitializerIndentWidth: 4
62
+ ContinuationIndentWidth: 4
63
+ Cpp11BracedListStyle: true
64
+ DeriveLineEnding: true
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ ExperimentalAutoDetectBinPacking: false
68
+ FixNamespaceComments: true
69
+ ForEachMacros:
70
+ - foreach
71
+ - Q_FOREACH
72
+ - BOOST_FOREACH
73
+ IncludeBlocks: Preserve
74
+ IncludeCategories:
75
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
76
+ Priority: 2
77
+ SortPriority: 0
78
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
79
+ Priority: 3
80
+ SortPriority: 0
81
+ - Regex: '.*'
82
+ Priority: 1
83
+ SortPriority: 0
84
+ IncludeIsMainRegex: '(Test)?$'
85
+ IncludeIsMainSourceRegex: ''
86
+ IndentCaseLabels: false
87
+ IndentCaseBlocks: false
88
+ IndentGotoLabels: true
89
+ IndentPPDirectives: None
90
+ IndentExternBlock: AfterExternBlock
91
+ IndentWidth: 2
92
+ IndentWrappedFunctionNames: false
93
+ InsertTrailingCommas: None
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: true
97
+ MacroBlockBegin: ''
98
+ MacroBlockEnd: ''
99
+ MaxEmptyLinesToKeep: 1
100
+ NamespaceIndentation: None
101
+ ObjCBinPackProtocolList: Auto
102
+ ObjCBlockIndentWidth: 2
103
+ ObjCBreakBeforeNestedBlockParam: true
104
+ ObjCSpaceAfterProperty: false
105
+ ObjCSpaceBeforeProtocolList: true
106
+ PenaltyBreakAssignment: 2
107
+ PenaltyBreakBeforeFirstCallParameter: 19
108
+ PenaltyBreakComment: 300
109
+ PenaltyBreakFirstLessLess: 120
110
+ PenaltyBreakString: 1000
111
+ PenaltyBreakTemplateDeclaration: 10
112
+ PenaltyExcessCharacter: 1000000
113
+ PenaltyReturnTypeOnItsOwnLine: 60
114
+ PointerAlignment: Left
115
+ ReflowComments: true
116
+ SortIncludes: true
117
+ SortUsingDeclarations: true
118
+ SpaceAfterCStyleCast: false
119
+ SpaceAfterLogicalNot: false
120
+ SpaceAfterTemplateKeyword: true
121
+ SpaceBeforeAssignmentOperators: true
122
+ SpaceBeforeCpp11BracedList: false
123
+ SpaceBeforeCtorInitializerColon: true
124
+ SpaceBeforeInheritanceColon: true
125
+ SpaceBeforeParens: ControlStatements
126
+ SpaceBeforeRangeBasedForLoopColon: true
127
+ SpaceInEmptyBlock: false
128
+ SpaceInEmptyParentheses: false
129
+ SpacesBeforeTrailingComments: 1
130
+ SpacesInAngles: false
131
+ SpacesInConditionalStatement: false
132
+ SpacesInContainerLiterals: true
133
+ SpacesInCStyleCastParentheses: false
134
+ SpacesInParentheses: false
135
+ SpacesInSquareBrackets: false
136
+ SpaceBeforeSquareBrackets: false
137
+ Standard: Latest
138
+ StatementMacros:
139
+ - Q_UNUSED
140
+ - QT_REQUIRE_VERSION
141
+ TabWidth: 8
142
+ UseCRLF: false
143
+ UseTab: Never
144
+ WhitespaceSensitiveMacros:
145
+ - STRINGIZE
146
+ - PP_STRINGIZE
147
+ - BOOST_PP_STRINGIZE
148
+ ...
149
+
@@ -18,6 +18,8 @@ jobs:
18
18
  with:
19
19
  ruby-version: '2.7'
20
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
21
23
  run: |
22
24
  gem install bundler
23
25
  bundle install
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.23.1
2
+ - Fix all estimators to return inference results in a contiguous narray.
3
+ - Fix to use until statement instead of recursive call on apply methods of tree estimators.
4
+ - Rename native extension files.
5
+ - Introduce clang-format for native extension codes.
6
+
1
7
  # 0.23.0
2
8
  ## Breaking change
3
9
  - Change automalically selected solver from sgd to lbfgs in
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
5
+ [![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
data/Rakefile CHANGED
@@ -7,7 +7,8 @@ require 'rake/extensiontask'
7
7
 
8
8
  task :build => :compile
9
9
 
10
- Rake::ExtensionTask.new('rumale') do |ext|
10
+ Rake::ExtensionTask.new('rumaleext') do |ext|
11
+ ext.ext_dir = 'ext/rumale'
11
12
  ext.lib_dir = 'lib/rumale'
12
13
  end
13
14
 
@@ -28,4 +28,4 @@ if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
28
28
  end
29
29
  end
30
30
 
31
- create_makefile('rumale/rumale')
31
+ create_makefile('rumale/rumaleext')
@@ -1,9 +1,8 @@
1
- #include "rumale.h"
1
+ #include "rumaleext.h"
2
2
 
3
3
  VALUE mRumale;
4
4
 
5
- void Init_rumale(void)
6
- {
5
+ void Init_rumaleext(void) {
7
6
  mRumale = rb_define_module("Rumale");
8
7
 
9
8
  init_tree_module();
@@ -5,4 +5,4 @@
5
5
 
6
6
  #include "tree.h"
7
7
 
8
- #endif /* RUMALE_H */
8
+ #endif /* RUMALEEXT_H */
data/ext/rumale/tree.c CHANGED
@@ -2,17 +2,13 @@
2
2
 
3
3
  RUBY_EXTERN VALUE mRumale;
4
4
 
5
- double*
6
- alloc_dbl_array(const long n_dimensions)
7
- {
5
+ double* alloc_dbl_array(const long n_dimensions) {
8
6
  double* arr = ALLOC_N(double, n_dimensions);
9
7
  memset(arr, 0, n_dimensions * sizeof(double));
10
8
  return arr;
11
9
  }
12
10
 
13
- double
14
- calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
15
- {
11
+ double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
16
12
  long i;
17
13
  double el;
18
14
  double gini = 0.0;
@@ -25,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
25
21
  return 1.0 - gini;
26
22
  }
27
23
 
28
- double
29
- calc_entropy(double* histogram, const long n_elements, const long n_classes)
30
- {
24
+ double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
31
25
  long i;
32
26
  double el;
33
27
  double entropy = 0.0;
@@ -41,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
41
35
  }
42
36
 
43
37
  VALUE
44
- calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
45
- {
38
+ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
46
39
  long i;
47
40
  VALUE mean_vec = rb_ary_new2(n_dimensions);
48
41
 
@@ -53,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
53
46
  return mean_vec;
54
47
  }
55
48
 
56
- double
57
- calc_vec_mae(VALUE vec_a, VALUE vec_b)
58
- {
49
+ double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
59
50
  long i;
60
51
  const long n_dimensions = RARRAY_LEN(vec_a);
61
52
  double sum = 0.0;
@@ -69,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
69
60
  return sum / n_dimensions;
70
61
  }
71
62
 
72
- double
73
- calc_vec_mse(VALUE vec_a, VALUE vec_b)
74
- {
63
+ double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
75
64
  long i;
76
65
  const long n_dimensions = RARRAY_LEN(vec_a);
77
66
  double sum = 0.0;
@@ -85,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
85
74
  return sum / n_dimensions;
86
75
  }
87
76
 
88
- double
89
- calc_mae(VALUE target_vecs, VALUE mean_vec)
90
- {
77
+ double calc_mae(VALUE target_vecs, VALUE mean_vec) {
91
78
  long i;
92
79
  const long n_elements = RARRAY_LEN(target_vecs);
93
80
  double sum = 0.0;
@@ -99,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
99
86
  return sum / n_elements;
100
87
  }
101
88
 
102
- double
103
- calc_mse(VALUE target_vecs, VALUE mean_vec)
104
- {
89
+ double calc_mse(VALUE target_vecs, VALUE mean_vec) {
105
90
  long i;
106
91
  const long n_elements = RARRAY_LEN(target_vecs);
107
92
  double sum = 0.0;
@@ -113,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
113
98
  return sum / n_elements;
114
99
  }
115
100
 
116
- double
117
- calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
118
- {
101
+ double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
119
102
  if (strcmp(criterion, "entropy") == 0) {
120
103
  return calc_entropy(histogram, n_elements, n_classes);
121
104
  }
122
105
  return calc_gini_coef(histogram, n_elements, n_classes);
123
106
  }
124
107
 
125
- double
126
- calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
127
- {
108
+ double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
128
109
  const long n_elements = RARRAY_LEN(target_vecs);
129
110
  const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
130
111
  VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
@@ -135,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
135
116
  return calc_mse(target_vecs, mean_vec);
136
117
  }
137
118
 
138
- void
139
- add_sum_vec(double* sum_vec, VALUE target)
140
- {
119
+ void add_sum_vec(double* sum_vec, VALUE target) {
141
120
  long i;
142
121
  const long n_dimensions = RARRAY_LEN(target);
143
122
 
@@ -146,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
146
125
  }
147
126
  }
148
127
 
149
- void
150
- sub_sum_vec(double* sum_vec, VALUE target)
151
- {
128
+ void sub_sum_vec(double* sum_vec, VALUE target) {
152
129
  long i;
153
130
  const long n_dimensions = RARRAY_LEN(target);
154
131
 
@@ -168,9 +145,7 @@ typedef struct {
168
145
  /**
169
146
  * @!visibility private
170
147
  */
171
- static void
172
- iter_find_split_params_cls(na_loop_t const* lp)
173
- {
148
+ static void iter_find_split_params_cls(na_loop_t const* lp) {
174
149
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
175
150
  const double* f = (double*)NDL_PTR(lp, 1);
176
151
  const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
@@ -200,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
200
175
  params[3] = 0.0; /* gain */
201
176
 
202
177
  /* Initialize child node variables. */
203
- for (i = 0; i < n_elements; i++) { r_histogram[y[o[i]]] += 1.0; }
178
+ for (i = 0; i < n_elements; i++) {
179
+ r_histogram[y[o[i]]] += 1.0;
180
+ }
204
181
 
205
182
  /* Find optimal parameters. */
206
183
  while (curr_pos < n_elements && curr_el != last_el) {
@@ -224,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
224
201
  params[2] = 0.5 * (curr_el + next_el);
225
202
  params[3] = gain;
226
203
  }
227
- if (next_pos == n_elements) break;
204
+ if (next_pos == n_elements)
205
+ break;
228
206
  curr_pos = next_pos;
229
207
  curr_el = f[o[curr_pos]];
230
208
  }
@@ -246,14 +224,13 @@ iter_find_split_params_cls(na_loop_t const* lp)
246
224
  * @param n_classes [Integer] The number of classes.
247
225
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
248
226
  */
249
- static VALUE
250
- find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels, VALUE n_classes)
251
- {
252
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1} };
253
- size_t out_shape[1] = { 4 };
254
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
255
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout };
256
- split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
227
+ static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
228
+ VALUE n_classes) {
229
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
230
+ size_t out_shape[1] = {4};
231
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
232
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
233
+ split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
257
234
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
258
235
  VALUE results = rb_ary_new2(4);
259
236
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -276,9 +253,7 @@ typedef struct {
276
253
  /**
277
254
  * @!visibility private
278
255
  */
279
- static void
280
- iter_find_split_params_reg(na_loop_t const* lp)
281
- {
256
+ static void iter_find_split_params_reg(na_loop_t const* lp) {
282
257
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
283
258
  const double* f = (double*)NDL_PTR(lp, 1);
284
259
  const double* y = (double*)NDL_PTR(lp, 2);
@@ -346,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
346
321
  params[2] = 0.5 * (curr_el + next_el);
347
322
  params[3] = gain;
348
323
  }
349
- if (next_pos == n_elements) break;
324
+ if (next_pos == n_elements)
325
+ break;
350
326
  curr_pos = next_pos;
351
327
  curr_el = f[o[curr_pos]];
352
328
  }
@@ -367,14 +343,12 @@ iter_find_split_params_reg(na_loop_t const* lp)
367
343
  * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
368
344
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
369
345
  */
370
- static VALUE
371
- find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
372
- {
373
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
374
- size_t out_shape[1] = { 4 };
375
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
376
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
377
- split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
346
+ static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
347
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
348
+ size_t out_shape[1] = {4};
349
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
350
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
351
+ split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
378
352
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
379
353
  VALUE results = rb_ary_new2(4);
380
354
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -390,9 +364,7 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
390
364
  /**
391
365
  * @!visibility private
392
366
  */
393
- static void
394
- iter_find_split_params_grad_reg(na_loop_t const* lp)
395
- {
367
+ static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
396
368
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
397
369
  const double* f = (double*)NDL_PTR(lp, 1);
398
370
  const double* g = (double*)NDL_PTR(lp, 2);
@@ -427,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
427
399
  /* Calculate gain of new split. */
428
400
  r_grad = s_grad - l_grad;
429
401
  r_hess = s_hess - l_hess;
430
- gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
431
- (r_grad * r_grad) / (r_hess + reg_lambda) -
402
+ gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
432
403
  (s_grad * s_grad) / (s_hess + reg_lambda);
433
404
  /* Update optimal parameters. */
434
405
  if (gain > gain_max) {
435
406
  threshold = 0.5 * (curr_el + next_el);
436
407
  gain_max = gain;
437
408
  }
438
- if (next_pos == n_elements) break;
409
+ if (next_pos == n_elements) {
410
+ break;
411
+ }
439
412
  curr_pos = next_pos;
440
413
  curr_el = f[o[curr_pos]];
441
414
  }
@@ -458,15 +431,13 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
458
431
  * @param reg_lambda [Float] The L2 regularization term on weight.
459
432
  * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
460
433
  */
461
- static VALUE
462
- find_split_params_grad_reg
463
- (VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians, VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda)
464
- {
465
- ndfunc_arg_in_t ain[4] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1} };
466
- size_t out_shape[1] = { 2 };
467
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
468
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
469
- double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
434
+ static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
435
+ VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
436
+ ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
437
+ size_t out_shape[1] = {2};
438
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
439
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
440
+ double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
470
441
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
471
442
  VALUE results = rb_ary_new2(2);
472
443
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -488,9 +459,7 @@ find_split_params_grad_reg
488
459
  * @param n_classes_ [Integer] The number of classes.
489
460
  * @return [Float] impurity
490
461
  */
491
- static VALUE
492
- node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
493
- {
462
+ static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
494
463
  long i;
495
464
  const long n_classes = NUM2LONG(n_classes_);
496
465
  const long n_elements = NUM2LONG(n_elements_);
@@ -498,7 +467,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
498
467
  double* histogram = alloc_dbl_array(n_classes);
499
468
  VALUE ret;
500
469
 
501
- for (i = 0; i < n_elements; i++) { histogram[y[i]] += 1; }
470
+ for (i = 0; i < n_elements; i++) {
471
+ histogram[y[i]] += 1;
472
+ }
502
473
 
503
474
  ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
504
475
 
@@ -520,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
520
491
  * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
521
492
  * @return [Float] impurity
522
493
  */
523
- static VALUE
524
- node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
525
- {
494
+ static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
526
495
  long i;
527
496
  const long n_elements = RARRAY_LEN(y);
528
497
  const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
@@ -546,8 +515,7 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
546
515
  return ret;
547
516
  }
548
517
 
549
- void init_tree_module()
550
- {
518
+ void init_tree_module() {
551
519
  VALUE mTree = rb_define_module_under(mRumale, "Tree");
552
520
  /**
553
521
  * Document-module: Rumale::Tree::ExtDecisionTreeClassifier
data/ext/rumale/tree.h CHANGED
@@ -3,7 +3,9 @@
3
3
 
4
4
  #include <math.h>
5
5
  #include <string.h>
6
+
6
7
  #include <ruby.h>
8
+
7
9
  #include <numo/narray.h>
8
10
  #include <numo/template.h>
9
11
 
data/lib/rumale.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'numo/narray'
4
4
 
5
- require 'rumale/rumale'
5
+ require 'rumale/rumaleext'
6
6
 
7
7
  require 'rumale/version'
8
8
  require 'rumale/validation'
@@ -81,7 +81,7 @@ module Rumale
81
81
  wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
82
82
  unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
83
83
  @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
84
- @mixing = Numo::Linalg.pinv(@components)
84
+ @mixing = Numo::Linalg.pinv(@components).dup
85
85
  if @params[:n_components] == 1
86
86
  @components = @components.flatten.dup
87
87
  @mixing = @mixing.flatten.dup
@@ -161,7 +161,7 @@ module Rumale
161
161
 
162
162
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
163
 
164
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
164
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
165
165
 
166
166
  n_samples, = x.shape
167
167
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -182,7 +182,7 @@ module Rumale
182
182
  else
183
183
  @estimators.map { |tree| tree.apply(x) }
184
184
  end
185
- Numo::Int32[*leaf_ids].transpose
185
+ Numo::Int32[*leaf_ids].transpose.dup
186
186
  end
187
187
 
188
188
  private
@@ -144,7 +144,7 @@ module Rumale
144
144
  else
145
145
  @estimators.map { |tree| tree.apply(x) }
146
146
  end
147
- Numo::Int32[*leaf_ids].transpose
147
+ Numo::Int32[*leaf_ids].transpose.dup
148
148
  end
149
149
 
150
150
  private
@@ -159,7 +159,7 @@ module Rumale
159
159
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
160
160
  def apply(x)
161
161
  x = check_convert_sample_array(x)
162
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
162
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
163
163
  end
164
164
 
165
165
  private
@@ -136,7 +136,7 @@ module Rumale
136
136
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
137
137
  def apply(x)
138
138
  x = check_convert_sample_array(x)
139
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
139
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
140
140
  end
141
141
 
142
142
  private
@@ -73,7 +73,7 @@ module Rumale
73
73
 
74
74
  # random sampling.
75
75
  @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
76
- @components = x[@component_indices, true]
76
+ @components = x[@component_indices, true].dup
77
77
 
78
78
  # calculate normalizing factor.
79
79
  kernel_mat = kernel_mat(@components)
@@ -152,7 +152,7 @@ module Rumale
152
152
 
153
153
  if @classes.size > 2
154
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose
155
+ return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
156
  end
157
157
 
158
158
  n_samples, = x.shape
@@ -75,7 +75,7 @@ module Rumale
75
75
  (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
76
  (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
77
  end
78
- Numo::DFloat[*log_likelihoods].transpose
78
+ Numo::DFloat[*log_likelihoods].transpose.dup
79
79
  end
80
80
  end
81
81
  end
@@ -62,7 +62,7 @@ module Rumale
62
62
  Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
63
  ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
64
  end
65
- Numo::DFloat[*log_likelihoods].transpose
65
+ Numo::DFloat[*log_likelihoods].transpose.dup
66
66
  end
67
67
  end
68
68
  end
@@ -67,7 +67,7 @@ module Rumale
67
67
  log_likelihoods = Array.new(n_classes) do |l|
68
68
  Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
69
  end
70
- Numo::DFloat[*log_likelihoods].transpose
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
71
  end
72
72
  end
73
73
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/tree/node'
5
+ require 'rumale/rumaleext'
5
6
 
6
7
  module Rumale
7
8
  # This module consists of the classes that implement tree models.
@@ -44,21 +45,25 @@ module Rumale
44
45
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
46
  def apply(x)
46
47
  x = check_convert_sample_array(x)
47
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def apply_at_node(node, sample)
53
- return node.leaf_id if node.leaf
54
- return apply_at_node(node.left, sample) if node.right.nil?
55
- return apply_at_node(node.right, sample) if node.left.nil?
56
-
57
- if sample[node.feature_id] <= node.threshold
58
- apply_at_node(node.left, sample)
59
- else
60
- apply_at_node(node.right, sample)
53
+ def partial_apply(tree, sample)
54
+ node = tree
55
+ until node.leaf
56
+ # :nocov:
57
+ node = if node.right.nil?
58
+ node.left
59
+ elsif node.left.nil?
60
+ node.right
61
+ # :nocov:
62
+ else
63
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
64
+ end
61
65
  end
66
+ node.leaf_id
62
67
  end
63
68
 
64
69
  def build_tree(x, y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/classifier'
6
5
 
@@ -101,21 +100,25 @@ module Rumale
101
100
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
101
  def predict_proba(x)
103
102
  x = check_convert_sample_array(x)
104
- Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
103
+ Numo::DFloat[*(Array.new(x.shape[0]) { |n| partial_predict_proba(@tree, x[n, true]) })]
105
104
  end
106
105
 
107
106
  private
108
107
 
109
- def predict_proba_at_node(node, sample)
110
- return node.probs if node.leaf
111
- return predict_proba_at_node(node.left, sample) if node.right.nil?
112
- return predict_proba_at_node(node.right, sample) if node.left.nil?
113
-
114
- if sample[node.feature_id] <= node.threshold
115
- predict_proba_at_node(node.left, sample)
116
- else
117
- predict_proba_at_node(node.right, sample)
108
+ def partial_predict_proba(tree, sample)
109
+ node = tree
110
+ until node.leaf
111
+ # :nocov:
112
+ node = if node.right.nil?
113
+ node.left
114
+ elsif node.left.nil?
115
+ node.right
116
+ # :nocov:
117
+ else
118
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
119
+ end
118
120
  end
121
+ node.probs
119
122
  end
120
123
 
121
124
  def stop_growing?(y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/regressor'
6
5
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/base/base_estimator'
5
4
  require 'rumale/base/regressor'
5
+ require 'rumale/rumaleext'
6
6
  require 'rumale/tree/node'
7
7
 
8
8
  module Rumale
@@ -114,21 +114,25 @@ module Rumale
114
114
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
115
115
  def apply(x)
116
116
  x = check_convert_sample_array(x)
117
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
117
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
118
118
  end
119
119
 
120
120
  private
121
121
 
122
- def apply_at_node(node, sample)
123
- return node.leaf_id if node.leaf
124
- return apply_at_node(node.left, sample) if node.right.nil?
125
- return apply_at_node(node.right, sample) if node.left.nil?
126
-
127
- if sample[node.feature_id] <= node.threshold
128
- apply_at_node(node.left, sample)
129
- else
130
- apply_at_node(node.right, sample)
122
+ def partial_apply(tree, sample)
123
+ node = tree
124
+ until node.leaf
125
+ # :nocov:
126
+ node = if node.right.nil?
127
+ node.left
128
+ elsif node.left.nil?
129
+ node.right
130
+ # :nocov:
131
+ else
132
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
133
+ end
131
134
  end
135
+ node.leaf_id
132
136
  end
133
137
 
134
138
  def build_tree(x, y, g, h)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.23.0'
6
+ VERSION = '0.23.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-04 00:00:00.000000000 Z
11
+ date: 2021-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -57,13 +57,13 @@ extensions:
57
57
  - ext/rumale/extconf.rb
58
58
  extra_rdoc_files: []
59
59
  files:
60
+ - ".clang-format"
60
61
  - ".coveralls.yml"
61
62
  - ".github/workflows/build.yml"
62
63
  - ".github/workflows/coverage.yml"
63
64
  - ".gitignore"
64
65
  - ".rspec"
65
66
  - ".rubocop.yml"
66
- - ".travis.yml"
67
67
  - CHANGELOG.md
68
68
  - CODE_OF_CONDUCT.md
69
69
  - Gemfile
@@ -71,8 +71,8 @@ files:
71
71
  - README.md
72
72
  - Rakefile
73
73
  - ext/rumale/extconf.rb
74
- - ext/rumale/rumale.c
75
- - ext/rumale/rumale.h
74
+ - ext/rumale/rumaleext.c
75
+ - ext/rumale/rumaleext.h
76
76
  - ext/rumale/tree.c
77
77
  - ext/rumale/tree.h
78
78
  - lib/rumale.rb
@@ -218,7 +218,7 @@ metadata:
218
218
  source_code_uri: https://github.com/yoshoku/rumale
219
219
  documentation_uri: https://yoshoku.github.io/rumale/doc/
220
220
  bug_tracker_uri: https://github.com/yoshoku/rumale/issues
221
- post_install_message:
221
+ post_install_message:
222
222
  rdoc_options: []
223
223
  require_paths:
224
224
  - lib
@@ -233,8 +233,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
233
233
  - !ruby/object:Gem::Version
234
234
  version: '0'
235
235
  requirements: []
236
- rubygems_version: 3.2.7
237
- signing_key:
236
+ rubygems_version: 3.1.6
237
+ signing_key:
238
238
  specification_version: 4
239
239
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
240
240
  algorithms with interfaces similar to Scikit-Learn in Python.
data/.travis.yml DELETED
@@ -1,17 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - '2.4'
6
- - '2.5'
7
- - '2.6'
8
- - '2.7'
9
-
10
- addons:
11
- apt:
12
- packages:
13
- - libopenblas-dev
14
- - liblapacke-dev
15
-
16
- before_install:
17
- - gem install bundler -v 2.1.4