rumale 0.23.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f66b61c7ae0d76fdccc2b95c8f1ae3ea181c1622024a0dd93a15dea17e9a632
4
- data.tar.gz: 1bf6934f79110b4bc59528bc6d656250f6e3fcb7834c9bf262e53f42fae78b69
3
+ metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
4
+ data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
5
5
  SHA512:
6
- metadata.gz: da5c9c6463d3fbefc2d48628b053379cfc7e71195780dcbceddefadd0211f15de509911a96e722d464640e9da1107be8298354f0df464ba63ca7e385632adcc8
7
- data.tar.gz: a54bce60f8d9c0f65a4ea2899d900b7fd7069181d5e76461dc48820314c603c3069323d96ceb8851d57910802b2c8f361a2c1c5f7345159736622cd23145fc87
6
+ metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
7
+ data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
data/.clang-format ADDED
@@ -0,0 +1,149 @@
1
+ ---
2
+ Language: Cpp
3
+ # BasedOnStyle: LLVM
4
+ AccessModifierOffset: -2
5
+ AlignAfterOpenBracket: Align
6
+ AlignConsecutiveMacros: false
7
+ AlignConsecutiveAssignments: false
8
+ AlignConsecutiveBitFields: false
9
+ AlignConsecutiveDeclarations: false
10
+ AlignEscapedNewlines: Right
11
+ AlignOperands: Align
12
+ AlignTrailingComments: true
13
+ AllowAllArgumentsOnNextLine: true
14
+ AllowAllConstructorInitializersOnNextLine: true
15
+ AllowAllParametersOfDeclarationOnNextLine: true
16
+ AllowShortEnumsOnASingleLine: true
17
+ AllowShortBlocksOnASingleLine: Never
18
+ AllowShortCaseLabelsOnASingleLine: false
19
+ AllowShortFunctionsOnASingleLine: All
20
+ AllowShortLambdasOnASingleLine: All
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLoopsOnASingleLine: false
23
+ AlwaysBreakAfterDefinitionReturnType: None
24
+ AlwaysBreakAfterReturnType: None
25
+ AlwaysBreakBeforeMultilineStrings: false
26
+ AlwaysBreakTemplateDeclarations: MultiLine
27
+ BinPackArguments: true
28
+ BinPackParameters: true
29
+ BraceWrapping:
30
+ AfterCaseLabel: false
31
+ AfterClass: false
32
+ AfterControlStatement: Never
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: true
46
+ SplitEmptyRecord: true
47
+ SplitEmptyNamespace: true
48
+ BreakBeforeBinaryOperators: None
49
+ BreakBeforeBraces: Attach
50
+ BreakBeforeInheritanceComma: false
51
+ BreakInheritanceList: BeforeColon
52
+ BreakBeforeTernaryOperators: true
53
+ BreakConstructorInitializersBeforeComma: false
54
+ BreakConstructorInitializers: BeforeColon
55
+ BreakAfterJavaFieldAnnotations: false
56
+ BreakStringLiterals: true
57
+ ColumnLimit: 128
58
+ CommentPragmas: '^ IWYU pragma:'
59
+ CompactNamespaces: false
60
+ ConstructorInitializerAllOnOneLineOrOnePerLine: false
61
+ ConstructorInitializerIndentWidth: 4
62
+ ContinuationIndentWidth: 4
63
+ Cpp11BracedListStyle: true
64
+ DeriveLineEnding: true
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ ExperimentalAutoDetectBinPacking: false
68
+ FixNamespaceComments: true
69
+ ForEachMacros:
70
+ - foreach
71
+ - Q_FOREACH
72
+ - BOOST_FOREACH
73
+ IncludeBlocks: Preserve
74
+ IncludeCategories:
75
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
76
+ Priority: 2
77
+ SortPriority: 0
78
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
79
+ Priority: 3
80
+ SortPriority: 0
81
+ - Regex: '.*'
82
+ Priority: 1
83
+ SortPriority: 0
84
+ IncludeIsMainRegex: '(Test)?$'
85
+ IncludeIsMainSourceRegex: ''
86
+ IndentCaseLabels: false
87
+ IndentCaseBlocks: false
88
+ IndentGotoLabels: true
89
+ IndentPPDirectives: None
90
+ IndentExternBlock: AfterExternBlock
91
+ IndentWidth: 2
92
+ IndentWrappedFunctionNames: false
93
+ InsertTrailingCommas: None
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: true
97
+ MacroBlockBegin: ''
98
+ MacroBlockEnd: ''
99
+ MaxEmptyLinesToKeep: 1
100
+ NamespaceIndentation: None
101
+ ObjCBinPackProtocolList: Auto
102
+ ObjCBlockIndentWidth: 2
103
+ ObjCBreakBeforeNestedBlockParam: true
104
+ ObjCSpaceAfterProperty: false
105
+ ObjCSpaceBeforeProtocolList: true
106
+ PenaltyBreakAssignment: 2
107
+ PenaltyBreakBeforeFirstCallParameter: 19
108
+ PenaltyBreakComment: 300
109
+ PenaltyBreakFirstLessLess: 120
110
+ PenaltyBreakString: 1000
111
+ PenaltyBreakTemplateDeclaration: 10
112
+ PenaltyExcessCharacter: 1000000
113
+ PenaltyReturnTypeOnItsOwnLine: 60
114
+ PointerAlignment: Left
115
+ ReflowComments: true
116
+ SortIncludes: true
117
+ SortUsingDeclarations: true
118
+ SpaceAfterCStyleCast: false
119
+ SpaceAfterLogicalNot: false
120
+ SpaceAfterTemplateKeyword: true
121
+ SpaceBeforeAssignmentOperators: true
122
+ SpaceBeforeCpp11BracedList: false
123
+ SpaceBeforeCtorInitializerColon: true
124
+ SpaceBeforeInheritanceColon: true
125
+ SpaceBeforeParens: ControlStatements
126
+ SpaceBeforeRangeBasedForLoopColon: true
127
+ SpaceInEmptyBlock: false
128
+ SpaceInEmptyParentheses: false
129
+ SpacesBeforeTrailingComments: 1
130
+ SpacesInAngles: false
131
+ SpacesInConditionalStatement: false
132
+ SpacesInContainerLiterals: true
133
+ SpacesInCStyleCastParentheses: false
134
+ SpacesInParentheses: false
135
+ SpacesInSquareBrackets: false
136
+ SpaceBeforeSquareBrackets: false
137
+ Standard: Latest
138
+ StatementMacros:
139
+ - Q_UNUSED
140
+ - QT_REQUIRE_VERSION
141
+ TabWidth: 8
142
+ UseCRLF: false
143
+ UseTab: Never
144
+ WhitespaceSensitiveMacros:
145
+ - STRINGIZE
146
+ - PP_STRINGIZE
147
+ - BOOST_PP_STRINGIZE
148
+ ...
149
+
@@ -18,6 +18,8 @@ jobs:
18
18
  with:
19
19
  ruby-version: '2.7'
20
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
21
23
  run: |
22
24
  gem install bundler
23
25
  bundle install
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.23.1
2
+ - Fix all estimators to return inference results in a contiguous narray.
3
+ - Fix to use until statement instead of recursive call on apply methods of tree estimators.
4
+ - Rename native extension files.
5
+ - Introduce clang-format for native extension codes.
6
+
1
7
  # 0.23.0
2
8
  ## Breaking change
3
9
  - Change automalically selected solver from sgd to lbfgs in
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
5
+ [![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
data/Rakefile CHANGED
@@ -7,7 +7,8 @@ require 'rake/extensiontask'
7
7
 
8
8
  task :build => :compile
9
9
 
10
- Rake::ExtensionTask.new('rumale') do |ext|
10
+ Rake::ExtensionTask.new('rumaleext') do |ext|
11
+ ext.ext_dir = 'ext/rumale'
11
12
  ext.lib_dir = 'lib/rumale'
12
13
  end
13
14
 
@@ -28,4 +28,4 @@ if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
28
28
  end
29
29
  end
30
30
 
31
- create_makefile('rumale/rumale')
31
+ create_makefile('rumale/rumaleext')
@@ -1,9 +1,8 @@
1
- #include "rumale.h"
1
+ #include "rumaleext.h"
2
2
 
3
3
  VALUE mRumale;
4
4
 
5
- void Init_rumale(void)
6
- {
5
+ void Init_rumaleext(void) {
7
6
  mRumale = rb_define_module("Rumale");
8
7
 
9
8
  init_tree_module();
@@ -5,4 +5,4 @@
5
5
 
6
6
  #include "tree.h"
7
7
 
8
- #endif /* RUMALE_H */
8
+ #endif /* RUMALEEXT_H */
data/ext/rumale/tree.c CHANGED
@@ -2,17 +2,13 @@
2
2
 
3
3
  RUBY_EXTERN VALUE mRumale;
4
4
 
5
- double*
6
- alloc_dbl_array(const long n_dimensions)
7
- {
5
+ double* alloc_dbl_array(const long n_dimensions) {
8
6
  double* arr = ALLOC_N(double, n_dimensions);
9
7
  memset(arr, 0, n_dimensions * sizeof(double));
10
8
  return arr;
11
9
  }
12
10
 
13
- double
14
- calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
15
- {
11
+ double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
16
12
  long i;
17
13
  double el;
18
14
  double gini = 0.0;
@@ -25,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
25
21
  return 1.0 - gini;
26
22
  }
27
23
 
28
- double
29
- calc_entropy(double* histogram, const long n_elements, const long n_classes)
30
- {
24
+ double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
31
25
  long i;
32
26
  double el;
33
27
  double entropy = 0.0;
@@ -41,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
41
35
  }
42
36
 
43
37
  VALUE
44
- calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
45
- {
38
+ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
46
39
  long i;
47
40
  VALUE mean_vec = rb_ary_new2(n_dimensions);
48
41
 
@@ -53,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
53
46
  return mean_vec;
54
47
  }
55
48
 
56
- double
57
- calc_vec_mae(VALUE vec_a, VALUE vec_b)
58
- {
49
+ double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
59
50
  long i;
60
51
  const long n_dimensions = RARRAY_LEN(vec_a);
61
52
  double sum = 0.0;
@@ -69,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
69
60
  return sum / n_dimensions;
70
61
  }
71
62
 
72
- double
73
- calc_vec_mse(VALUE vec_a, VALUE vec_b)
74
- {
63
+ double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
75
64
  long i;
76
65
  const long n_dimensions = RARRAY_LEN(vec_a);
77
66
  double sum = 0.0;
@@ -85,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
85
74
  return sum / n_dimensions;
86
75
  }
87
76
 
88
- double
89
- calc_mae(VALUE target_vecs, VALUE mean_vec)
90
- {
77
+ double calc_mae(VALUE target_vecs, VALUE mean_vec) {
91
78
  long i;
92
79
  const long n_elements = RARRAY_LEN(target_vecs);
93
80
  double sum = 0.0;
@@ -99,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
99
86
  return sum / n_elements;
100
87
  }
101
88
 
102
- double
103
- calc_mse(VALUE target_vecs, VALUE mean_vec)
104
- {
89
+ double calc_mse(VALUE target_vecs, VALUE mean_vec) {
105
90
  long i;
106
91
  const long n_elements = RARRAY_LEN(target_vecs);
107
92
  double sum = 0.0;
@@ -113,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
113
98
  return sum / n_elements;
114
99
  }
115
100
 
116
- double
117
- calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
118
- {
101
+ double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
119
102
  if (strcmp(criterion, "entropy") == 0) {
120
103
  return calc_entropy(histogram, n_elements, n_classes);
121
104
  }
122
105
  return calc_gini_coef(histogram, n_elements, n_classes);
123
106
  }
124
107
 
125
- double
126
- calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
127
- {
108
+ double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
128
109
  const long n_elements = RARRAY_LEN(target_vecs);
129
110
  const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
130
111
  VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
@@ -135,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
135
116
  return calc_mse(target_vecs, mean_vec);
136
117
  }
137
118
 
138
- void
139
- add_sum_vec(double* sum_vec, VALUE target)
140
- {
119
+ void add_sum_vec(double* sum_vec, VALUE target) {
141
120
  long i;
142
121
  const long n_dimensions = RARRAY_LEN(target);
143
122
 
@@ -146,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
146
125
  }
147
126
  }
148
127
 
149
- void
150
- sub_sum_vec(double* sum_vec, VALUE target)
151
- {
128
+ void sub_sum_vec(double* sum_vec, VALUE target) {
152
129
  long i;
153
130
  const long n_dimensions = RARRAY_LEN(target);
154
131
 
@@ -168,9 +145,7 @@ typedef struct {
168
145
  /**
169
146
  * @!visibility private
170
147
  */
171
- static void
172
- iter_find_split_params_cls(na_loop_t const* lp)
173
- {
148
+ static void iter_find_split_params_cls(na_loop_t const* lp) {
174
149
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
175
150
  const double* f = (double*)NDL_PTR(lp, 1);
176
151
  const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
@@ -200,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
200
175
  params[3] = 0.0; /* gain */
201
176
 
202
177
  /* Initialize child node variables. */
203
- for (i = 0; i < n_elements; i++) { r_histogram[y[o[i]]] += 1.0; }
178
+ for (i = 0; i < n_elements; i++) {
179
+ r_histogram[y[o[i]]] += 1.0;
180
+ }
204
181
 
205
182
  /* Find optimal parameters. */
206
183
  while (curr_pos < n_elements && curr_el != last_el) {
@@ -224,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
224
201
  params[2] = 0.5 * (curr_el + next_el);
225
202
  params[3] = gain;
226
203
  }
227
- if (next_pos == n_elements) break;
204
+ if (next_pos == n_elements)
205
+ break;
228
206
  curr_pos = next_pos;
229
207
  curr_el = f[o[curr_pos]];
230
208
  }
@@ -246,14 +224,13 @@ iter_find_split_params_cls(na_loop_t const* lp)
246
224
  * @param n_classes [Integer] The number of classes.
247
225
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
248
226
  */
249
- static VALUE
250
- find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels, VALUE n_classes)
251
- {
252
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1} };
253
- size_t out_shape[1] = { 4 };
254
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
255
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout };
256
- split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
227
+ static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
228
+ VALUE n_classes) {
229
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
230
+ size_t out_shape[1] = {4};
231
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
232
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
233
+ split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
257
234
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
258
235
  VALUE results = rb_ary_new2(4);
259
236
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -276,9 +253,7 @@ typedef struct {
276
253
  /**
277
254
  * @!visibility private
278
255
  */
279
- static void
280
- iter_find_split_params_reg(na_loop_t const* lp)
281
- {
256
+ static void iter_find_split_params_reg(na_loop_t const* lp) {
282
257
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
283
258
  const double* f = (double*)NDL_PTR(lp, 1);
284
259
  const double* y = (double*)NDL_PTR(lp, 2);
@@ -346,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
346
321
  params[2] = 0.5 * (curr_el + next_el);
347
322
  params[3] = gain;
348
323
  }
349
- if (next_pos == n_elements) break;
324
+ if (next_pos == n_elements)
325
+ break;
350
326
  curr_pos = next_pos;
351
327
  curr_el = f[o[curr_pos]];
352
328
  }
@@ -367,14 +343,12 @@ iter_find_split_params_reg(na_loop_t const* lp)
367
343
  * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
368
344
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
369
345
  */
370
- static VALUE
371
- find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
372
- {
373
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
374
- size_t out_shape[1] = { 4 };
375
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
376
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
377
- split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
346
+ static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
347
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
348
+ size_t out_shape[1] = {4};
349
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
350
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
351
+ split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
378
352
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
379
353
  VALUE results = rb_ary_new2(4);
380
354
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -390,9 +364,7 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
390
364
  /**
391
365
  * @!visibility private
392
366
  */
393
- static void
394
- iter_find_split_params_grad_reg(na_loop_t const* lp)
395
- {
367
+ static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
396
368
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
397
369
  const double* f = (double*)NDL_PTR(lp, 1);
398
370
  const double* g = (double*)NDL_PTR(lp, 2);
@@ -427,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
427
399
  /* Calculate gain of new split. */
428
400
  r_grad = s_grad - l_grad;
429
401
  r_hess = s_hess - l_hess;
430
- gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
431
- (r_grad * r_grad) / (r_hess + reg_lambda) -
402
+ gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
432
403
  (s_grad * s_grad) / (s_hess + reg_lambda);
433
404
  /* Update optimal parameters. */
434
405
  if (gain > gain_max) {
435
406
  threshold = 0.5 * (curr_el + next_el);
436
407
  gain_max = gain;
437
408
  }
438
- if (next_pos == n_elements) break;
409
+ if (next_pos == n_elements) {
410
+ break;
411
+ }
439
412
  curr_pos = next_pos;
440
413
  curr_el = f[o[curr_pos]];
441
414
  }
@@ -458,15 +431,13 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
458
431
  * @param reg_lambda [Float] The L2 regularization term on weight.
459
432
  * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
460
433
  */
461
- static VALUE
462
- find_split_params_grad_reg
463
- (VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians, VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda)
464
- {
465
- ndfunc_arg_in_t ain[4] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1} };
466
- size_t out_shape[1] = { 2 };
467
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
468
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
469
- double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
434
+ static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
435
+ VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
436
+ ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
437
+ size_t out_shape[1] = {2};
438
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
439
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
440
+ double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
470
441
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
471
442
  VALUE results = rb_ary_new2(2);
472
443
  double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -488,9 +459,7 @@ find_split_params_grad_reg
488
459
  * @param n_classes_ [Integer] The number of classes.
489
460
  * @return [Float] impurity
490
461
  */
491
- static VALUE
492
- node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
493
- {
462
+ static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
494
463
  long i;
495
464
  const long n_classes = NUM2LONG(n_classes_);
496
465
  const long n_elements = NUM2LONG(n_elements_);
@@ -498,7 +467,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
498
467
  double* histogram = alloc_dbl_array(n_classes);
499
468
  VALUE ret;
500
469
 
501
- for (i = 0; i < n_elements; i++) { histogram[y[i]] += 1; }
470
+ for (i = 0; i < n_elements; i++) {
471
+ histogram[y[i]] += 1;
472
+ }
502
473
 
503
474
  ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
504
475
 
@@ -520,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
520
491
  * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
521
492
  * @return [Float] impurity
522
493
  */
523
- static VALUE
524
- node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
525
- {
494
+ static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
526
495
  long i;
527
496
  const long n_elements = RARRAY_LEN(y);
528
497
  const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
@@ -546,8 +515,7 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
546
515
  return ret;
547
516
  }
548
517
 
549
- void init_tree_module()
550
- {
518
+ void init_tree_module() {
551
519
  VALUE mTree = rb_define_module_under(mRumale, "Tree");
552
520
  /**
553
521
  * Document-module: Rumale::Tree::ExtDecisionTreeClassifier
data/ext/rumale/tree.h CHANGED
@@ -3,7 +3,9 @@
3
3
 
4
4
  #include <math.h>
5
5
  #include <string.h>
6
+
6
7
  #include <ruby.h>
8
+
7
9
  #include <numo/narray.h>
8
10
  #include <numo/template.h>
9
11
 
data/lib/rumale.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'numo/narray'
4
4
 
5
- require 'rumale/rumale'
5
+ require 'rumale/rumaleext'
6
6
 
7
7
  require 'rumale/version'
8
8
  require 'rumale/validation'
@@ -81,7 +81,7 @@ module Rumale
81
81
  wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
82
82
  unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
83
83
  @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
84
- @mixing = Numo::Linalg.pinv(@components)
84
+ @mixing = Numo::Linalg.pinv(@components).dup
85
85
  if @params[:n_components] == 1
86
86
  @components = @components.flatten.dup
87
87
  @mixing = @mixing.flatten.dup
@@ -161,7 +161,7 @@ module Rumale
161
161
 
162
162
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
163
 
164
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
164
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
165
165
 
166
166
  n_samples, = x.shape
167
167
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -182,7 +182,7 @@ module Rumale
182
182
  else
183
183
  @estimators.map { |tree| tree.apply(x) }
184
184
  end
185
- Numo::Int32[*leaf_ids].transpose
185
+ Numo::Int32[*leaf_ids].transpose.dup
186
186
  end
187
187
 
188
188
  private
@@ -144,7 +144,7 @@ module Rumale
144
144
  else
145
145
  @estimators.map { |tree| tree.apply(x) }
146
146
  end
147
- Numo::Int32[*leaf_ids].transpose
147
+ Numo::Int32[*leaf_ids].transpose.dup
148
148
  end
149
149
 
150
150
  private
@@ -159,7 +159,7 @@ module Rumale
159
159
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
160
160
  def apply(x)
161
161
  x = check_convert_sample_array(x)
162
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
162
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
163
163
  end
164
164
 
165
165
  private
@@ -136,7 +136,7 @@ module Rumale
136
136
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
137
137
  def apply(x)
138
138
  x = check_convert_sample_array(x)
139
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
139
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
140
140
  end
141
141
 
142
142
  private
@@ -73,7 +73,7 @@ module Rumale
73
73
 
74
74
  # random sampling.
75
75
  @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
76
- @components = x[@component_indices, true]
76
+ @components = x[@component_indices, true].dup
77
77
 
78
78
  # calculate normalizing factor.
79
79
  kernel_mat = kernel_mat(@components)
@@ -152,7 +152,7 @@ module Rumale
152
152
 
153
153
  if @classes.size > 2
154
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose
155
+ return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
156
  end
157
157
 
158
158
  n_samples, = x.shape
@@ -75,7 +75,7 @@ module Rumale
75
75
  (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
76
  (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
77
  end
78
- Numo::DFloat[*log_likelihoods].transpose
78
+ Numo::DFloat[*log_likelihoods].transpose.dup
79
79
  end
80
80
  end
81
81
  end
@@ -62,7 +62,7 @@ module Rumale
62
62
  Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
63
  ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
64
  end
65
- Numo::DFloat[*log_likelihoods].transpose
65
+ Numo::DFloat[*log_likelihoods].transpose.dup
66
66
  end
67
67
  end
68
68
  end
@@ -67,7 +67,7 @@ module Rumale
67
67
  log_likelihoods = Array.new(n_classes) do |l|
68
68
  Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
69
  end
70
- Numo::DFloat[*log_likelihoods].transpose
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
71
  end
72
72
  end
73
73
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/tree/node'
5
+ require 'rumale/rumaleext'
5
6
 
6
7
  module Rumale
7
8
  # This module consists of the classes that implement tree models.
@@ -44,21 +45,25 @@ module Rumale
44
45
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
46
  def apply(x)
46
47
  x = check_convert_sample_array(x)
47
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def apply_at_node(node, sample)
53
- return node.leaf_id if node.leaf
54
- return apply_at_node(node.left, sample) if node.right.nil?
55
- return apply_at_node(node.right, sample) if node.left.nil?
56
-
57
- if sample[node.feature_id] <= node.threshold
58
- apply_at_node(node.left, sample)
59
- else
60
- apply_at_node(node.right, sample)
53
+ def partial_apply(tree, sample)
54
+ node = tree
55
+ until node.leaf
56
+ # :nocov:
57
+ node = if node.right.nil?
58
+ node.left
59
+ elsif node.left.nil?
60
+ node.right
61
+ # :nocov:
62
+ else
63
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
64
+ end
61
65
  end
66
+ node.leaf_id
62
67
  end
63
68
 
64
69
  def build_tree(x, y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/classifier'
6
5
 
@@ -101,21 +100,25 @@ module Rumale
101
100
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
101
  def predict_proba(x)
103
102
  x = check_convert_sample_array(x)
104
- Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
103
+ Numo::DFloat[*(Array.new(x.shape[0]) { |n| partial_predict_proba(@tree, x[n, true]) })]
105
104
  end
106
105
 
107
106
  private
108
107
 
109
- def predict_proba_at_node(node, sample)
110
- return node.probs if node.leaf
111
- return predict_proba_at_node(node.left, sample) if node.right.nil?
112
- return predict_proba_at_node(node.right, sample) if node.left.nil?
113
-
114
- if sample[node.feature_id] <= node.threshold
115
- predict_proba_at_node(node.left, sample)
116
- else
117
- predict_proba_at_node(node.right, sample)
108
+ def partial_predict_proba(tree, sample)
109
+ node = tree
110
+ until node.leaf
111
+ # :nocov:
112
+ node = if node.right.nil?
113
+ node.left
114
+ elsif node.left.nil?
115
+ node.right
116
+ # :nocov:
117
+ else
118
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
119
+ end
118
120
  end
121
+ node.probs
119
122
  end
120
123
 
121
124
  def stop_growing?(y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/regressor'
6
5
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/base/base_estimator'
5
4
  require 'rumale/base/regressor'
5
+ require 'rumale/rumaleext'
6
6
  require 'rumale/tree/node'
7
7
 
8
8
  module Rumale
@@ -114,21 +114,25 @@ module Rumale
114
114
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
115
115
  def apply(x)
116
116
  x = check_convert_sample_array(x)
117
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
117
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
118
118
  end
119
119
 
120
120
  private
121
121
 
122
- def apply_at_node(node, sample)
123
- return node.leaf_id if node.leaf
124
- return apply_at_node(node.left, sample) if node.right.nil?
125
- return apply_at_node(node.right, sample) if node.left.nil?
126
-
127
- if sample[node.feature_id] <= node.threshold
128
- apply_at_node(node.left, sample)
129
- else
130
- apply_at_node(node.right, sample)
122
+ def partial_apply(tree, sample)
123
+ node = tree
124
+ until node.leaf
125
+ # :nocov:
126
+ node = if node.right.nil?
127
+ node.left
128
+ elsif node.left.nil?
129
+ node.right
130
+ # :nocov:
131
+ else
132
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
133
+ end
131
134
  end
135
+ node.leaf_id
132
136
  end
133
137
 
134
138
  def build_tree(x, y, g, h)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.23.0'
6
+ VERSION = '0.23.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-04 00:00:00.000000000 Z
11
+ date: 2021-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -57,13 +57,13 @@ extensions:
57
57
  - ext/rumale/extconf.rb
58
58
  extra_rdoc_files: []
59
59
  files:
60
+ - ".clang-format"
60
61
  - ".coveralls.yml"
61
62
  - ".github/workflows/build.yml"
62
63
  - ".github/workflows/coverage.yml"
63
64
  - ".gitignore"
64
65
  - ".rspec"
65
66
  - ".rubocop.yml"
66
- - ".travis.yml"
67
67
  - CHANGELOG.md
68
68
  - CODE_OF_CONDUCT.md
69
69
  - Gemfile
@@ -71,8 +71,8 @@ files:
71
71
  - README.md
72
72
  - Rakefile
73
73
  - ext/rumale/extconf.rb
74
- - ext/rumale/rumale.c
75
- - ext/rumale/rumale.h
74
+ - ext/rumale/rumaleext.c
75
+ - ext/rumale/rumaleext.h
76
76
  - ext/rumale/tree.c
77
77
  - ext/rumale/tree.h
78
78
  - lib/rumale.rb
@@ -218,7 +218,7 @@ metadata:
218
218
  source_code_uri: https://github.com/yoshoku/rumale
219
219
  documentation_uri: https://yoshoku.github.io/rumale/doc/
220
220
  bug_tracker_uri: https://github.com/yoshoku/rumale/issues
221
- post_install_message:
221
+ post_install_message:
222
222
  rdoc_options: []
223
223
  require_paths:
224
224
  - lib
@@ -233,8 +233,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
233
233
  - !ruby/object:Gem::Version
234
234
  version: '0'
235
235
  requirements: []
236
- rubygems_version: 3.2.7
237
- signing_key:
236
+ rubygems_version: 3.1.6
237
+ signing_key:
238
238
  specification_version: 4
239
239
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
240
240
  algorithms with interfaces similar to Scikit-Learn in Python.
data/.travis.yml DELETED
@@ -1,17 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - '2.4'
6
- - '2.5'
7
- - '2.6'
8
- - '2.7'
9
-
10
- addons:
11
- apt:
12
- packages:
13
- - libopenblas-dev
14
- - liblapacke-dev
15
-
16
- before_install:
17
- - gem install bundler -v 2.1.4