rumale 0.22.2 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 703a6895f4218ca45c5d5ae5e86559b077cf1be213d4939eb1e9ab94eac4621d
4
- data.tar.gz: 5862466e565d1e6030c35494b5028ae980a47d373e90050c62266055fcecd374
3
+ metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
4
+ data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
5
5
  SHA512:
6
- metadata.gz: 988d55c681a102e0c65b9133c6aeafc049e33755955f959d6e6046f5601dd192af881424355a2b373ed2e7a5a16b74236698aef5372e09584b10fe28d1b7bc21
7
- data.tar.gz: adc58efa3b46d9fc1a87ddb2a4df32472507d61f21a3a0eb07026068cc5e41af166fb0a0f8ae23f1b23aec649b22835a50edbed79d35255e8cc231b82b31eb8c
6
+ metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
7
+ data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
data/.clang-format ADDED
@@ -0,0 +1,149 @@
1
+ ---
2
+ Language: Cpp
3
+ # BasedOnStyle: LLVM
4
+ AccessModifierOffset: -2
5
+ AlignAfterOpenBracket: Align
6
+ AlignConsecutiveMacros: false
7
+ AlignConsecutiveAssignments: false
8
+ AlignConsecutiveBitFields: false
9
+ AlignConsecutiveDeclarations: false
10
+ AlignEscapedNewlines: Right
11
+ AlignOperands: Align
12
+ AlignTrailingComments: true
13
+ AllowAllArgumentsOnNextLine: true
14
+ AllowAllConstructorInitializersOnNextLine: true
15
+ AllowAllParametersOfDeclarationOnNextLine: true
16
+ AllowShortEnumsOnASingleLine: true
17
+ AllowShortBlocksOnASingleLine: Never
18
+ AllowShortCaseLabelsOnASingleLine: false
19
+ AllowShortFunctionsOnASingleLine: All
20
+ AllowShortLambdasOnASingleLine: All
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLoopsOnASingleLine: false
23
+ AlwaysBreakAfterDefinitionReturnType: None
24
+ AlwaysBreakAfterReturnType: None
25
+ AlwaysBreakBeforeMultilineStrings: false
26
+ AlwaysBreakTemplateDeclarations: MultiLine
27
+ BinPackArguments: true
28
+ BinPackParameters: true
29
+ BraceWrapping:
30
+ AfterCaseLabel: false
31
+ AfterClass: false
32
+ AfterControlStatement: Never
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: true
46
+ SplitEmptyRecord: true
47
+ SplitEmptyNamespace: true
48
+ BreakBeforeBinaryOperators: None
49
+ BreakBeforeBraces: Attach
50
+ BreakBeforeInheritanceComma: false
51
+ BreakInheritanceList: BeforeColon
52
+ BreakBeforeTernaryOperators: true
53
+ BreakConstructorInitializersBeforeComma: false
54
+ BreakConstructorInitializers: BeforeColon
55
+ BreakAfterJavaFieldAnnotations: false
56
+ BreakStringLiterals: true
57
+ ColumnLimit: 128
58
+ CommentPragmas: '^ IWYU pragma:'
59
+ CompactNamespaces: false
60
+ ConstructorInitializerAllOnOneLineOrOnePerLine: false
61
+ ConstructorInitializerIndentWidth: 4
62
+ ContinuationIndentWidth: 4
63
+ Cpp11BracedListStyle: true
64
+ DeriveLineEnding: true
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ ExperimentalAutoDetectBinPacking: false
68
+ FixNamespaceComments: true
69
+ ForEachMacros:
70
+ - foreach
71
+ - Q_FOREACH
72
+ - BOOST_FOREACH
73
+ IncludeBlocks: Preserve
74
+ IncludeCategories:
75
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
76
+ Priority: 2
77
+ SortPriority: 0
78
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
79
+ Priority: 3
80
+ SortPriority: 0
81
+ - Regex: '.*'
82
+ Priority: 1
83
+ SortPriority: 0
84
+ IncludeIsMainRegex: '(Test)?$'
85
+ IncludeIsMainSourceRegex: ''
86
+ IndentCaseLabels: false
87
+ IndentCaseBlocks: false
88
+ IndentGotoLabels: true
89
+ IndentPPDirectives: None
90
+ IndentExternBlock: AfterExternBlock
91
+ IndentWidth: 2
92
+ IndentWrappedFunctionNames: false
93
+ InsertTrailingCommas: None
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: true
97
+ MacroBlockBegin: ''
98
+ MacroBlockEnd: ''
99
+ MaxEmptyLinesToKeep: 1
100
+ NamespaceIndentation: None
101
+ ObjCBinPackProtocolList: Auto
102
+ ObjCBlockIndentWidth: 2
103
+ ObjCBreakBeforeNestedBlockParam: true
104
+ ObjCSpaceAfterProperty: false
105
+ ObjCSpaceBeforeProtocolList: true
106
+ PenaltyBreakAssignment: 2
107
+ PenaltyBreakBeforeFirstCallParameter: 19
108
+ PenaltyBreakComment: 300
109
+ PenaltyBreakFirstLessLess: 120
110
+ PenaltyBreakString: 1000
111
+ PenaltyBreakTemplateDeclaration: 10
112
+ PenaltyExcessCharacter: 1000000
113
+ PenaltyReturnTypeOnItsOwnLine: 60
114
+ PointerAlignment: Left
115
+ ReflowComments: true
116
+ SortIncludes: true
117
+ SortUsingDeclarations: true
118
+ SpaceAfterCStyleCast: false
119
+ SpaceAfterLogicalNot: false
120
+ SpaceAfterTemplateKeyword: true
121
+ SpaceBeforeAssignmentOperators: true
122
+ SpaceBeforeCpp11BracedList: false
123
+ SpaceBeforeCtorInitializerColon: true
124
+ SpaceBeforeInheritanceColon: true
125
+ SpaceBeforeParens: ControlStatements
126
+ SpaceBeforeRangeBasedForLoopColon: true
127
+ SpaceInEmptyBlock: false
128
+ SpaceInEmptyParentheses: false
129
+ SpacesBeforeTrailingComments: 1
130
+ SpacesInAngles: false
131
+ SpacesInConditionalStatement: false
132
+ SpacesInContainerLiterals: true
133
+ SpacesInCStyleCastParentheses: false
134
+ SpacesInParentheses: false
135
+ SpacesInSquareBrackets: false
136
+ SpaceBeforeSquareBrackets: false
137
+ Standard: Latest
138
+ StatementMacros:
139
+ - Q_UNUSED
140
+ - QT_REQUIRE_VERSION
141
+ TabWidth: 8
142
+ UseCRLF: false
143
+ UseTab: Never
144
+ WhitespaceSensitiveMacros:
145
+ - STRINGIZE
146
+ - PP_STRINGIZE
147
+ - BOOST_PP_STRINGIZE
148
+ ...
149
+
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: github-ci
@@ -6,8 +6,9 @@ jobs:
6
6
  build:
7
7
  runs-on: ubuntu-latest
8
8
  strategy:
9
+ fail-fast: false
9
10
  matrix:
10
- ruby: [ '2.5', '2.6', '2.7' ]
11
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
14
  - name: Install BLAS and LAPACK
@@ -17,7 +18,9 @@ jobs:
17
18
  with:
18
19
  ruby-version: ${{ matrix.ruby }}
19
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
20
23
  run: |
21
- gem install bundler
24
+ gem install --no-document bundler
22
25
  bundle install --jobs 4 --retry 3
23
26
  bundle exec rake
@@ -0,0 +1,30 @@
1
+ name: coverage
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ coverage:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Install BLAS and LAPACK
15
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
16
+ - name: Set up Ruby 2.7
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: '2.7'
20
+ - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
23
+ run: |
24
+ gem install bundler
25
+ bundle install
26
+ bundle exec rake
27
+ - name: Coveralls GitHub Action
28
+ uses: coverallsapp/github-action@v1.1.2
29
+ with:
30
+ github-token: ${{ secrets.GITHUB_TOKEN }}
data/.gitignore CHANGED
@@ -16,6 +16,7 @@
16
16
  tags
17
17
  .DS_Store
18
18
  .ruby-version
19
+ iterate.dat
19
20
  /spec/dump_dbl.t
20
21
  /spec/dump_int.t
21
22
  /spec/dump_mult_dbl.t
data/CHANGELOG.md CHANGED
@@ -1,3 +1,41 @@
1
+ # 0.23.1
2
+ - Fix all estimators to return inference results in a contiguous narray.
3
+ - Fix to use until statement instead of recursive call on apply methods of tree estimators.
4
+ - Rename native extension files.
5
+ - Introduce clang-format for native extension codes.
6
+
7
+ # 0.23.0
8
+ ## Breaking change
9
+ - Change automalically selected solver from sgd to lbfgs in
10
+ [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html) and
11
+ [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html).
12
+ - When given 'auto' to solver parameter, these estimator select the 'svd' solver if Numo::Linalg is loaded.
13
+ Otherwise, they select the 'lbfgs' solver.
14
+
15
+ # 0.22.5
16
+ - Add transformer class for calculating kernel matrix.
17
+ - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
18
+ - Add classifier class based on Ridge regression.
19
+ - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
20
+ - Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
21
+ - Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
22
+
23
+ # 0.22.4
24
+ - Add classifier and regressor classes for voting ensemble method.
25
+ - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
26
+ - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
27
+ - Refactor some codes.
28
+ - Fix some typos on API documentation.
29
+
30
+ # 0.22.3
31
+ - Add regressor class for non-negative least square method.
32
+ - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
33
+ - Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
34
+ - In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
35
+ the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
36
+ - Add GC guard to native extension codes.
37
+ - Update API documentation.
38
+
1
39
  # 0.22.2
2
40
  - Add classifier and regressor classes for stacking method.
3
41
  - [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
data/Gemfile CHANGED
@@ -6,11 +6,12 @@ gemspec
6
6
  gem 'mmh3', '>= 1.0'
7
7
  gem 'numo-linalg', '>= 0.1.4'
8
8
  gem 'parallel', '>= 1.17.0'
9
- gem 'rake', '~> 12.0'
9
+ gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'rspec', '~> 3.0'
12
12
  gem 'rubocop', '~> 1.0'
13
13
  gem 'rubocop-performance', '~> 1.8'
14
14
  gem 'rubocop-rake', '~> 0.5'
15
15
  gem 'rubocop-rspec', '~> 2.0'
16
- gem 'simplecov', '~> 0.19'
16
+ gem 'simplecov', '~> 0.21'
17
+ gem 'simplecov-lcov', '~> 0.8'
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2020 Atsushi Tatsuma
1
+ Copyright (c) 2017-2021 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
5
+ [![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
6
+ [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
6
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
7
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
8
9
  [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
176
177
 
177
178
  Install OpenBLAS library.
178
179
 
179
- Mac:
180
+ macOS:
180
181
 
181
182
  ```bash
182
183
  $ brew install openblas
@@ -185,12 +186,13 @@ $ brew install openblas
185
186
  Ubuntu:
186
187
 
187
188
  ```bash
188
- $ sudo apt-get install gcc gfortran
189
- $ wget https://github.com/xianyi/OpenBLAS/archive/v0.3.5.tar.gz
190
- $ tar xzf v0.3.5.tar.gz
191
- $ cd OpenBLAS-0.3.5
192
- $ make USE_OPENMP=1
193
- $ sudo make PREFIX=/usr/local install
189
+ $ sudo apt-get install libopenblas-dev liblapacke-dev
190
+ ```
191
+
192
+ Windows (MSYS2):
193
+
194
+ ```bash
195
+ $ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
194
196
  ```
195
197
 
196
198
  Install Numo::Linalg gem.
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
206
208
  require 'rumale'
207
209
  ```
208
210
 
211
+ ### Numo::OpenBLAS
212
+ [Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
213
+ and uses that as a background library for Numo::Linalg.
214
+
215
+ Install compilers for building OpenBLAS.
216
+
217
+ macOS:
218
+
219
+ ```bash
220
+ $ brew install gcc gfortran make
221
+ ```
222
+
223
+ Ubuntu:
224
+
225
+ ```bash
226
+ $ sudo apt-get install gcc gfortran make
227
+ ```
228
+
229
+ Install Numo::OpenBLAS gem.
230
+
231
+ ```bash
232
+ $ gem install numo-openblas
233
+ ```
234
+
235
+ Load Numo::OpenBLAS gem instead of Numo::Linalg.
236
+
237
+ ```ruby
238
+ require 'numo/openblas'
239
+ require 'rumale'
240
+ ```
241
+
209
242
  ### Parallel
210
243
  Several estimators in Rumale support parallel processing.
211
244
  Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
227
260
  estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
228
261
  ```
229
262
 
263
+ ## Related Projects
264
+ - [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
265
+ - [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
266
+
230
267
  ## Novelties
231
268
 
232
269
  * [Rumale SHOP](https://suzuri.jp/yoshoku)
data/Rakefile CHANGED
@@ -7,7 +7,8 @@ require 'rake/extensiontask'
7
7
 
8
8
  task :build => :compile
9
9
 
10
- Rake::ExtensionTask.new('rumale') do |ext|
10
+ Rake::ExtensionTask.new('rumaleext') do |ext|
11
+ ext.ext_dir = 'ext/rumale'
11
12
  ext.lib_dir = 'lib/rumale'
12
13
  end
13
14
 
@@ -28,4 +28,4 @@ if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
28
28
  end
29
29
  end
30
30
 
31
- create_makefile('rumale/rumale')
31
+ create_makefile('rumale/rumaleext')
@@ -1,9 +1,8 @@
1
- #include "rumale.h"
1
+ #include "rumaleext.h"
2
2
 
3
3
  VALUE mRumale;
4
4
 
5
- void Init_rumale(void)
6
- {
5
+ void Init_rumaleext(void) {
7
6
  mRumale = rb_define_module("Rumale");
8
7
 
9
8
  init_tree_module();
@@ -5,4 +5,4 @@
5
5
 
6
6
  #include "tree.h"
7
7
 
8
- #endif /* RUMALE_H */
8
+ #endif /* RUMALEEXT_H */
data/ext/rumale/tree.c CHANGED
@@ -2,18 +2,13 @@
2
2
 
3
3
  RUBY_EXTERN VALUE mRumale;
4
4
 
5
- double*
6
- alloc_dbl_array(const long n_dimensions)
7
- {
8
- long i;
5
+ double* alloc_dbl_array(const long n_dimensions) {
9
6
  double* arr = ALLOC_N(double, n_dimensions);
10
- for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
7
+ memset(arr, 0, n_dimensions * sizeof(double));
11
8
  return arr;
12
9
  }
13
10
 
14
- double
15
- calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
16
- {
11
+ double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
17
12
  long i;
18
13
  double el;
19
14
  double gini = 0.0;
@@ -26,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
26
21
  return 1.0 - gini;
27
22
  }
28
23
 
29
- double
30
- calc_entropy(double* histogram, const long n_elements, const long n_classes)
31
- {
24
+ double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
32
25
  long i;
33
26
  double el;
34
27
  double entropy = 0.0;
@@ -42,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
42
35
  }
43
36
 
44
37
  VALUE
45
- calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
46
- {
38
+ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
47
39
  long i;
48
40
  VALUE mean_vec = rb_ary_new2(n_dimensions);
49
41
 
@@ -54,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
54
46
  return mean_vec;
55
47
  }
56
48
 
57
- double
58
- calc_vec_mae(VALUE vec_a, VALUE vec_b)
59
- {
49
+ double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
60
50
  long i;
61
51
  const long n_dimensions = RARRAY_LEN(vec_a);
62
52
  double sum = 0.0;
@@ -70,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
70
60
  return sum / n_dimensions;
71
61
  }
72
62
 
73
- double
74
- calc_vec_mse(VALUE vec_a, VALUE vec_b)
75
- {
63
+ double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
76
64
  long i;
77
65
  const long n_dimensions = RARRAY_LEN(vec_a);
78
66
  double sum = 0.0;
@@ -86,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
86
74
  return sum / n_dimensions;
87
75
  }
88
76
 
89
- double
90
- calc_mae(VALUE target_vecs, VALUE mean_vec)
91
- {
77
+ double calc_mae(VALUE target_vecs, VALUE mean_vec) {
92
78
  long i;
93
79
  const long n_elements = RARRAY_LEN(target_vecs);
94
80
  double sum = 0.0;
@@ -100,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
100
86
  return sum / n_elements;
101
87
  }
102
88
 
103
- double
104
- calc_mse(VALUE target_vecs, VALUE mean_vec)
105
- {
89
+ double calc_mse(VALUE target_vecs, VALUE mean_vec) {
106
90
  long i;
107
91
  const long n_elements = RARRAY_LEN(target_vecs);
108
92
  double sum = 0.0;
@@ -114,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
114
98
  return sum / n_elements;
115
99
  }
116
100
 
117
- double
118
- calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
119
- {
101
+ double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
120
102
  if (strcmp(criterion, "entropy") == 0) {
121
103
  return calc_entropy(histogram, n_elements, n_classes);
122
104
  }
123
105
  return calc_gini_coef(histogram, n_elements, n_classes);
124
106
  }
125
107
 
126
- double
127
- calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
128
- {
108
+ double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
129
109
  const long n_elements = RARRAY_LEN(target_vecs);
130
110
  const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
131
111
  VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
@@ -136,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
136
116
  return calc_mse(target_vecs, mean_vec);
137
117
  }
138
118
 
139
- void
140
- add_sum_vec(double* sum_vec, VALUE target)
141
- {
119
+ void add_sum_vec(double* sum_vec, VALUE target) {
142
120
  long i;
143
121
  const long n_dimensions = RARRAY_LEN(target);
144
122
 
@@ -147,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
147
125
  }
148
126
  }
149
127
 
150
- void
151
- sub_sum_vec(double* sum_vec, VALUE target)
152
- {
128
+ void sub_sum_vec(double* sum_vec, VALUE target) {
153
129
  long i;
154
130
  const long n_dimensions = RARRAY_LEN(target);
155
131
 
@@ -169,9 +145,7 @@ typedef struct {
169
145
  /**
170
146
  * @!visibility private
171
147
  */
172
- static void
173
- iter_find_split_params_cls(na_loop_t const* lp)
174
- {
148
+ static void iter_find_split_params_cls(na_loop_t const* lp) {
175
149
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
176
150
  const double* f = (double*)NDL_PTR(lp, 1);
177
151
  const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
@@ -201,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
201
175
  params[3] = 0.0; /* gain */
202
176
 
203
177
  /* Initialize child node variables. */
204
- for (i = 0; i < n_elements; i++) { r_histogram[y[o[i]]] += 1.0; }
178
+ for (i = 0; i < n_elements; i++) {
179
+ r_histogram[y[o[i]]] += 1.0;
180
+ }
205
181
 
206
182
  /* Find optimal parameters. */
207
183
  while (curr_pos < n_elements && curr_el != last_el) {
@@ -225,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
225
201
  params[2] = 0.5 * (curr_el + next_el);
226
202
  params[3] = gain;
227
203
  }
228
- if (next_pos == n_elements) break;
204
+ if (next_pos == n_elements)
205
+ break;
229
206
  curr_pos = next_pos;
230
207
  curr_el = f[o[curr_pos]];
231
208
  }
@@ -247,20 +224,22 @@ iter_find_split_params_cls(na_loop_t const* lp)
247
224
  * @param n_classes [Integer] The number of classes.
248
225
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
249
226
  */
250
- static VALUE
251
- find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels, VALUE n_classes)
252
- {
253
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1} };
254
- size_t out_shape[1] = { 4 };
255
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
256
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout };
257
- split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
227
+ static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
228
+ VALUE n_classes) {
229
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
230
+ size_t out_shape[1] = {4};
231
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
232
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
233
+ split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
258
234
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
259
235
  VALUE results = rb_ary_new2(4);
260
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
261
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
262
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
263
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
236
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
237
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
238
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
239
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
240
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
241
+ RB_GC_GUARD(params);
242
+ RB_GC_GUARD(criterion);
264
243
  return results;
265
244
  }
266
245
 
@@ -274,9 +253,7 @@ typedef struct {
274
253
  /**
275
254
  * @!visibility private
276
255
  */
277
- static void
278
- iter_find_split_params_reg(na_loop_t const* lp)
279
- {
256
+ static void iter_find_split_params_reg(na_loop_t const* lp) {
280
257
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
281
258
  const double* f = (double*)NDL_PTR(lp, 1);
282
259
  const double* y = (double*)NDL_PTR(lp, 2);
@@ -344,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
344
321
  params[2] = 0.5 * (curr_el + next_el);
345
322
  params[3] = gain;
346
323
  }
347
- if (next_pos == n_elements) break;
324
+ if (next_pos == n_elements)
325
+ break;
348
326
  curr_pos = next_pos;
349
327
  curr_el = f[o[curr_pos]];
350
328
  }
@@ -365,29 +343,28 @@ iter_find_split_params_reg(na_loop_t const* lp)
365
343
  * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
366
344
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
367
345
  */
368
- static VALUE
369
- find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
370
- {
371
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
372
- size_t out_shape[1] = { 4 };
373
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
374
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
375
- split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
346
+ static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
347
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
348
+ size_t out_shape[1] = {4};
349
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
350
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
351
+ split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
376
352
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
377
353
  VALUE results = rb_ary_new2(4);
378
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
379
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
380
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
381
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
354
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
355
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
356
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
357
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
358
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
359
+ RB_GC_GUARD(params);
360
+ RB_GC_GUARD(criterion);
382
361
  return results;
383
362
  }
384
363
 
385
364
  /**
386
365
  * @!visibility private
387
366
  */
388
- static void
389
- iter_find_split_params_grad_reg(na_loop_t const* lp)
390
- {
367
+ static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
391
368
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
392
369
  const double* f = (double*)NDL_PTR(lp, 1);
393
370
  const double* g = (double*)NDL_PTR(lp, 2);
@@ -422,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
422
399
  /* Calculate gain of new split. */
423
400
  r_grad = s_grad - l_grad;
424
401
  r_hess = s_hess - l_hess;
425
- gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
426
- (r_grad * r_grad) / (r_hess + reg_lambda) -
402
+ gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
427
403
  (s_grad * s_grad) / (s_hess + reg_lambda);
428
404
  /* Update optimal parameters. */
429
405
  if (gain > gain_max) {
430
406
  threshold = 0.5 * (curr_el + next_el);
431
407
  gain_max = gain;
432
408
  }
433
- if (next_pos == n_elements) break;
409
+ if (next_pos == n_elements) {
410
+ break;
411
+ }
434
412
  curr_pos = next_pos;
435
413
  curr_el = f[o[curr_pos]];
436
414
  }
@@ -453,19 +431,19 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
453
431
  * @param reg_lambda [Float] The L2 regularization term on weight.
454
432
  * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
455
433
  */
456
- static VALUE
457
- find_split_params_grad_reg
458
- (VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians, VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda)
459
- {
460
- ndfunc_arg_in_t ain[4] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1} };
461
- size_t out_shape[1] = { 2 };
462
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
463
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
464
- double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
434
+ static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
435
+ VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
436
+ ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
437
+ size_t out_shape[1] = {2};
438
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
439
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
440
+ double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
465
441
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
466
442
  VALUE results = rb_ary_new2(2);
467
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
468
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
443
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
444
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
445
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
446
+ RB_GC_GUARD(params);
469
447
  return results;
470
448
  }
471
449
 
@@ -481,9 +459,7 @@ find_split_params_grad_reg
481
459
  * @param n_classes_ [Integer] The number of classes.
482
460
  * @return [Float] impurity
483
461
  */
484
- static VALUE
485
- node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
486
- {
462
+ static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
487
463
  long i;
488
464
  const long n_classes = NUM2LONG(n_classes_);
489
465
  const long n_elements = NUM2LONG(n_elements_);
@@ -491,12 +467,17 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
491
467
  double* histogram = alloc_dbl_array(n_classes);
492
468
  VALUE ret;
493
469
 
494
- for (i = 0; i < n_elements; i++) { histogram[y[i]] += 1; }
470
+ for (i = 0; i < n_elements; i++) {
471
+ histogram[y[i]] += 1;
472
+ }
495
473
 
496
474
  ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
497
475
 
498
476
  xfree(histogram);
499
477
 
478
+ RB_GC_GUARD(y_nary);
479
+ RB_GC_GUARD(criterion);
480
+
500
481
  return ret;
501
482
  }
502
483
 
@@ -510,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
510
491
  * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
511
492
  * @return [Float] impurity
512
493
  */
513
- static VALUE
514
- node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
515
- {
494
+ static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
516
495
  long i;
517
496
  const long n_elements = RARRAY_LEN(y);
518
497
  const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
@@ -531,11 +510,12 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
531
510
 
532
511
  xfree(sum_vec);
533
512
 
513
+ RB_GC_GUARD(criterion);
514
+
534
515
  return ret;
535
516
  }
536
517
 
537
- void init_tree_module()
538
- {
518
+ void init_tree_module() {
539
519
  VALUE mTree = rb_define_module_under(mRumale, "Tree");
540
520
  /**
541
521
  * Document-module: Rumale::Tree::ExtDecisionTreeClassifier