rumale 0.22.2 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 703a6895f4218ca45c5d5ae5e86559b077cf1be213d4939eb1e9ab94eac4621d
4
- data.tar.gz: 5862466e565d1e6030c35494b5028ae980a47d373e90050c62266055fcecd374
3
+ metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
4
+ data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
5
5
  SHA512:
6
- metadata.gz: 988d55c681a102e0c65b9133c6aeafc049e33755955f959d6e6046f5601dd192af881424355a2b373ed2e7a5a16b74236698aef5372e09584b10fe28d1b7bc21
7
- data.tar.gz: adc58efa3b46d9fc1a87ddb2a4df32472507d61f21a3a0eb07026068cc5e41af166fb0a0f8ae23f1b23aec649b22835a50edbed79d35255e8cc231b82b31eb8c
6
+ metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
7
+ data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
data/.clang-format ADDED
@@ -0,0 +1,149 @@
1
+ ---
2
+ Language: Cpp
3
+ # BasedOnStyle: LLVM
4
+ AccessModifierOffset: -2
5
+ AlignAfterOpenBracket: Align
6
+ AlignConsecutiveMacros: false
7
+ AlignConsecutiveAssignments: false
8
+ AlignConsecutiveBitFields: false
9
+ AlignConsecutiveDeclarations: false
10
+ AlignEscapedNewlines: Right
11
+ AlignOperands: Align
12
+ AlignTrailingComments: true
13
+ AllowAllArgumentsOnNextLine: true
14
+ AllowAllConstructorInitializersOnNextLine: true
15
+ AllowAllParametersOfDeclarationOnNextLine: true
16
+ AllowShortEnumsOnASingleLine: true
17
+ AllowShortBlocksOnASingleLine: Never
18
+ AllowShortCaseLabelsOnASingleLine: false
19
+ AllowShortFunctionsOnASingleLine: All
20
+ AllowShortLambdasOnASingleLine: All
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLoopsOnASingleLine: false
23
+ AlwaysBreakAfterDefinitionReturnType: None
24
+ AlwaysBreakAfterReturnType: None
25
+ AlwaysBreakBeforeMultilineStrings: false
26
+ AlwaysBreakTemplateDeclarations: MultiLine
27
+ BinPackArguments: true
28
+ BinPackParameters: true
29
+ BraceWrapping:
30
+ AfterCaseLabel: false
31
+ AfterClass: false
32
+ AfterControlStatement: Never
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: true
46
+ SplitEmptyRecord: true
47
+ SplitEmptyNamespace: true
48
+ BreakBeforeBinaryOperators: None
49
+ BreakBeforeBraces: Attach
50
+ BreakBeforeInheritanceComma: false
51
+ BreakInheritanceList: BeforeColon
52
+ BreakBeforeTernaryOperators: true
53
+ BreakConstructorInitializersBeforeComma: false
54
+ BreakConstructorInitializers: BeforeColon
55
+ BreakAfterJavaFieldAnnotations: false
56
+ BreakStringLiterals: true
57
+ ColumnLimit: 128
58
+ CommentPragmas: '^ IWYU pragma:'
59
+ CompactNamespaces: false
60
+ ConstructorInitializerAllOnOneLineOrOnePerLine: false
61
+ ConstructorInitializerIndentWidth: 4
62
+ ContinuationIndentWidth: 4
63
+ Cpp11BracedListStyle: true
64
+ DeriveLineEnding: true
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ ExperimentalAutoDetectBinPacking: false
68
+ FixNamespaceComments: true
69
+ ForEachMacros:
70
+ - foreach
71
+ - Q_FOREACH
72
+ - BOOST_FOREACH
73
+ IncludeBlocks: Preserve
74
+ IncludeCategories:
75
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
76
+ Priority: 2
77
+ SortPriority: 0
78
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
79
+ Priority: 3
80
+ SortPriority: 0
81
+ - Regex: '.*'
82
+ Priority: 1
83
+ SortPriority: 0
84
+ IncludeIsMainRegex: '(Test)?$'
85
+ IncludeIsMainSourceRegex: ''
86
+ IndentCaseLabels: false
87
+ IndentCaseBlocks: false
88
+ IndentGotoLabels: true
89
+ IndentPPDirectives: None
90
+ IndentExternBlock: AfterExternBlock
91
+ IndentWidth: 2
92
+ IndentWrappedFunctionNames: false
93
+ InsertTrailingCommas: None
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: true
97
+ MacroBlockBegin: ''
98
+ MacroBlockEnd: ''
99
+ MaxEmptyLinesToKeep: 1
100
+ NamespaceIndentation: None
101
+ ObjCBinPackProtocolList: Auto
102
+ ObjCBlockIndentWidth: 2
103
+ ObjCBreakBeforeNestedBlockParam: true
104
+ ObjCSpaceAfterProperty: false
105
+ ObjCSpaceBeforeProtocolList: true
106
+ PenaltyBreakAssignment: 2
107
+ PenaltyBreakBeforeFirstCallParameter: 19
108
+ PenaltyBreakComment: 300
109
+ PenaltyBreakFirstLessLess: 120
110
+ PenaltyBreakString: 1000
111
+ PenaltyBreakTemplateDeclaration: 10
112
+ PenaltyExcessCharacter: 1000000
113
+ PenaltyReturnTypeOnItsOwnLine: 60
114
+ PointerAlignment: Left
115
+ ReflowComments: true
116
+ SortIncludes: true
117
+ SortUsingDeclarations: true
118
+ SpaceAfterCStyleCast: false
119
+ SpaceAfterLogicalNot: false
120
+ SpaceAfterTemplateKeyword: true
121
+ SpaceBeforeAssignmentOperators: true
122
+ SpaceBeforeCpp11BracedList: false
123
+ SpaceBeforeCtorInitializerColon: true
124
+ SpaceBeforeInheritanceColon: true
125
+ SpaceBeforeParens: ControlStatements
126
+ SpaceBeforeRangeBasedForLoopColon: true
127
+ SpaceInEmptyBlock: false
128
+ SpaceInEmptyParentheses: false
129
+ SpacesBeforeTrailingComments: 1
130
+ SpacesInAngles: false
131
+ SpacesInConditionalStatement: false
132
+ SpacesInContainerLiterals: true
133
+ SpacesInCStyleCastParentheses: false
134
+ SpacesInParentheses: false
135
+ SpacesInSquareBrackets: false
136
+ SpaceBeforeSquareBrackets: false
137
+ Standard: Latest
138
+ StatementMacros:
139
+ - Q_UNUSED
140
+ - QT_REQUIRE_VERSION
141
+ TabWidth: 8
142
+ UseCRLF: false
143
+ UseTab: Never
144
+ WhitespaceSensitiveMacros:
145
+ - STRINGIZE
146
+ - PP_STRINGIZE
147
+ - BOOST_PP_STRINGIZE
148
+ ...
149
+
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: github-ci
@@ -6,8 +6,9 @@ jobs:
6
6
  build:
7
7
  runs-on: ubuntu-latest
8
8
  strategy:
9
+ fail-fast: false
9
10
  matrix:
10
- ruby: [ '2.5', '2.6', '2.7' ]
11
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
14
  - name: Install BLAS and LAPACK
@@ -17,7 +18,9 @@ jobs:
17
18
  with:
18
19
  ruby-version: ${{ matrix.ruby }}
19
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
20
23
  run: |
21
- gem install bundler
24
+ gem install --no-document bundler
22
25
  bundle install --jobs 4 --retry 3
23
26
  bundle exec rake
@@ -0,0 +1,30 @@
1
+ name: coverage
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ coverage:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Install BLAS and LAPACK
15
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
16
+ - name: Set up Ruby 2.7
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: '2.7'
20
+ - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
23
+ run: |
24
+ gem install bundler
25
+ bundle install
26
+ bundle exec rake
27
+ - name: Coveralls GitHub Action
28
+ uses: coverallsapp/github-action@v1.1.2
29
+ with:
30
+ github-token: ${{ secrets.GITHUB_TOKEN }}
data/.gitignore CHANGED
@@ -16,6 +16,7 @@
16
16
  tags
17
17
  .DS_Store
18
18
  .ruby-version
19
+ iterate.dat
19
20
  /spec/dump_dbl.t
20
21
  /spec/dump_int.t
21
22
  /spec/dump_mult_dbl.t
data/CHANGELOG.md CHANGED
@@ -1,3 +1,41 @@
1
+ # 0.23.1
2
+ - Fix all estimators to return inference results in a contiguous narray.
3
+ - Fix to use until statement instead of recursive call on apply methods of tree estimators.
4
+ - Rename native extension files.
5
+ - Introduce clang-format for native extension codes.
6
+
7
+ # 0.23.0
8
+ ## Breaking change
9
+ - Change automalically selected solver from sgd to lbfgs in
10
+ [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html) and
11
+ [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html).
12
+ - When given 'auto' to solver parameter, these estimator select the 'svd' solver if Numo::Linalg is loaded.
13
+ Otherwise, they select the 'lbfgs' solver.
14
+
15
+ # 0.22.5
16
+ - Add transformer class for calculating kernel matrix.
17
+ - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
18
+ - Add classifier class based on Ridge regression.
19
+ - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
20
+ - Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
21
+ - Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
22
+
23
+ # 0.22.4
24
+ - Add classifier and regressor classes for voting ensemble method.
25
+ - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
26
+ - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
27
+ - Refactor some codes.
28
+ - Fix some typos on API documentation.
29
+
30
+ # 0.22.3
31
+ - Add regressor class for non-negative least square method.
32
+ - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
33
+ - Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
34
+ - In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
35
+ the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
36
+ - Add GC guard to native extension codes.
37
+ - Update API documentation.
38
+
1
39
  # 0.22.2
2
40
  - Add classifier and regressor classes for stacking method.
3
41
  - [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
data/Gemfile CHANGED
@@ -6,11 +6,12 @@ gemspec
6
6
  gem 'mmh3', '>= 1.0'
7
7
  gem 'numo-linalg', '>= 0.1.4'
8
8
  gem 'parallel', '>= 1.17.0'
9
- gem 'rake', '~> 12.0'
9
+ gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'rspec', '~> 3.0'
12
12
  gem 'rubocop', '~> 1.0'
13
13
  gem 'rubocop-performance', '~> 1.8'
14
14
  gem 'rubocop-rake', '~> 0.5'
15
15
  gem 'rubocop-rspec', '~> 2.0'
16
- gem 'simplecov', '~> 0.19'
16
+ gem 'simplecov', '~> 0.21'
17
+ gem 'simplecov-lcov', '~> 0.8'
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2020 Atsushi Tatsuma
1
+ Copyright (c) 2017-2021 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
5
+ [![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
6
+ [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
6
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
7
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
8
9
  [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
176
177
 
177
178
  Install OpenBLAS library.
178
179
 
179
- Mac:
180
+ macOS:
180
181
 
181
182
  ```bash
182
183
  $ brew install openblas
@@ -185,12 +186,13 @@ $ brew install openblas
185
186
  Ubuntu:
186
187
 
187
188
  ```bash
188
- $ sudo apt-get install gcc gfortran
189
- $ wget https://github.com/xianyi/OpenBLAS/archive/v0.3.5.tar.gz
190
- $ tar xzf v0.3.5.tar.gz
191
- $ cd OpenBLAS-0.3.5
192
- $ make USE_OPENMP=1
193
- $ sudo make PREFIX=/usr/local install
189
+ $ sudo apt-get install libopenblas-dev liblapacke-dev
190
+ ```
191
+
192
+ Windows (MSYS2):
193
+
194
+ ```bash
195
+ $ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
194
196
  ```
195
197
 
196
198
  Install Numo::Linalg gem.
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
206
208
  require 'rumale'
207
209
  ```
208
210
 
211
+ ### Numo::OpenBLAS
212
+ [Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
213
+ and uses that as a background library for Numo::Linalg.
214
+
215
+ Install compilers for building OpenBLAS.
216
+
217
+ macOS:
218
+
219
+ ```bash
220
+ $ brew install gcc gfortran make
221
+ ```
222
+
223
+ Ubuntu:
224
+
225
+ ```bash
226
+ $ sudo apt-get install gcc gfortran make
227
+ ```
228
+
229
+ Install Numo::OpenBLAS gem.
230
+
231
+ ```bash
232
+ $ gem install numo-openblas
233
+ ```
234
+
235
+ Load Numo::OpenBLAS gem instead of Numo::Linalg.
236
+
237
+ ```ruby
238
+ require 'numo/openblas'
239
+ require 'rumale'
240
+ ```
241
+
209
242
  ### Parallel
210
243
  Several estimators in Rumale support parallel processing.
211
244
  Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
227
260
  estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
228
261
  ```
229
262
 
263
+ ## Related Projects
264
+ - [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
265
+ - [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
266
+
230
267
  ## Novelties
231
268
 
232
269
  * [Rumale SHOP](https://suzuri.jp/yoshoku)
data/Rakefile CHANGED
@@ -7,7 +7,8 @@ require 'rake/extensiontask'
7
7
 
8
8
  task :build => :compile
9
9
 
10
- Rake::ExtensionTask.new('rumale') do |ext|
10
+ Rake::ExtensionTask.new('rumaleext') do |ext|
11
+ ext.ext_dir = 'ext/rumale'
11
12
  ext.lib_dir = 'lib/rumale'
12
13
  end
13
14
 
@@ -28,4 +28,4 @@ if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
28
28
  end
29
29
  end
30
30
 
31
- create_makefile('rumale/rumale')
31
+ create_makefile('rumale/rumaleext')
@@ -1,9 +1,8 @@
1
- #include "rumale.h"
1
+ #include "rumaleext.h"
2
2
 
3
3
  VALUE mRumale;
4
4
 
5
- void Init_rumale(void)
6
- {
5
+ void Init_rumaleext(void) {
7
6
  mRumale = rb_define_module("Rumale");
8
7
 
9
8
  init_tree_module();
@@ -5,4 +5,4 @@
5
5
 
6
6
  #include "tree.h"
7
7
 
8
- #endif /* RUMALE_H */
8
+ #endif /* RUMALEEXT_H */
data/ext/rumale/tree.c CHANGED
@@ -2,18 +2,13 @@
2
2
 
3
3
  RUBY_EXTERN VALUE mRumale;
4
4
 
5
- double*
6
- alloc_dbl_array(const long n_dimensions)
7
- {
8
- long i;
5
+ double* alloc_dbl_array(const long n_dimensions) {
9
6
  double* arr = ALLOC_N(double, n_dimensions);
10
- for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
7
+ memset(arr, 0, n_dimensions * sizeof(double));
11
8
  return arr;
12
9
  }
13
10
 
14
- double
15
- calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
16
- {
11
+ double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
17
12
  long i;
18
13
  double el;
19
14
  double gini = 0.0;
@@ -26,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
26
21
  return 1.0 - gini;
27
22
  }
28
23
 
29
- double
30
- calc_entropy(double* histogram, const long n_elements, const long n_classes)
31
- {
24
+ double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
32
25
  long i;
33
26
  double el;
34
27
  double entropy = 0.0;
@@ -42,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
42
35
  }
43
36
 
44
37
  VALUE
45
- calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
46
- {
38
+ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
47
39
  long i;
48
40
  VALUE mean_vec = rb_ary_new2(n_dimensions);
49
41
 
@@ -54,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
54
46
  return mean_vec;
55
47
  }
56
48
 
57
- double
58
- calc_vec_mae(VALUE vec_a, VALUE vec_b)
59
- {
49
+ double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
60
50
  long i;
61
51
  const long n_dimensions = RARRAY_LEN(vec_a);
62
52
  double sum = 0.0;
@@ -70,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
70
60
  return sum / n_dimensions;
71
61
  }
72
62
 
73
- double
74
- calc_vec_mse(VALUE vec_a, VALUE vec_b)
75
- {
63
+ double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
76
64
  long i;
77
65
  const long n_dimensions = RARRAY_LEN(vec_a);
78
66
  double sum = 0.0;
@@ -86,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
86
74
  return sum / n_dimensions;
87
75
  }
88
76
 
89
- double
90
- calc_mae(VALUE target_vecs, VALUE mean_vec)
91
- {
77
+ double calc_mae(VALUE target_vecs, VALUE mean_vec) {
92
78
  long i;
93
79
  const long n_elements = RARRAY_LEN(target_vecs);
94
80
  double sum = 0.0;
@@ -100,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
100
86
  return sum / n_elements;
101
87
  }
102
88
 
103
- double
104
- calc_mse(VALUE target_vecs, VALUE mean_vec)
105
- {
89
+ double calc_mse(VALUE target_vecs, VALUE mean_vec) {
106
90
  long i;
107
91
  const long n_elements = RARRAY_LEN(target_vecs);
108
92
  double sum = 0.0;
@@ -114,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
114
98
  return sum / n_elements;
115
99
  }
116
100
 
117
- double
118
- calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
119
- {
101
+ double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
120
102
  if (strcmp(criterion, "entropy") == 0) {
121
103
  return calc_entropy(histogram, n_elements, n_classes);
122
104
  }
123
105
  return calc_gini_coef(histogram, n_elements, n_classes);
124
106
  }
125
107
 
126
- double
127
- calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
128
- {
108
+ double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
129
109
  const long n_elements = RARRAY_LEN(target_vecs);
130
110
  const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
131
111
  VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
@@ -136,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
136
116
  return calc_mse(target_vecs, mean_vec);
137
117
  }
138
118
 
139
- void
140
- add_sum_vec(double* sum_vec, VALUE target)
141
- {
119
+ void add_sum_vec(double* sum_vec, VALUE target) {
142
120
  long i;
143
121
  const long n_dimensions = RARRAY_LEN(target);
144
122
 
@@ -147,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
147
125
  }
148
126
  }
149
127
 
150
- void
151
- sub_sum_vec(double* sum_vec, VALUE target)
152
- {
128
+ void sub_sum_vec(double* sum_vec, VALUE target) {
153
129
  long i;
154
130
  const long n_dimensions = RARRAY_LEN(target);
155
131
 
@@ -169,9 +145,7 @@ typedef struct {
169
145
  /**
170
146
  * @!visibility private
171
147
  */
172
- static void
173
- iter_find_split_params_cls(na_loop_t const* lp)
174
- {
148
+ static void iter_find_split_params_cls(na_loop_t const* lp) {
175
149
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
176
150
  const double* f = (double*)NDL_PTR(lp, 1);
177
151
  const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
@@ -201,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
201
175
  params[3] = 0.0; /* gain */
202
176
 
203
177
  /* Initialize child node variables. */
204
- for (i = 0; i < n_elements; i++) { r_histogram[y[o[i]]] += 1.0; }
178
+ for (i = 0; i < n_elements; i++) {
179
+ r_histogram[y[o[i]]] += 1.0;
180
+ }
205
181
 
206
182
  /* Find optimal parameters. */
207
183
  while (curr_pos < n_elements && curr_el != last_el) {
@@ -225,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
225
201
  params[2] = 0.5 * (curr_el + next_el);
226
202
  params[3] = gain;
227
203
  }
228
- if (next_pos == n_elements) break;
204
+ if (next_pos == n_elements)
205
+ break;
229
206
  curr_pos = next_pos;
230
207
  curr_el = f[o[curr_pos]];
231
208
  }
@@ -247,20 +224,22 @@ iter_find_split_params_cls(na_loop_t const* lp)
247
224
  * @param n_classes [Integer] The number of classes.
248
225
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
249
226
  */
250
- static VALUE
251
- find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels, VALUE n_classes)
252
- {
253
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1} };
254
- size_t out_shape[1] = { 4 };
255
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
256
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout };
257
- split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
227
+ static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
228
+ VALUE n_classes) {
229
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
230
+ size_t out_shape[1] = {4};
231
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
232
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
233
+ split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
258
234
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
259
235
  VALUE results = rb_ary_new2(4);
260
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
261
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
262
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
263
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
236
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
237
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
238
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
239
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
240
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
241
+ RB_GC_GUARD(params);
242
+ RB_GC_GUARD(criterion);
264
243
  return results;
265
244
  }
266
245
 
@@ -274,9 +253,7 @@ typedef struct {
274
253
  /**
275
254
  * @!visibility private
276
255
  */
277
- static void
278
- iter_find_split_params_reg(na_loop_t const* lp)
279
- {
256
+ static void iter_find_split_params_reg(na_loop_t const* lp) {
280
257
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
281
258
  const double* f = (double*)NDL_PTR(lp, 1);
282
259
  const double* y = (double*)NDL_PTR(lp, 2);
@@ -344,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
344
321
  params[2] = 0.5 * (curr_el + next_el);
345
322
  params[3] = gain;
346
323
  }
347
- if (next_pos == n_elements) break;
324
+ if (next_pos == n_elements)
325
+ break;
348
326
  curr_pos = next_pos;
349
327
  curr_el = f[o[curr_pos]];
350
328
  }
@@ -365,29 +343,28 @@ iter_find_split_params_reg(na_loop_t const* lp)
365
343
  * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
366
344
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
367
345
  */
368
- static VALUE
369
- find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
370
- {
371
- ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
372
- size_t out_shape[1] = { 4 };
373
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
374
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
375
- split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
346
+ static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
347
+ ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
348
+ size_t out_shape[1] = {4};
349
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
350
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
351
+ split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
376
352
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
377
353
  VALUE results = rb_ary_new2(4);
378
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
379
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
380
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
381
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
354
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
355
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
356
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
357
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
358
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
359
+ RB_GC_GUARD(params);
360
+ RB_GC_GUARD(criterion);
382
361
  return results;
383
362
  }
384
363
 
385
364
  /**
386
365
  * @!visibility private
387
366
  */
388
- static void
389
- iter_find_split_params_grad_reg(na_loop_t const* lp)
390
- {
367
+ static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
391
368
  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
392
369
  const double* f = (double*)NDL_PTR(lp, 1);
393
370
  const double* g = (double*)NDL_PTR(lp, 2);
@@ -422,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
422
399
  /* Calculate gain of new split. */
423
400
  r_grad = s_grad - l_grad;
424
401
  r_hess = s_hess - l_hess;
425
- gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
426
- (r_grad * r_grad) / (r_hess + reg_lambda) -
402
+ gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
427
403
  (s_grad * s_grad) / (s_hess + reg_lambda);
428
404
  /* Update optimal parameters. */
429
405
  if (gain > gain_max) {
430
406
  threshold = 0.5 * (curr_el + next_el);
431
407
  gain_max = gain;
432
408
  }
433
- if (next_pos == n_elements) break;
409
+ if (next_pos == n_elements) {
410
+ break;
411
+ }
434
412
  curr_pos = next_pos;
435
413
  curr_el = f[o[curr_pos]];
436
414
  }
@@ -453,19 +431,19 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
453
431
  * @param reg_lambda [Float] The L2 regularization term on weight.
454
432
  * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
455
433
  */
456
- static VALUE
457
- find_split_params_grad_reg
458
- (VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians, VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda)
459
- {
460
- ndfunc_arg_in_t ain[4] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1} };
461
- size_t out_shape[1] = { 2 };
462
- ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
463
- ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
464
- double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
434
+ static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
435
+ VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
436
+ ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
437
+ size_t out_shape[1] = {2};
438
+ ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
439
+ ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
440
+ double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
465
441
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
466
442
  VALUE results = rb_ary_new2(2);
467
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
468
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
443
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
444
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
445
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
446
+ RB_GC_GUARD(params);
469
447
  return results;
470
448
  }
471
449
 
@@ -481,9 +459,7 @@ find_split_params_grad_reg
481
459
  * @param n_classes_ [Integer] The number of classes.
482
460
  * @return [Float] impurity
483
461
  */
484
- static VALUE
485
- node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
486
- {
462
+ static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
487
463
  long i;
488
464
  const long n_classes = NUM2LONG(n_classes_);
489
465
  const long n_elements = NUM2LONG(n_elements_);
@@ -491,12 +467,17 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
491
467
  double* histogram = alloc_dbl_array(n_classes);
492
468
  VALUE ret;
493
469
 
494
- for (i = 0; i < n_elements; i++) { histogram[y[i]] += 1; }
470
+ for (i = 0; i < n_elements; i++) {
471
+ histogram[y[i]] += 1;
472
+ }
495
473
 
496
474
  ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
497
475
 
498
476
  xfree(histogram);
499
477
 
478
+ RB_GC_GUARD(y_nary);
479
+ RB_GC_GUARD(criterion);
480
+
500
481
  return ret;
501
482
  }
502
483
 
@@ -510,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
510
491
  * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
511
492
  * @return [Float] impurity
512
493
  */
513
- static VALUE
514
- node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
515
- {
494
+ static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
516
495
  long i;
517
496
  const long n_elements = RARRAY_LEN(y);
518
497
  const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
@@ -531,11 +510,12 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
531
510
 
532
511
  xfree(sum_vec);
533
512
 
513
+ RB_GC_GUARD(criterion);
514
+
534
515
  return ret;
535
516
  }
536
517
 
537
- void init_tree_module()
538
- {
518
+ void init_tree_module() {
539
519
  VALUE mTree = rb_define_module_under(mRumale, "Tree");
540
520
  /**
541
521
  * Document-module: Rumale::Tree::ExtDecisionTreeClassifier