rumale 0.22.2 → 0.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
|
4
|
+
data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
|
7
|
+
data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
|
data/.clang-format
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
---
|
2
|
+
Language: Cpp
|
3
|
+
# BasedOnStyle: LLVM
|
4
|
+
AccessModifierOffset: -2
|
5
|
+
AlignAfterOpenBracket: Align
|
6
|
+
AlignConsecutiveMacros: false
|
7
|
+
AlignConsecutiveAssignments: false
|
8
|
+
AlignConsecutiveBitFields: false
|
9
|
+
AlignConsecutiveDeclarations: false
|
10
|
+
AlignEscapedNewlines: Right
|
11
|
+
AlignOperands: Align
|
12
|
+
AlignTrailingComments: true
|
13
|
+
AllowAllArgumentsOnNextLine: true
|
14
|
+
AllowAllConstructorInitializersOnNextLine: true
|
15
|
+
AllowAllParametersOfDeclarationOnNextLine: true
|
16
|
+
AllowShortEnumsOnASingleLine: true
|
17
|
+
AllowShortBlocksOnASingleLine: Never
|
18
|
+
AllowShortCaseLabelsOnASingleLine: false
|
19
|
+
AllowShortFunctionsOnASingleLine: All
|
20
|
+
AllowShortLambdasOnASingleLine: All
|
21
|
+
AllowShortIfStatementsOnASingleLine: Never
|
22
|
+
AllowShortLoopsOnASingleLine: false
|
23
|
+
AlwaysBreakAfterDefinitionReturnType: None
|
24
|
+
AlwaysBreakAfterReturnType: None
|
25
|
+
AlwaysBreakBeforeMultilineStrings: false
|
26
|
+
AlwaysBreakTemplateDeclarations: MultiLine
|
27
|
+
BinPackArguments: true
|
28
|
+
BinPackParameters: true
|
29
|
+
BraceWrapping:
|
30
|
+
AfterCaseLabel: false
|
31
|
+
AfterClass: false
|
32
|
+
AfterControlStatement: Never
|
33
|
+
AfterEnum: false
|
34
|
+
AfterFunction: false
|
35
|
+
AfterNamespace: false
|
36
|
+
AfterObjCDeclaration: false
|
37
|
+
AfterStruct: false
|
38
|
+
AfterUnion: false
|
39
|
+
AfterExternBlock: false
|
40
|
+
BeforeCatch: false
|
41
|
+
BeforeElse: false
|
42
|
+
BeforeLambdaBody: false
|
43
|
+
BeforeWhile: false
|
44
|
+
IndentBraces: false
|
45
|
+
SplitEmptyFunction: true
|
46
|
+
SplitEmptyRecord: true
|
47
|
+
SplitEmptyNamespace: true
|
48
|
+
BreakBeforeBinaryOperators: None
|
49
|
+
BreakBeforeBraces: Attach
|
50
|
+
BreakBeforeInheritanceComma: false
|
51
|
+
BreakInheritanceList: BeforeColon
|
52
|
+
BreakBeforeTernaryOperators: true
|
53
|
+
BreakConstructorInitializersBeforeComma: false
|
54
|
+
BreakConstructorInitializers: BeforeColon
|
55
|
+
BreakAfterJavaFieldAnnotations: false
|
56
|
+
BreakStringLiterals: true
|
57
|
+
ColumnLimit: 128
|
58
|
+
CommentPragmas: '^ IWYU pragma:'
|
59
|
+
CompactNamespaces: false
|
60
|
+
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
61
|
+
ConstructorInitializerIndentWidth: 4
|
62
|
+
ContinuationIndentWidth: 4
|
63
|
+
Cpp11BracedListStyle: true
|
64
|
+
DeriveLineEnding: true
|
65
|
+
DerivePointerAlignment: false
|
66
|
+
DisableFormat: false
|
67
|
+
ExperimentalAutoDetectBinPacking: false
|
68
|
+
FixNamespaceComments: true
|
69
|
+
ForEachMacros:
|
70
|
+
- foreach
|
71
|
+
- Q_FOREACH
|
72
|
+
- BOOST_FOREACH
|
73
|
+
IncludeBlocks: Preserve
|
74
|
+
IncludeCategories:
|
75
|
+
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
76
|
+
Priority: 2
|
77
|
+
SortPriority: 0
|
78
|
+
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
79
|
+
Priority: 3
|
80
|
+
SortPriority: 0
|
81
|
+
- Regex: '.*'
|
82
|
+
Priority: 1
|
83
|
+
SortPriority: 0
|
84
|
+
IncludeIsMainRegex: '(Test)?$'
|
85
|
+
IncludeIsMainSourceRegex: ''
|
86
|
+
IndentCaseLabels: false
|
87
|
+
IndentCaseBlocks: false
|
88
|
+
IndentGotoLabels: true
|
89
|
+
IndentPPDirectives: None
|
90
|
+
IndentExternBlock: AfterExternBlock
|
91
|
+
IndentWidth: 2
|
92
|
+
IndentWrappedFunctionNames: false
|
93
|
+
InsertTrailingCommas: None
|
94
|
+
JavaScriptQuotes: Leave
|
95
|
+
JavaScriptWrapImports: true
|
96
|
+
KeepEmptyLinesAtTheStartOfBlocks: true
|
97
|
+
MacroBlockBegin: ''
|
98
|
+
MacroBlockEnd: ''
|
99
|
+
MaxEmptyLinesToKeep: 1
|
100
|
+
NamespaceIndentation: None
|
101
|
+
ObjCBinPackProtocolList: Auto
|
102
|
+
ObjCBlockIndentWidth: 2
|
103
|
+
ObjCBreakBeforeNestedBlockParam: true
|
104
|
+
ObjCSpaceAfterProperty: false
|
105
|
+
ObjCSpaceBeforeProtocolList: true
|
106
|
+
PenaltyBreakAssignment: 2
|
107
|
+
PenaltyBreakBeforeFirstCallParameter: 19
|
108
|
+
PenaltyBreakComment: 300
|
109
|
+
PenaltyBreakFirstLessLess: 120
|
110
|
+
PenaltyBreakString: 1000
|
111
|
+
PenaltyBreakTemplateDeclaration: 10
|
112
|
+
PenaltyExcessCharacter: 1000000
|
113
|
+
PenaltyReturnTypeOnItsOwnLine: 60
|
114
|
+
PointerAlignment: Left
|
115
|
+
ReflowComments: true
|
116
|
+
SortIncludes: true
|
117
|
+
SortUsingDeclarations: true
|
118
|
+
SpaceAfterCStyleCast: false
|
119
|
+
SpaceAfterLogicalNot: false
|
120
|
+
SpaceAfterTemplateKeyword: true
|
121
|
+
SpaceBeforeAssignmentOperators: true
|
122
|
+
SpaceBeforeCpp11BracedList: false
|
123
|
+
SpaceBeforeCtorInitializerColon: true
|
124
|
+
SpaceBeforeInheritanceColon: true
|
125
|
+
SpaceBeforeParens: ControlStatements
|
126
|
+
SpaceBeforeRangeBasedForLoopColon: true
|
127
|
+
SpaceInEmptyBlock: false
|
128
|
+
SpaceInEmptyParentheses: false
|
129
|
+
SpacesBeforeTrailingComments: 1
|
130
|
+
SpacesInAngles: false
|
131
|
+
SpacesInConditionalStatement: false
|
132
|
+
SpacesInContainerLiterals: true
|
133
|
+
SpacesInCStyleCastParentheses: false
|
134
|
+
SpacesInParentheses: false
|
135
|
+
SpacesInSquareBrackets: false
|
136
|
+
SpaceBeforeSquareBrackets: false
|
137
|
+
Standard: Latest
|
138
|
+
StatementMacros:
|
139
|
+
- Q_UNUSED
|
140
|
+
- QT_REQUIRE_VERSION
|
141
|
+
TabWidth: 8
|
142
|
+
UseCRLF: false
|
143
|
+
UseTab: Never
|
144
|
+
WhitespaceSensitiveMacros:
|
145
|
+
- STRINGIZE
|
146
|
+
- PP_STRINGIZE
|
147
|
+
- BOOST_PP_STRINGIZE
|
148
|
+
...
|
149
|
+
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: github-ci
|
data/.github/workflows/build.yml
CHANGED
@@ -6,8 +6,9 @@ jobs:
|
|
6
6
|
build:
|
7
7
|
runs-on: ubuntu-latest
|
8
8
|
strategy:
|
9
|
+
fail-fast: false
|
9
10
|
matrix:
|
10
|
-
ruby: [ '2.5', '2.6', '2.7' ]
|
11
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0' ]
|
11
12
|
steps:
|
12
13
|
- uses: actions/checkout@v2
|
13
14
|
- name: Install BLAS and LAPACK
|
@@ -17,7 +18,9 @@ jobs:
|
|
17
18
|
with:
|
18
19
|
ruby-version: ${{ matrix.ruby }}
|
19
20
|
- name: Build and test with Rake
|
21
|
+
env:
|
22
|
+
LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
|
20
23
|
run: |
|
21
|
-
gem install bundler
|
24
|
+
gem install --no-document bundler
|
22
25
|
bundle install --jobs 4 --retry 3
|
23
26
|
bundle exec rake
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: coverage
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
coverage:
|
11
|
+
runs-on: ubuntu-20.04
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
- name: Install BLAS and LAPACK
|
15
|
+
run: sudo apt-get install -y libopenblas-dev liblapacke-dev
|
16
|
+
- name: Set up Ruby 2.7
|
17
|
+
uses: actions/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: '2.7'
|
20
|
+
- name: Build and test with Rake
|
21
|
+
env:
|
22
|
+
LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
|
23
|
+
run: |
|
24
|
+
gem install bundler
|
25
|
+
bundle install
|
26
|
+
bundle exec rake
|
27
|
+
- name: Coveralls GitHub Action
|
28
|
+
uses: coverallsapp/github-action@v1.1.2
|
29
|
+
with:
|
30
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
# 0.23.1
|
2
|
+
- Fix all estimators to return inference results in a contiguous narray.
|
3
|
+
- Fix to use until statement instead of recursive call on apply methods of tree estimators.
|
4
|
+
- Rename native extension files.
|
5
|
+
- Introduce clang-format for native extension codes.
|
6
|
+
|
7
|
+
# 0.23.0
|
8
|
+
## Breaking change
|
9
|
+
- Change automalically selected solver from sgd to lbfgs in
|
10
|
+
[LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html) and
|
11
|
+
[Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html).
|
12
|
+
- When given 'auto' to solver parameter, these estimator select the 'svd' solver if Numo::Linalg is loaded.
|
13
|
+
Otherwise, they select the 'lbfgs' solver.
|
14
|
+
|
15
|
+
# 0.22.5
|
16
|
+
- Add transformer class for calculating kernel matrix.
|
17
|
+
- [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
|
18
|
+
- Add classifier class based on Ridge regression.
|
19
|
+
- [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
|
20
|
+
- Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
|
21
|
+
- Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
|
22
|
+
|
23
|
+
# 0.22.4
|
24
|
+
- Add classifier and regressor classes for voting ensemble method.
|
25
|
+
- [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
|
26
|
+
- [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
|
27
|
+
- Refactor some codes.
|
28
|
+
- Fix some typos on API documentation.
|
29
|
+
|
30
|
+
# 0.22.3
|
31
|
+
- Add regressor class for non-negative least square method.
|
32
|
+
- [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
|
33
|
+
- Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
|
34
|
+
- In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
|
35
|
+
the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
|
36
|
+
- Add GC guard to native extension codes.
|
37
|
+
- Update API documentation.
|
38
|
+
|
1
39
|
# 0.22.2
|
2
40
|
- Add classifier and regressor classes for stacking method.
|
3
41
|
- [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
|
data/Gemfile
CHANGED
@@ -6,11 +6,12 @@ gemspec
|
|
6
6
|
gem 'mmh3', '>= 1.0'
|
7
7
|
gem 'numo-linalg', '>= 0.1.4'
|
8
8
|
gem 'parallel', '>= 1.17.0'
|
9
|
-
gem 'rake', '~>
|
9
|
+
gem 'rake', '~> 13.0'
|
10
10
|
gem 'rake-compiler', '~> 1.0'
|
11
11
|
gem 'rspec', '~> 3.0'
|
12
12
|
gem 'rubocop', '~> 1.0'
|
13
13
|
gem 'rubocop-performance', '~> 1.8'
|
14
14
|
gem 'rubocop-rake', '~> 0.5'
|
15
15
|
gem 'rubocop-rspec', '~> 2.0'
|
16
|
-
gem 'simplecov', '~> 0.
|
16
|
+
gem 'simplecov', '~> 0.21'
|
17
|
+
gem 'simplecov-lcov', '~> 0.8'
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions
|
5
|
+
[![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
|
6
7
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
7
8
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
8
9
|
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
|
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
|
|
176
177
|
|
177
178
|
Install OpenBLAS library.
|
178
179
|
|
179
|
-
|
180
|
+
macOS:
|
180
181
|
|
181
182
|
```bash
|
182
183
|
$ brew install openblas
|
@@ -185,12 +186,13 @@ $ brew install openblas
|
|
185
186
|
Ubuntu:
|
186
187
|
|
187
188
|
```bash
|
188
|
-
$ sudo apt-get install
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
189
|
+
$ sudo apt-get install libopenblas-dev liblapacke-dev
|
190
|
+
```
|
191
|
+
|
192
|
+
Windows (MSYS2):
|
193
|
+
|
194
|
+
```bash
|
195
|
+
$ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
|
194
196
|
```
|
195
197
|
|
196
198
|
Install Numo::Linalg gem.
|
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
|
|
206
208
|
require 'rumale'
|
207
209
|
```
|
208
210
|
|
211
|
+
### Numo::OpenBLAS
|
212
|
+
[Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
|
213
|
+
and uses that as a background library for Numo::Linalg.
|
214
|
+
|
215
|
+
Install compilers for building OpenBLAS.
|
216
|
+
|
217
|
+
macOS:
|
218
|
+
|
219
|
+
```bash
|
220
|
+
$ brew install gcc gfortran make
|
221
|
+
```
|
222
|
+
|
223
|
+
Ubuntu:
|
224
|
+
|
225
|
+
```bash
|
226
|
+
$ sudo apt-get install gcc gfortran make
|
227
|
+
```
|
228
|
+
|
229
|
+
Install Numo::OpenBLAS gem.
|
230
|
+
|
231
|
+
```bash
|
232
|
+
$ gem install numo-openblas
|
233
|
+
```
|
234
|
+
|
235
|
+
Load Numo::OpenBLAS gem instead of Numo::Linalg.
|
236
|
+
|
237
|
+
```ruby
|
238
|
+
require 'numo/openblas'
|
239
|
+
require 'rumale'
|
240
|
+
```
|
241
|
+
|
209
242
|
### Parallel
|
210
243
|
Several estimators in Rumale support parallel processing.
|
211
244
|
Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
|
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
|
|
227
260
|
estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
|
228
261
|
```
|
229
262
|
|
263
|
+
## Related Projects
|
264
|
+
- [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
|
265
|
+
- [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
|
266
|
+
|
230
267
|
## Novelties
|
231
268
|
|
232
269
|
* [Rumale SHOP](https://suzuri.jp/yoshoku)
|
data/Rakefile
CHANGED
data/ext/rumale/extconf.rb
CHANGED
data/ext/rumale/tree.c
CHANGED
@@ -2,18 +2,13 @@
|
|
2
2
|
|
3
3
|
RUBY_EXTERN VALUE mRumale;
|
4
4
|
|
5
|
-
double*
|
6
|
-
alloc_dbl_array(const long n_dimensions)
|
7
|
-
{
|
8
|
-
long i;
|
5
|
+
double* alloc_dbl_array(const long n_dimensions) {
|
9
6
|
double* arr = ALLOC_N(double, n_dimensions);
|
10
|
-
|
7
|
+
memset(arr, 0, n_dimensions * sizeof(double));
|
11
8
|
return arr;
|
12
9
|
}
|
13
10
|
|
14
|
-
double
|
15
|
-
calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
16
|
-
{
|
11
|
+
double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
|
17
12
|
long i;
|
18
13
|
double el;
|
19
14
|
double gini = 0.0;
|
@@ -26,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
|
26
21
|
return 1.0 - gini;
|
27
22
|
}
|
28
23
|
|
29
|
-
double
|
30
|
-
calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
31
|
-
{
|
24
|
+
double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
|
32
25
|
long i;
|
33
26
|
double el;
|
34
27
|
double entropy = 0.0;
|
@@ -42,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
|
42
35
|
}
|
43
36
|
|
44
37
|
VALUE
|
45
|
-
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
46
|
-
{
|
38
|
+
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
|
47
39
|
long i;
|
48
40
|
VALUE mean_vec = rb_ary_new2(n_dimensions);
|
49
41
|
|
@@ -54,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
|
54
46
|
return mean_vec;
|
55
47
|
}
|
56
48
|
|
57
|
-
double
|
58
|
-
calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
59
|
-
{
|
49
|
+
double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
|
60
50
|
long i;
|
61
51
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
62
52
|
double sum = 0.0;
|
@@ -70,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
|
70
60
|
return sum / n_dimensions;
|
71
61
|
}
|
72
62
|
|
73
|
-
double
|
74
|
-
calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
75
|
-
{
|
63
|
+
double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
|
76
64
|
long i;
|
77
65
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
78
66
|
double sum = 0.0;
|
@@ -86,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
|
86
74
|
return sum / n_dimensions;
|
87
75
|
}
|
88
76
|
|
89
|
-
double
|
90
|
-
calc_mae(VALUE target_vecs, VALUE mean_vec)
|
91
|
-
{
|
77
|
+
double calc_mae(VALUE target_vecs, VALUE mean_vec) {
|
92
78
|
long i;
|
93
79
|
const long n_elements = RARRAY_LEN(target_vecs);
|
94
80
|
double sum = 0.0;
|
@@ -100,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
|
|
100
86
|
return sum / n_elements;
|
101
87
|
}
|
102
88
|
|
103
|
-
double
|
104
|
-
calc_mse(VALUE target_vecs, VALUE mean_vec)
|
105
|
-
{
|
89
|
+
double calc_mse(VALUE target_vecs, VALUE mean_vec) {
|
106
90
|
long i;
|
107
91
|
const long n_elements = RARRAY_LEN(target_vecs);
|
108
92
|
double sum = 0.0;
|
@@ -114,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
|
|
114
98
|
return sum / n_elements;
|
115
99
|
}
|
116
100
|
|
117
|
-
double
|
118
|
-
calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
|
119
|
-
{
|
101
|
+
double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
|
120
102
|
if (strcmp(criterion, "entropy") == 0) {
|
121
103
|
return calc_entropy(histogram, n_elements, n_classes);
|
122
104
|
}
|
123
105
|
return calc_gini_coef(histogram, n_elements, n_classes);
|
124
106
|
}
|
125
107
|
|
126
|
-
double
|
127
|
-
calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
128
|
-
{
|
108
|
+
double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
|
129
109
|
const long n_elements = RARRAY_LEN(target_vecs);
|
130
110
|
const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
|
131
111
|
VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
|
@@ -136,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
|
136
116
|
return calc_mse(target_vecs, mean_vec);
|
137
117
|
}
|
138
118
|
|
139
|
-
void
|
140
|
-
add_sum_vec(double* sum_vec, VALUE target)
|
141
|
-
{
|
119
|
+
void add_sum_vec(double* sum_vec, VALUE target) {
|
142
120
|
long i;
|
143
121
|
const long n_dimensions = RARRAY_LEN(target);
|
144
122
|
|
@@ -147,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
|
|
147
125
|
}
|
148
126
|
}
|
149
127
|
|
150
|
-
void
|
151
|
-
sub_sum_vec(double* sum_vec, VALUE target)
|
152
|
-
{
|
128
|
+
void sub_sum_vec(double* sum_vec, VALUE target) {
|
153
129
|
long i;
|
154
130
|
const long n_dimensions = RARRAY_LEN(target);
|
155
131
|
|
@@ -169,9 +145,7 @@ typedef struct {
|
|
169
145
|
/**
|
170
146
|
* @!visibility private
|
171
147
|
*/
|
172
|
-
static void
|
173
|
-
iter_find_split_params_cls(na_loop_t const* lp)
|
174
|
-
{
|
148
|
+
static void iter_find_split_params_cls(na_loop_t const* lp) {
|
175
149
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
176
150
|
const double* f = (double*)NDL_PTR(lp, 1);
|
177
151
|
const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
|
@@ -201,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
201
175
|
params[3] = 0.0; /* gain */
|
202
176
|
|
203
177
|
/* Initialize child node variables. */
|
204
|
-
for (i = 0; i < n_elements; i++) {
|
178
|
+
for (i = 0; i < n_elements; i++) {
|
179
|
+
r_histogram[y[o[i]]] += 1.0;
|
180
|
+
}
|
205
181
|
|
206
182
|
/* Find optimal parameters. */
|
207
183
|
while (curr_pos < n_elements && curr_el != last_el) {
|
@@ -225,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
225
201
|
params[2] = 0.5 * (curr_el + next_el);
|
226
202
|
params[3] = gain;
|
227
203
|
}
|
228
|
-
if (next_pos == n_elements)
|
204
|
+
if (next_pos == n_elements)
|
205
|
+
break;
|
229
206
|
curr_pos = next_pos;
|
230
207
|
curr_el = f[o[curr_pos]];
|
231
208
|
}
|
@@ -247,20 +224,22 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
247
224
|
* @param n_classes [Integer] The number of classes.
|
248
225
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
249
226
|
*/
|
250
|
-
static VALUE
|
251
|
-
|
252
|
-
{
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
|
227
|
+
static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
|
228
|
+
VALUE n_classes) {
|
229
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
|
230
|
+
size_t out_shape[1] = {4};
|
231
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
232
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
|
233
|
+
split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
|
258
234
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
|
259
235
|
VALUE results = rb_ary_new2(4);
|
260
|
-
|
261
|
-
rb_ary_store(results,
|
262
|
-
rb_ary_store(results,
|
263
|
-
rb_ary_store(results,
|
236
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
237
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
238
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
239
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
240
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
241
|
+
RB_GC_GUARD(params);
|
242
|
+
RB_GC_GUARD(criterion);
|
264
243
|
return results;
|
265
244
|
}
|
266
245
|
|
@@ -274,9 +253,7 @@ typedef struct {
|
|
274
253
|
/**
|
275
254
|
* @!visibility private
|
276
255
|
*/
|
277
|
-
static void
|
278
|
-
iter_find_split_params_reg(na_loop_t const* lp)
|
279
|
-
{
|
256
|
+
static void iter_find_split_params_reg(na_loop_t const* lp) {
|
280
257
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
281
258
|
const double* f = (double*)NDL_PTR(lp, 1);
|
282
259
|
const double* y = (double*)NDL_PTR(lp, 2);
|
@@ -344,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
344
321
|
params[2] = 0.5 * (curr_el + next_el);
|
345
322
|
params[3] = gain;
|
346
323
|
}
|
347
|
-
if (next_pos == n_elements)
|
324
|
+
if (next_pos == n_elements)
|
325
|
+
break;
|
348
326
|
curr_pos = next_pos;
|
349
327
|
curr_el = f[o[curr_pos]];
|
350
328
|
}
|
@@ -365,29 +343,28 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
365
343
|
* @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
|
366
344
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
367
345
|
*/
|
368
|
-
static VALUE
|
369
|
-
|
370
|
-
{
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
|
375
|
-
split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
|
346
|
+
static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
|
347
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
|
348
|
+
size_t out_shape[1] = {4};
|
349
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
350
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
|
351
|
+
split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
|
376
352
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
|
377
353
|
VALUE results = rb_ary_new2(4);
|
378
|
-
|
379
|
-
rb_ary_store(results,
|
380
|
-
rb_ary_store(results,
|
381
|
-
rb_ary_store(results,
|
354
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
355
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
356
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
357
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
358
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
359
|
+
RB_GC_GUARD(params);
|
360
|
+
RB_GC_GUARD(criterion);
|
382
361
|
return results;
|
383
362
|
}
|
384
363
|
|
385
364
|
/**
|
386
365
|
* @!visibility private
|
387
366
|
*/
|
388
|
-
static void
|
389
|
-
iter_find_split_params_grad_reg(na_loop_t const* lp)
|
390
|
-
{
|
367
|
+
static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
|
391
368
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
392
369
|
const double* f = (double*)NDL_PTR(lp, 1);
|
393
370
|
const double* g = (double*)NDL_PTR(lp, 2);
|
@@ -422,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
422
399
|
/* Calculate gain of new split. */
|
423
400
|
r_grad = s_grad - l_grad;
|
424
401
|
r_hess = s_hess - l_hess;
|
425
|
-
gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
|
426
|
-
(r_grad * r_grad) / (r_hess + reg_lambda) -
|
402
|
+
gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
|
427
403
|
(s_grad * s_grad) / (s_hess + reg_lambda);
|
428
404
|
/* Update optimal parameters. */
|
429
405
|
if (gain > gain_max) {
|
430
406
|
threshold = 0.5 * (curr_el + next_el);
|
431
407
|
gain_max = gain;
|
432
408
|
}
|
433
|
-
if (next_pos == n_elements)
|
409
|
+
if (next_pos == n_elements) {
|
410
|
+
break;
|
411
|
+
}
|
434
412
|
curr_pos = next_pos;
|
435
413
|
curr_el = f[o[curr_pos]];
|
436
414
|
}
|
@@ -453,19 +431,19 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
453
431
|
* @param reg_lambda [Float] The L2 regularization term on weight.
|
454
432
|
* @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
|
455
433
|
*/
|
456
|
-
static VALUE
|
457
|
-
|
458
|
-
|
459
|
-
{
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
|
464
|
-
double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
|
434
|
+
static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
|
435
|
+
VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
|
436
|
+
ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
|
437
|
+
size_t out_shape[1] = {2};
|
438
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
439
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
|
440
|
+
double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
|
465
441
|
VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
|
466
442
|
VALUE results = rb_ary_new2(2);
|
467
|
-
|
468
|
-
rb_ary_store(results,
|
443
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
444
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
445
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
446
|
+
RB_GC_GUARD(params);
|
469
447
|
return results;
|
470
448
|
}
|
471
449
|
|
@@ -481,9 +459,7 @@ find_split_params_grad_reg
|
|
481
459
|
* @param n_classes_ [Integer] The number of classes.
|
482
460
|
* @return [Float] impurity
|
483
461
|
*/
|
484
|
-
static VALUE
|
485
|
-
node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
|
486
|
-
{
|
462
|
+
static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
|
487
463
|
long i;
|
488
464
|
const long n_classes = NUM2LONG(n_classes_);
|
489
465
|
const long n_elements = NUM2LONG(n_elements_);
|
@@ -491,12 +467,17 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
491
467
|
double* histogram = alloc_dbl_array(n_classes);
|
492
468
|
VALUE ret;
|
493
469
|
|
494
|
-
for (i = 0; i < n_elements; i++) {
|
470
|
+
for (i = 0; i < n_elements; i++) {
|
471
|
+
histogram[y[i]] += 1;
|
472
|
+
}
|
495
473
|
|
496
474
|
ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
|
497
475
|
|
498
476
|
xfree(histogram);
|
499
477
|
|
478
|
+
RB_GC_GUARD(y_nary);
|
479
|
+
RB_GC_GUARD(criterion);
|
480
|
+
|
500
481
|
return ret;
|
501
482
|
}
|
502
483
|
|
@@ -510,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
510
491
|
* @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
|
511
492
|
* @return [Float] impurity
|
512
493
|
*/
|
513
|
-
static VALUE
|
514
|
-
node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
515
|
-
{
|
494
|
+
static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
|
516
495
|
long i;
|
517
496
|
const long n_elements = RARRAY_LEN(y);
|
518
497
|
const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
|
@@ -531,11 +510,12 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
|
531
510
|
|
532
511
|
xfree(sum_vec);
|
533
512
|
|
513
|
+
RB_GC_GUARD(criterion);
|
514
|
+
|
534
515
|
return ret;
|
535
516
|
}
|
536
517
|
|
537
|
-
void init_tree_module()
|
538
|
-
{
|
518
|
+
void init_tree_module() {
|
539
519
|
VALUE mTree = rb_define_module_under(mRumale, "Tree");
|
540
520
|
/**
|
541
521
|
* Document-module: Rumale::Tree::ExtDecisionTreeClassifier
|