rumale 0.22.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3c7b4dd3b452f96f88f368a9b279fc67dd7e2fd0033f7a06247e052252de18f
|
4
|
+
data.tar.gz: 88913193c9a6d33cd16cdd45b6a22bf94c072f6ebcb141571dcaef2a0f7aec71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6f824f82415c8dfca7448505a2743bd94a89e9d0575e1c8edf6cdd37bd81af991ab9ed0c4970ed4572d2296c43d5c69331b669be9f4c5a60f9b900b7d220744
|
7
|
+
data.tar.gz: bfebdfc2110f159c2aa0b3cd00b33455e1cfc38bc7bdce36be98e3a21b6138ffbc0299eae7f8b4a913629611d168095dcbc0d9e560ede89463963b2284d95689
|
data/.clang-format
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
---
|
2
|
+
Language: Cpp
|
3
|
+
# BasedOnStyle: LLVM
|
4
|
+
AccessModifierOffset: -2
|
5
|
+
AlignAfterOpenBracket: Align
|
6
|
+
AlignConsecutiveMacros: false
|
7
|
+
AlignConsecutiveAssignments: false
|
8
|
+
AlignConsecutiveBitFields: false
|
9
|
+
AlignConsecutiveDeclarations: false
|
10
|
+
AlignEscapedNewlines: Right
|
11
|
+
AlignOperands: Align
|
12
|
+
AlignTrailingComments: true
|
13
|
+
AllowAllArgumentsOnNextLine: true
|
14
|
+
AllowAllConstructorInitializersOnNextLine: true
|
15
|
+
AllowAllParametersOfDeclarationOnNextLine: true
|
16
|
+
AllowShortEnumsOnASingleLine: true
|
17
|
+
AllowShortBlocksOnASingleLine: Never
|
18
|
+
AllowShortCaseLabelsOnASingleLine: false
|
19
|
+
AllowShortFunctionsOnASingleLine: All
|
20
|
+
AllowShortLambdasOnASingleLine: All
|
21
|
+
AllowShortIfStatementsOnASingleLine: Never
|
22
|
+
AllowShortLoopsOnASingleLine: false
|
23
|
+
AlwaysBreakAfterDefinitionReturnType: None
|
24
|
+
AlwaysBreakAfterReturnType: None
|
25
|
+
AlwaysBreakBeforeMultilineStrings: false
|
26
|
+
AlwaysBreakTemplateDeclarations: MultiLine
|
27
|
+
BinPackArguments: true
|
28
|
+
BinPackParameters: true
|
29
|
+
BraceWrapping:
|
30
|
+
AfterCaseLabel: false
|
31
|
+
AfterClass: false
|
32
|
+
AfterControlStatement: Never
|
33
|
+
AfterEnum: false
|
34
|
+
AfterFunction: false
|
35
|
+
AfterNamespace: false
|
36
|
+
AfterObjCDeclaration: false
|
37
|
+
AfterStruct: false
|
38
|
+
AfterUnion: false
|
39
|
+
AfterExternBlock: false
|
40
|
+
BeforeCatch: false
|
41
|
+
BeforeElse: false
|
42
|
+
BeforeLambdaBody: false
|
43
|
+
BeforeWhile: false
|
44
|
+
IndentBraces: false
|
45
|
+
SplitEmptyFunction: true
|
46
|
+
SplitEmptyRecord: true
|
47
|
+
SplitEmptyNamespace: true
|
48
|
+
BreakBeforeBinaryOperators: None
|
49
|
+
BreakBeforeBraces: Attach
|
50
|
+
BreakBeforeInheritanceComma: false
|
51
|
+
BreakInheritanceList: BeforeColon
|
52
|
+
BreakBeforeTernaryOperators: true
|
53
|
+
BreakConstructorInitializersBeforeComma: false
|
54
|
+
BreakConstructorInitializers: BeforeColon
|
55
|
+
BreakAfterJavaFieldAnnotations: false
|
56
|
+
BreakStringLiterals: true
|
57
|
+
ColumnLimit: 128
|
58
|
+
CommentPragmas: '^ IWYU pragma:'
|
59
|
+
CompactNamespaces: false
|
60
|
+
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
61
|
+
ConstructorInitializerIndentWidth: 4
|
62
|
+
ContinuationIndentWidth: 4
|
63
|
+
Cpp11BracedListStyle: true
|
64
|
+
DeriveLineEnding: true
|
65
|
+
DerivePointerAlignment: false
|
66
|
+
DisableFormat: false
|
67
|
+
ExperimentalAutoDetectBinPacking: false
|
68
|
+
FixNamespaceComments: true
|
69
|
+
ForEachMacros:
|
70
|
+
- foreach
|
71
|
+
- Q_FOREACH
|
72
|
+
- BOOST_FOREACH
|
73
|
+
IncludeBlocks: Preserve
|
74
|
+
IncludeCategories:
|
75
|
+
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
76
|
+
Priority: 2
|
77
|
+
SortPriority: 0
|
78
|
+
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
79
|
+
Priority: 3
|
80
|
+
SortPriority: 0
|
81
|
+
- Regex: '.*'
|
82
|
+
Priority: 1
|
83
|
+
SortPriority: 0
|
84
|
+
IncludeIsMainRegex: '(Test)?$'
|
85
|
+
IncludeIsMainSourceRegex: ''
|
86
|
+
IndentCaseLabels: false
|
87
|
+
IndentCaseBlocks: false
|
88
|
+
IndentGotoLabels: true
|
89
|
+
IndentPPDirectives: None
|
90
|
+
IndentExternBlock: AfterExternBlock
|
91
|
+
IndentWidth: 2
|
92
|
+
IndentWrappedFunctionNames: false
|
93
|
+
InsertTrailingCommas: None
|
94
|
+
JavaScriptQuotes: Leave
|
95
|
+
JavaScriptWrapImports: true
|
96
|
+
KeepEmptyLinesAtTheStartOfBlocks: true
|
97
|
+
MacroBlockBegin: ''
|
98
|
+
MacroBlockEnd: ''
|
99
|
+
MaxEmptyLinesToKeep: 1
|
100
|
+
NamespaceIndentation: None
|
101
|
+
ObjCBinPackProtocolList: Auto
|
102
|
+
ObjCBlockIndentWidth: 2
|
103
|
+
ObjCBreakBeforeNestedBlockParam: true
|
104
|
+
ObjCSpaceAfterProperty: false
|
105
|
+
ObjCSpaceBeforeProtocolList: true
|
106
|
+
PenaltyBreakAssignment: 2
|
107
|
+
PenaltyBreakBeforeFirstCallParameter: 19
|
108
|
+
PenaltyBreakComment: 300
|
109
|
+
PenaltyBreakFirstLessLess: 120
|
110
|
+
PenaltyBreakString: 1000
|
111
|
+
PenaltyBreakTemplateDeclaration: 10
|
112
|
+
PenaltyExcessCharacter: 1000000
|
113
|
+
PenaltyReturnTypeOnItsOwnLine: 60
|
114
|
+
PointerAlignment: Left
|
115
|
+
ReflowComments: true
|
116
|
+
SortIncludes: true
|
117
|
+
SortUsingDeclarations: true
|
118
|
+
SpaceAfterCStyleCast: false
|
119
|
+
SpaceAfterLogicalNot: false
|
120
|
+
SpaceAfterTemplateKeyword: true
|
121
|
+
SpaceBeforeAssignmentOperators: true
|
122
|
+
SpaceBeforeCpp11BracedList: false
|
123
|
+
SpaceBeforeCtorInitializerColon: true
|
124
|
+
SpaceBeforeInheritanceColon: true
|
125
|
+
SpaceBeforeParens: ControlStatements
|
126
|
+
SpaceBeforeRangeBasedForLoopColon: true
|
127
|
+
SpaceInEmptyBlock: false
|
128
|
+
SpaceInEmptyParentheses: false
|
129
|
+
SpacesBeforeTrailingComments: 1
|
130
|
+
SpacesInAngles: false
|
131
|
+
SpacesInConditionalStatement: false
|
132
|
+
SpacesInContainerLiterals: true
|
133
|
+
SpacesInCStyleCastParentheses: false
|
134
|
+
SpacesInParentheses: false
|
135
|
+
SpacesInSquareBrackets: false
|
136
|
+
SpaceBeforeSquareBrackets: false
|
137
|
+
Standard: Latest
|
138
|
+
StatementMacros:
|
139
|
+
- Q_UNUSED
|
140
|
+
- QT_REQUIRE_VERSION
|
141
|
+
TabWidth: 8
|
142
|
+
UseCRLF: false
|
143
|
+
UseTab: Never
|
144
|
+
WhitespaceSensitiveMacros:
|
145
|
+
- STRINGIZE
|
146
|
+
- PP_STRINGIZE
|
147
|
+
- BOOST_PP_STRINGIZE
|
148
|
+
...
|
149
|
+
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: github-ci
|
data/.github/workflows/build.yml
CHANGED
@@ -6,8 +6,9 @@ jobs:
|
|
6
6
|
build:
|
7
7
|
runs-on: ubuntu-latest
|
8
8
|
strategy:
|
9
|
+
fail-fast: false
|
9
10
|
matrix:
|
10
|
-
ruby: [ '2.5', '2.6', '2.7' ]
|
11
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0' ]
|
11
12
|
steps:
|
12
13
|
- uses: actions/checkout@v2
|
13
14
|
- name: Install BLAS and LAPACK
|
@@ -17,7 +18,9 @@ jobs:
|
|
17
18
|
with:
|
18
19
|
ruby-version: ${{ matrix.ruby }}
|
19
20
|
- name: Build and test with Rake
|
21
|
+
env:
|
22
|
+
LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
|
20
23
|
run: |
|
21
|
-
gem install bundler
|
24
|
+
gem install --no-document bundler
|
22
25
|
bundle install --jobs 4 --retry 3
|
23
26
|
bundle exec rake
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: coverage
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
coverage:
|
11
|
+
runs-on: ubuntu-20.04
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
- name: Install BLAS and LAPACK
|
15
|
+
run: sudo apt-get install -y libopenblas-dev liblapacke-dev
|
16
|
+
- name: Set up Ruby 2.7
|
17
|
+
uses: actions/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: '2.7'
|
20
|
+
- name: Build and test with Rake
|
21
|
+
env:
|
22
|
+
LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
|
23
|
+
run: |
|
24
|
+
gem install bundler
|
25
|
+
bundle install
|
26
|
+
bundle exec rake
|
27
|
+
- name: Coveralls GitHub Action
|
28
|
+
uses: coverallsapp/github-action@v1.1.2
|
29
|
+
with:
|
30
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
# 0.23.1
|
2
|
+
- Fix all estimators to return inference results in a contiguous narray.
|
3
|
+
- Fix to use until statement instead of recursive call on apply methods of tree estimators.
|
4
|
+
- Rename native extension files.
|
5
|
+
- Introduce clang-format for native extension codes.
|
6
|
+
|
7
|
+
# 0.23.0
|
8
|
+
## Breaking change
|
9
|
+
- Change automalically selected solver from sgd to lbfgs in
|
10
|
+
[LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html) and
|
11
|
+
[Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html).
|
12
|
+
- When given 'auto' to solver parameter, these estimator select the 'svd' solver if Numo::Linalg is loaded.
|
13
|
+
Otherwise, they select the 'lbfgs' solver.
|
14
|
+
|
15
|
+
# 0.22.5
|
16
|
+
- Add transformer class for calculating kernel matrix.
|
17
|
+
- [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
|
18
|
+
- Add classifier class based on Ridge regression.
|
19
|
+
- [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
|
20
|
+
- Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
|
21
|
+
- Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
|
22
|
+
|
23
|
+
# 0.22.4
|
24
|
+
- Add classifier and regressor classes for voting ensemble method.
|
25
|
+
- [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
|
26
|
+
- [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
|
27
|
+
- Refactor some codes.
|
28
|
+
- Fix some typos on API documentation.
|
29
|
+
|
30
|
+
# 0.22.3
|
31
|
+
- Add regressor class for non-negative least square method.
|
32
|
+
- [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
|
33
|
+
- Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
|
34
|
+
- In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
|
35
|
+
the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
|
36
|
+
- Add GC guard to native extension codes.
|
37
|
+
- Update API documentation.
|
38
|
+
|
1
39
|
# 0.22.2
|
2
40
|
- Add classifier and regressor classes for stacking method.
|
3
41
|
- [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
|
data/Gemfile
CHANGED
@@ -6,11 +6,12 @@ gemspec
|
|
6
6
|
gem 'mmh3', '>= 1.0'
|
7
7
|
gem 'numo-linalg', '>= 0.1.4'
|
8
8
|
gem 'parallel', '>= 1.17.0'
|
9
|
-
gem 'rake', '~>
|
9
|
+
gem 'rake', '~> 13.0'
|
10
10
|
gem 'rake-compiler', '~> 1.0'
|
11
11
|
gem 'rspec', '~> 3.0'
|
12
12
|
gem 'rubocop', '~> 1.0'
|
13
13
|
gem 'rubocop-performance', '~> 1.8'
|
14
14
|
gem 'rubocop-rake', '~> 0.5'
|
15
15
|
gem 'rubocop-rspec', '~> 2.0'
|
16
|
-
gem 'simplecov', '~> 0.
|
16
|
+
gem 'simplecov', '~> 0.21'
|
17
|
+
gem 'simplecov-lcov', '~> 0.8'
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|

|
4
4
|
|
5
|
-
[](https://github.com/yoshoku/rumale/actions
|
5
|
+
[](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
|
6
|
+
[](https://coveralls.io/github/yoshoku/rumale?branch=main)
|
6
7
|
[](https://badge.fury.io/rb/rumale)
|
7
8
|
[](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
8
9
|
[](https://yoshoku.github.io/rumale/doc/)
|
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
|
|
176
177
|
|
177
178
|
Install OpenBLAS library.
|
178
179
|
|
179
|
-
|
180
|
+
macOS:
|
180
181
|
|
181
182
|
```bash
|
182
183
|
$ brew install openblas
|
@@ -185,12 +186,13 @@ $ brew install openblas
|
|
185
186
|
Ubuntu:
|
186
187
|
|
187
188
|
```bash
|
188
|
-
$ sudo apt-get install
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
189
|
+
$ sudo apt-get install libopenblas-dev liblapacke-dev
|
190
|
+
```
|
191
|
+
|
192
|
+
Windows (MSYS2):
|
193
|
+
|
194
|
+
```bash
|
195
|
+
$ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
|
194
196
|
```
|
195
197
|
|
196
198
|
Install Numo::Linalg gem.
|
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
|
|
206
208
|
require 'rumale'
|
207
209
|
```
|
208
210
|
|
211
|
+
### Numo::OpenBLAS
|
212
|
+
[Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
|
213
|
+
and uses that as a background library for Numo::Linalg.
|
214
|
+
|
215
|
+
Install compilers for building OpenBLAS.
|
216
|
+
|
217
|
+
macOS:
|
218
|
+
|
219
|
+
```bash
|
220
|
+
$ brew install gcc gfortran make
|
221
|
+
```
|
222
|
+
|
223
|
+
Ubuntu:
|
224
|
+
|
225
|
+
```bash
|
226
|
+
$ sudo apt-get install gcc gfortran make
|
227
|
+
```
|
228
|
+
|
229
|
+
Install Numo::OpenBLAS gem.
|
230
|
+
|
231
|
+
```bash
|
232
|
+
$ gem install numo-openblas
|
233
|
+
```
|
234
|
+
|
235
|
+
Load Numo::OpenBLAS gem instead of Numo::Linalg.
|
236
|
+
|
237
|
+
```ruby
|
238
|
+
require 'numo/openblas'
|
239
|
+
require 'rumale'
|
240
|
+
```
|
241
|
+
|
209
242
|
### Parallel
|
210
243
|
Several estimators in Rumale support parallel processing.
|
211
244
|
Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
|
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
|
|
227
260
|
estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
|
228
261
|
```
|
229
262
|
|
263
|
+
## Related Projects
|
264
|
+
- [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
|
265
|
+
- [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
|
266
|
+
|
230
267
|
## Novelties
|
231
268
|
|
232
269
|
* [Rumale SHOP](https://suzuri.jp/yoshoku)
|
data/Rakefile
CHANGED
data/ext/rumale/extconf.rb
CHANGED
data/ext/rumale/tree.c
CHANGED
@@ -2,18 +2,13 @@
|
|
2
2
|
|
3
3
|
RUBY_EXTERN VALUE mRumale;
|
4
4
|
|
5
|
-
double*
|
6
|
-
alloc_dbl_array(const long n_dimensions)
|
7
|
-
{
|
8
|
-
long i;
|
5
|
+
double* alloc_dbl_array(const long n_dimensions) {
|
9
6
|
double* arr = ALLOC_N(double, n_dimensions);
|
10
|
-
|
7
|
+
memset(arr, 0, n_dimensions * sizeof(double));
|
11
8
|
return arr;
|
12
9
|
}
|
13
10
|
|
14
|
-
double
|
15
|
-
calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
16
|
-
{
|
11
|
+
double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
|
17
12
|
long i;
|
18
13
|
double el;
|
19
14
|
double gini = 0.0;
|
@@ -26,9 +21,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
|
|
26
21
|
return 1.0 - gini;
|
27
22
|
}
|
28
23
|
|
29
|
-
double
|
30
|
-
calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
31
|
-
{
|
24
|
+
double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
|
32
25
|
long i;
|
33
26
|
double el;
|
34
27
|
double entropy = 0.0;
|
@@ -42,8 +35,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
|
|
42
35
|
}
|
43
36
|
|
44
37
|
VALUE
|
45
|
-
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
46
|
-
{
|
38
|
+
calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
|
47
39
|
long i;
|
48
40
|
VALUE mean_vec = rb_ary_new2(n_dimensions);
|
49
41
|
|
@@ -54,9 +46,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
|
|
54
46
|
return mean_vec;
|
55
47
|
}
|
56
48
|
|
57
|
-
double
|
58
|
-
calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
59
|
-
{
|
49
|
+
double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
|
60
50
|
long i;
|
61
51
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
62
52
|
double sum = 0.0;
|
@@ -70,9 +60,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
|
|
70
60
|
return sum / n_dimensions;
|
71
61
|
}
|
72
62
|
|
73
|
-
double
|
74
|
-
calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
75
|
-
{
|
63
|
+
double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
|
76
64
|
long i;
|
77
65
|
const long n_dimensions = RARRAY_LEN(vec_a);
|
78
66
|
double sum = 0.0;
|
@@ -86,9 +74,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
|
|
86
74
|
return sum / n_dimensions;
|
87
75
|
}
|
88
76
|
|
89
|
-
double
|
90
|
-
calc_mae(VALUE target_vecs, VALUE mean_vec)
|
91
|
-
{
|
77
|
+
double calc_mae(VALUE target_vecs, VALUE mean_vec) {
|
92
78
|
long i;
|
93
79
|
const long n_elements = RARRAY_LEN(target_vecs);
|
94
80
|
double sum = 0.0;
|
@@ -100,9 +86,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
|
|
100
86
|
return sum / n_elements;
|
101
87
|
}
|
102
88
|
|
103
|
-
double
|
104
|
-
calc_mse(VALUE target_vecs, VALUE mean_vec)
|
105
|
-
{
|
89
|
+
double calc_mse(VALUE target_vecs, VALUE mean_vec) {
|
106
90
|
long i;
|
107
91
|
const long n_elements = RARRAY_LEN(target_vecs);
|
108
92
|
double sum = 0.0;
|
@@ -114,18 +98,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
|
|
114
98
|
return sum / n_elements;
|
115
99
|
}
|
116
100
|
|
117
|
-
double
|
118
|
-
calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
|
119
|
-
{
|
101
|
+
double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
|
120
102
|
if (strcmp(criterion, "entropy") == 0) {
|
121
103
|
return calc_entropy(histogram, n_elements, n_classes);
|
122
104
|
}
|
123
105
|
return calc_gini_coef(histogram, n_elements, n_classes);
|
124
106
|
}
|
125
107
|
|
126
|
-
double
|
127
|
-
calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
128
|
-
{
|
108
|
+
double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
|
129
109
|
const long n_elements = RARRAY_LEN(target_vecs);
|
130
110
|
const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
|
131
111
|
VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
|
@@ -136,9 +116,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
|
|
136
116
|
return calc_mse(target_vecs, mean_vec);
|
137
117
|
}
|
138
118
|
|
139
|
-
void
|
140
|
-
add_sum_vec(double* sum_vec, VALUE target)
|
141
|
-
{
|
119
|
+
void add_sum_vec(double* sum_vec, VALUE target) {
|
142
120
|
long i;
|
143
121
|
const long n_dimensions = RARRAY_LEN(target);
|
144
122
|
|
@@ -147,9 +125,7 @@ add_sum_vec(double* sum_vec, VALUE target)
|
|
147
125
|
}
|
148
126
|
}
|
149
127
|
|
150
|
-
void
|
151
|
-
sub_sum_vec(double* sum_vec, VALUE target)
|
152
|
-
{
|
128
|
+
void sub_sum_vec(double* sum_vec, VALUE target) {
|
153
129
|
long i;
|
154
130
|
const long n_dimensions = RARRAY_LEN(target);
|
155
131
|
|
@@ -169,9 +145,7 @@ typedef struct {
|
|
169
145
|
/**
|
170
146
|
* @!visibility private
|
171
147
|
*/
|
172
|
-
static void
|
173
|
-
iter_find_split_params_cls(na_loop_t const* lp)
|
174
|
-
{
|
148
|
+
static void iter_find_split_params_cls(na_loop_t const* lp) {
|
175
149
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
176
150
|
const double* f = (double*)NDL_PTR(lp, 1);
|
177
151
|
const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
|
@@ -201,7 +175,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
201
175
|
params[3] = 0.0; /* gain */
|
202
176
|
|
203
177
|
/* Initialize child node variables. */
|
204
|
-
for (i = 0; i < n_elements; i++) {
|
178
|
+
for (i = 0; i < n_elements; i++) {
|
179
|
+
r_histogram[y[o[i]]] += 1.0;
|
180
|
+
}
|
205
181
|
|
206
182
|
/* Find optimal parameters. */
|
207
183
|
while (curr_pos < n_elements && curr_el != last_el) {
|
@@ -225,7 +201,8 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
225
201
|
params[2] = 0.5 * (curr_el + next_el);
|
226
202
|
params[3] = gain;
|
227
203
|
}
|
228
|
-
if (next_pos == n_elements)
|
204
|
+
if (next_pos == n_elements)
|
205
|
+
break;
|
229
206
|
curr_pos = next_pos;
|
230
207
|
curr_el = f[o[curr_pos]];
|
231
208
|
}
|
@@ -247,20 +224,22 @@ iter_find_split_params_cls(na_loop_t const* lp)
|
|
247
224
|
* @param n_classes [Integer] The number of classes.
|
248
225
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
249
226
|
*/
|
250
|
-
static VALUE
|
251
|
-
|
252
|
-
{
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
|
227
|
+
static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
|
228
|
+
VALUE n_classes) {
|
229
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
|
230
|
+
size_t out_shape[1] = {4};
|
231
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
232
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
|
233
|
+
split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
|
258
234
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
|
259
235
|
VALUE results = rb_ary_new2(4);
|
260
|
-
|
261
|
-
rb_ary_store(results,
|
262
|
-
rb_ary_store(results,
|
263
|
-
rb_ary_store(results,
|
236
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
237
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
238
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
239
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
240
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
241
|
+
RB_GC_GUARD(params);
|
242
|
+
RB_GC_GUARD(criterion);
|
264
243
|
return results;
|
265
244
|
}
|
266
245
|
|
@@ -274,9 +253,7 @@ typedef struct {
|
|
274
253
|
/**
|
275
254
|
* @!visibility private
|
276
255
|
*/
|
277
|
-
static void
|
278
|
-
iter_find_split_params_reg(na_loop_t const* lp)
|
279
|
-
{
|
256
|
+
static void iter_find_split_params_reg(na_loop_t const* lp) {
|
280
257
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
281
258
|
const double* f = (double*)NDL_PTR(lp, 1);
|
282
259
|
const double* y = (double*)NDL_PTR(lp, 2);
|
@@ -344,7 +321,8 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
344
321
|
params[2] = 0.5 * (curr_el + next_el);
|
345
322
|
params[3] = gain;
|
346
323
|
}
|
347
|
-
if (next_pos == n_elements)
|
324
|
+
if (next_pos == n_elements)
|
325
|
+
break;
|
348
326
|
curr_pos = next_pos;
|
349
327
|
curr_el = f[o[curr_pos]];
|
350
328
|
}
|
@@ -365,29 +343,28 @@ iter_find_split_params_reg(na_loop_t const* lp)
|
|
365
343
|
* @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
|
366
344
|
* @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
|
367
345
|
*/
|
368
|
-
static VALUE
|
369
|
-
|
370
|
-
{
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
|
375
|
-
split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
|
346
|
+
static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
|
347
|
+
ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
|
348
|
+
size_t out_shape[1] = {4};
|
349
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
350
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
|
351
|
+
split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
|
376
352
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
|
377
353
|
VALUE results = rb_ary_new2(4);
|
378
|
-
|
379
|
-
rb_ary_store(results,
|
380
|
-
rb_ary_store(results,
|
381
|
-
rb_ary_store(results,
|
354
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
355
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
356
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
357
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
358
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
359
|
+
RB_GC_GUARD(params);
|
360
|
+
RB_GC_GUARD(criterion);
|
382
361
|
return results;
|
383
362
|
}
|
384
363
|
|
385
364
|
/**
|
386
365
|
* @!visibility private
|
387
366
|
*/
|
388
|
-
static void
|
389
|
-
iter_find_split_params_grad_reg(na_loop_t const* lp)
|
390
|
-
{
|
367
|
+
static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
|
391
368
|
const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
|
392
369
|
const double* f = (double*)NDL_PTR(lp, 1);
|
393
370
|
const double* g = (double*)NDL_PTR(lp, 2);
|
@@ -422,15 +399,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
422
399
|
/* Calculate gain of new split. */
|
423
400
|
r_grad = s_grad - l_grad;
|
424
401
|
r_hess = s_hess - l_hess;
|
425
|
-
gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
|
426
|
-
(r_grad * r_grad) / (r_hess + reg_lambda) -
|
402
|
+
gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
|
427
403
|
(s_grad * s_grad) / (s_hess + reg_lambda);
|
428
404
|
/* Update optimal parameters. */
|
429
405
|
if (gain > gain_max) {
|
430
406
|
threshold = 0.5 * (curr_el + next_el);
|
431
407
|
gain_max = gain;
|
432
408
|
}
|
433
|
-
if (next_pos == n_elements)
|
409
|
+
if (next_pos == n_elements) {
|
410
|
+
break;
|
411
|
+
}
|
434
412
|
curr_pos = next_pos;
|
435
413
|
curr_el = f[o[curr_pos]];
|
436
414
|
}
|
@@ -453,19 +431,19 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
|
|
453
431
|
* @param reg_lambda [Float] The L2 regularization term on weight.
|
454
432
|
* @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
|
455
433
|
*/
|
456
|
-
static VALUE
|
457
|
-
|
458
|
-
|
459
|
-
{
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
|
464
|
-
double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
|
434
|
+
static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
|
435
|
+
VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
|
436
|
+
ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
|
437
|
+
size_t out_shape[1] = {2};
|
438
|
+
ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
|
439
|
+
ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
|
440
|
+
double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
|
465
441
|
VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
|
466
442
|
VALUE results = rb_ary_new2(2);
|
467
|
-
|
468
|
-
rb_ary_store(results,
|
443
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
444
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
445
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
446
|
+
RB_GC_GUARD(params);
|
469
447
|
return results;
|
470
448
|
}
|
471
449
|
|
@@ -481,9 +459,7 @@ find_split_params_grad_reg
|
|
481
459
|
* @param n_classes_ [Integer] The number of classes.
|
482
460
|
* @return [Float] impurity
|
483
461
|
*/
|
484
|
-
static VALUE
|
485
|
-
node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
|
486
|
-
{
|
462
|
+
static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
|
487
463
|
long i;
|
488
464
|
const long n_classes = NUM2LONG(n_classes_);
|
489
465
|
const long n_elements = NUM2LONG(n_elements_);
|
@@ -491,12 +467,17 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
491
467
|
double* histogram = alloc_dbl_array(n_classes);
|
492
468
|
VALUE ret;
|
493
469
|
|
494
|
-
for (i = 0; i < n_elements; i++) {
|
470
|
+
for (i = 0; i < n_elements; i++) {
|
471
|
+
histogram[y[i]] += 1;
|
472
|
+
}
|
495
473
|
|
496
474
|
ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
|
497
475
|
|
498
476
|
xfree(histogram);
|
499
477
|
|
478
|
+
RB_GC_GUARD(y_nary);
|
479
|
+
RB_GC_GUARD(criterion);
|
480
|
+
|
500
481
|
return ret;
|
501
482
|
}
|
502
483
|
|
@@ -510,9 +491,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
510
491
|
* @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
|
511
492
|
* @return [Float] impurity
|
512
493
|
*/
|
513
|
-
static VALUE
|
514
|
-
node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
515
|
-
{
|
494
|
+
static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
|
516
495
|
long i;
|
517
496
|
const long n_elements = RARRAY_LEN(y);
|
518
497
|
const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
|
@@ -531,11 +510,12 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
|
531
510
|
|
532
511
|
xfree(sum_vec);
|
533
512
|
|
513
|
+
RB_GC_GUARD(criterion);
|
514
|
+
|
534
515
|
return ret;
|
535
516
|
}
|
536
517
|
|
537
|
-
void init_tree_module()
|
538
|
-
{
|
518
|
+
void init_tree_module() {
|
539
519
|
VALUE mTree = rb_define_module_under(mRumale, "Tree");
|
540
520
|
/**
|
541
521
|
* Document-module: Rumale::Tree::ExtDecisionTreeClassifier
|