hybridforest 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/Gemfile.lock +1 -1
- data/lib/hybridforest/trees/feature_selectors/random_feature_subspace.rb +1 -0
- data/lib/hybridforest/utilities/utils.rb +38 -26
- data/lib/hybridforest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b32e830c0e55cd23122b879bae7d42407085026f573d5565522ba64fe71a255e
|
4
|
+
data.tar.gz: 55c965cbe81e2f2a2e18ebecd21b1027577249f2aa6de4bb0e57b14ab6409f30
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fd27d1de1bcc9f1776607475bc3e03731c8053dbd309171265d1eeb1abc1498965b62d251f1f731ea708ea3914372cba1843b73be1acedcb90d417e79cc54bf
|
7
|
+
data.tar.gz: 75a6a949ccce8e0369966814fdd9a40944a9163dacb82de7a30fad31d54844e5fdc396ea546dbfcad19258e249a60b452973ee3df4e9fe8f07da71d4e888c274
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -12,17 +12,16 @@ module HybridForest
|
|
12
12
|
# of independent features and an array of labels. Returns [+training_set+, +testing_set+, +testing_set_labels+]
|
13
13
|
#
|
14
14
|
def self.train_test_split(dataset, test_set_size = 0.20)
|
15
|
-
# TODO:
|
15
|
+
# TODO: Offer stratify param
|
16
16
|
dataset = to_dataframe(dataset)
|
17
|
+
all_rows = (0...dataset.count).to_a
|
17
18
|
|
18
19
|
test_set_count = (dataset.count * test_set_size).floor
|
19
|
-
|
20
|
-
test_set = dataset[
|
21
|
-
test_set_labels = test_set.
|
22
|
-
test_set.except!(test_set.label)
|
20
|
+
test_set_rows = rand_uniq_nums(test_set_count, 0...dataset.count)
|
21
|
+
test_set = dataset[test_set_rows]
|
22
|
+
test_set, test_set_labels = test_set.disconnect_labels
|
23
23
|
|
24
|
-
|
25
|
-
train_set = dataset[train_set_indices]
|
24
|
+
train_set = dataset[all_rows - test_set_rows]
|
26
25
|
|
27
26
|
[train_set, test_set, test_set_labels]
|
28
27
|
end
|
@@ -37,20 +36,13 @@ module HybridForest
|
|
37
36
|
dataset = to_dataframe(dataset)
|
38
37
|
all_rows = (0...dataset.count).to_a
|
39
38
|
|
40
|
-
|
41
|
-
|
42
|
-
dataset.count.times do
|
43
|
-
row = all_rows.sample
|
44
|
-
train_set_rows << row
|
45
|
-
train_set.concat(dataset[row])
|
46
|
-
end
|
39
|
+
train_set_rows = rand_nums(dataset.count, 0...dataset.count)
|
40
|
+
train_set = dataset[train_set_rows]
|
47
41
|
|
48
42
|
return train_test_split(dataset) if train_set_rows.sort == all_rows
|
49
43
|
|
50
|
-
|
51
|
-
test_set =
|
52
|
-
test_set_labels = test_set.class_labels
|
53
|
-
test_set.except!(test_set.label)
|
44
|
+
test_set = dataset[all_rows - train_set_rows]
|
45
|
+
test_set, test_set_labels = test_set.disconnect_labels
|
54
46
|
|
55
47
|
[train_set, test_set, test_set_labels]
|
56
48
|
end
|
@@ -88,16 +80,12 @@ module HybridForest
|
|
88
80
|
def self.random_sample(data:, size:, with_replacement: true)
|
89
81
|
raise ArgumentError, "Invalid sample size" if size < 1 || size > data.count
|
90
82
|
|
91
|
-
if with_replacement
|
92
|
-
|
93
|
-
data[rows]
|
83
|
+
rows = if with_replacement
|
84
|
+
rand_nums(size, 0...data.count)
|
94
85
|
else
|
95
|
-
|
96
|
-
until rows.size == size
|
97
|
-
rows << rand(0...data.count)
|
98
|
-
end
|
99
|
-
data[rows.to_a]
|
86
|
+
rand_uniq_nums(size, 0...data.count)
|
100
87
|
end
|
88
|
+
data[rows]
|
101
89
|
end
|
102
90
|
|
103
91
|
# Outputs a report of common prediction metrics.
|
@@ -168,6 +156,12 @@ module HybridForest
|
|
168
156
|
def class_labels
|
169
157
|
self[label].to_a
|
170
158
|
end
|
159
|
+
|
160
|
+
def disconnect_labels
|
161
|
+
labels = class_labels
|
162
|
+
except!(label)
|
163
|
+
[self, labels]
|
164
|
+
end
|
171
165
|
end
|
172
166
|
end
|
173
167
|
|
@@ -202,5 +196,23 @@ module HybridForest
|
|
202
196
|
def false_label?(label)
|
203
197
|
[false, 0].include? label
|
204
198
|
end
|
199
|
+
|
200
|
+
##
|
201
|
+
# Returns an array of +n+ random numbers in the exclusive +range+.
|
202
|
+
def rand_nums(n, range)
|
203
|
+
n.times.collect { rand(range) }
|
204
|
+
end
|
205
|
+
|
206
|
+
##
|
207
|
+
# Returns an array of +n+ _unique_ random numbers in the exclusive +range+.
|
208
|
+
def rand_uniq_nums(n, range)
|
209
|
+
raise ArgumentError if n > range.size
|
210
|
+
|
211
|
+
nums = Set.new
|
212
|
+
until nums.size == n
|
213
|
+
nums << rand(range)
|
214
|
+
end
|
215
|
+
nums.to_a
|
216
|
+
end
|
205
217
|
end
|
206
218
|
end
|
data/lib/hybridforest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hybridforest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hi-tech-jazz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|