rbbt-dm 1.1.53 → 1.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tensorflow.rb +1 -1
- data/lib/rbbt/vector/model/random_forest.rb +26 -0
- data/lib/rbbt/vector/model/svm.rb +3 -3
- data/lib/rbbt/vector/model/tensorflow.rb +1 -1
- data/lib/rbbt/vector/model.rb +137 -48
- data/test/rbbt/vector/model/test_svm.rb +3 -3
- data/test/rbbt/vector/model/test_tensorflow.rb +3 -3
- data/test/rbbt/vector/test_model.rb +162 -77
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40ee19cdf9fd742bfa844428a16e61f398f76d53ca0bfdda5499f5bb03db1c2b
|
4
|
+
data.tar.gz: 7be309bf582ce9e547cf316a36b88ad3043fb0bdc9cd5d5590d507f0f78a7a71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 801e3c9a8541b3b87b12b961c31c4932c17b36db6c9d65dd9a88ef1adb5811188d79570099cf96a1dea3d71d6323edfe2707fd46ebe9e9faa96f38ade84684eb
|
7
|
+
data.tar.gz: e1955147d51d34595cf169e818da48584cf9f1597dd2a714d91de88cf90846e9845f3147715ea95769cfb453adf170879c926e02419ff1437b95968cf9bf9ffc
|
data/lib/rbbt/tensorflow.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rbbt/vector/model'
|
2
|
+
class RFModel < VectorModel
|
3
|
+
def initialize(dir)
|
4
|
+
super(dir)
|
5
|
+
|
6
|
+
@extract_features = Proc.new{|element|
|
7
|
+
element
|
8
|
+
}
|
9
|
+
|
10
|
+
@train_model =<<-EOF
|
11
|
+
rbbt.require("randomForest");
|
12
|
+
model = randomForest(as.factor(label) ~ ., data = features);
|
13
|
+
EOF
|
14
|
+
|
15
|
+
@eval_model =<<-EOF
|
16
|
+
rbbt.require("randomForest");
|
17
|
+
pred = names(model$forest$xlevels)
|
18
|
+
for (p in pred) {
|
19
|
+
if (class(features[[p]]) == "factor") {
|
20
|
+
features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
|
21
|
+
}
|
22
|
+
}
|
23
|
+
label = predict(model, features);
|
24
|
+
EOF
|
25
|
+
end
|
26
|
+
end
|
@@ -8,12 +8,12 @@ class SVMModel < VectorModel
|
|
8
8
|
}
|
9
9
|
|
10
10
|
@train_model =<<-EOF
|
11
|
-
|
12
|
-
model = svm(as.factor(
|
11
|
+
rbbt.require('e1071');
|
12
|
+
model = svm(as.factor(label) ~ ., data = features);
|
13
13
|
EOF
|
14
14
|
|
15
15
|
@eval_model =<<-EOF
|
16
|
-
|
16
|
+
rbbt.require('e1071');
|
17
17
|
label = predict(model, features);
|
18
18
|
EOF
|
19
19
|
end
|
@@ -32,7 +32,7 @@ class TensorFlowModel < VectorModel
|
|
32
32
|
end
|
33
33
|
@graph ||= keras_graph
|
34
34
|
@graph.compile(**@compile_options)
|
35
|
-
@graph.fit(features, labels, :epochs => @epochs, :verbose =>
|
35
|
+
@graph.fit(features, labels, :epochs => @epochs, :verbose => true)
|
36
36
|
@graph.save(file)
|
37
37
|
end
|
38
38
|
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -2,54 +2,85 @@ require 'rbbt/util/R'
|
|
2
2
|
|
3
3
|
class VectorModel
|
4
4
|
attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
|
5
|
-
attr_accessor :features, :labels
|
5
|
+
attr_accessor :features, :names, :labels, :factor_levels
|
6
6
|
|
7
|
-
def self.R_run(model_file, features, labels, code)
|
7
|
+
def self.R_run(model_file, features, labels, code, names = nil, factor_levels = nil)
|
8
8
|
TmpFile.with_file do |feature_file|
|
9
9
|
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
10
|
-
Open.write(feature_file + '.
|
10
|
+
Open.write(feature_file + '.label', labels * "\n" + "\n")
|
11
|
+
Open.write(feature_file + '.names', names * "\n" + "\n") if names
|
12
|
+
|
13
|
+
|
14
|
+
what = case labels.first
|
15
|
+
when Numeric, Integer, Float
|
16
|
+
'numeric()'
|
17
|
+
else
|
18
|
+
'character()'
|
19
|
+
end
|
11
20
|
|
12
21
|
R.run <<-EOF
|
13
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=
|
14
|
-
|
15
|
-
|
22
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
|
23
|
+
#{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
|
24
|
+
#{ factor_levels.collect do |name,levels|
|
25
|
+
"features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
|
26
|
+
end * "\n" if factor_levels }
|
27
|
+
labels = scan("#{ feature_file }.label", what=#{what});
|
28
|
+
features = cbind(features, label = labels);
|
16
29
|
#{code}
|
17
30
|
EOF
|
18
31
|
end
|
19
32
|
end
|
20
33
|
|
21
|
-
def self.R_train(model_file, features, labels, code)
|
34
|
+
def self.R_train(model_file, features, labels, code, names = nil, factor_levels = nil)
|
22
35
|
TmpFile.with_file do |feature_file|
|
23
36
|
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
24
|
-
Open.write(feature_file + '.
|
37
|
+
Open.write(feature_file + '.label', labels * "\n" + "\n")
|
38
|
+
Open.write(feature_file + '.names', names * "\n" + "\n") if names
|
39
|
+
|
40
|
+
what = case labels.first
|
41
|
+
when Numeric, Integer, Float
|
42
|
+
'numeric()'
|
43
|
+
else
|
44
|
+
'character()'
|
45
|
+
end
|
25
46
|
|
26
47
|
R.run <<-EOF
|
27
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=
|
28
|
-
labels = scan("#{ feature_file }.
|
29
|
-
features =
|
48
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
|
49
|
+
labels = scan("#{ feature_file }.label", what=#{what});
|
50
|
+
#{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
|
51
|
+
features = cbind(features, label = labels);
|
52
|
+
#{ factor_levels.collect do |name,levels|
|
53
|
+
"features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
|
54
|
+
end * "\n" if factor_levels }
|
30
55
|
#{code}
|
31
56
|
save(model, file='#{model_file}')
|
32
57
|
EOF
|
33
58
|
end
|
34
59
|
end
|
35
60
|
|
36
|
-
def self.R_eval(model_file, features, list, code)
|
61
|
+
def self.R_eval(model_file, features, list, code, names = nil, factor_levels = nil)
|
37
62
|
TmpFile.with_file do |feature_file|
|
63
|
+
if list
|
64
|
+
Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
|
65
|
+
else
|
66
|
+
Open.write(feature_file, features * "\t" + "\n")
|
67
|
+
end
|
68
|
+
Open.write(feature_file + '.names', names * "\n" + "\n") if names
|
69
|
+
|
38
70
|
TmpFile.with_file do |results|
|
39
|
-
if list
|
40
|
-
Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
|
41
|
-
else
|
42
|
-
Open.write(feature_file, features * "\t" + "\n")
|
43
|
-
end
|
44
71
|
|
45
72
|
io = R.run <<-EOF
|
46
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=
|
73
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
|
74
|
+
#{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
|
75
|
+
#{ factor_levels.collect do |name,levels|
|
76
|
+
"features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
|
77
|
+
end * "\n" if factor_levels }
|
47
78
|
load(file="#{model_file}");
|
48
79
|
#{code}
|
49
80
|
cat(paste(label, sep="\\n", collapse="\\n"));
|
50
81
|
EOF
|
51
82
|
txt = io.read
|
52
|
-
res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/)
|
83
|
+
res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/)
|
53
84
|
|
54
85
|
if list
|
55
86
|
res
|
@@ -66,7 +97,7 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
66
97
|
instance_eval code, file
|
67
98
|
end
|
68
99
|
|
69
|
-
def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
|
100
|
+
def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil, names = nil, factor_levels = nil)
|
70
101
|
@directory = directory
|
71
102
|
FileUtils.mkdir_p @directory unless File.exists? @directory
|
72
103
|
|
@@ -76,6 +107,8 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
76
107
|
@eval_model_file = File.join(@directory, "eval_model")
|
77
108
|
@train_model_file_R = File.join(@directory, "train_model.R")
|
78
109
|
@eval_model_file_R = File.join(@directory, "eval_model.R")
|
110
|
+
@names_file = File.join(@directory, "feature_names")
|
111
|
+
@levels_file = File.join(@directory, "levels")
|
79
112
|
|
80
113
|
if extract_features.nil?
|
81
114
|
if File.exists?(@extract_features_file)
|
@@ -105,6 +138,22 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
105
138
|
@eval_model = eval_model
|
106
139
|
end
|
107
140
|
|
141
|
+
if names.nil?
|
142
|
+
if File.exists?(@names_file)
|
143
|
+
@names = Open.read(@names_file).split("\n")
|
144
|
+
end
|
145
|
+
else
|
146
|
+
@extract_features = names
|
147
|
+
end
|
148
|
+
|
149
|
+
if factor_levels.nil?
|
150
|
+
if File.exists?(@levels_file)
|
151
|
+
@factor_levels = YAML.load(Open.read(@levels_file))
|
152
|
+
end
|
153
|
+
else
|
154
|
+
@factor_levels = factor_levels
|
155
|
+
end
|
156
|
+
|
108
157
|
@features = []
|
109
158
|
@labels = []
|
110
159
|
end
|
@@ -156,28 +205,31 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
156
205
|
when String === eval_model
|
157
206
|
Open.write(@eval_model_file_R, eval_model)
|
158
207
|
end
|
208
|
+
|
209
|
+
Open.write(@levels_file, @factor_levels.to_yaml) if @factor_levels
|
210
|
+
Open.write(@names_file, @names * "\n" + "\n") if @names
|
159
211
|
end
|
160
212
|
|
161
213
|
def train
|
162
214
|
case
|
163
215
|
when Proc === train_model
|
164
|
-
train_model.call(@model_file, @features, @labels)
|
216
|
+
train_model.call(@model_file, @features, @labels, @names, @factor_levels)
|
165
217
|
when String === train_model
|
166
|
-
VectorModel.R_train(@model_file, @features, @labels, train_model)
|
218
|
+
VectorModel.R_train(@model_file, @features, @labels, train_model, @names, @factor_levels)
|
167
219
|
end
|
168
220
|
save_models
|
169
221
|
end
|
170
222
|
|
171
223
|
def run(code)
|
172
|
-
VectorModel.R_run(@model_file, @features, @labels, code)
|
224
|
+
VectorModel.R_run(@model_file, @features, @labels, code, @names, @factor_levels)
|
173
225
|
end
|
174
226
|
|
175
227
|
def eval(element)
|
176
228
|
case
|
177
229
|
when Proc === @eval_model
|
178
|
-
@eval_model.call(@model_file, @extract_features.call(element), false)
|
230
|
+
@eval_model.call(@model_file, @extract_features.call(element), false, nil, @names, @factor_levels)
|
179
231
|
when String === @eval_model
|
180
|
-
VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model)
|
232
|
+
VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model, @names, @factor_levels)
|
181
233
|
end
|
182
234
|
end
|
183
235
|
|
@@ -195,9 +247,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
195
247
|
|
196
248
|
case
|
197
249
|
when Proc === eval_model
|
198
|
-
eval_model.call(@model_file, features, true)
|
250
|
+
eval_model.call(@model_file, features, true, nil, @names, @factor_levels)
|
199
251
|
when String === eval_model
|
200
|
-
VectorModel.R_eval(@model_file, features, true, eval_model)
|
252
|
+
VectorModel.R_eval(@model_file, features, true, eval_model, @names, @factor_levels)
|
201
253
|
end
|
202
254
|
end
|
203
255
|
|
@@ -231,14 +283,61 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
231
283
|
|
232
284
|
# acc
|
233
285
|
#end
|
286
|
+
#
|
287
|
+
|
288
|
+
def self.f1_metrics(test, predicted, good_label = nil)
|
289
|
+
tp, tn, fp, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
|
290
|
+
|
291
|
+
labels = (test + predicted).uniq
|
292
|
+
|
293
|
+
if labels.length == 2 || good_label
|
294
|
+
good_label = labels.uniq.select{|l| l.to_s == "true"}.first if good_label.nil?
|
295
|
+
good_label = labels.uniq.select{|l| l.to_s == "1"}.first if good_label.nil?
|
296
|
+
good_label = labels.uniq.sort.first if good_label.nil?
|
297
|
+
|
298
|
+
test.zip(predicted).each do |gs,pred|
|
299
|
+
gs = gs.to_s
|
300
|
+
pred = pred.to_s
|
301
|
+
|
302
|
+
tp += 1 if gs == pred && gs == good_label
|
303
|
+
tn += 1 if gs == pred && gs != good_label
|
304
|
+
fp += 1 if gs != good_label && pred == good_label
|
305
|
+
fn += 1 if gs == good_label && pred != good_label
|
306
|
+
end
|
307
|
+
|
308
|
+
p = tp + fn
|
309
|
+
pp = tp + fp
|
234
310
|
|
235
|
-
|
311
|
+
pr = tp.to_f / pp
|
312
|
+
re = tp.to_f / p
|
236
313
|
|
237
|
-
|
314
|
+
f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
|
315
|
+
|
316
|
+
[tp, tn, fp, fn, pr, re, f1]
|
317
|
+
else
|
318
|
+
num = labels.length
|
319
|
+
acc = []
|
320
|
+
labels.each do |good_label|
|
321
|
+
values = VectorModel.f1_metrics(test, predicted, good_label)
|
322
|
+
acc << values
|
323
|
+
end
|
324
|
+
Misc.zip_fields(acc).collect{|s| Misc.mean(s)}
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
def cross_validation(folds = 10, good_label = nil)
|
238
329
|
|
239
330
|
orig_features = @features
|
240
331
|
orig_labels = @labels
|
241
332
|
|
333
|
+
multiclass = @labels.uniq.length > 2
|
334
|
+
|
335
|
+
if multiclass
|
336
|
+
res = TSV.setup({}, "Fold~P,R,F1#:type=:list")
|
337
|
+
else
|
338
|
+
res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
|
339
|
+
end
|
340
|
+
|
242
341
|
begin
|
243
342
|
feature_folds = Misc.divide(@features, folds)
|
244
343
|
labels_folds = Misc.divide(@labels, folds)
|
@@ -253,8 +352,6 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
253
352
|
test_labels = labels_folds[fix]
|
254
353
|
train_labels = labels_folds.values_at(*rest).flatten
|
255
354
|
|
256
|
-
tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
|
257
|
-
|
258
355
|
@features = train_set
|
259
356
|
@labels = train_labels
|
260
357
|
self.train
|
@@ -262,26 +359,18 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
262
359
|
|
263
360
|
raise "Number of predictions (#{predictions.length}) and test labels (#{test_labels.length}) do not match" if predictions.length != test_labels.length
|
264
361
|
|
265
|
-
test_labels.
|
266
|
-
gs = gs.to_i
|
267
|
-
pred = pred > 0.5 ? 1 : 0
|
268
|
-
tp += 1 if gs == pred && gs == 1
|
269
|
-
tn += 1 if gs == pred && gs == 0
|
270
|
-
fp += 1 if gs == 0 && pred == 1
|
271
|
-
fn += 1 if gs == 1 && pred == 0
|
272
|
-
end
|
273
|
-
|
274
|
-
p = tp + fn
|
275
|
-
pp = tp + fp
|
276
|
-
|
277
|
-
pr = tp.to_f / pp
|
278
|
-
re = tp.to_f / p
|
362
|
+
different_labels = test_labels.uniq
|
279
363
|
|
280
|
-
|
364
|
+
tp, tn, fp, fn, pr, re, f1 = VectorModel.f1_metrics(test_labels, predictions, good_label)
|
281
365
|
|
282
|
-
|
366
|
+
if multiclass
|
367
|
+
Log.low "Multi-class CV Fold #{fix} - Average P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
|
368
|
+
res[fix] = [pr,re,f1]
|
369
|
+
else
|
370
|
+
Log.low "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
|
371
|
+
res[fix] = [tp,tn,fp,fn,pr,re,f1]
|
372
|
+
end
|
283
373
|
|
284
|
-
res[fix] = [tp,tn,fp,fn,pr,re,f1]
|
285
374
|
end
|
286
375
|
ensure
|
287
376
|
@features = orig_features
|
@@ -33,10 +33,10 @@ class TestSVMModel < Test::Unit::TestCase
|
|
33
33
|
|
34
34
|
model.train
|
35
35
|
|
36
|
-
assert model.eval("1;1;1")
|
37
|
-
assert model.eval("0;0;0")
|
36
|
+
assert model.eval("1;1;1") == "1"
|
37
|
+
assert model.eval("0;0;0") == "0"
|
38
38
|
|
39
|
-
assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v
|
39
|
+
assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v == "1"}
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -4,12 +4,13 @@ require 'rbbt/vector/model/tensorflow'
|
|
4
4
|
class TestTensorflowModel < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def test_keras
|
7
|
+
Log.severity = 0
|
7
8
|
TmpFile.with_file() do |dir|
|
8
9
|
FileUtils.mkdir_p dir
|
9
10
|
|
10
11
|
model = TensorFlowModel.new(
|
11
12
|
dir,
|
12
|
-
optimizer:'adam',
|
13
|
+
optimizer: 'adam',
|
13
14
|
loss: 'sparse_categorical_crossentropy',
|
14
15
|
metrics: ['accuracy']
|
15
16
|
)
|
@@ -42,6 +43,7 @@ class TestTensorflowModel < Test::Unit::TestCase
|
|
42
43
|
|
43
44
|
predictions = model.eval_list x_test.tolist()
|
44
45
|
sum = 0
|
46
|
+
|
45
47
|
predictions.zip(y_test.tolist()).each do |pred,label|
|
46
48
|
sum += 1 if label.to_i == pred
|
47
49
|
end
|
@@ -49,8 +51,6 @@ class TestTensorflowModel < Test::Unit::TestCase
|
|
49
51
|
end
|
50
52
|
|
51
53
|
assert sum.to_f / predictions.length > 0.7
|
52
|
-
|
53
|
-
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
@@ -211,7 +211,7 @@ cat(label, file="#{results}");
|
|
211
211
|
end
|
212
212
|
end
|
213
213
|
|
214
|
-
def
|
214
|
+
def test_model_save
|
215
215
|
text =<<-EOF
|
216
216
|
1 0;1;1
|
217
217
|
1 1;0;1
|
@@ -243,9 +243,9 @@ cat(label, file="#{results}");
|
|
243
243
|
R.run <<-EOF
|
244
244
|
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
245
245
|
labels = scan("#{ feature_file }.class", what=numeric());
|
246
|
-
features = cbind(features,
|
246
|
+
features = cbind(features, label = labels);
|
247
247
|
rbbt.require('e1071')
|
248
|
-
model = svm(
|
248
|
+
model = svm(label ~ ., data = features)
|
249
249
|
save(model, file="#{ model_file }");
|
250
250
|
EOF
|
251
251
|
end
|
@@ -276,12 +276,18 @@ cat(label, file="#{results}");
|
|
276
276
|
|
277
277
|
model.train
|
278
278
|
|
279
|
+
model = VectorModel.new(dir)
|
280
|
+
pairs = text.split(/\n/).collect do |line|
|
281
|
+
label, features = line.split(" ")
|
282
|
+
model.add features, label
|
283
|
+
end
|
284
|
+
|
279
285
|
assert model.eval("1;1;1").to_f > 0.5
|
280
286
|
assert model.eval("0;0;0").to_f < 0.5
|
281
287
|
end
|
282
288
|
end
|
283
289
|
|
284
|
-
def
|
290
|
+
def test_model_name
|
285
291
|
text =<<-EOF
|
286
292
|
1 0;1;1
|
287
293
|
1 1;0;1
|
@@ -298,50 +304,31 @@ cat(label, file="#{results}");
|
|
298
304
|
FileUtils.mkdir_p dir
|
299
305
|
model = VectorModel.new(dir)
|
300
306
|
|
301
|
-
model.
|
302
|
-
element.split(";")
|
303
|
-
}
|
307
|
+
model.names = %w(Var1 Var2 Var3)
|
304
308
|
|
305
|
-
model.
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
311
|
-
labels = scan("#{ feature_file }.class", what=numeric());
|
312
|
-
features = cbind(features, class = labels);
|
313
|
-
rbbt.require('e1071')
|
314
|
-
model = svm(class ~ ., data = features)
|
315
|
-
save(model, file="#{ model_file }");
|
316
|
-
EOF
|
309
|
+
model.extract_features = Proc.new{|element,list|
|
310
|
+
if element
|
311
|
+
element.split(";")
|
312
|
+
elsif list
|
313
|
+
list.collect{|e| e.split(";") }
|
317
314
|
end
|
318
315
|
}
|
319
316
|
|
320
|
-
model.
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
317
|
+
model.train_model =<<-EOF
|
318
|
+
rbbt.require('e1071')
|
319
|
+
model = svm(as.factor(label) ~ Var1 + Var2, data = features)
|
320
|
+
EOF
|
321
|
+
|
322
|
+
model.eval_model = <<-EOF
|
326
323
|
library(e1071)
|
327
|
-
load(file="#{ model_file }")
|
328
324
|
label = predict(model, features);
|
329
|
-
|
330
|
-
EOF
|
331
|
-
).read
|
332
|
-
Open.read(results)
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
}
|
325
|
+
EOF
|
337
326
|
|
338
327
|
pairs = text.split(/\n/).collect do |line|
|
339
328
|
label, features = line.split(" ")
|
340
|
-
|
329
|
+
model.add features, label
|
341
330
|
end
|
342
331
|
|
343
|
-
model.add_list(*Misc.zip_fields(pairs))
|
344
|
-
|
345
332
|
model.train
|
346
333
|
|
347
334
|
assert model.eval("1;1;1").to_f > 0.5
|
@@ -349,23 +336,25 @@ cat(label, file="#{results}");
|
|
349
336
|
end
|
350
337
|
end
|
351
338
|
|
352
|
-
def
|
339
|
+
def test_model_cv
|
353
340
|
text =<<-EOF
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
1
|
358
|
-
1 1;1;
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
341
|
+
0 0;1;0;0
|
342
|
+
0 1;0;0;0
|
343
|
+
0 0;1;0;0
|
344
|
+
0 1;0;0;0
|
345
|
+
1 0;1;1;0
|
346
|
+
1 1;0;1;0
|
347
|
+
1 1;1;1;0
|
348
|
+
1 0;1;1;0
|
349
|
+
1 1;1;1;0
|
363
350
|
EOF
|
364
351
|
|
365
352
|
TmpFile.with_file() do |dir|
|
366
353
|
FileUtils.mkdir_p dir
|
367
354
|
model = VectorModel.new(dir)
|
368
355
|
|
356
|
+
model.names = %w(Var1 Var2 Var3 Var4)
|
357
|
+
|
369
358
|
model.extract_features = Proc.new{|element,list|
|
370
359
|
if element
|
371
360
|
element.split(";")
|
@@ -374,55 +363,151 @@ cat(label, file="#{results}");
|
|
374
363
|
end
|
375
364
|
}
|
376
365
|
|
377
|
-
model.train_model
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
R.run <<-EOF
|
382
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
383
|
-
labels = scan("#{ feature_file }.class", what=numeric());
|
384
|
-
features = cbind(features, class = labels);
|
385
|
-
rbbt.require('e1071')
|
386
|
-
model = svm(class ~ ., data = features)
|
387
|
-
save(model, file="#{ model_file }");
|
388
|
-
EOF
|
389
|
-
end
|
390
|
-
}
|
366
|
+
model.train_model =<<-EOF
|
367
|
+
rbbt.require('randomForest')
|
368
|
+
model = randomForest(as.factor(label) ~ ., data = features)
|
369
|
+
EOF
|
391
370
|
|
392
|
-
model.eval_model =
|
393
|
-
|
394
|
-
TmpFile.with_file do |results|
|
395
|
-
Open.write(feature_file, features * "\t")
|
396
|
-
puts R.run(<<-EOF
|
397
|
-
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
398
|
-
library(e1071)
|
399
|
-
load(file="#{ model_file }")
|
371
|
+
model.eval_model = <<-EOF
|
372
|
+
rbbt.require('randomForest')
|
400
373
|
label = predict(model, features);
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
374
|
+
EOF
|
375
|
+
|
376
|
+
pairs = text.split(/\n/).collect do |line|
|
377
|
+
label, features = line.split(" ")
|
378
|
+
model.add features, label
|
379
|
+
end
|
380
|
+
|
381
|
+
model.train
|
382
|
+
|
383
|
+
assert_equal "0", model.eval("1;1;0;0")
|
384
|
+
assert_equal "1", model.eval("1;1;1;0")
|
385
|
+
|
386
|
+
Log.with_severity 1 do
|
387
|
+
model.cross_validation(2)
|
388
|
+
end
|
407
389
|
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def test_model_mclass
|
394
|
+
text =<<-EOF
|
395
|
+
0 0;1;0;0
|
396
|
+
0 1;0;0;0
|
397
|
+
0 0;1;0;0
|
398
|
+
0 1;0;0;0
|
399
|
+
1 0;1;1;0
|
400
|
+
1 1;0;1;0
|
401
|
+
1 1;1;1;0
|
402
|
+
1 0;1;1;0
|
403
|
+
1 1;1;1;0
|
404
|
+
2 0;1;0;1
|
405
|
+
2 1;0;0;1
|
406
|
+
2 1;1;0;1
|
407
|
+
2 0;1;0;1
|
408
|
+
2 1;1;0;1
|
409
|
+
EOF
|
410
|
+
|
411
|
+
TmpFile.with_file() do |dir|
|
412
|
+
FileUtils.mkdir_p dir
|
413
|
+
model = VectorModel.new(dir)
|
414
|
+
|
415
|
+
model.names = %w(Var1 Var2 Var3 Var4)
|
416
|
+
|
417
|
+
model.extract_features = Proc.new{|element,list|
|
418
|
+
if element
|
419
|
+
element.split(";")
|
420
|
+
elsif list
|
421
|
+
list.collect{|e| e.split(";") }
|
422
|
+
end
|
408
423
|
}
|
409
424
|
|
425
|
+
model.train_model =<<-EOF
|
426
|
+
rbbt.require('randomForest')
|
427
|
+
model = randomForest(as.factor(label) ~ ., data = features)
|
428
|
+
EOF
|
429
|
+
|
430
|
+
model.eval_model = <<-EOF
|
431
|
+
rbbt.require('randomForest')
|
432
|
+
label = predict(model, features);
|
433
|
+
EOF
|
434
|
+
|
410
435
|
pairs = text.split(/\n/).collect do |line|
|
411
436
|
label, features = line.split(" ")
|
412
437
|
model.add features, label
|
413
438
|
end
|
414
439
|
|
415
440
|
model.train
|
441
|
+
|
442
|
+
assert_equal "0", model.eval("1;1;0;0")
|
443
|
+
assert_equal "1", model.eval("1;1;1;0")
|
444
|
+
assert_equal "2", model.eval("1;1;0;1")
|
445
|
+
|
446
|
+
Log.with_severity 1 do
|
447
|
+
model.cross_validation(2)
|
448
|
+
end
|
416
449
|
|
450
|
+
end
|
451
|
+
end
|
452
|
+
|
453
|
+
def test_model_factor_levels
|
454
|
+
text =<<-EOF
|
455
|
+
0 0;1;0;f1
|
456
|
+
0 1;0;0;f1
|
457
|
+
0 0;1;0;f1
|
458
|
+
0 1;0;0;f1
|
459
|
+
1 0;1;1;f2
|
460
|
+
1 1;0;1;f2
|
461
|
+
1 1;1;1;f2
|
462
|
+
1 0;1;1;f2
|
463
|
+
1 1;1;1;f2
|
464
|
+
EOF
|
465
|
+
|
466
|
+
TmpFile.with_file() do |dir|
|
467
|
+
FileUtils.mkdir_p dir
|
417
468
|
model = VectorModel.new(dir)
|
469
|
+
|
470
|
+
model.names = %w(Var1 Var2 Var3 Factor)
|
471
|
+
|
472
|
+
model.extract_features = Proc.new{|element,list|
|
473
|
+
if element
|
474
|
+
element.split(";")
|
475
|
+
elsif list
|
476
|
+
list.collect{|e| e.split(";") }
|
477
|
+
end
|
478
|
+
}
|
479
|
+
|
480
|
+
model.train_model =<<-EOF
|
481
|
+
rbbt.require('randomForest')
|
482
|
+
model = randomForest(as.factor(label) ~ ., data = features)
|
483
|
+
EOF
|
484
|
+
|
485
|
+
model.eval_model = <<-EOF
|
486
|
+
rbbt.require('randomForest')
|
487
|
+
label = predict(model, features);
|
488
|
+
EOF
|
489
|
+
|
418
490
|
pairs = text.split(/\n/).collect do |line|
|
419
491
|
label, features = line.split(" ")
|
420
492
|
model.add features, label
|
421
493
|
end
|
422
494
|
|
423
|
-
|
424
|
-
|
495
|
+
Log.with_severity 0 do
|
496
|
+
model.train
|
497
|
+
model.cross_validation(2)
|
498
|
+
|
499
|
+
assert_raise do
|
500
|
+
assert_equal "0", model.eval("1;1;0;f1")
|
501
|
+
end
|
502
|
+
|
503
|
+
model.factor_levels = {"Factor" => %w(f1 f2)}
|
504
|
+
model.train
|
505
|
+
model = VectorModel.new(dir)
|
506
|
+
assert_equal "1", model.eval("1;1;1;f2")
|
507
|
+
end
|
508
|
+
|
425
509
|
end
|
426
510
|
end
|
427
511
|
|
512
|
+
|
428
513
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.54
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- lib/rbbt/statistics/rank_product.rb
|
108
108
|
- lib/rbbt/tensorflow.rb
|
109
109
|
- lib/rbbt/vector/model.rb
|
110
|
+
- lib/rbbt/vector/model/random_forest.rb
|
110
111
|
- lib/rbbt/vector/model/spaCy.rb
|
111
112
|
- lib/rbbt/vector/model/svm.rb
|
112
113
|
- lib/rbbt/vector/model/tensorflow.rb
|