rbbt-dm 1.1.53 → 1.1.54

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b00fd271a576dd8e92f3e24e863ce59128c39edba34c14c75b3f0414f45e2ccf
4
- data.tar.gz: 19a2825592b122ab485abaffa432cf19a71afc6f9fc30d6e7a63793fc70de914
3
+ metadata.gz: 40ee19cdf9fd742bfa844428a16e61f398f76d53ca0bfdda5499f5bb03db1c2b
4
+ data.tar.gz: 7be309bf582ce9e547cf316a36b88ad3043fb0bdc9cd5d5590d507f0f78a7a71
5
5
  SHA512:
6
- metadata.gz: b7b327a6de2ea159266ae41f38a8139e84552ece97b94bdab809dd26474be59b4a62456628257e788384e2cd2d5ea3c6d4f915dfbca1fb24fcad283d75c539ad
7
- data.tar.gz: 53581538c5d4ac0a9ff7acda9039565f608caa04e5bfa5721f1e9efc29fb69ff7e1ff88de0379c8ca9be217feb4c273d44fefc5d30f901dcd6c723e0db28abab
6
+ metadata.gz: 801e3c9a8541b3b87b12b961c31c4932c17b36db6c9d65dd9a88ef1adb5811188d79570099cf96a1dea3d71d6323edfe2707fd46ebe9e9faa96f38ade84684eb
7
+ data.tar.gz: e1955147d51d34595cf169e818da48584cf9f1597dd2a714d91de88cf90846e9845f3147715ea95769cfb453adf170879c926e02419ff1437b95968cf9bf9ffc
@@ -27,7 +27,7 @@ module RbbtTensorflow
27
27
  mod.compile(optimizer='adam',
28
28
  loss='sparse_categorical_crossentropy',
29
29
  metrics=['accuracy'])
30
- mod.fit(x_train, y_train, epochs:1)
30
+ mod.fit(x_train, y_train, epochs:3)
31
31
  mod
32
32
  end
33
33
 
@@ -0,0 +1,26 @@
1
+ require 'rbbt/vector/model'
2
+ class RFModel < VectorModel
3
+ def initialize(dir)
4
+ super(dir)
5
+
6
+ @extract_features = Proc.new{|element|
7
+ element
8
+ }
9
+
10
+ @train_model =<<-EOF
11
+ rbbt.require("randomForest");
12
+ model = randomForest(as.factor(label) ~ ., data = features);
13
+ EOF
14
+
15
+ @eval_model =<<-EOF
16
+ rbbt.require("randomForest");
17
+ pred = names(model$forest$xlevels)
18
+ for (p in pred) {
19
+ if (class(features[[p]]) == "factor") {
20
+ features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
21
+ }
22
+ }
23
+ label = predict(model, features);
24
+ EOF
25
+ end
26
+ end
@@ -8,12 +8,12 @@ class SVMModel < VectorModel
8
8
  }
9
9
 
10
10
  @train_model =<<-EOF
11
- library(e1071);
12
- model = svm(as.factor(class) ~ ., data = features);
11
+ rbbt.require('e1071');
12
+ model = svm(as.factor(label) ~ ., data = features);
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
16
- library(e1071);
16
+ rbbt.require('e1071');
17
17
  label = predict(model, features);
18
18
  EOF
19
19
  end
@@ -32,7 +32,7 @@ class TensorFlowModel < VectorModel
32
32
  end
33
33
  @graph ||= keras_graph
34
34
  @graph.compile(**@compile_options)
35
- @graph.fit(features, labels, :epochs => @epochs, :verbose => false)
35
+ @graph.fit(features, labels, :epochs => @epochs, :verbose => true)
36
36
  @graph.save(file)
37
37
  end
38
38
 
@@ -2,54 +2,85 @@ require 'rbbt/util/R'
2
2
 
3
3
  class VectorModel
4
4
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
- attr_accessor :features, :labels
5
+ attr_accessor :features, :names, :labels, :factor_levels
6
6
 
7
- def self.R_run(model_file, features, labels, code)
7
+ def self.R_run(model_file, features, labels, code, names = nil, factor_levels = nil)
8
8
  TmpFile.with_file do |feature_file|
9
9
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
- Open.write(feature_file + '.class', labels * "\n")
10
+ Open.write(feature_file + '.label', labels * "\n" + "\n")
11
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
12
+
13
+
14
+ what = case labels.first
15
+ when Numeric, Integer, Float
16
+ 'numeric()'
17
+ else
18
+ 'character()'
19
+ end
11
20
 
12
21
  R.run <<-EOF
13
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
- labels = scan("#{ feature_file }.class");
15
- features = cbind(features, class = labels);
22
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
23
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
24
+ #{ factor_levels.collect do |name,levels|
25
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
26
+ end * "\n" if factor_levels }
27
+ labels = scan("#{ feature_file }.label", what=#{what});
28
+ features = cbind(features, label = labels);
16
29
  #{code}
17
30
  EOF
18
31
  end
19
32
  end
20
33
 
21
- def self.R_train(model_file, features, labels, code)
34
+ def self.R_train(model_file, features, labels, code, names = nil, factor_levels = nil)
22
35
  TmpFile.with_file do |feature_file|
23
36
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
24
- Open.write(feature_file + '.class', labels * "\n")
37
+ Open.write(feature_file + '.label', labels * "\n" + "\n")
38
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
39
+
40
+ what = case labels.first
41
+ when Numeric, Integer, Float
42
+ 'numeric()'
43
+ else
44
+ 'character()'
45
+ end
25
46
 
26
47
  R.run <<-EOF
27
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
28
- labels = scan("#{ feature_file }.class");
29
- features = cbind(features, class = labels);
48
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
49
+ labels = scan("#{ feature_file }.label", what=#{what});
50
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
51
+ features = cbind(features, label = labels);
52
+ #{ factor_levels.collect do |name,levels|
53
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
54
+ end * "\n" if factor_levels }
30
55
  #{code}
31
56
  save(model, file='#{model_file}')
32
57
  EOF
33
58
  end
34
59
  end
35
60
 
36
- def self.R_eval(model_file, features, list, code)
61
+ def self.R_eval(model_file, features, list, code, names = nil, factor_levels = nil)
37
62
  TmpFile.with_file do |feature_file|
63
+ if list
64
+ Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
65
+ else
66
+ Open.write(feature_file, features * "\t" + "\n")
67
+ end
68
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
69
+
38
70
  TmpFile.with_file do |results|
39
- if list
40
- Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
41
- else
42
- Open.write(feature_file, features * "\t" + "\n")
43
- end
44
71
 
45
72
  io = R.run <<-EOF
46
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
73
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
74
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
75
+ #{ factor_levels.collect do |name,levels|
76
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
77
+ end * "\n" if factor_levels }
47
78
  load(file="#{model_file}");
48
79
  #{code}
49
80
  cat(paste(label, sep="\\n", collapse="\\n"));
50
81
  EOF
51
82
  txt = io.read
52
- res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
83
+ res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/)
53
84
 
54
85
  if list
55
86
  res
@@ -66,7 +97,7 @@ cat(paste(label, sep="\\n", collapse="\\n"));
66
97
  instance_eval code, file
67
98
  end
68
99
 
69
- def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
100
+ def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil, names = nil, factor_levels = nil)
70
101
  @directory = directory
71
102
  FileUtils.mkdir_p @directory unless File.exists? @directory
72
103
 
@@ -76,6 +107,8 @@ cat(paste(label, sep="\\n", collapse="\\n"));
76
107
  @eval_model_file = File.join(@directory, "eval_model")
77
108
  @train_model_file_R = File.join(@directory, "train_model.R")
78
109
  @eval_model_file_R = File.join(@directory, "eval_model.R")
110
+ @names_file = File.join(@directory, "feature_names")
111
+ @levels_file = File.join(@directory, "levels")
79
112
 
80
113
  if extract_features.nil?
81
114
  if File.exists?(@extract_features_file)
@@ -105,6 +138,22 @@ cat(paste(label, sep="\\n", collapse="\\n"));
105
138
  @eval_model = eval_model
106
139
  end
107
140
 
141
+ if names.nil?
142
+ if File.exists?(@names_file)
143
+ @names = Open.read(@names_file).split("\n")
144
+ end
145
+ else
146
+ @extract_features = names
147
+ end
148
+
149
+ if factor_levels.nil?
150
+ if File.exists?(@levels_file)
151
+ @factor_levels = YAML.load(Open.read(@levels_file))
152
+ end
153
+ else
154
+ @factor_levels = factor_levels
155
+ end
156
+
108
157
  @features = []
109
158
  @labels = []
110
159
  end
@@ -156,28 +205,31 @@ cat(paste(label, sep="\\n", collapse="\\n"));
156
205
  when String === eval_model
157
206
  Open.write(@eval_model_file_R, eval_model)
158
207
  end
208
+
209
+ Open.write(@levels_file, @factor_levels.to_yaml) if @factor_levels
210
+ Open.write(@names_file, @names * "\n" + "\n") if @names
159
211
  end
160
212
 
161
213
  def train
162
214
  case
163
215
  when Proc === train_model
164
- train_model.call(@model_file, @features, @labels)
216
+ train_model.call(@model_file, @features, @labels, @names, @factor_levels)
165
217
  when String === train_model
166
- VectorModel.R_train(@model_file, @features, @labels, train_model)
218
+ VectorModel.R_train(@model_file, @features, @labels, train_model, @names, @factor_levels)
167
219
  end
168
220
  save_models
169
221
  end
170
222
 
171
223
  def run(code)
172
- VectorModel.R_run(@model_file, @features, @labels, code)
224
+ VectorModel.R_run(@model_file, @features, @labels, code, @names, @factor_levels)
173
225
  end
174
226
 
175
227
  def eval(element)
176
228
  case
177
229
  when Proc === @eval_model
178
- @eval_model.call(@model_file, @extract_features.call(element), false)
230
+ @eval_model.call(@model_file, @extract_features.call(element), false, nil, @names, @factor_levels)
179
231
  when String === @eval_model
180
- VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model)
232
+ VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model, @names, @factor_levels)
181
233
  end
182
234
  end
183
235
 
@@ -195,9 +247,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
195
247
 
196
248
  case
197
249
  when Proc === eval_model
198
- eval_model.call(@model_file, features, true)
250
+ eval_model.call(@model_file, features, true, nil, @names, @factor_levels)
199
251
  when String === eval_model
200
- VectorModel.R_eval(@model_file, features, true, eval_model)
252
+ VectorModel.R_eval(@model_file, features, true, eval_model, @names, @factor_levels)
201
253
  end
202
254
  end
203
255
 
@@ -231,14 +283,61 @@ cat(paste(label, sep="\\n", collapse="\\n"));
231
283
 
232
284
  # acc
233
285
  #end
286
+ #
287
+
288
+ def self.f1_metrics(test, predicted, good_label = nil)
289
+ tp, tn, fp, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
290
+
291
+ labels = (test + predicted).uniq
292
+
293
+ if labels.length == 2 || good_label
294
+ good_label = labels.uniq.select{|l| l.to_s == "true"}.first if good_label.nil?
295
+ good_label = labels.uniq.select{|l| l.to_s == "1"}.first if good_label.nil?
296
+ good_label = labels.uniq.sort.first if good_label.nil?
297
+
298
+ test.zip(predicted).each do |gs,pred|
299
+ gs = gs.to_s
300
+ pred = pred.to_s
301
+
302
+ tp += 1 if gs == pred && gs == good_label
303
+ tn += 1 if gs == pred && gs != good_label
304
+ fp += 1 if gs != good_label && pred == good_label
305
+ fn += 1 if gs == good_label && pred != good_label
306
+ end
307
+
308
+ p = tp + fn
309
+ pp = tp + fp
234
310
 
235
- def cross_validation(folds = 10)
311
+ pr = tp.to_f / pp
312
+ re = tp.to_f / p
236
313
 
237
- res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
314
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
315
+
316
+ [tp, tn, fp, fn, pr, re, f1]
317
+ else
318
+ num = labels.length
319
+ acc = []
320
+ labels.each do |good_label|
321
+ values = VectorModel.f1_metrics(test, predicted, good_label)
322
+ acc << values
323
+ end
324
+ Misc.zip_fields(acc).collect{|s| Misc.mean(s)}
325
+ end
326
+ end
327
+
328
+ def cross_validation(folds = 10, good_label = nil)
238
329
 
239
330
  orig_features = @features
240
331
  orig_labels = @labels
241
332
 
333
+ multiclass = @labels.uniq.length > 2
334
+
335
+ if multiclass
336
+ res = TSV.setup({}, "Fold~P,R,F1#:type=:list")
337
+ else
338
+ res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
339
+ end
340
+
242
341
  begin
243
342
  feature_folds = Misc.divide(@features, folds)
244
343
  labels_folds = Misc.divide(@labels, folds)
@@ -253,8 +352,6 @@ cat(paste(label, sep="\\n", collapse="\\n"));
253
352
  test_labels = labels_folds[fix]
254
353
  train_labels = labels_folds.values_at(*rest).flatten
255
354
 
256
- tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
257
-
258
355
  @features = train_set
259
356
  @labels = train_labels
260
357
  self.train
@@ -262,26 +359,18 @@ cat(paste(label, sep="\\n", collapse="\\n"));
262
359
 
263
360
  raise "Number of predictions (#{predictions.length}) and test labels (#{test_labels.length}) do not match" if predictions.length != test_labels.length
264
361
 
265
- test_labels.zip(predictions).each do |gs,pred|
266
- gs = gs.to_i
267
- pred = pred > 0.5 ? 1 : 0
268
- tp += 1 if gs == pred && gs == 1
269
- tn += 1 if gs == pred && gs == 0
270
- fp += 1 if gs == 0 && pred == 1
271
- fn += 1 if gs == 1 && pred == 0
272
- end
273
-
274
- p = tp + fn
275
- pp = tp + fp
276
-
277
- pr = tp.to_f / pp
278
- re = tp.to_f / p
362
+ different_labels = test_labels.uniq
279
363
 
280
- f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
364
+ tp, tn, fp, fn, pr, re, f1 = VectorModel.f1_metrics(test_labels, predictions, good_label)
281
365
 
282
- Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
366
+ if multiclass
367
+ Log.low "Multi-class CV Fold #{fix} - Average P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
368
+ res[fix] = [pr,re,f1]
369
+ else
370
+ Log.low "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
371
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
372
+ end
283
373
 
284
- res[fix] = [tp,tn,fp,fn,pr,re,f1]
285
374
  end
286
375
  ensure
287
376
  @features = orig_features
@@ -33,10 +33,10 @@ class TestSVMModel < Test::Unit::TestCase
33
33
 
34
34
  model.train
35
35
 
36
- assert model.eval("1;1;1") > 0.5
37
- assert model.eval("0;0;0") < 0.5
36
+ assert model.eval("1;1;1") == "1"
37
+ assert model.eval("0;0;0") == "0"
38
38
 
39
- assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
39
+ assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v == "1"}
40
40
  end
41
41
  end
42
42
 
@@ -4,12 +4,13 @@ require 'rbbt/vector/model/tensorflow'
4
4
  class TestTensorflowModel < Test::Unit::TestCase
5
5
 
6
6
  def test_keras
7
+ Log.severity = 0
7
8
  TmpFile.with_file() do |dir|
8
9
  FileUtils.mkdir_p dir
9
10
 
10
11
  model = TensorFlowModel.new(
11
12
  dir,
12
- optimizer:'adam',
13
+ optimizer: 'adam',
13
14
  loss: 'sparse_categorical_crossentropy',
14
15
  metrics: ['accuracy']
15
16
  )
@@ -42,6 +43,7 @@ class TestTensorflowModel < Test::Unit::TestCase
42
43
 
43
44
  predictions = model.eval_list x_test.tolist()
44
45
  sum = 0
46
+
45
47
  predictions.zip(y_test.tolist()).each do |pred,label|
46
48
  sum += 1 if label.to_i == pred
47
49
  end
@@ -49,8 +51,6 @@ class TestTensorflowModel < Test::Unit::TestCase
49
51
  end
50
52
 
51
53
  assert sum.to_f / predictions.length > 0.7
52
-
53
-
54
54
  end
55
55
  end
56
56
  end
@@ -211,7 +211,7 @@ cat(label, file="#{results}");
211
211
  end
212
212
  end
213
213
 
214
- def test_model_list
214
+ def test_model_save
215
215
  text =<<-EOF
216
216
  1 0;1;1
217
217
  1 1;0;1
@@ -243,9 +243,9 @@ cat(label, file="#{results}");
243
243
  R.run <<-EOF
244
244
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
245
245
  labels = scan("#{ feature_file }.class", what=numeric());
246
- features = cbind(features, class = labels);
246
+ features = cbind(features, label = labels);
247
247
  rbbt.require('e1071')
248
- model = svm(class ~ ., data = features)
248
+ model = svm(label ~ ., data = features)
249
249
  save(model, file="#{ model_file }");
250
250
  EOF
251
251
  end
@@ -276,12 +276,18 @@ cat(label, file="#{results}");
276
276
 
277
277
  model.train
278
278
 
279
+ model = VectorModel.new(dir)
280
+ pairs = text.split(/\n/).collect do |line|
281
+ label, features = line.split(" ")
282
+ model.add features, label
283
+ end
284
+
279
285
  assert model.eval("1;1;1").to_f > 0.5
280
286
  assert model.eval("0;0;0").to_f < 0.5
281
287
  end
282
288
  end
283
289
 
284
- def test_model_save
290
+ def test_model_name
285
291
  text =<<-EOF
286
292
  1 0;1;1
287
293
  1 1;0;1
@@ -298,50 +304,31 @@ cat(label, file="#{results}");
298
304
  FileUtils.mkdir_p dir
299
305
  model = VectorModel.new(dir)
300
306
 
301
- model.extract_features = Proc.new{|element|
302
- element.split(";")
303
- }
307
+ model.names = %w(Var1 Var2 Var3)
304
308
 
305
- model.train_model = Proc.new{|model_file,features,labels|
306
- TmpFile.with_file do |feature_file|
307
- Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
308
- Open.write(feature_file + '.class', labels * "\n")
309
- R.run <<-EOF
310
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
311
- labels = scan("#{ feature_file }.class", what=numeric());
312
- features = cbind(features, class = labels);
313
- rbbt.require('e1071')
314
- model = svm(class ~ ., data = features)
315
- save(model, file="#{ model_file }");
316
- EOF
309
+ model.extract_features = Proc.new{|element,list|
310
+ if element
311
+ element.split(";")
312
+ elsif list
313
+ list.collect{|e| e.split(";") }
317
314
  end
318
315
  }
319
316
 
320
- model.eval_model = Proc.new{|model_file,features|
321
- TmpFile.with_file do |feature_file|
322
- TmpFile.with_file do |results|
323
- Open.write(feature_file, features * "\t")
324
- puts R.run(<<-EOF
325
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
317
+ model.train_model =<<-EOF
318
+ rbbt.require('e1071')
319
+ model = svm(as.factor(label) ~ Var1 + Var2, data = features)
320
+ EOF
321
+
322
+ model.eval_model = <<-EOF
326
323
  library(e1071)
327
- load(file="#{ model_file }")
328
324
  label = predict(model, features);
329
- cat(label, file="#{results}");
330
- EOF
331
- ).read
332
- Open.read(results)
333
- end
334
- end
335
-
336
- }
325
+ EOF
337
326
 
338
327
  pairs = text.split(/\n/).collect do |line|
339
328
  label, features = line.split(" ")
340
- [features, label]
329
+ model.add features, label
341
330
  end
342
331
 
343
- model.add_list(*Misc.zip_fields(pairs))
344
-
345
332
  model.train
346
333
 
347
334
  assert model.eval("1;1;1").to_f > 0.5
@@ -349,23 +336,25 @@ cat(label, file="#{results}");
349
336
  end
350
337
  end
351
338
 
352
- def test_model_save
339
+ def test_model_cv
353
340
  text =<<-EOF
354
- 1 0;1;1
355
- 1 1;0;1
356
- 1 1;1;1
357
- 1 0;1;1
358
- 1 1;1;1
359
- 0 0;1;0
360
- 0 1;0;0
361
- 0 0;1;0
362
- 0 1;0;0
341
+ 0 0;1;0;0
342
+ 0 1;0;0;0
343
+ 0 0;1;0;0
344
+ 0 1;0;0;0
345
+ 1 0;1;1;0
346
+ 1 1;0;1;0
347
+ 1 1;1;1;0
348
+ 1 0;1;1;0
349
+ 1 1;1;1;0
363
350
  EOF
364
351
 
365
352
  TmpFile.with_file() do |dir|
366
353
  FileUtils.mkdir_p dir
367
354
  model = VectorModel.new(dir)
368
355
 
356
+ model.names = %w(Var1 Var2 Var3 Var4)
357
+
369
358
  model.extract_features = Proc.new{|element,list|
370
359
  if element
371
360
  element.split(";")
@@ -374,55 +363,151 @@ cat(label, file="#{results}");
374
363
  end
375
364
  }
376
365
 
377
- model.train_model = Proc.new{|model_file,features,labels|
378
- TmpFile.with_file do |feature_file|
379
- Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
380
- Open.write(feature_file + '.class', labels * "\n")
381
- R.run <<-EOF
382
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
383
- labels = scan("#{ feature_file }.class", what=numeric());
384
- features = cbind(features, class = labels);
385
- rbbt.require('e1071')
386
- model = svm(class ~ ., data = features)
387
- save(model, file="#{ model_file }");
388
- EOF
389
- end
390
- }
366
+ model.train_model =<<-EOF
367
+ rbbt.require('randomForest')
368
+ model = randomForest(as.factor(label) ~ ., data = features)
369
+ EOF
391
370
 
392
- model.eval_model = Proc.new{|model_file,features|
393
- TmpFile.with_file do |feature_file|
394
- TmpFile.with_file do |results|
395
- Open.write(feature_file, features * "\t")
396
- puts R.run(<<-EOF
397
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
398
- library(e1071)
399
- load(file="#{ model_file }")
371
+ model.eval_model = <<-EOF
372
+ rbbt.require('randomForest')
400
373
  label = predict(model, features);
401
- cat(label, file="#{results}");
402
- EOF
403
- ).read
404
- Open.read(results)
405
- end
406
- end
374
+ EOF
375
+
376
+ pairs = text.split(/\n/).collect do |line|
377
+ label, features = line.split(" ")
378
+ model.add features, label
379
+ end
380
+
381
+ model.train
382
+
383
+ assert_equal "0", model.eval("1;1;0;0")
384
+ assert_equal "1", model.eval("1;1;1;0")
385
+
386
+ Log.with_severity 1 do
387
+ model.cross_validation(2)
388
+ end
407
389
 
390
+ end
391
+ end
392
+
393
+ def test_model_mclass
394
+ text =<<-EOF
395
+ 0 0;1;0;0
396
+ 0 1;0;0;0
397
+ 0 0;1;0;0
398
+ 0 1;0;0;0
399
+ 1 0;1;1;0
400
+ 1 1;0;1;0
401
+ 1 1;1;1;0
402
+ 1 0;1;1;0
403
+ 1 1;1;1;0
404
+ 2 0;1;0;1
405
+ 2 1;0;0;1
406
+ 2 1;1;0;1
407
+ 2 0;1;0;1
408
+ 2 1;1;0;1
409
+ EOF
410
+
411
+ TmpFile.with_file() do |dir|
412
+ FileUtils.mkdir_p dir
413
+ model = VectorModel.new(dir)
414
+
415
+ model.names = %w(Var1 Var2 Var3 Var4)
416
+
417
+ model.extract_features = Proc.new{|element,list|
418
+ if element
419
+ element.split(";")
420
+ elsif list
421
+ list.collect{|e| e.split(";") }
422
+ end
408
423
  }
409
424
 
425
+ model.train_model =<<-EOF
426
+ rbbt.require('randomForest')
427
+ model = randomForest(as.factor(label) ~ ., data = features)
428
+ EOF
429
+
430
+ model.eval_model = <<-EOF
431
+ rbbt.require('randomForest')
432
+ label = predict(model, features);
433
+ EOF
434
+
410
435
  pairs = text.split(/\n/).collect do |line|
411
436
  label, features = line.split(" ")
412
437
  model.add features, label
413
438
  end
414
439
 
415
440
  model.train
441
+
442
+ assert_equal "0", model.eval("1;1;0;0")
443
+ assert_equal "1", model.eval("1;1;1;0")
444
+ assert_equal "2", model.eval("1;1;0;1")
445
+
446
+ Log.with_severity 1 do
447
+ model.cross_validation(2)
448
+ end
416
449
 
450
+ end
451
+ end
452
+
453
+ def test_model_factor_levels
454
+ text =<<-EOF
455
+ 0 0;1;0;f1
456
+ 0 1;0;0;f1
457
+ 0 0;1;0;f1
458
+ 0 1;0;0;f1
459
+ 1 0;1;1;f2
460
+ 1 1;0;1;f2
461
+ 1 1;1;1;f2
462
+ 1 0;1;1;f2
463
+ 1 1;1;1;f2
464
+ EOF
465
+
466
+ TmpFile.with_file() do |dir|
467
+ FileUtils.mkdir_p dir
417
468
  model = VectorModel.new(dir)
469
+
470
+ model.names = %w(Var1 Var2 Var3 Factor)
471
+
472
+ model.extract_features = Proc.new{|element,list|
473
+ if element
474
+ element.split(";")
475
+ elsif list
476
+ list.collect{|e| e.split(";") }
477
+ end
478
+ }
479
+
480
+ model.train_model =<<-EOF
481
+ rbbt.require('randomForest')
482
+ model = randomForest(as.factor(label) ~ ., data = features)
483
+ EOF
484
+
485
+ model.eval_model = <<-EOF
486
+ rbbt.require('randomForest')
487
+ label = predict(model, features);
488
+ EOF
489
+
418
490
  pairs = text.split(/\n/).collect do |line|
419
491
  label, features = line.split(" ")
420
492
  model.add features, label
421
493
  end
422
494
 
423
- assert model.eval("1;1;1").to_f > 0.5
424
- assert model.eval("0;0;0").to_f < 0.5
495
+ Log.with_severity 0 do
496
+ model.train
497
+ model.cross_validation(2)
498
+
499
+ assert_raise do
500
+ assert_equal "0", model.eval("1;1;0;f1")
501
+ end
502
+
503
+ model.factor_levels = {"Factor" => %w(f1 f2)}
504
+ model.train
505
+ model = VectorModel.new(dir)
506
+ assert_equal "1", model.eval("1;1;1;f2")
507
+ end
508
+
425
509
  end
426
510
  end
427
511
 
512
+
428
513
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.53
4
+ version: 1.1.54
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-25 00:00:00.000000000 Z
11
+ date: 2021-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -107,6 +107,7 @@ files:
107
107
  - lib/rbbt/statistics/rank_product.rb
108
108
  - lib/rbbt/tensorflow.rb
109
109
  - lib/rbbt/vector/model.rb
110
+ - lib/rbbt/vector/model/random_forest.rb
110
111
  - lib/rbbt/vector/model/spaCy.rb
111
112
  - lib/rbbt/vector/model/svm.rb
112
113
  - lib/rbbt/vector/model/tensorflow.rb