rbbt-dm 1.1.53 → 1.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b00fd271a576dd8e92f3e24e863ce59128c39edba34c14c75b3f0414f45e2ccf
4
- data.tar.gz: 19a2825592b122ab485abaffa432cf19a71afc6f9fc30d6e7a63793fc70de914
3
+ metadata.gz: 40ee19cdf9fd742bfa844428a16e61f398f76d53ca0bfdda5499f5bb03db1c2b
4
+ data.tar.gz: 7be309bf582ce9e547cf316a36b88ad3043fb0bdc9cd5d5590d507f0f78a7a71
5
5
  SHA512:
6
- metadata.gz: b7b327a6de2ea159266ae41f38a8139e84552ece97b94bdab809dd26474be59b4a62456628257e788384e2cd2d5ea3c6d4f915dfbca1fb24fcad283d75c539ad
7
- data.tar.gz: 53581538c5d4ac0a9ff7acda9039565f608caa04e5bfa5721f1e9efc29fb69ff7e1ff88de0379c8ca9be217feb4c273d44fefc5d30f901dcd6c723e0db28abab
6
+ metadata.gz: 801e3c9a8541b3b87b12b961c31c4932c17b36db6c9d65dd9a88ef1adb5811188d79570099cf96a1dea3d71d6323edfe2707fd46ebe9e9faa96f38ade84684eb
7
+ data.tar.gz: e1955147d51d34595cf169e818da48584cf9f1597dd2a714d91de88cf90846e9845f3147715ea95769cfb453adf170879c926e02419ff1437b95968cf9bf9ffc
@@ -27,7 +27,7 @@ module RbbtTensorflow
27
27
  mod.compile(optimizer='adam',
28
28
  loss='sparse_categorical_crossentropy',
29
29
  metrics=['accuracy'])
30
- mod.fit(x_train, y_train, epochs:1)
30
+ mod.fit(x_train, y_train, epochs:3)
31
31
  mod
32
32
  end
33
33
 
@@ -0,0 +1,26 @@
1
+ require 'rbbt/vector/model'
2
+ class RFModel < VectorModel
3
+ def initialize(dir)
4
+ super(dir)
5
+
6
+ @extract_features = Proc.new{|element|
7
+ element
8
+ }
9
+
10
+ @train_model =<<-EOF
11
+ rbbt.require("randomForest");
12
+ model = randomForest(as.factor(label) ~ ., data = features);
13
+ EOF
14
+
15
+ @eval_model =<<-EOF
16
+ rbbt.require("randomForest");
17
+ pred = names(model$forest$xlevels)
18
+ for (p in pred) {
19
+ if (class(features[[p]]) == "factor") {
20
+ features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
21
+ }
22
+ }
23
+ label = predict(model, features);
24
+ EOF
25
+ end
26
+ end
@@ -8,12 +8,12 @@ class SVMModel < VectorModel
8
8
  }
9
9
 
10
10
  @train_model =<<-EOF
11
- library(e1071);
12
- model = svm(as.factor(class) ~ ., data = features);
11
+ rbbt.require('e1071');
12
+ model = svm(as.factor(label) ~ ., data = features);
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
16
- library(e1071);
16
+ rbbt.require('e1071');
17
17
  label = predict(model, features);
18
18
  EOF
19
19
  end
@@ -32,7 +32,7 @@ class TensorFlowModel < VectorModel
32
32
  end
33
33
  @graph ||= keras_graph
34
34
  @graph.compile(**@compile_options)
35
- @graph.fit(features, labels, :epochs => @epochs, :verbose => false)
35
+ @graph.fit(features, labels, :epochs => @epochs, :verbose => true)
36
36
  @graph.save(file)
37
37
  end
38
38
 
@@ -2,54 +2,85 @@ require 'rbbt/util/R'
2
2
 
3
3
  class VectorModel
4
4
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
- attr_accessor :features, :labels
5
+ attr_accessor :features, :names, :labels, :factor_levels
6
6
 
7
- def self.R_run(model_file, features, labels, code)
7
+ def self.R_run(model_file, features, labels, code, names = nil, factor_levels = nil)
8
8
  TmpFile.with_file do |feature_file|
9
9
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
- Open.write(feature_file + '.class', labels * "\n")
10
+ Open.write(feature_file + '.label', labels * "\n" + "\n")
11
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
12
+
13
+
14
+ what = case labels.first
15
+ when Numeric, Integer, Float
16
+ 'numeric()'
17
+ else
18
+ 'character()'
19
+ end
11
20
 
12
21
  R.run <<-EOF
13
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
- labels = scan("#{ feature_file }.class");
15
- features = cbind(features, class = labels);
22
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
23
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
24
+ #{ factor_levels.collect do |name,levels|
25
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
26
+ end * "\n" if factor_levels }
27
+ labels = scan("#{ feature_file }.label", what=#{what});
28
+ features = cbind(features, label = labels);
16
29
  #{code}
17
30
  EOF
18
31
  end
19
32
  end
20
33
 
21
- def self.R_train(model_file, features, labels, code)
34
+ def self.R_train(model_file, features, labels, code, names = nil, factor_levels = nil)
22
35
  TmpFile.with_file do |feature_file|
23
36
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
24
- Open.write(feature_file + '.class', labels * "\n")
37
+ Open.write(feature_file + '.label', labels * "\n" + "\n")
38
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
39
+
40
+ what = case labels.first
41
+ when Numeric, Integer, Float
42
+ 'numeric()'
43
+ else
44
+ 'character()'
45
+ end
25
46
 
26
47
  R.run <<-EOF
27
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
28
- labels = scan("#{ feature_file }.class");
29
- features = cbind(features, class = labels);
48
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
49
+ labels = scan("#{ feature_file }.label", what=#{what});
50
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
51
+ features = cbind(features, label = labels);
52
+ #{ factor_levels.collect do |name,levels|
53
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
54
+ end * "\n" if factor_levels }
30
55
  #{code}
31
56
  save(model, file='#{model_file}')
32
57
  EOF
33
58
  end
34
59
  end
35
60
 
36
- def self.R_eval(model_file, features, list, code)
61
+ def self.R_eval(model_file, features, list, code, names = nil, factor_levels = nil)
37
62
  TmpFile.with_file do |feature_file|
63
+ if list
64
+ Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
65
+ else
66
+ Open.write(feature_file, features * "\t" + "\n")
67
+ end
68
+ Open.write(feature_file + '.names', names * "\n" + "\n") if names
69
+
38
70
  TmpFile.with_file do |results|
39
- if list
40
- Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
41
- else
42
- Open.write(feature_file, features * "\t" + "\n")
43
- end
44
71
 
45
72
  io = R.run <<-EOF
46
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
73
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=TRUE);
74
+ #{"names(features) = make.names(readLines('#{feature_file + '.names'}'))" if names }
75
+ #{ factor_levels.collect do |name,levels|
76
+ "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
77
+ end * "\n" if factor_levels }
47
78
  load(file="#{model_file}");
48
79
  #{code}
49
80
  cat(paste(label, sep="\\n", collapse="\\n"));
50
81
  EOF
51
82
  txt = io.read
52
- res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
83
+ res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/)
53
84
 
54
85
  if list
55
86
  res
@@ -66,7 +97,7 @@ cat(paste(label, sep="\\n", collapse="\\n"));
66
97
  instance_eval code, file
67
98
  end
68
99
 
69
- def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
100
+ def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil, names = nil, factor_levels = nil)
70
101
  @directory = directory
71
102
  FileUtils.mkdir_p @directory unless File.exists? @directory
72
103
 
@@ -76,6 +107,8 @@ cat(paste(label, sep="\\n", collapse="\\n"));
76
107
  @eval_model_file = File.join(@directory, "eval_model")
77
108
  @train_model_file_R = File.join(@directory, "train_model.R")
78
109
  @eval_model_file_R = File.join(@directory, "eval_model.R")
110
+ @names_file = File.join(@directory, "feature_names")
111
+ @levels_file = File.join(@directory, "levels")
79
112
 
80
113
  if extract_features.nil?
81
114
  if File.exists?(@extract_features_file)
@@ -105,6 +138,22 @@ cat(paste(label, sep="\\n", collapse="\\n"));
105
138
  @eval_model = eval_model
106
139
  end
107
140
 
141
+ if names.nil?
142
+ if File.exists?(@names_file)
143
+ @names = Open.read(@names_file).split("\n")
144
+ end
145
+ else
146
+ @extract_features = names
147
+ end
148
+
149
+ if factor_levels.nil?
150
+ if File.exists?(@levels_file)
151
+ @factor_levels = YAML.load(Open.read(@levels_file))
152
+ end
153
+ else
154
+ @factor_levels = factor_levels
155
+ end
156
+
108
157
  @features = []
109
158
  @labels = []
110
159
  end
@@ -156,28 +205,31 @@ cat(paste(label, sep="\\n", collapse="\\n"));
156
205
  when String === eval_model
157
206
  Open.write(@eval_model_file_R, eval_model)
158
207
  end
208
+
209
+ Open.write(@levels_file, @factor_levels.to_yaml) if @factor_levels
210
+ Open.write(@names_file, @names * "\n" + "\n") if @names
159
211
  end
160
212
 
161
213
  def train
162
214
  case
163
215
  when Proc === train_model
164
- train_model.call(@model_file, @features, @labels)
216
+ train_model.call(@model_file, @features, @labels, @names, @factor_levels)
165
217
  when String === train_model
166
- VectorModel.R_train(@model_file, @features, @labels, train_model)
218
+ VectorModel.R_train(@model_file, @features, @labels, train_model, @names, @factor_levels)
167
219
  end
168
220
  save_models
169
221
  end
170
222
 
171
223
  def run(code)
172
- VectorModel.R_run(@model_file, @features, @labels, code)
224
+ VectorModel.R_run(@model_file, @features, @labels, code, @names, @factor_levels)
173
225
  end
174
226
 
175
227
  def eval(element)
176
228
  case
177
229
  when Proc === @eval_model
178
- @eval_model.call(@model_file, @extract_features.call(element), false)
230
+ @eval_model.call(@model_file, @extract_features.call(element), false, nil, @names, @factor_levels)
179
231
  when String === @eval_model
180
- VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model)
232
+ VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model, @names, @factor_levels)
181
233
  end
182
234
  end
183
235
 
@@ -195,9 +247,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
195
247
 
196
248
  case
197
249
  when Proc === eval_model
198
- eval_model.call(@model_file, features, true)
250
+ eval_model.call(@model_file, features, true, nil, @names, @factor_levels)
199
251
  when String === eval_model
200
- VectorModel.R_eval(@model_file, features, true, eval_model)
252
+ VectorModel.R_eval(@model_file, features, true, eval_model, @names, @factor_levels)
201
253
  end
202
254
  end
203
255
 
@@ -231,14 +283,61 @@ cat(paste(label, sep="\\n", collapse="\\n"));
231
283
 
232
284
  # acc
233
285
  #end
286
+ #
287
+
288
+ def self.f1_metrics(test, predicted, good_label = nil)
289
+ tp, tn, fp, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
290
+
291
+ labels = (test + predicted).uniq
292
+
293
+ if labels.length == 2 || good_label
294
+ good_label = labels.uniq.select{|l| l.to_s == "true"}.first if good_label.nil?
295
+ good_label = labels.uniq.select{|l| l.to_s == "1"}.first if good_label.nil?
296
+ good_label = labels.uniq.sort.first if good_label.nil?
297
+
298
+ test.zip(predicted).each do |gs,pred|
299
+ gs = gs.to_s
300
+ pred = pred.to_s
301
+
302
+ tp += 1 if gs == pred && gs == good_label
303
+ tn += 1 if gs == pred && gs != good_label
304
+ fp += 1 if gs != good_label && pred == good_label
305
+ fn += 1 if gs == good_label && pred != good_label
306
+ end
307
+
308
+ p = tp + fn
309
+ pp = tp + fp
234
310
 
235
- def cross_validation(folds = 10)
311
+ pr = tp.to_f / pp
312
+ re = tp.to_f / p
236
313
 
237
- res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
314
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
315
+
316
+ [tp, tn, fp, fn, pr, re, f1]
317
+ else
318
+ num = labels.length
319
+ acc = []
320
+ labels.each do |good_label|
321
+ values = VectorModel.f1_metrics(test, predicted, good_label)
322
+ acc << values
323
+ end
324
+ Misc.zip_fields(acc).collect{|s| Misc.mean(s)}
325
+ end
326
+ end
327
+
328
+ def cross_validation(folds = 10, good_label = nil)
238
329
 
239
330
  orig_features = @features
240
331
  orig_labels = @labels
241
332
 
333
+ multiclass = @labels.uniq.length > 2
334
+
335
+ if multiclass
336
+ res = TSV.setup({}, "Fold~P,R,F1#:type=:list")
337
+ else
338
+ res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
339
+ end
340
+
242
341
  begin
243
342
  feature_folds = Misc.divide(@features, folds)
244
343
  labels_folds = Misc.divide(@labels, folds)
@@ -253,8 +352,6 @@ cat(paste(label, sep="\\n", collapse="\\n"));
253
352
  test_labels = labels_folds[fix]
254
353
  train_labels = labels_folds.values_at(*rest).flatten
255
354
 
256
- tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
257
-
258
355
  @features = train_set
259
356
  @labels = train_labels
260
357
  self.train
@@ -262,26 +359,18 @@ cat(paste(label, sep="\\n", collapse="\\n"));
262
359
 
263
360
  raise "Number of predictions (#{predictions.length}) and test labels (#{test_labels.length}) do not match" if predictions.length != test_labels.length
264
361
 
265
- test_labels.zip(predictions).each do |gs,pred|
266
- gs = gs.to_i
267
- pred = pred > 0.5 ? 1 : 0
268
- tp += 1 if gs == pred && gs == 1
269
- tn += 1 if gs == pred && gs == 0
270
- fp += 1 if gs == 0 && pred == 1
271
- fn += 1 if gs == 1 && pred == 0
272
- end
273
-
274
- p = tp + fn
275
- pp = tp + fp
276
-
277
- pr = tp.to_f / pp
278
- re = tp.to_f / p
362
+ different_labels = test_labels.uniq
279
363
 
280
- f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
364
+ tp, tn, fp, fn, pr, re, f1 = VectorModel.f1_metrics(test_labels, predictions, good_label)
281
365
 
282
- Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
366
+ if multiclass
367
+ Log.low "Multi-class CV Fold #{fix} - Average P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
368
+ res[fix] = [pr,re,f1]
369
+ else
370
+ Log.low "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
371
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
372
+ end
283
373
 
284
- res[fix] = [tp,tn,fp,fn,pr,re,f1]
285
374
  end
286
375
  ensure
287
376
  @features = orig_features
@@ -33,10 +33,10 @@ class TestSVMModel < Test::Unit::TestCase
33
33
 
34
34
  model.train
35
35
 
36
- assert model.eval("1;1;1") > 0.5
37
- assert model.eval("0;0;0") < 0.5
36
+ assert model.eval("1;1;1") == "1"
37
+ assert model.eval("0;0;0") == "0"
38
38
 
39
- assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
39
+ assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v == "1"}
40
40
  end
41
41
  end
42
42
 
@@ -4,12 +4,13 @@ require 'rbbt/vector/model/tensorflow'
4
4
  class TestTensorflowModel < Test::Unit::TestCase
5
5
 
6
6
  def test_keras
7
+ Log.severity = 0
7
8
  TmpFile.with_file() do |dir|
8
9
  FileUtils.mkdir_p dir
9
10
 
10
11
  model = TensorFlowModel.new(
11
12
  dir,
12
- optimizer:'adam',
13
+ optimizer: 'adam',
13
14
  loss: 'sparse_categorical_crossentropy',
14
15
  metrics: ['accuracy']
15
16
  )
@@ -42,6 +43,7 @@ class TestTensorflowModel < Test::Unit::TestCase
42
43
 
43
44
  predictions = model.eval_list x_test.tolist()
44
45
  sum = 0
46
+
45
47
  predictions.zip(y_test.tolist()).each do |pred,label|
46
48
  sum += 1 if label.to_i == pred
47
49
  end
@@ -49,8 +51,6 @@ class TestTensorflowModel < Test::Unit::TestCase
49
51
  end
50
52
 
51
53
  assert sum.to_f / predictions.length > 0.7
52
-
53
-
54
54
  end
55
55
  end
56
56
  end
@@ -211,7 +211,7 @@ cat(label, file="#{results}");
211
211
  end
212
212
  end
213
213
 
214
- def test_model_list
214
+ def test_model_save
215
215
  text =<<-EOF
216
216
  1 0;1;1
217
217
  1 1;0;1
@@ -243,9 +243,9 @@ cat(label, file="#{results}");
243
243
  R.run <<-EOF
244
244
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
245
245
  labels = scan("#{ feature_file }.class", what=numeric());
246
- features = cbind(features, class = labels);
246
+ features = cbind(features, label = labels);
247
247
  rbbt.require('e1071')
248
- model = svm(class ~ ., data = features)
248
+ model = svm(label ~ ., data = features)
249
249
  save(model, file="#{ model_file }");
250
250
  EOF
251
251
  end
@@ -276,12 +276,18 @@ cat(label, file="#{results}");
276
276
 
277
277
  model.train
278
278
 
279
+ model = VectorModel.new(dir)
280
+ pairs = text.split(/\n/).collect do |line|
281
+ label, features = line.split(" ")
282
+ model.add features, label
283
+ end
284
+
279
285
  assert model.eval("1;1;1").to_f > 0.5
280
286
  assert model.eval("0;0;0").to_f < 0.5
281
287
  end
282
288
  end
283
289
 
284
- def test_model_save
290
+ def test_model_name
285
291
  text =<<-EOF
286
292
  1 0;1;1
287
293
  1 1;0;1
@@ -298,50 +304,31 @@ cat(label, file="#{results}");
298
304
  FileUtils.mkdir_p dir
299
305
  model = VectorModel.new(dir)
300
306
 
301
- model.extract_features = Proc.new{|element|
302
- element.split(";")
303
- }
307
+ model.names = %w(Var1 Var2 Var3)
304
308
 
305
- model.train_model = Proc.new{|model_file,features,labels|
306
- TmpFile.with_file do |feature_file|
307
- Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
308
- Open.write(feature_file + '.class', labels * "\n")
309
- R.run <<-EOF
310
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
311
- labels = scan("#{ feature_file }.class", what=numeric());
312
- features = cbind(features, class = labels);
313
- rbbt.require('e1071')
314
- model = svm(class ~ ., data = features)
315
- save(model, file="#{ model_file }");
316
- EOF
309
+ model.extract_features = Proc.new{|element,list|
310
+ if element
311
+ element.split(";")
312
+ elsif list
313
+ list.collect{|e| e.split(";") }
317
314
  end
318
315
  }
319
316
 
320
- model.eval_model = Proc.new{|model_file,features|
321
- TmpFile.with_file do |feature_file|
322
- TmpFile.with_file do |results|
323
- Open.write(feature_file, features * "\t")
324
- puts R.run(<<-EOF
325
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
317
+ model.train_model =<<-EOF
318
+ rbbt.require('e1071')
319
+ model = svm(as.factor(label) ~ Var1 + Var2, data = features)
320
+ EOF
321
+
322
+ model.eval_model = <<-EOF
326
323
  library(e1071)
327
- load(file="#{ model_file }")
328
324
  label = predict(model, features);
329
- cat(label, file="#{results}");
330
- EOF
331
- ).read
332
- Open.read(results)
333
- end
334
- end
335
-
336
- }
325
+ EOF
337
326
 
338
327
  pairs = text.split(/\n/).collect do |line|
339
328
  label, features = line.split(" ")
340
- [features, label]
329
+ model.add features, label
341
330
  end
342
331
 
343
- model.add_list(*Misc.zip_fields(pairs))
344
-
345
332
  model.train
346
333
 
347
334
  assert model.eval("1;1;1").to_f > 0.5
@@ -349,23 +336,25 @@ cat(label, file="#{results}");
349
336
  end
350
337
  end
351
338
 
352
- def test_model_save
339
+ def test_model_cv
353
340
  text =<<-EOF
354
- 1 0;1;1
355
- 1 1;0;1
356
- 1 1;1;1
357
- 1 0;1;1
358
- 1 1;1;1
359
- 0 0;1;0
360
- 0 1;0;0
361
- 0 0;1;0
362
- 0 1;0;0
341
+ 0 0;1;0;0
342
+ 0 1;0;0;0
343
+ 0 0;1;0;0
344
+ 0 1;0;0;0
345
+ 1 0;1;1;0
346
+ 1 1;0;1;0
347
+ 1 1;1;1;0
348
+ 1 0;1;1;0
349
+ 1 1;1;1;0
363
350
  EOF
364
351
 
365
352
  TmpFile.with_file() do |dir|
366
353
  FileUtils.mkdir_p dir
367
354
  model = VectorModel.new(dir)
368
355
 
356
+ model.names = %w(Var1 Var2 Var3 Var4)
357
+
369
358
  model.extract_features = Proc.new{|element,list|
370
359
  if element
371
360
  element.split(";")
@@ -374,55 +363,151 @@ cat(label, file="#{results}");
374
363
  end
375
364
  }
376
365
 
377
- model.train_model = Proc.new{|model_file,features,labels|
378
- TmpFile.with_file do |feature_file|
379
- Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
380
- Open.write(feature_file + '.class', labels * "\n")
381
- R.run <<-EOF
382
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
383
- labels = scan("#{ feature_file }.class", what=numeric());
384
- features = cbind(features, class = labels);
385
- rbbt.require('e1071')
386
- model = svm(class ~ ., data = features)
387
- save(model, file="#{ model_file }");
388
- EOF
389
- end
390
- }
366
+ model.train_model =<<-EOF
367
+ rbbt.require('randomForest')
368
+ model = randomForest(as.factor(label) ~ ., data = features)
369
+ EOF
391
370
 
392
- model.eval_model = Proc.new{|model_file,features|
393
- TmpFile.with_file do |feature_file|
394
- TmpFile.with_file do |results|
395
- Open.write(feature_file, features * "\t")
396
- puts R.run(<<-EOF
397
- features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
398
- library(e1071)
399
- load(file="#{ model_file }")
371
+ model.eval_model = <<-EOF
372
+ rbbt.require('randomForest')
400
373
  label = predict(model, features);
401
- cat(label, file="#{results}");
402
- EOF
403
- ).read
404
- Open.read(results)
405
- end
406
- end
374
+ EOF
375
+
376
+ pairs = text.split(/\n/).collect do |line|
377
+ label, features = line.split(" ")
378
+ model.add features, label
379
+ end
380
+
381
+ model.train
382
+
383
+ assert_equal "0", model.eval("1;1;0;0")
384
+ assert_equal "1", model.eval("1;1;1;0")
385
+
386
+ Log.with_severity 1 do
387
+ model.cross_validation(2)
388
+ end
407
389
 
390
+ end
391
+ end
392
+
393
+ def test_model_mclass
394
+ text =<<-EOF
395
+ 0 0;1;0;0
396
+ 0 1;0;0;0
397
+ 0 0;1;0;0
398
+ 0 1;0;0;0
399
+ 1 0;1;1;0
400
+ 1 1;0;1;0
401
+ 1 1;1;1;0
402
+ 1 0;1;1;0
403
+ 1 1;1;1;0
404
+ 2 0;1;0;1
405
+ 2 1;0;0;1
406
+ 2 1;1;0;1
407
+ 2 0;1;0;1
408
+ 2 1;1;0;1
409
+ EOF
410
+
411
+ TmpFile.with_file() do |dir|
412
+ FileUtils.mkdir_p dir
413
+ model = VectorModel.new(dir)
414
+
415
+ model.names = %w(Var1 Var2 Var3 Var4)
416
+
417
+ model.extract_features = Proc.new{|element,list|
418
+ if element
419
+ element.split(";")
420
+ elsif list
421
+ list.collect{|e| e.split(";") }
422
+ end
408
423
  }
409
424
 
425
+ model.train_model =<<-EOF
426
+ rbbt.require('randomForest')
427
+ model = randomForest(as.factor(label) ~ ., data = features)
428
+ EOF
429
+
430
+ model.eval_model = <<-EOF
431
+ rbbt.require('randomForest')
432
+ label = predict(model, features);
433
+ EOF
434
+
410
435
  pairs = text.split(/\n/).collect do |line|
411
436
  label, features = line.split(" ")
412
437
  model.add features, label
413
438
  end
414
439
 
415
440
  model.train
441
+
442
+ assert_equal "0", model.eval("1;1;0;0")
443
+ assert_equal "1", model.eval("1;1;1;0")
444
+ assert_equal "2", model.eval("1;1;0;1")
445
+
446
+ Log.with_severity 1 do
447
+ model.cross_validation(2)
448
+ end
416
449
 
450
+ end
451
+ end
452
+
453
+ def test_model_factor_levels
454
+ text =<<-EOF
455
+ 0 0;1;0;f1
456
+ 0 1;0;0;f1
457
+ 0 0;1;0;f1
458
+ 0 1;0;0;f1
459
+ 1 0;1;1;f2
460
+ 1 1;0;1;f2
461
+ 1 1;1;1;f2
462
+ 1 0;1;1;f2
463
+ 1 1;1;1;f2
464
+ EOF
465
+
466
+ TmpFile.with_file() do |dir|
467
+ FileUtils.mkdir_p dir
417
468
  model = VectorModel.new(dir)
469
+
470
+ model.names = %w(Var1 Var2 Var3 Factor)
471
+
472
+ model.extract_features = Proc.new{|element,list|
473
+ if element
474
+ element.split(";")
475
+ elsif list
476
+ list.collect{|e| e.split(";") }
477
+ end
478
+ }
479
+
480
+ model.train_model =<<-EOF
481
+ rbbt.require('randomForest')
482
+ model = randomForest(as.factor(label) ~ ., data = features)
483
+ EOF
484
+
485
+ model.eval_model = <<-EOF
486
+ rbbt.require('randomForest')
487
+ label = predict(model, features);
488
+ EOF
489
+
418
490
  pairs = text.split(/\n/).collect do |line|
419
491
  label, features = line.split(" ")
420
492
  model.add features, label
421
493
  end
422
494
 
423
- assert model.eval("1;1;1").to_f > 0.5
424
- assert model.eval("0;0;0").to_f < 0.5
495
+ Log.with_severity 0 do
496
+ model.train
497
+ model.cross_validation(2)
498
+
499
+ assert_raise do
500
+ assert_equal "0", model.eval("1;1;0;f1")
501
+ end
502
+
503
+ model.factor_levels = {"Factor" => %w(f1 f2)}
504
+ model.train
505
+ model = VectorModel.new(dir)
506
+ assert_equal "1", model.eval("1;1;1;f2")
507
+ end
508
+
425
509
  end
426
510
  end
427
511
 
512
+
428
513
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.53
4
+ version: 1.1.54
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-25 00:00:00.000000000 Z
11
+ date: 2021-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -107,6 +107,7 @@ files:
107
107
  - lib/rbbt/statistics/rank_product.rb
108
108
  - lib/rbbt/tensorflow.rb
109
109
  - lib/rbbt/vector/model.rb
110
+ - lib/rbbt/vector/model/random_forest.rb
110
111
  - lib/rbbt/vector/model/spaCy.rb
111
112
  - lib/rbbt/vector/model/svm.rb
112
113
  - lib/rbbt/vector/model/tensorflow.rb