rbbt-dm 1.1.49 → 1.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73f692fa2323508c640c594a056975f7bd24659aaffe8e9c4fb5e1c19d5b4ee9
4
- data.tar.gz: 9cc5cb88059630f1c65e6e2c6e9b9af5e1766aa97cc1192f088ee7043ee2cbef
3
+ metadata.gz: 2efbfe29d382655f78a91a1e8e381a304f4610f2d13d6d7c5fae8be40a22aa29
4
+ data.tar.gz: cee71dc48458dad863d6cc04bf8d1c113430e3f78309dac4cad1d575d9f4b198
5
5
  SHA512:
6
- metadata.gz: bab237c5d577abab5e4fed7f4567f15a95c20faa64dfbc3ba58dfb4054dc715093c2685e9166ce0df91148a3de0ea60aba1ea3de20a40120eb563806f15dd5a2
7
- data.tar.gz: 20fe2789e521288b50ff070b5f4ad930115ffcc782fbb656048a601f17e8f7408cbdc14181023e3430194855e40fd3dee8fbea2cb7d28981296ceafa10c6abc7
6
+ metadata.gz: 953cf89ffc4a04e38aff7020bd25d8ab8db5a6383db3a9a9fce5e7bab3bc668a763d660953bdbeedfc9d32beaacc6e83142c773dac6663b118c3a02c58405611
7
+ data.tar.gz: c899b27449bb378c9c757438aed976d5375e8d49bb99edf80ff7dbf9f2b03adeac9ec2000aa35bed4206b13e399f025e326952e35acfaefe04d1e5612597367f
@@ -32,7 +32,7 @@ module Paths
32
32
  if end_node
33
33
  end_node = end_node.select{|n| parents.keys.include? n}.first unless String === end_node
34
34
  return nil if not parents.include? end_node
35
- extract_path(parents, start_node, u)
35
+ extract_path(parents, start_node, end_node)
36
36
  else
37
37
  parents
38
38
  end
@@ -0,0 +1,43 @@
1
+ require 'rbbt/util/python'
2
+
3
+ module RbbtTensorflow
4
+
5
+ def self.init
6
+ RbbtPython.run do
7
+ pyimport "tensorflow", as: "tf"
8
+ end
9
+ end
10
+
11
+ def self.test
12
+
13
+ mod = x_test = y_test = nil
14
+ RbbtPython.run do
15
+
16
+ mnist_db = tf.keras.datasets.mnist
17
+
18
+ (x_train, y_train), (x_test, y_test) = mnist_db.load_data()
19
+ x_train, x_test = x_train / 255.0, x_test / 255.0
20
+
21
+ mod = tf.keras.models.Sequential.new([
22
+ tf.keras.layers.Flatten.new(input_shape: [28, 28]),
23
+ tf.keras.layers.Dense.new(128, activation:'relu'),
24
+ tf.keras.layers.Dropout.new(0.2),
25
+ tf.keras.layers.Dense.new(10, activation:'softmax')
26
+ ])
27
+ mod.compile(optimizer='adam',
28
+ loss='sparse_categorical_crossentropy',
29
+ metrics=['accuracy'])
30
+ mod.fit(x_train, y_train, epochs:1)
31
+ mod
32
+ end
33
+
34
+ RbbtPython.run do
35
+ mod.evaluate(x_test, y_test, verbose:2)
36
+ end
37
+ end
38
+ end
39
+
40
+ if __FILE__ == $0
41
+ RbbtTensorflow.init
42
+ RbbtTensorflow.test
43
+ end
@@ -46,10 +46,10 @@ save(model, file='#{model_file}')
46
46
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
47
47
  load(file="#{model_file}");
48
48
  #{code}
49
- cat(paste(label, sep="\\n"));
49
+ cat(paste(label, sep="\\n", collapse="\\n"));
50
50
  EOF
51
-
52
- res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
51
+ txt = io.read
52
+ res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
53
53
 
54
54
  if list
55
55
  res
@@ -60,13 +60,51 @@ cat(paste(label, sep="\\n"));
60
60
  end
61
61
  end
62
62
 
63
+ def __load_method(file)
64
+ code = Open.read(file)
65
+ code.sub!(/.*Proc\.new/, "Proc.new")
66
+ instance_eval code, file
67
+ end
68
+
63
69
  def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
64
70
  @directory = directory
65
71
  FileUtils.mkdir_p @directory unless File.exists? @directory
72
+
66
73
  @model_file = File.join(@directory, "model")
67
- extract_features = @extract_features
68
- train_model = @train_model
69
- eval_model = @eval_model
74
+ @extract_features_file = File.join(@directory, "features")
75
+ @train_model_file = File.join(@directory, "train_model")
76
+ @eval_model_file = File.join(@directory, "eval_model")
77
+ @train_model_file_R = File.join(@directory, "train_model.R")
78
+ @eval_model_file_R = File.join(@directory, "eval_model.R")
79
+
80
+ if extract_features.nil?
81
+ if File.exists?(@extract_features_file)
82
+ @extract_features = __load_method @extract_features_file
83
+ end
84
+ else
85
+ @extract_features = extract_features
86
+ end
87
+
88
+ if train_model.nil?
89
+ if File.exists?(@train_model_file)
90
+ @train_model = __load_method @train_model_file
91
+ elsif File.exists?(@train_model_file_R)
92
+ @train_model = Open.read(@train_model_file_R)
93
+ end
94
+ else
95
+ @train_model = train_model
96
+ end
97
+
98
+ if eval_model.nil?
99
+ if File.exists?(@eval_model_file)
100
+ @eval_model = __load_method @eval_model_file
101
+ elsif File.exists?(@eval_model_file_R)
102
+ @eval_model = Open.read(@eval_model_file_R)
103
+ end
104
+ else
105
+ @eval_model = eval_model
106
+ end
107
+
70
108
  @features = []
71
109
  @labels = []
72
110
  end
@@ -77,8 +115,47 @@ cat(paste(label, sep="\\n"));
77
115
  end
78
116
 
79
117
  def add(element, label = nil)
80
- @features << extract_features.call(element)
81
- @labels << label unless label.nil?
118
+ features = @extract_features ? extract_features.call(element) : element
119
+ @features << features
120
+ @labels << label
121
+ end
122
+
123
+ def add_list(elements, labels = nil)
124
+ if @extract_features.nil? || @extract_features.arity == 1
125
+ elements.zip(labels || [nil]).each do |elem,label|
126
+ add(elem, label)
127
+ end
128
+ else
129
+ features = @extract_features.call(nil, elements)
130
+ @features.concat features
131
+ @labels.concat labels if labels
132
+ end
133
+ end
134
+
135
+ def save_models
136
+ require 'method_source'
137
+
138
+ case
139
+ when Proc === train_model
140
+ begin
141
+ Open.write(@train_model_file, train_model.source)
142
+ rescue
143
+ end
144
+ when String === train_model
145
+ Open.write(@train_model_file_R, @train_model)
146
+ end
147
+
148
+ Open.write(@extract_features_file, @extract_features.source) if @extract_features
149
+
150
+ case
151
+ when Proc === eval_model
152
+ begin
153
+ Open.write(@eval_model_file, eval_model.source)
154
+ rescue
155
+ end
156
+ when String === eval_model
157
+ Open.write(@eval_model_file_R, eval_model)
158
+ end
82
159
  end
83
160
 
84
161
  def train
@@ -88,6 +165,7 @@ cat(paste(label, sep="\\n"));
88
165
  when String === train_model
89
166
  VectorModel.R_train(@model_file, @features, @labels, train_model)
90
167
  end
168
+ save_models
91
169
  end
92
170
 
93
171
  def run(code)
@@ -96,99 +174,120 @@ cat(paste(label, sep="\\n"));
96
174
 
97
175
  def eval(element)
98
176
  case
99
- when Proc === eval_model
100
- eval_model.call(@model_file, extract_features.call(element), false)
101
- when String === eval_model
102
- VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
177
+ when Proc === @eval_model
178
+ @eval_model.call(@model_file, @extract_features.call(element), false)
179
+ when String === @eval_model
180
+ VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model)
103
181
  end
104
182
  end
105
183
 
106
184
  def eval_list(elements, extract = true)
185
+
186
+ if extract && ! @extract_features.nil?
187
+ features = if @extract_features.arity == 1
188
+ elements.collect{|element| @extract_features.call(element) }
189
+ else
190
+ @extract_features.call(nil, elements)
191
+ end
192
+ else
193
+ features = elements
194
+ end
195
+
107
196
  case
108
197
  when Proc === eval_model
109
- eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
198
+ eval_model.call(@model_file, features, true)
110
199
  when String === eval_model
111
- SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
200
+ VectorModel.R_eval(@model_file, features, true, eval_model)
112
201
  end
113
202
  end
114
203
 
115
- def cross_validation(folds = 10)
116
- saved_features = @features
117
- saved_labels = @labels
118
- seq = (0..features.length - 1).to_a
204
+ #def cross_validation(folds = 10)
205
+ # saved_features = @features
206
+ # saved_labels = @labels
207
+ # seq = (0..features.length - 1).to_a
119
208
 
120
- chunk_size = features.length / folds
209
+ # chunk_size = features.length / folds
121
210
 
122
- acc = []
123
- folds.times do
124
- seq = seq.shuffle
125
- eval_chunk = seq[0..chunk_size]
126
- train_chunk = seq[chunk_size.. -1]
211
+ # acc = []
212
+ # folds.times do
213
+ # seq = seq.shuffle
214
+ # eval_chunk = seq[0..chunk_size]
215
+ # train_chunk = seq[chunk_size.. -1]
127
216
 
128
- eval_features = @features.values_at *eval_chunk
129
- eval_labels = @labels.values_at *eval_chunk
217
+ # eval_features = @features.values_at *eval_chunk
218
+ # eval_labels = @labels.values_at *eval_chunk
130
219
 
131
- @features = @features.values_at *train_chunk
132
- @labels = @labels.values_at *train_chunk
220
+ # @features = @features.values_at *train_chunk
221
+ # @labels = @labels.values_at *train_chunk
133
222
 
134
- train
135
- predictions = eval_list eval_features, false
223
+ # train
224
+ # predictions = eval_list eval_features, false
136
225
 
137
- acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
226
+ # acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
138
227
 
139
- @features = saved_features
140
- @labels = saved_labels
141
- end
228
+ # @features = saved_features
229
+ # @labels = saved_labels
230
+ # end
142
231
 
143
- acc
144
- end
232
+ # acc
233
+ #end
145
234
 
146
235
  def cross_validation(folds = 10)
147
236
 
148
237
  res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
149
238
 
150
- feature_folds = Misc.divide(@features, folds)
151
- labels_folds = Misc.divide(@labels, folds)
239
+ orig_features = @features
240
+ orig_labels = @labels
152
241
 
153
- folds.times do |fix|
242
+ begin
243
+ feature_folds = Misc.divide(@features, folds)
244
+ labels_folds = Misc.divide(@labels, folds)
154
245
 
155
- test_set = feature_folds[fix]
156
- train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
246
+ folds.times do |fix|
157
247
 
158
- test_labels = labels_folds[fix]
159
- train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
248
+ rest = (0..(folds-1)).to_a - [fix]
160
249
 
161
- tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
250
+ test_set = feature_folds[fix]
251
+ train_set = feature_folds.values_at(*rest).inject([]){|acc,e| acc += e; acc}
162
252
 
163
- @features = train_set
164
- @labels = train_labels
165
- self.train
166
- predictions = self.eval_list test_set, false
253
+ test_labels = labels_folds[fix]
254
+ train_labels = labels_folds.values_at(*rest).flatten
167
255
 
168
- test_labels.zip(predictions).each do |gs,pred|
169
- gs = gs.to_i
170
- pred = pred > 0.5 ? 1 : 0
171
- tp += 1 if gs == pred && gs == 1
172
- tn += 1 if gs == pred && gs == 0
173
- fp += 1 if gs == 0 && pred == 1
174
- fn += 1 if gs == 1 && pred == 0
175
- end
256
+ tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
176
257
 
177
- p = tp + fn
178
- pp = tp + fp
258
+ @features = train_set
259
+ @labels = train_labels
260
+ self.train
261
+ predictions = self.eval_list test_set, false
179
262
 
180
- pr = tp.to_f / pp
181
- re = tp.to_f / p
263
+ raise "Number of predictions (#{predictions.length}) and test labels (#{test_labels.length}) do not match" if predictions.length != test_labels.length
182
264
 
183
- f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
265
+ test_labels.zip(predictions).each do |gs,pred|
266
+ gs = gs.to_i
267
+ pred = pred > 0.5 ? 1 : 0
268
+ tp += 1 if gs == pred && gs == 1
269
+ tn += 1 if gs == pred && gs == 0
270
+ fp += 1 if gs == 0 && pred == 1
271
+ fn += 1 if gs == 1 && pred == 0
272
+ end
184
273
 
185
- Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
274
+ p = tp + fn
275
+ pp = tp + fp
186
276
 
187
- Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
277
+ pr = tp.to_f / pp
278
+ re = tp.to_f / p
188
279
 
189
- res[fix] = [tp,tn,fp,fn,pr,re,f1]
190
- end
280
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
281
+
282
+ Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
191
283
 
284
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
285
+ end
286
+ ensure
287
+ @features = orig_features
288
+ @labels = orig_labels
289
+ end
290
+ self.train
192
291
  res
193
292
  end
194
293
  end
@@ -0,0 +1,72 @@
1
+ require 'rbbt/vector/model'
2
+ require 'rbbt/nlp/spaCy'
3
+
4
+ class SpaCyModel < VectorModel
5
+ attr_accessor :config
6
+
7
+ def spacy(&block)
8
+ RbbtPython.run "spacy" do
9
+ RbbtPython.module_eval(&block)
10
+ end
11
+ end
12
+
13
+ def initialize(dir, config, lang = 'en_core_web_md')
14
+ @config = case
15
+ when Path === config
16
+ config.read
17
+ when Misc.is_filename?(config)
18
+ Open.read(config)
19
+ when (Misc.is_filename?(config, false) && Rbbt.share.spaCy.cpu[config].exists?)
20
+ Rbbt.share.spaCy.cpu[config].read
21
+ when (Misc.is_filename?(config, false) && Rbbt.share.spaCy[config].exists?)
22
+ Rbbt.share.spaCy[config].read
23
+ else
24
+ config
25
+ end
26
+ @lang = lang
27
+
28
+ super(dir)
29
+
30
+ @train_model = Proc.new do |file, features, labels|
31
+ texts = features
32
+ docs = []
33
+ tmpconfig = File.join(file, 'config')
34
+ tmptrain = File.join(file, 'train.spacy')
35
+ SpaCy.config(@config, tmpconfig)
36
+ spacy do
37
+ nlp = SpaCy.nlp(lang)
38
+ docs = []
39
+ RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
40
+ if %w(1 true pos).include?(label.to_s.downcase)
41
+ doc.cats["positive"] = 1
42
+ doc.cats["negative"] = 0
43
+ else
44
+ doc.cats["positive"] = 0
45
+ doc.cats["negative"] = 1
46
+ end
47
+ docs << doc
48
+ end
49
+
50
+ doc_bin = spacy.tokens.DocBin.new(docs: docs)
51
+ doc_bin.to_disk(tmptrain)
52
+ end
53
+
54
+ CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}")
55
+ end
56
+
57
+ @eval_model = Proc.new do |file, features|
58
+ texts = features
59
+
60
+ docs = []
61
+ spacy do
62
+ nlp = spacy.load("#{file}/model-best")
63
+
64
+ texts.collect do |text|
65
+ cats = nlp.(text).cats
66
+ cats['positive'] > cats['negative'] ? 1 : 0
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ end
@@ -9,7 +9,7 @@ class SVMModel < VectorModel
9
9
 
10
10
  @train_model =<<-EOF
11
11
  library(e1071);
12
- model = svm(class ~ ., data = features, scale=c(0));
12
+ model = svm(as.factor(class) ~ ., data = features);
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
@@ -0,0 +1,55 @@
1
+ require 'rbbt/vector/model'
2
+ require 'rbbt/tensorflow'
3
+
4
+ class TensorFlowModel < VectorModel
5
+ attr_accessor :graph, :epochs, :compile_options
6
+
7
+ def tensorflow(&block)
8
+ RbbtPython.run "tensorflow" do
9
+ RbbtPython.module_eval(&block)
10
+ end
11
+ end
12
+
13
+ def keras(&block)
14
+ RbbtPython.run "tensorflow.keras", as: 'keras' do
15
+ RbbtPython.run "tensorflow" do
16
+ RbbtPython.module_eval(&block)
17
+ end
18
+ end
19
+ end
20
+
21
+ def initialize(dir, graph = nil, epochs = 3, **compile_options)
22
+ @graph = graph
23
+ @epochs = epochs
24
+ @compile_options = compile_options
25
+
26
+ super(dir)
27
+
28
+ @train_model = Proc.new do |file, features, labels|
29
+ tensorflow do
30
+ features = tensorflow.convert_to_tensor(features)
31
+ labels = tensorflow.convert_to_tensor(labels)
32
+ end
33
+ @graph ||= keras_graph
34
+ @graph.compile(**@compile_options)
35
+ @graph.fit(features, labels, :epochs => @epochs, :verbose => false)
36
+ @graph.save(file)
37
+ end
38
+
39
+ @eval_model = Proc.new do |file, features|
40
+ tensorflow do
41
+ features = tensorflow.convert_to_tensor(features)
42
+ end
43
+ keras do
44
+ @graph ||= keras.models.load_model(file)
45
+ indices = @graph.predict(features, :verbose => false).tolist()
46
+ labels = indices.collect{|p| p.length > 1 ? p.index(p.max): p.first }
47
+ labels
48
+ end
49
+ end
50
+ end
51
+
52
+ def keras_graph(&block)
53
+ @graph = keras(&block)
54
+ end
55
+ end
@@ -20,7 +20,7 @@ N4 N5
20
20
  end_node = "N5"
21
21
 
22
22
  path = Paths.dijkstra(network, start_node, [end_node])
23
- assert_equal %w(N1 N2 N4), path.reverse
23
+ assert_equal %w(N1 N2 N4 N5), path.reverse
24
24
  end
25
25
 
26
26
  def test_weighted_dijsktra
@@ -0,0 +1,88 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/vector/model/spaCy'
3
+ require 'rbbt/vector/model/svm'
4
+
5
+ class TestSpaCyModel < Test::Unit::TestCase
6
+
7
+ def test_spyCy
8
+ TmpFile.with_file() do |dir|
9
+ Log.severity = 0
10
+ FileUtils.mkdir_p dir
11
+
12
+ model = SpaCyModel.new(
13
+ dir,
14
+ "cpu/textcat_efficiency.conf"
15
+ )
16
+
17
+
18
+ require 'rbbt/tsv/csv'
19
+ url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
20
+ tsv = TSV.csv(Open.open(url))
21
+ tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
22
+
23
+ good = tsv.select("Recommended IND" => '1')
24
+ bad = tsv.select("Recommended IND" => '0')
25
+
26
+ gsize = 2000
27
+ bsize = 500
28
+ good.keys[0..gsize-1].each do |text|
29
+ next if text.nil? || text.empty?
30
+ model.add text, '1'
31
+ end
32
+
33
+ bad.keys[0..bsize-1].each do |text|
34
+ model.add text, '0'
35
+ end
36
+
37
+ model.cross_validation
38
+ end
39
+
40
+ def _test_svm_spacy
41
+
42
+ require 'rbbt/tsv/csv'
43
+ url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
44
+ tsv = TSV.csv(Open.open(url))
45
+ tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
46
+
47
+ good = tsv.select("Recommended IND" => '1')
48
+ bad = tsv.select("Recommended IND" => '0')
49
+
50
+ gsize = 2000
51
+ bsize = 500
52
+ model = SVMModel.new(
53
+ dir
54
+ )
55
+
56
+ nlp = RbbtPython.run "spacy" do
57
+ spacy.load('en_core_web_md')
58
+ end
59
+
60
+ model.extract_features = Proc.new do |text|
61
+ vs = RbbtPython.run do
62
+ RbbtPython.collect nlp.(text).__iter__ do |token|
63
+ token.vector.tolist()
64
+ end
65
+ end
66
+ length = vs.length
67
+
68
+ v = vs.inject(nil){|acc,ev| acc = acc.nil? ? ev : acc.zip(ev).collect{|a,b| a + b } }
69
+
70
+ v.collect{|e| e / length }
71
+ end
72
+
73
+ TSV.traverse good.keys[0..gsize-1], :type => :array, :bar => true do |text|
74
+ next if text.nil? || text.empty?
75
+ model.add text, '1'
76
+ end
77
+
78
+ TSV.traverse bad.keys[0..bsize-1], :type => :array, :bar => true do |text|
79
+ model.add text, '0'
80
+ end
81
+
82
+ model.cross_validation
83
+
84
+ end
85
+ end
86
+
87
+ end
88
+
@@ -0,0 +1,57 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/vector/model/tensorflow'
3
+
4
+ class TestTensorflowModel < Test::Unit::TestCase
5
+
6
+ def test_keras
7
+ TmpFile.with_file() do |dir|
8
+ FileUtils.mkdir_p dir
9
+
10
+ model = TensorFlowModel.new(
11
+ dir,
12
+ optimizer:'adam',
13
+ loss: 'sparse_categorical_crossentropy',
14
+ metrics: ['accuracy']
15
+ )
16
+
17
+ model.keras_graph do
18
+ tf = tensorflow
19
+ tf.keras.models.Sequential.new([
20
+ tf.keras.layers.Flatten.new(input_shape: [28, 28]),
21
+ tf.keras.layers.Dense.new(128, activation:'relu'),
22
+ tf.keras.layers.Dropout.new(0.2),
23
+ tf.keras.layers.Dense.new(10, activation:'softmax')
24
+ ])
25
+ end
26
+
27
+ sum = predictions = nil
28
+ model.tensorflow do
29
+ tf = tensorflow
30
+ mnist_db = tf.keras.datasets.mnist
31
+
32
+ (x_train, y_train), (x_test, y_test) = mnist_db.load_data()
33
+ x_train, x_test = x_train / 255.0, x_test / 255.0
34
+
35
+ num = PyCall.len(x_train)
36
+
37
+ num.times do |i|
38
+ model.add x_train[i], y_train[i]
39
+ end
40
+
41
+ model.train
42
+
43
+ predictions = model.eval_list x_test.tolist()
44
+ sum = 0
45
+ predictions.zip(y_test.tolist()).each do |pred,label|
46
+ sum += 1 if label.to_i == pred
47
+ end
48
+
49
+ end
50
+
51
+ assert sum.to_f / predictions.length > 0.7
52
+
53
+
54
+ end
55
+ end
56
+ end
57
+
@@ -71,4 +71,358 @@ cat(label, file="#{results}");
71
71
  end
72
72
  end
73
73
 
74
+ def test_model_list
75
+ text =<<-EOF
76
+ 1 0;1;1
77
+ 1 1;0;1
78
+ 1 1;1;1
79
+ 1 0;1;1
80
+ 1 1;1;1
81
+ 0 0;1;0
82
+ 0 1;0;0
83
+ 0 0;1;0
84
+ 0 1;0;0
85
+ EOF
86
+
87
+ TmpFile.with_file() do |dir|
88
+ FileUtils.mkdir_p dir
89
+ model = VectorModel.new(dir)
90
+
91
+ model.extract_features = Proc.new{|element,list|
92
+ if element
93
+ element.split(";")
94
+ elsif list
95
+ list.collect{|e| e.split(";") }
96
+ end
97
+ }
98
+
99
+ model.train_model = Proc.new{|model_file,features,labels|
100
+ TmpFile.with_file do |feature_file|
101
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
102
+ Open.write(feature_file + '.class', labels * "\n")
103
+ R.run <<-EOF
104
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
105
+ labels = scan("#{ feature_file }.class", what=numeric());
106
+ features = cbind(features, class = labels);
107
+ rbbt.require('e1071')
108
+ model = svm(class ~ ., data = features)
109
+ save(model, file="#{ model_file }");
110
+ EOF
111
+ end
112
+ }
113
+
114
+ model.eval_model = Proc.new{|model_file,features|
115
+ TmpFile.with_file do |feature_file|
116
+ TmpFile.with_file do |results|
117
+ Open.write(feature_file, features * "\t")
118
+ puts R.run(<<-EOF
119
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
120
+ library(e1071)
121
+ load(file="#{ model_file }")
122
+ label = predict(model, features);
123
+ cat(label, file="#{results}");
124
+ EOF
125
+ ).read
126
+ Open.read(results)
127
+ end
128
+ end
129
+
130
+ }
131
+
132
+ pairs = text.split(/\n/).collect do |line|
133
+ label, features = line.split(" ")
134
+ [features, label]
135
+ end
136
+
137
+ model.add_list(*Misc.zip_fields(pairs))
138
+
139
+ model.train
140
+
141
+ assert model.eval("1;1;1").to_f > 0.5
142
+ assert model.eval("0;0;0").to_f < 0.5
143
+ end
144
+ end
145
+
146
+ def test_model_list2
147
+ text =<<-EOF
148
+ 1 0;1;1
149
+ 1 1;0;1
150
+ 1 1;1;1
151
+ 1 0;1;1
152
+ 1 1;1;1
153
+ 0 0;1;0
154
+ 0 1;0;0
155
+ 0 0;1;0
156
+ 0 1;0;0
157
+ EOF
158
+
159
+ TmpFile.with_file() do |dir|
160
+ FileUtils.mkdir_p dir
161
+ model = VectorModel.new(dir)
162
+
163
+ model.extract_features = Proc.new{|element|
164
+ element.split(";")
165
+ }
166
+
167
+ model.train_model = Proc.new{|model_file,features,labels|
168
+ TmpFile.with_file do |feature_file|
169
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
170
+ Open.write(feature_file + '.class', labels * "\n")
171
+ R.run <<-EOF
172
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
173
+ labels = scan("#{ feature_file }.class", what=numeric());
174
+ features = cbind(features, class = labels);
175
+ rbbt.require('e1071')
176
+ model = svm(class ~ ., data = features)
177
+ save(model, file="#{ model_file }");
178
+ EOF
179
+ end
180
+ }
181
+
182
+ model.eval_model = Proc.new{|model_file,features|
183
+ TmpFile.with_file do |feature_file|
184
+ TmpFile.with_file do |results|
185
+ Open.write(feature_file, features * "\t")
186
+ puts R.run(<<-EOF
187
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
188
+ library(e1071)
189
+ load(file="#{ model_file }")
190
+ label = predict(model, features);
191
+ cat(label, file="#{results}");
192
+ EOF
193
+ ).read
194
+ Open.read(results)
195
+ end
196
+ end
197
+
198
+ }
199
+
200
+ pairs = text.split(/\n/).collect do |line|
201
+ label, features = line.split(" ")
202
+ [features, label]
203
+ end
204
+
205
+ model.add_list(*Misc.zip_fields(pairs))
206
+
207
+ model.train
208
+
209
+ assert model.eval("1;1;1").to_f > 0.5
210
+ assert model.eval("0;0;0").to_f < 0.5
211
+ end
212
+ end
213
+
214
+ def test_model_list
215
+ text =<<-EOF
216
+ 1 0;1;1
217
+ 1 1;0;1
218
+ 1 1;1;1
219
+ 1 0;1;1
220
+ 1 1;1;1
221
+ 0 0;1;0
222
+ 0 1;0;0
223
+ 0 0;1;0
224
+ 0 1;0;0
225
+ EOF
226
+
227
+ TmpFile.with_file() do |dir|
228
+ FileUtils.mkdir_p dir
229
+ model = VectorModel.new(dir)
230
+
231
+ model.extract_features = Proc.new{|element,list|
232
+ if element
233
+ element.split(";")
234
+ elsif list
235
+ list.collect{|e| e.split(";") }
236
+ end
237
+ }
238
+
239
+ model.train_model = Proc.new{|model_file,features,labels|
240
+ TmpFile.with_file do |feature_file|
241
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
242
+ Open.write(feature_file + '.class', labels * "\n")
243
+ R.run <<-EOF
244
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
245
+ labels = scan("#{ feature_file }.class", what=numeric());
246
+ features = cbind(features, class = labels);
247
+ rbbt.require('e1071')
248
+ model = svm(class ~ ., data = features)
249
+ save(model, file="#{ model_file }");
250
+ EOF
251
+ end
252
+ }
253
+
254
+ model.eval_model = Proc.new{|model_file,features|
255
+ TmpFile.with_file do |feature_file|
256
+ TmpFile.with_file do |results|
257
+ Open.write(feature_file, features * "\t")
258
+ puts R.run(<<-EOF
259
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
260
+ library(e1071)
261
+ load(file="#{ model_file }")
262
+ label = predict(model, features);
263
+ cat(label, file="#{results}");
264
+ EOF
265
+ ).read
266
+ Open.read(results)
267
+ end
268
+ end
269
+
270
+ }
271
+
272
+ pairs = text.split(/\n/).collect do |line|
273
+ label, features = line.split(" ")
274
+ model.add features, label
275
+ end
276
+
277
+ model.train
278
+
279
+ assert model.eval("1;1;1").to_f > 0.5
280
+ assert model.eval("0;0;0").to_f < 0.5
281
+ end
282
+ end
283
+
284
+ def test_model_save
285
+ text =<<-EOF
286
+ 1 0;1;1
287
+ 1 1;0;1
288
+ 1 1;1;1
289
+ 1 0;1;1
290
+ 1 1;1;1
291
+ 0 0;1;0
292
+ 0 1;0;0
293
+ 0 0;1;0
294
+ 0 1;0;0
295
+ EOF
296
+
297
+ TmpFile.with_file() do |dir|
298
+ FileUtils.mkdir_p dir
299
+ model = VectorModel.new(dir)
300
+
301
+ model.extract_features = Proc.new{|element|
302
+ element.split(";")
303
+ }
304
+
305
+ model.train_model = Proc.new{|model_file,features,labels|
306
+ TmpFile.with_file do |feature_file|
307
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
308
+ Open.write(feature_file + '.class', labels * "\n")
309
+ R.run <<-EOF
310
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
311
+ labels = scan("#{ feature_file }.class", what=numeric());
312
+ features = cbind(features, class = labels);
313
+ rbbt.require('e1071')
314
+ model = svm(class ~ ., data = features)
315
+ save(model, file="#{ model_file }");
316
+ EOF
317
+ end
318
+ }
319
+
320
+ model.eval_model = Proc.new{|model_file,features|
321
+ TmpFile.with_file do |feature_file|
322
+ TmpFile.with_file do |results|
323
+ Open.write(feature_file, features * "\t")
324
+ puts R.run(<<-EOF
325
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
326
+ library(e1071)
327
+ load(file="#{ model_file }")
328
+ label = predict(model, features);
329
+ cat(label, file="#{results}");
330
+ EOF
331
+ ).read
332
+ Open.read(results)
333
+ end
334
+ end
335
+
336
+ }
337
+
338
+ pairs = text.split(/\n/).collect do |line|
339
+ label, features = line.split(" ")
340
+ [features, label]
341
+ end
342
+
343
+ model.add_list(*Misc.zip_fields(pairs))
344
+
345
+ model.train
346
+
347
+ assert model.eval("1;1;1").to_f > 0.5
348
+ assert model.eval("0;0;0").to_f < 0.5
349
+ end
350
+ end
351
+
352
+ def test_model_save
353
+ text =<<-EOF
354
+ 1 0;1;1
355
+ 1 1;0;1
356
+ 1 1;1;1
357
+ 1 0;1;1
358
+ 1 1;1;1
359
+ 0 0;1;0
360
+ 0 1;0;0
361
+ 0 0;1;0
362
+ 0 1;0;0
363
+ EOF
364
+
365
+ TmpFile.with_file() do |dir|
366
+ FileUtils.mkdir_p dir
367
+ model = VectorModel.new(dir)
368
+
369
+ model.extract_features = Proc.new{|element,list|
370
+ if element
371
+ element.split(";")
372
+ elsif list
373
+ list.collect{|e| e.split(";") }
374
+ end
375
+ }
376
+
377
+ model.train_model = Proc.new{|model_file,features,labels|
378
+ TmpFile.with_file do |feature_file|
379
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
380
+ Open.write(feature_file + '.class', labels * "\n")
381
+ R.run <<-EOF
382
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
383
+ labels = scan("#{ feature_file }.class", what=numeric());
384
+ features = cbind(features, class = labels);
385
+ rbbt.require('e1071')
386
+ model = svm(class ~ ., data = features)
387
+ save(model, file="#{ model_file }");
388
+ EOF
389
+ end
390
+ }
391
+
392
+ model.eval_model = Proc.new{|model_file,features|
393
+ TmpFile.with_file do |feature_file|
394
+ TmpFile.with_file do |results|
395
+ Open.write(feature_file, features * "\t")
396
+ puts R.run(<<-EOF
397
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
398
+ library(e1071)
399
+ load(file="#{ model_file }")
400
+ label = predict(model, features);
401
+ cat(label, file="#{results}");
402
+ EOF
403
+ ).read
404
+ Open.read(results)
405
+ end
406
+ end
407
+
408
+ }
409
+
410
+ pairs = text.split(/\n/).collect do |line|
411
+ label, features = line.split(" ")
412
+ model.add features, label
413
+ end
414
+
415
+ model.train
416
+
417
+ model = VectorModel.new(dir)
418
+ pairs = text.split(/\n/).collect do |line|
419
+ label, features = line.split(" ")
420
+ model.add features, label
421
+ end
422
+
423
+ assert model.eval("1;1;1").to_f > 0.5
424
+ assert model.eval("0;0;0").to_f < 0.5
425
+ end
426
+ end
427
+
74
428
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.49
4
+ version: 1.1.50
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-14 00:00:00.000000000 Z
11
+ date: 2021-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -105,8 +105,11 @@ files:
105
105
  - lib/rbbt/statistics/hypergeometric.rb
106
106
  - lib/rbbt/statistics/random_walk.rb
107
107
  - lib/rbbt/statistics/rank_product.rb
108
+ - lib/rbbt/tensorflow.rb
108
109
  - lib/rbbt/vector/model.rb
110
+ - lib/rbbt/vector/model/spaCy.rb
109
111
  - lib/rbbt/vector/model/svm.rb
112
+ - lib/rbbt/vector/model/tensorflow.rb
110
113
  - share/R/MA.R
111
114
  - share/R/barcode.R
112
115
  - share/R/heatmap.3.R
@@ -118,7 +121,9 @@ files:
118
121
  - test/rbbt/statistics/test_random_walk.rb
119
122
  - test/rbbt/test_ml_task.rb
120
123
  - test/rbbt/test_stan.rb
124
+ - test/rbbt/vector/model/test_spaCy.rb
121
125
  - test/rbbt/vector/model/test_svm.rb
126
+ - test/rbbt/vector/model/test_tensorflow.rb
122
127
  - test/rbbt/vector/test_model.rb
123
128
  - test/test_helper.rb
124
129
  homepage: http://github.com/mikisvaz/rbbt-phgx
@@ -139,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
144
  - !ruby/object:Gem::Version
140
145
  version: '0'
141
146
  requirements: []
142
- rubygems_version: 3.0.6
147
+ rubygems_version: 3.1.4
143
148
  signing_key:
144
149
  specification_version: 4
145
150
  summary: Data-mining and statistics
@@ -152,6 +157,8 @@ test_files:
152
157
  - test/rbbt/statistics/test_hypergeometric.rb
153
158
  - test/rbbt/test_ml_task.rb
154
159
  - test/rbbt/vector/test_model.rb
160
+ - test/rbbt/vector/model/test_spaCy.rb
161
+ - test/rbbt/vector/model/test_tensorflow.rb
155
162
  - test/rbbt/vector/model/test_svm.rb
156
163
  - test/rbbt/test_stan.rb
157
164
  - test/test_helper.rb