rbbt-dm 1.1.47 → 1.1.52

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eefb23100b9c066739e3d7fbeec940d6362d61b093770635f91e33c61d3a44a6
4
- data.tar.gz: 4a7d70a7c252ef9a9e3edb4580eb9977186aa502d8e40c53d367a81244a26d2b
3
+ metadata.gz: b24501336450d017789e30104fd8681c1fc6c20163c96e134fddfbbe84930da4
4
+ data.tar.gz: b4b15f1b3f94d07c40b779be25919f6809d2750885be59dab0d0f67749329b30
5
5
  SHA512:
6
- metadata.gz: 3acf95a4b443a7bcf289a8f24dceec824a1d08453ff7cddbb470d608dedd324db5945893ea8c58668ad27d382e5fb1672fd327f207ac380f182be93ddf90b72c
7
- data.tar.gz: 6b04078b2458a9c78481a1585ced18a6c5c7a81d1749422be29be510d735a8e8245d8a74fc446ed8895c801acb65ef5b33280e579abf6f8116cd1472d5efbef0
6
+ metadata.gz: 8f83514ee5511773598c04675ac03db34e9593cc65c67dfe84a933b107f3137355025de2c79f03c3afa1173f01128c4c7e36776fb549991032c546e7a72bab71
7
+ data.tar.gz: e84f508a5ee4a68163e8d409c897cc8c3716d5da6ac20ea5aa9eb159c7baf9b8cc8cc8feec0fcd4c05f9ca43f1686a2e5b32f381c716c825198fbcbafa0d6055
@@ -38,7 +38,7 @@ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.r
38
38
  end
39
39
  end
40
40
  key = key.first if Array === key
41
- [key, bars]
41
+ [key, bars]
42
42
  end
43
43
  end
44
44
 
@@ -32,7 +32,7 @@ module Paths
32
32
  if end_node
33
33
  end_node = end_node.select{|n| parents.keys.include? n}.first unless String === end_node
34
34
  return nil if not parents.include? end_node
35
- extract_path(parents, start_node, u)
35
+ extract_path(parents, start_node, end_node)
36
36
  else
37
37
  parents
38
38
  end
@@ -0,0 +1,43 @@
1
+ require 'rbbt/util/python'
2
+
3
+ module RbbtTensorflow
4
+
5
+ def self.init
6
+ RbbtPython.run do
7
+ pyimport "tensorflow", as: "tf"
8
+ end
9
+ end
10
+
11
+ def self.test
12
+
13
+ mod = x_test = y_test = nil
14
+ RbbtPython.run do
15
+
16
+ mnist_db = tf.keras.datasets.mnist
17
+
18
+ (x_train, y_train), (x_test, y_test) = mnist_db.load_data()
19
+ x_train, x_test = x_train / 255.0, x_test / 255.0
20
+
21
+ mod = tf.keras.models.Sequential.new([
22
+ tf.keras.layers.Flatten.new(input_shape: [28, 28]),
23
+ tf.keras.layers.Dense.new(128, activation:'relu'),
24
+ tf.keras.layers.Dropout.new(0.2),
25
+ tf.keras.layers.Dense.new(10, activation:'softmax')
26
+ ])
27
+ mod.compile(optimizer='adam',
28
+ loss='sparse_categorical_crossentropy',
29
+ metrics=['accuracy'])
30
+ mod.fit(x_train, y_train, epochs:1)
31
+ mod
32
+ end
33
+
34
+ RbbtPython.run do
35
+ mod.evaluate(x_test, y_test, verbose:2)
36
+ end
37
+ end
38
+ end
39
+
40
+ if __FILE__ == $0
41
+ RbbtTensorflow.init
42
+ RbbtTensorflow.test
43
+ end
@@ -46,10 +46,10 @@ save(model, file='#{model_file}')
46
46
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
47
47
  load(file="#{model_file}");
48
48
  #{code}
49
- cat(paste(label, sep="\\n"));
49
+ cat(paste(label, sep="\\n", collapse="\\n"));
50
50
  EOF
51
-
52
- res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
51
+ txt = io.read
52
+ res = txt.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
53
53
 
54
54
  if list
55
55
  res
@@ -60,13 +60,51 @@ cat(paste(label, sep="\\n"));
60
60
  end
61
61
  end
62
62
 
63
+ def __load_method(file)
64
+ code = Open.read(file)
65
+ code.sub!(/.*Proc\.new/, "Proc.new")
66
+ instance_eval code, file
67
+ end
68
+
63
69
  def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
64
70
  @directory = directory
65
71
  FileUtils.mkdir_p @directory unless File.exists? @directory
72
+
66
73
  @model_file = File.join(@directory, "model")
67
- extract_features = @extract_features
68
- train_model = @train_model
69
- eval_model = @eval_model
74
+ @extract_features_file = File.join(@directory, "features")
75
+ @train_model_file = File.join(@directory, "train_model")
76
+ @eval_model_file = File.join(@directory, "eval_model")
77
+ @train_model_file_R = File.join(@directory, "train_model.R")
78
+ @eval_model_file_R = File.join(@directory, "eval_model.R")
79
+
80
+ if extract_features.nil?
81
+ if File.exists?(@extract_features_file)
82
+ @extract_features = __load_method @extract_features_file
83
+ end
84
+ else
85
+ @extract_features = extract_features
86
+ end
87
+
88
+ if train_model.nil?
89
+ if File.exists?(@train_model_file)
90
+ @train_model = __load_method @train_model_file
91
+ elsif File.exists?(@train_model_file_R)
92
+ @train_model = Open.read(@train_model_file_R)
93
+ end
94
+ else
95
+ @train_model = train_model
96
+ end
97
+
98
+ if eval_model.nil?
99
+ if File.exists?(@eval_model_file)
100
+ @eval_model = __load_method @eval_model_file
101
+ elsif File.exists?(@eval_model_file_R)
102
+ @eval_model = Open.read(@eval_model_file_R)
103
+ end
104
+ else
105
+ @eval_model = eval_model
106
+ end
107
+
70
108
  @features = []
71
109
  @labels = []
72
110
  end
@@ -77,8 +115,47 @@ cat(paste(label, sep="\\n"));
77
115
  end
78
116
 
79
117
  def add(element, label = nil)
80
- @features << extract_features.call(element)
81
- @labels << label unless label.nil?
118
+ features = @extract_features ? extract_features.call(element) : element
119
+ @features << features
120
+ @labels << label
121
+ end
122
+
123
+ def add_list(elements, labels = nil)
124
+ if @extract_features.nil? || @extract_features.arity == 1
125
+ elements.zip(labels || [nil]).each do |elem,label|
126
+ add(elem, label)
127
+ end
128
+ else
129
+ features = @extract_features.call(nil, elements)
130
+ @features.concat features
131
+ @labels.concat labels if labels
132
+ end
133
+ end
134
+
135
+ def save_models
136
+ require 'method_source'
137
+
138
+ case
139
+ when Proc === train_model
140
+ begin
141
+ Open.write(@train_model_file, train_model.source)
142
+ rescue
143
+ end
144
+ when String === train_model
145
+ Open.write(@train_model_file_R, @train_model)
146
+ end
147
+
148
+ Open.write(@extract_features_file, @extract_features.source) if @extract_features
149
+
150
+ case
151
+ when Proc === eval_model
152
+ begin
153
+ Open.write(@eval_model_file, eval_model.source)
154
+ rescue
155
+ end
156
+ when String === eval_model
157
+ Open.write(@eval_model_file_R, eval_model)
158
+ end
82
159
  end
83
160
 
84
161
  def train
@@ -88,6 +165,7 @@ cat(paste(label, sep="\\n"));
88
165
  when String === train_model
89
166
  VectorModel.R_train(@model_file, @features, @labels, train_model)
90
167
  end
168
+ save_models
91
169
  end
92
170
 
93
171
  def run(code)
@@ -96,99 +174,120 @@ cat(paste(label, sep="\\n"));
96
174
 
97
175
  def eval(element)
98
176
  case
99
- when Proc === eval_model
100
- eval_model.call(@model_file, extract_features.call(element), false)
101
- when String === eval_model
102
- VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
177
+ when Proc === @eval_model
178
+ @eval_model.call(@model_file, @extract_features.call(element), false)
179
+ when String === @eval_model
180
+ VectorModel.R_eval(@model_file, @extract_features.call(element), false, eval_model)
103
181
  end
104
182
  end
105
183
 
106
184
  def eval_list(elements, extract = true)
185
+
186
+ if extract && ! @extract_features.nil?
187
+ features = if @extract_features.arity == 1
188
+ elements.collect{|element| @extract_features.call(element) }
189
+ else
190
+ @extract_features.call(nil, elements)
191
+ end
192
+ else
193
+ features = elements
194
+ end
195
+
107
196
  case
108
197
  when Proc === eval_model
109
- eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
198
+ eval_model.call(@model_file, features, true)
110
199
  when String === eval_model
111
- SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
200
+ VectorModel.R_eval(@model_file, features, true, eval_model)
112
201
  end
113
202
  end
114
203
 
115
- def cross_validation(folds = 10)
116
- saved_features = @features
117
- saved_labels = @labels
118
- seq = (0..features.length - 1).to_a
204
+ #def cross_validation(folds = 10)
205
+ # saved_features = @features
206
+ # saved_labels = @labels
207
+ # seq = (0..features.length - 1).to_a
119
208
 
120
- chunk_size = features.length / folds
209
+ # chunk_size = features.length / folds
121
210
 
122
- acc = []
123
- folds.times do
124
- seq = seq.shuffle
125
- eval_chunk = seq[0..chunk_size]
126
- train_chunk = seq[chunk_size.. -1]
211
+ # acc = []
212
+ # folds.times do
213
+ # seq = seq.shuffle
214
+ # eval_chunk = seq[0..chunk_size]
215
+ # train_chunk = seq[chunk_size.. -1]
127
216
 
128
- eval_features = @features.values_at *eval_chunk
129
- eval_labels = @labels.values_at *eval_chunk
217
+ # eval_features = @features.values_at *eval_chunk
218
+ # eval_labels = @labels.values_at *eval_chunk
130
219
 
131
- @features = @features.values_at *train_chunk
132
- @labels = @labels.values_at *train_chunk
220
+ # @features = @features.values_at *train_chunk
221
+ # @labels = @labels.values_at *train_chunk
133
222
 
134
- train
135
- predictions = eval_list eval_features, false
223
+ # train
224
+ # predictions = eval_list eval_features, false
136
225
 
137
- acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
226
+ # acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
138
227
 
139
- @features = saved_features
140
- @labels = saved_labels
141
- end
228
+ # @features = saved_features
229
+ # @labels = saved_labels
230
+ # end
142
231
 
143
- acc
144
- end
232
+ # acc
233
+ #end
145
234
 
146
235
  def cross_validation(folds = 10)
147
236
 
148
237
  res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
149
238
 
150
- feature_folds = Misc.divide(@features, folds)
151
- labels_folds = Misc.divide(@labels, folds)
239
+ orig_features = @features
240
+ orig_labels = @labels
152
241
 
153
- folds.times do |fix|
242
+ begin
243
+ feature_folds = Misc.divide(@features, folds)
244
+ labels_folds = Misc.divide(@labels, folds)
154
245
 
155
- test_set = feature_folds[fix]
156
- train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
246
+ folds.times do |fix|
157
247
 
158
- test_labels = labels_folds[fix]
159
- train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
248
+ rest = (0..(folds-1)).to_a - [fix]
160
249
 
161
- tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
250
+ test_set = feature_folds[fix]
251
+ train_set = feature_folds.values_at(*rest).inject([]){|acc,e| acc += e; acc}
162
252
 
163
- @features = train_set
164
- @labels = train_labels
165
- self.train
166
- predictions = self.eval_list test_set, false
253
+ test_labels = labels_folds[fix]
254
+ train_labels = labels_folds.values_at(*rest).flatten
167
255
 
168
- test_labels.zip(predictions).each do |gs,pred|
169
- gs = gs.to_i
170
- pred = pred > 0.5 ? 1 : 0
171
- tp += 1 if gs == pred && gs == 1
172
- tn += 1 if gs == pred && gs == 0
173
- fp += 1 if gs == 0 && pred == 1
174
- fn += 1 if gs == 1 && pred == 0
175
- end
256
+ tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
176
257
 
177
- p = tp + fn
178
- pp = tp + fp
258
+ @features = train_set
259
+ @labels = train_labels
260
+ self.train
261
+ predictions = self.eval_list test_set, false
179
262
 
180
- pr = tp.to_f / pp
181
- re = tp.to_f / p
263
+ raise "Number of predictions (#{predictions.length}) and test labels (#{test_labels.length}) do not match" if predictions.length != test_labels.length
182
264
 
183
- f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
265
+ test_labels.zip(predictions).each do |gs,pred|
266
+ gs = gs.to_i
267
+ pred = pred > 0.5 ? 1 : 0
268
+ tp += 1 if gs == pred && gs == 1
269
+ tn += 1 if gs == pred && gs == 0
270
+ fp += 1 if gs == 0 && pred == 1
271
+ fn += 1 if gs == 1 && pred == 0
272
+ end
184
273
 
185
- Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
274
+ p = tp + fn
275
+ pp = tp + fp
186
276
 
187
- Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
277
+ pr = tp.to_f / pp
278
+ re = tp.to_f / p
188
279
 
189
- res[fix] = [tp,tn,fp,fn,pr,re,f1]
190
- end
280
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
281
+
282
+ Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1} - #{[tp.to_s, tn.to_s, fp.to_s, fn.to_s] * " "}"
191
283
 
284
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
285
+ end
286
+ ensure
287
+ @features = orig_features
288
+ @labels = orig_labels
289
+ end
290
+ self.train
192
291
  res
193
292
  end
194
293
  end
@@ -0,0 +1,73 @@
1
+ require 'rbbt/vector/model'
2
+ require 'rbbt/nlp/spaCy'
3
+
4
+ class SpaCyModel < VectorModel
5
+ attr_accessor :config
6
+
7
+ def spacy(&block)
8
+ RbbtPython.run "spacy" do
9
+ RbbtPython.module_eval(&block)
10
+ end
11
+ end
12
+
13
+ def initialize(dir, config, lang = 'en_core_web_md')
14
+ @config = case
15
+ when Path === config
16
+ config.read
17
+ when Misc.is_filename?(config)
18
+ Open.read(config)
19
+ when (Misc.is_filename?(config, false) && Rbbt.share.spaCy.cpu[config].exists?)
20
+ Rbbt.share.spaCy.cpu[config].read
21
+ when (Misc.is_filename?(config, false) && Rbbt.share.spaCy[config].exists?)
22
+ Rbbt.share.spaCy[config].read
23
+ else
24
+ config
25
+ end
26
+ @lang = lang
27
+
28
+ super(dir)
29
+
30
+ @train_model = Proc.new do |file, features, labels|
31
+ texts = features
32
+ docs = []
33
+ tmpconfig = File.join(file, 'config')
34
+ tmptrain = File.join(file, 'train.spacy')
35
+ SpaCy.config(@config, tmpconfig)
36
+ spacy do
37
+ nlp = SpaCy.nlp(lang)
38
+ docs = []
39
+ RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
40
+ if %w(1 true pos).include?(label.to_s.downcase)
41
+ doc.cats["positive"] = 1
42
+ doc.cats["negative"] = 0
43
+ else
44
+ doc.cats["positive"] = 0
45
+ doc.cats["negative"] = 1
46
+ end
47
+ docs << doc
48
+ end
49
+
50
+ doc_bin = spacy.tokens.DocBin.new(docs: docs)
51
+ doc_bin.to_disk(tmptrain)
52
+ end
53
+
54
+ gpu = Rbbt::Config.get('gpu_id', :spacy, :spacy_train)
55
+ CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}", "--gpu-id" => gpu)
56
+ end
57
+
58
+ @eval_model = Proc.new do |file, features|
59
+ texts = features
60
+
61
+ docs = []
62
+ spacy do
63
+ nlp = spacy.load("#{file}/model-best")
64
+
65
+ texts.collect do |text|
66
+ cats = nlp.(text).cats
67
+ cats['positive'] > cats['negative'] ? 1 : 0
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ end