rbbt-dm 1.2.7 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix/barcode.rb +2 -2
- data/lib/rbbt/matrix/differential.rb +3 -3
- data/lib/rbbt/matrix/knowledge_base.rb +1 -1
- data/lib/rbbt/plots/bar.rb +1 -1
- data/lib/rbbt/stan.rb +1 -1
- data/lib/rbbt/statistics/hypergeometric.rb +2 -1
- data/lib/rbbt/vector/model/huggingface/masked_lm.rb +50 -0
- data/lib/rbbt/vector/model/huggingface.rb +39 -52
- data/lib/rbbt/vector/model/python.rb +33 -0
- data/lib/rbbt/vector/model/pytorch_lightning.rb +31 -0
- data/lib/rbbt/vector/model/random_forest.rb +1 -1
- data/lib/rbbt/vector/model/spaCy.rb +8 -6
- data/lib/rbbt/vector/model/tensorflow.rb +6 -5
- data/lib/rbbt/vector/model/torch/dataloader.rb +58 -0
- data/lib/rbbt/vector/model/torch/helpers.rb +52 -0
- data/lib/rbbt/vector/model/torch/introspection.rb +31 -0
- data/lib/rbbt/vector/model/torch/load_and_save.rb +30 -0
- data/lib/rbbt/vector/model/torch.rb +71 -0
- data/lib/rbbt/vector/model.rb +84 -54
- data/python/rbbt_dm/__init__.py +31 -1
- data/python/rbbt_dm/atcold/__init__.py +0 -0
- data/python/rbbt_dm/atcold/plot_lib.py +141 -0
- data/python/rbbt_dm/atcold/spiral.py +27 -0
- data/python/rbbt_dm/huggingface.py +64 -28
- data/python/rbbt_dm/language_model.py +70 -0
- data/python/rbbt_dm/util.py +32 -0
- data/share/spaCy/gpu/textcat_accuracy.conf +2 -1
- data/test/rbbt/vector/model/huggingface/test_masked_lm.rb +41 -0
- data/test/rbbt/vector/model/test_huggingface.rb +258 -27
- data/test/rbbt/vector/model/test_python.rb +31 -0
- data/test/rbbt/vector/model/test_pytorch_lightning.rb +97 -0
- data/test/rbbt/vector/model/test_spaCy.rb +1 -1
- data/test/rbbt/vector/model/test_tensorflow.rb +3 -0
- data/test/rbbt/vector/model/test_torch.rb +61 -0
- data/test/rbbt/vector/test_model.rb +25 -26
- data/test/test_helper.rb +13 -0
- metadata +35 -16
- data/lib/rbbt/tensorflow.rb +0 -43
- data/lib/rbbt/vector/model/huggingface.old.rb +0 -160
@@ -3,7 +3,7 @@ require 'rbbt/vector/model/huggingface'
|
|
3
3
|
|
4
4
|
class TestHuggingface < Test::Unit::TestCase
|
5
5
|
|
6
|
-
def
|
6
|
+
def _test_options
|
7
7
|
TmpFile.with_file do |dir|
|
8
8
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
9
9
|
task = "SequenceClassification"
|
@@ -11,20 +11,20 @@ class TestHuggingface < Test::Unit::TestCase
|
|
11
11
|
model = HuggingfaceModel.new task, checkpoint, dir, :class_labels => %w(bad good)
|
12
12
|
iii model.eval "This is dog"
|
13
13
|
iii model.eval "This is cat"
|
14
|
-
iii model.
|
14
|
+
iii model.eval_list(["This is dog", "This is cat"])
|
15
15
|
|
16
16
|
model = VectorModel.new dir
|
17
|
-
iii model.
|
17
|
+
iii model.eval_list(["This is dog", "This is cat"])
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
21
|
+
def _test_pipeline
|
22
22
|
require 'rbbt/util/python'
|
23
23
|
model = VectorModel.new
|
24
24
|
model.post_process do |elements|
|
25
25
|
elements.collect{|e| e['label'] }
|
26
26
|
end
|
27
|
-
model.eval_model do |
|
27
|
+
model.eval_model do |elements|
|
28
28
|
RbbtPython.run :transformers do
|
29
29
|
classifier ||= transformers.pipeline("sentiment-analysis")
|
30
30
|
classifier.call(elements)
|
@@ -33,21 +33,53 @@ class TestHuggingface < Test::Unit::TestCase
|
|
33
33
|
|
34
34
|
assert_equal ["POSITIVE"], model.eval("I've been waiting for a HuggingFace course my whole life.")
|
35
35
|
end
|
36
|
+
|
37
|
+
def _test_tokenizer_size
|
38
|
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
39
|
+
tokenizer = RbbtPython.call_method("rbbt_dm.huggingface", :load_tokenizer,
|
40
|
+
"MaskedLM", checkpoint, :max_length => 5, :model_max_length => 5)
|
41
|
+
assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true, max_length: 5)["input_ids"].__len__
|
42
|
+
assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true)["input_ids"].__len__
|
43
|
+
end
|
36
44
|
|
37
45
|
def test_sst_eval
|
38
46
|
TmpFile.with_file do |dir|
|
39
47
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
40
48
|
|
41
|
-
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
49
|
+
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir, :tokenizer_args => {:max_length => 16}
|
42
50
|
|
43
51
|
model.model_options[:class_labels] = ["Bad", "Good"]
|
44
52
|
|
45
|
-
assert_equal
|
53
|
+
assert_equal "Bad", model.eval("This is dog")
|
54
|
+
assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
49
58
|
|
50
|
-
def
|
59
|
+
def _test_sst_train
|
60
|
+
TmpFile.with_file do |dir|
|
61
|
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
62
|
+
|
63
|
+
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir, max_length: 128
|
64
|
+
|
65
|
+
model.model_options[:class_labels] = %w(Bad Good)
|
66
|
+
|
67
|
+
assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
|
68
|
+
|
69
|
+
100.times do
|
70
|
+
model.add "Dog is good", "Good"
|
71
|
+
end
|
72
|
+
|
73
|
+
model.train
|
74
|
+
|
75
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
76
|
+
|
77
|
+
model = VectorModel.new dir
|
78
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def _test_sst_train_with_labels
|
51
83
|
TmpFile.with_file do |dir|
|
52
84
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
53
85
|
|
@@ -55,28 +87,29 @@ class TestHuggingface < Test::Unit::TestCase
|
|
55
87
|
|
56
88
|
model.model_options[:class_labels] = %w(Bad Good)
|
57
89
|
|
58
|
-
assert_equal ["Bad", "Good"], model.
|
90
|
+
assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
|
59
91
|
|
60
92
|
100.times do
|
61
|
-
model.add "Dog is good",
|
93
|
+
model.add "Dog is good", "Good"
|
62
94
|
end
|
63
95
|
|
64
96
|
model.train
|
65
97
|
|
66
|
-
assert_equal ["Good", "Good"], model.
|
98
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
67
99
|
|
68
100
|
model = VectorModel.new dir
|
69
|
-
assert_equal ["Good", "Good"], model.
|
101
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
70
102
|
end
|
71
103
|
end
|
72
104
|
|
73
|
-
|
105
|
+
|
106
|
+
def _test_sst_train_no_save
|
74
107
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
75
108
|
|
76
109
|
model = HuggingfaceModel.new "SequenceClassification", checkpoint
|
77
110
|
model.model_options[:class_labels] = ["Bad", "Good"]
|
78
111
|
|
79
|
-
assert_equal ["Bad", "Good"], model.
|
112
|
+
assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
|
80
113
|
|
81
114
|
100.times do
|
82
115
|
model.add "Dog is good", 1
|
@@ -84,48 +117,50 @@ class TestHuggingface < Test::Unit::TestCase
|
|
84
117
|
|
85
118
|
model.train
|
86
119
|
|
87
|
-
assert_equal ["Good", "Good"], model.
|
120
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
88
121
|
end
|
89
122
|
|
90
|
-
def
|
123
|
+
def _test_sst_train_save_and_load
|
91
124
|
TmpFile.with_file do |dir|
|
92
125
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
93
126
|
|
94
127
|
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
95
128
|
model.model_options[:class_labels] = ["Bad", "Good"]
|
96
129
|
|
97
|
-
assert_equal ["Bad", "Good"], model.
|
130
|
+
assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
|
98
131
|
|
99
132
|
100.times do
|
100
|
-
model.add "Dog is good",
|
133
|
+
model.add "Dog is good", "Good"
|
101
134
|
end
|
102
135
|
|
103
136
|
model.train
|
104
137
|
|
105
138
|
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
106
139
|
|
107
|
-
assert_equal ["Good", "Good"], model.
|
140
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
108
141
|
|
109
|
-
|
142
|
+
model_path = model.model_path
|
110
143
|
|
111
|
-
model = HuggingfaceModel.new "SequenceClassification",
|
144
|
+
model = HuggingfaceModel.new "SequenceClassification", model_path
|
112
145
|
model.model_options[:class_labels] = ["Bad", "Good"]
|
113
146
|
|
114
|
-
assert_equal ["Good", "Good"], model.
|
147
|
+
assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
|
115
148
|
|
116
149
|
model = VectorModel.new dir
|
117
150
|
|
118
|
-
assert_equal "Good", model.
|
151
|
+
assert_equal "Good", model.eval_list("This is dog")
|
119
152
|
|
120
153
|
end
|
121
154
|
end
|
122
155
|
|
123
|
-
def
|
156
|
+
def _test_sst_stress_test
|
124
157
|
TmpFile.with_file do |dir|
|
125
158
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
126
159
|
|
127
160
|
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
128
161
|
|
162
|
+
assert_equal 0, model.eval("This is dog")
|
163
|
+
|
129
164
|
100.times do
|
130
165
|
model.add "Dog is good", 1
|
131
166
|
model.add "Cat is bad", 0
|
@@ -136,18 +171,214 @@ class TestHuggingface < Test::Unit::TestCase
|
|
136
171
|
end
|
137
172
|
|
138
173
|
Misc.benchmark 1000 do
|
139
|
-
model.
|
174
|
+
model.eval_list(["This is good", "This is terrible", "This is dog", "This is cat", "Very different stuff", "Dog is bad", "Cat is good"])
|
140
175
|
end
|
141
176
|
end
|
142
177
|
end
|
143
178
|
|
144
|
-
def
|
179
|
+
def _test_mask_eval
|
145
180
|
checkpoint = "bert-base-uncased"
|
146
181
|
|
147
182
|
model = HuggingfaceModel.new "MaskedLM", checkpoint
|
148
|
-
assert_equal 3, model.
|
183
|
+
assert_equal 3, model.eval_list(["Paris is the [MASK] of the France.", "The [MASK] worked very hard all the time.", "The [MASK] arrested the dangerous [MASK]."]).
|
149
184
|
reject{|v| v.empty?}.length
|
150
185
|
end
|
151
186
|
|
187
|
+
def _test_mask_eval_tokenizer
|
188
|
+
checkpoint = "bert-base-uncased"
|
189
|
+
|
190
|
+
model = HuggingfaceModel.new "MaskedLM", checkpoint
|
191
|
+
|
192
|
+
mod, tokenizer = model.init
|
193
|
+
|
194
|
+
orig = tokenizer.call("Hi [GENE]")["input_ids"]
|
195
|
+
tokenizer.add_tokens(["[GENE]"])
|
196
|
+
mod.resize_token_embeddings(tokenizer.__len__)
|
197
|
+
new = tokenizer.call("Hi [GENE]")["input_ids"]
|
198
|
+
|
199
|
+
assert orig.length > new.length
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
def _test_custom_class
|
204
|
+
TmpFile.with_file do |dir|
|
205
|
+
Open.write File.join(dir, "mypkg/__init__.py"), ""
|
206
|
+
|
207
|
+
Open.write File.join(dir, "mypkg/mymodel.py"), <<~EOF
|
208
|
+
|
209
|
+
# Esta clase es igual que la de RobertaForTokenClassification
|
210
|
+
# Importamos los métodos necesarios
|
211
|
+
import torch.nn as nn
|
212
|
+
from transformers import RobertaConfig
|
213
|
+
from transformers.modeling_outputs import TokenClassifierOutput
|
214
|
+
from transformers.models.roberta.modeling_roberta import RobertaModel, RobertaPreTrainedModel
|
215
|
+
|
216
|
+
# Creamos una clase que herede de RobertaPreTrainedModel
|
217
|
+
class RobertaForTokenClassification_NER(RobertaPreTrainedModel):
|
218
|
+
config_class = RobertaConfig
|
219
|
+
|
220
|
+
def __init__(self, config):
|
221
|
+
# Se usa para inicializar el modelo Roberta
|
222
|
+
super().__init__(config)
|
223
|
+
# Numero de etiquetas que se van a clasificar (sería el número de etiquetas del corpus*2)
|
224
|
+
# Una correspondiente a la etiqueta I y otra a la B.
|
225
|
+
self.num_labels = config.num_labels
|
226
|
+
# No incorporamos pooling layer para devolver los hidden states de cada token (no sólo el CLS)
|
227
|
+
self.roberta = RobertaModel(config, add_pooling_layer=False)
|
228
|
+
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
229
|
+
self.classifier = nn.Linear(config.hidden_size, config.num_labels)
|
230
|
+
self.init_weights()
|
231
|
+
|
232
|
+
def forward(self, input_ids = None, attention_mask = None, token_type_ids = None, labels = None,
|
233
|
+
**kwargs):
|
234
|
+
# Obtenemos una codificación del input (los hidden states)
|
235
|
+
outputs = self.roberta(input_ids, attention_mask = attention_mask,
|
236
|
+
token_type_ids = token_type_ids, **kwargs)
|
237
|
+
|
238
|
+
# A la salida de los hidden states le aplicamos la capa de dropout
|
239
|
+
sequence_output = self.dropout(outputs[0])
|
240
|
+
# Y posteriormente la capa de clasificación.
|
241
|
+
logits = self.classifier(sequence_output)
|
242
|
+
# Si labels tiene algún valor (lo que se hará durante el proceso de entrenamiento), se calculan las Loss
|
243
|
+
# para justar los pesos en el backprop.
|
244
|
+
loss = None
|
245
|
+
if labels is not None:
|
246
|
+
loss_fct = nn.CrossEntropyLoss()
|
247
|
+
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
248
|
+
|
249
|
+
return TokenClassifierOutput(loss=loss, logits=logits,
|
250
|
+
hidden_states=outputs.hidden_states,
|
251
|
+
attentions=outputs.attentions)
|
252
|
+
EOF
|
253
|
+
|
254
|
+
RbbtPython.add_path dir
|
255
|
+
|
256
|
+
biomedical_roberta = "PlanTL-GOB-ES/bsc-bio-ehr-es-cantemist"
|
257
|
+
model = HuggingfaceModel.new "mypkg.mymodel:RobertaForTokenClassification_NER", biomedical_roberta
|
258
|
+
|
259
|
+
model.post_process do |result,is_list|
|
260
|
+
if is_list
|
261
|
+
RbbtPython.numpy2ruby result.predictions
|
262
|
+
else
|
263
|
+
result["logits"][0]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
texto = "El paciente tiene un cáncer del pulmon"
|
268
|
+
assert model.eval(texto)[5][1] > 0
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def _test_sst_train_word_embeddings
|
273
|
+
TmpFile.with_file do |dir|
|
274
|
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
275
|
+
|
276
|
+
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
277
|
+
model.model_options[:class_labels] = %w(Bad Good)
|
278
|
+
|
279
|
+
mod, tokenizer = model.init
|
280
|
+
|
281
|
+
orig = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
|
282
|
+
orig = RbbtPython.numpy2ruby(orig.cpu.detach.numpy)
|
283
|
+
|
284
|
+
100.times do
|
285
|
+
model.add "Dog is good", "Good"
|
286
|
+
end
|
287
|
+
|
288
|
+
model.train
|
289
|
+
|
290
|
+
new = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
|
291
|
+
new = RbbtPython.numpy2ruby(new.cpu.detach.numpy)
|
292
|
+
|
293
|
+
diff = []
|
294
|
+
new.each_with_index do |row,i|
|
295
|
+
diff << i if row != orig[i]
|
296
|
+
end
|
297
|
+
|
298
|
+
assert diff.length > 0
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
def _test_sst_freeze_word_embeddings
|
303
|
+
TmpFile.with_file do |dir|
|
304
|
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
305
|
+
|
306
|
+
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
307
|
+
model.model_options[:class_labels] = %w(Bad Good)
|
308
|
+
|
309
|
+
mod, tokenizer = model.init
|
310
|
+
|
311
|
+
layer = HuggingfaceModel.freeze_layer(mod, 'distilbert')
|
312
|
+
|
313
|
+
orig = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
|
314
|
+
orig = RbbtPython.numpy2ruby(orig.cpu.detach.numpy)
|
315
|
+
|
316
|
+
100.times do
|
317
|
+
model.add "Dog is good", "Good"
|
318
|
+
end
|
319
|
+
|
320
|
+
model.train
|
321
|
+
|
322
|
+
new = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
|
323
|
+
new = RbbtPython.numpy2ruby(new.cpu.detach.numpy)
|
324
|
+
|
325
|
+
diff = []
|
326
|
+
new.each_with_index do |row,i|
|
327
|
+
diff << i if row != orig[i]
|
328
|
+
end
|
329
|
+
|
330
|
+
assert diff.length == 0
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
def _test_sst_save_word_embeddings
|
335
|
+
TmpFile.with_file do |dir|
|
336
|
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
337
|
+
|
338
|
+
model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
|
339
|
+
model.model_options[:class_labels] = %w(Bad Good)
|
340
|
+
|
341
|
+
mod, tokenizer = model.init
|
342
|
+
|
343
|
+
100.times do
|
344
|
+
model.add "Dog is good", "Good"
|
345
|
+
end
|
346
|
+
|
347
|
+
model.train
|
348
|
+
|
349
|
+
orig = RbbtPython.numpy2ruby(
|
350
|
+
HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
|
351
|
+
|
352
|
+
model = HuggingfaceModel.new "MaskedLM", checkpoint, dir
|
353
|
+
|
354
|
+
mod, tokenizer = model.init
|
355
|
+
|
356
|
+
new = RbbtPython.numpy2ruby(
|
357
|
+
HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
|
358
|
+
|
359
|
+
|
360
|
+
diff = []
|
361
|
+
new.each_with_index do |row,i|
|
362
|
+
diff << i if row != orig[i]
|
363
|
+
end
|
364
|
+
|
365
|
+
assert diff.length == 0
|
366
|
+
|
367
|
+
model = HuggingfaceModel.new "MaskedLM", checkpoint
|
368
|
+
|
369
|
+
mod, tokenizer = model.init
|
370
|
+
|
371
|
+
new = RbbtPython.numpy2ruby(
|
372
|
+
HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
|
373
|
+
|
374
|
+
|
375
|
+
diff = []
|
376
|
+
new.each_with_index do |row,i|
|
377
|
+
diff << i if row != orig[i]
|
378
|
+
end
|
379
|
+
|
380
|
+
assert diff.length > 0
|
381
|
+
end
|
382
|
+
end
|
152
383
|
end
|
153
384
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestPythonModel < Test::Unit::TestCase
|
5
|
+
def test_linear
|
6
|
+
model = nil
|
7
|
+
|
8
|
+
TmpFile.with_dir do |dir|
|
9
|
+
|
10
|
+
Misc.in_dir dir do
|
11
|
+
Open.write 'model.py', <<-EOF
|
12
|
+
class TestModel:
|
13
|
+
def __init__(self, delta):
|
14
|
+
self.delta = delta
|
15
|
+
|
16
|
+
def eval(self, x):
|
17
|
+
return [e + self.delta for e in x]
|
18
|
+
EOF
|
19
|
+
model = PythonModel.new dir, 'TestModel', :model, delta: 1
|
20
|
+
|
21
|
+
assert_equal 2, model.eval(1)
|
22
|
+
assert_equal [4, 6], model.eval_list([3, 5])
|
23
|
+
|
24
|
+
model = PythonModel.new dir, 'TestModel', :model, delta: 2
|
25
|
+
|
26
|
+
assert_equal 3, model.eval(1)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/vector/model/pytorch_lightning'
|
3
|
+
|
4
|
+
class TestPytorchLightning < Test::Unit::TestCase
|
5
|
+
def test_regresion
|
6
|
+
points = 10
|
7
|
+
a = 1
|
8
|
+
b = 1
|
9
|
+
|
10
|
+
x = (0..points - 1)
|
11
|
+
y = points.times.collect{|p| p }
|
12
|
+
|
13
|
+
python = <<~EOF
|
14
|
+
import pytorch_lightning as pl
|
15
|
+
import numpy as np
|
16
|
+
import torch
|
17
|
+
from torch.nn import MSELoss
|
18
|
+
from torch.optim import Adam
|
19
|
+
from torch.utils.data import DataLoader, Dataset
|
20
|
+
import torch.nn as nn
|
21
|
+
|
22
|
+
|
23
|
+
class SimpleDataset(Dataset):
|
24
|
+
def __init__(self):
|
25
|
+
X = np.arange(10000)
|
26
|
+
y = X * 2
|
27
|
+
X = [[_] for _ in X]
|
28
|
+
y = [[_] for _ in y]
|
29
|
+
self.X = torch.Tensor(X)
|
30
|
+
self.y = torch.Tensor(y)
|
31
|
+
|
32
|
+
def __len__(self):
|
33
|
+
return len(self.y)
|
34
|
+
|
35
|
+
def __getitem__(self, idx):
|
36
|
+
return {"X": self.X[idx], "y": self.y[idx]}
|
37
|
+
|
38
|
+
|
39
|
+
class TestPytorchLightningModel(pl.LightningModule):
|
40
|
+
def __init__(self):
|
41
|
+
super().__init__()
|
42
|
+
self.fc = nn.Linear(1, 1)
|
43
|
+
self.criterion = MSELoss()
|
44
|
+
|
45
|
+
def forward(self, inputs, labels=None):
|
46
|
+
outputs = self.fc(inputs)
|
47
|
+
loss = 0
|
48
|
+
if labels is not None:
|
49
|
+
loss = self.criterion(outputs, labels)
|
50
|
+
return loss, outputs
|
51
|
+
|
52
|
+
def train_dataloader(self):
|
53
|
+
dataset = SimpleDataset()
|
54
|
+
return DataLoader(dataset, batch_size=1000)
|
55
|
+
|
56
|
+
def training_step(self, batch, batch_idx):
|
57
|
+
input_ids = batch["X"]
|
58
|
+
labels = batch["y"]
|
59
|
+
loss, outputs = self(input_ids, labels)
|
60
|
+
return {"loss": loss}
|
61
|
+
|
62
|
+
def configure_optimizers(self):
|
63
|
+
optimizer = Adam(self.parameters(), lr=0.1)
|
64
|
+
return optimizer
|
65
|
+
EOF
|
66
|
+
|
67
|
+
TmpFile.with_dir do |dir|
|
68
|
+
Open.write(File.join(dir, 'model.py'), python)
|
69
|
+
model = PytorchLightningModel.new dir, "TestPytorchLightningModel"
|
70
|
+
model.init
|
71
|
+
|
72
|
+
model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", max_epochs: 10, precision: 16)
|
73
|
+
model.init
|
74
|
+
|
75
|
+
model.train
|
76
|
+
|
77
|
+
w = model.get_weights('fc').to_ruby.first.first
|
78
|
+
|
79
|
+
assert w > 1.8
|
80
|
+
assert w < 2.2
|
81
|
+
|
82
|
+
res = model.eval(10.0)
|
83
|
+
assert_equal res, (10 * w)
|
84
|
+
assert res > 1.8 * 10.0
|
85
|
+
assert res < 2.2 * 10.0
|
86
|
+
|
87
|
+
res = model.eval([10.0])
|
88
|
+
res = model.eval_list([[10.0], [11.2], [14.3]])
|
89
|
+
assert_equal 3, RbbtPython.numpy2ruby(res).length
|
90
|
+
|
91
|
+
model = VectorModel.new dir
|
92
|
+
model.init
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
@@ -100,7 +100,7 @@ class TestSpaCyModel < Test::Unit::TestCase
|
|
100
100
|
)
|
101
101
|
|
102
102
|
|
103
|
-
Rbbt::Config.set 'gpu_id',
|
103
|
+
Rbbt::Config.set 'gpu_id', 0, :spacy
|
104
104
|
require 'rbbt/tsv/csv'
|
105
105
|
url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
|
106
106
|
tsv = TSV.csv(Open.open(url))
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
2
|
require 'rbbt/vector/model/tensorflow'
|
3
|
+
require 'rbbt/util/python'
|
3
4
|
|
4
5
|
class TestTensorflowModel < Test::Unit::TestCase
|
5
6
|
|
@@ -10,6 +11,7 @@ class TestTensorflowModel < Test::Unit::TestCase
|
|
10
11
|
|
11
12
|
model = TensorFlowModel.new(
|
12
13
|
dir,
|
14
|
+
jit_compile: true,
|
13
15
|
optimizer: 'adam',
|
14
16
|
loss: 'sparse_categorical_crossentropy',
|
15
17
|
metrics: ['accuracy']
|
@@ -53,5 +55,6 @@ class TestTensorflowModel < Test::Unit::TestCase
|
|
53
55
|
assert sum.to_f / predictions.length > 0.7
|
54
56
|
end
|
55
57
|
end
|
58
|
+
|
56
59
|
end
|
57
60
|
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestTorch < Test::Unit::TestCase
|
5
|
+
def test_linear
|
6
|
+
model = nil
|
7
|
+
|
8
|
+
TmpFile.with_dir do |dir|
|
9
|
+
|
10
|
+
# Create model
|
11
|
+
|
12
|
+
model = TorchModel.new dir
|
13
|
+
model.model = RbbtPython.torch.nn.Linear.new(1, 1)
|
14
|
+
model.criterion = RbbtPython.torch.nn.MSELoss.new()
|
15
|
+
|
16
|
+
model.extract_features do |f|
|
17
|
+
[f]
|
18
|
+
end
|
19
|
+
|
20
|
+
model.post_process do |v,list|
|
21
|
+
list ? v.to_ruby.collect{|vv| vv.first } : v.to_ruby.first
|
22
|
+
end
|
23
|
+
|
24
|
+
# Train model
|
25
|
+
|
26
|
+
model.add 5.0, [10.0]
|
27
|
+
model.add 10.0, [20.0]
|
28
|
+
|
29
|
+
model.training_args[:epochs] = 1000
|
30
|
+
model.train
|
31
|
+
|
32
|
+
w = model.get_weights.to_ruby.first.first
|
33
|
+
|
34
|
+
assert w > 1.8
|
35
|
+
assert w < 2.2
|
36
|
+
|
37
|
+
# Load the model again
|
38
|
+
|
39
|
+
model = VectorModel.new dir
|
40
|
+
|
41
|
+
# Test model
|
42
|
+
|
43
|
+
y = model.eval(100.0)
|
44
|
+
|
45
|
+
assert(y > 150.0)
|
46
|
+
assert(y < 250.0)
|
47
|
+
|
48
|
+
test = [1.0, 5.0, 10.0, 20.0]
|
49
|
+
input_sum = Misc.sum(test)
|
50
|
+
sum = Misc.sum(model.eval_list(test))
|
51
|
+
assert sum > 0.8 * input_sum * 2
|
52
|
+
assert sum < 1.2 * input_sum * 2
|
53
|
+
|
54
|
+
w = TorchModel.get_weights(model.model).to_ruby.first.first
|
55
|
+
|
56
|
+
assert w > 1.8
|
57
|
+
assert w < 2.2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|