rbbt-dm 1.2.9 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/vector/model/huggingface.rb +10 -40
- data/lib/rbbt/vector/model/python.rb +33 -0
- data/lib/rbbt/vector/model/pytorch_lightning.rb +19 -23
- data/lib/rbbt/vector/model/torch/dataloader.rb +58 -0
- data/lib/rbbt/vector/model/torch/helpers.rb +52 -0
- data/lib/rbbt/vector/model/torch/introspection.rb +31 -0
- data/lib/rbbt/vector/model/torch/load_and_save.rb +30 -0
- data/lib/rbbt/vector/model/torch.rb +60 -26
- data/lib/rbbt/vector/model.rb +2 -2
- data/python/rbbt_dm/__init__.py +4 -21
- data/python/rbbt_dm/huggingface.py +9 -4
- data/python/rbbt_dm/util.py +2 -0
- data/test/rbbt/vector/model/test_huggingface.rb +2 -2
- data/test/rbbt/vector/model/test_python.rb +31 -0
- data/test/rbbt/vector/model/test_pytorch_lightning.rb +80 -66
- data/test/rbbt/vector/model/test_torch.rb +61 -0
- metadata +12 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9b8071884e4e9d7a8c04f175fe262aad9e2b77911dca787a957a5c5f797fb9b
|
4
|
+
data.tar.gz: 1c7334d62036d3ae07b7f625b310f401b5078022f909be34cd78bb66c5b2af06
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22c73d01543e93a2a7b10ecaa88db9a663b35c8264b6d0e5e9d4b00096f34955250105dec4787242529c594c1a959feb23a4b5cd46298850eee7a813dc551d0f
|
7
|
+
data.tar.gz: 545663b2ee93dd0e6e6b54e353cb3bfafab9001c7031b42e7f895fb95ea85ffb6c1dcdb54bb671ee5cace49561cca018212e25ee43592b457e4e1abe83277076
|
@@ -2,53 +2,23 @@ require 'rbbt/vector/model/torch'
|
|
2
2
|
|
3
3
|
class HuggingfaceModel < TorchModel
|
4
4
|
|
5
|
-
def self.tsv_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
|
6
|
-
|
7
|
-
if labels
|
8
|
-
labels = case class_labels
|
9
|
-
when Array
|
10
|
-
labels.collect{|l| class_labels.index l}
|
11
|
-
when Hash
|
12
|
-
inverse_class_labels = {}
|
13
|
-
class_labels.each{|c,l| inverse_class_labels[l] = c }
|
14
|
-
labels.collect{|l| inverse_class_labels[l]}
|
15
|
-
else
|
16
|
-
labels
|
17
|
-
end
|
18
|
-
|
19
|
-
Open.write(tsv_dataset_file) do |ffile|
|
20
|
-
ffile.puts ["label", "text"].flatten * "\t"
|
21
|
-
elements.zip(labels).each do |element,label|
|
22
|
-
element = element.gsub("\n", " ")
|
23
|
-
ffile.puts [label, element].flatten * "\t"
|
24
|
-
end
|
25
|
-
ffile.sync
|
26
|
-
end
|
27
|
-
else
|
28
|
-
Open.write(tsv_dataset_file) do |ffile|
|
29
|
-
ffile.puts ["text"].flatten * "\t"
|
30
|
-
elements.each do |element|
|
31
|
-
element = element.gsub("\n", " ")
|
32
|
-
ffile.puts element
|
33
|
-
end
|
34
|
-
ffile.sync
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
tsv_dataset_file
|
39
|
-
end
|
40
|
-
|
41
5
|
def initialize(task, checkpoint, dir = nil, model_options = {})
|
42
|
-
super(dir, model_options)
|
6
|
+
super(dir, nil, model_options)
|
7
|
+
|
8
|
+
checkpoint = checkpoint.find if Path === checkpoint
|
43
9
|
|
44
10
|
@model_options = Misc.add_defaults @model_options, :task => task, :checkpoint => checkpoint
|
45
11
|
|
46
12
|
init_model do
|
47
13
|
checkpoint = @model_path && File.directory?(@model_path) ? @model_path : @model_options[:checkpoint]
|
14
|
+
|
48
15
|
model = RbbtPython.call_method("rbbt_dm.huggingface", :load_model,
|
49
16
|
@model_options[:task], checkpoint, **(IndiferentHash.setup(model_options[:model_args]) || {}))
|
17
|
+
|
18
|
+
tokenizer_checkpoint = @model_options[:tokenizer_checkpoint] || checkpoint
|
19
|
+
|
50
20
|
tokenizer = RbbtPython.call_method("rbbt_dm.huggingface", :load_tokenizer,
|
51
|
-
@model_options[:task],
|
21
|
+
@model_options[:task], tokenizer_checkpoint, **(IndiferentHash.setup(model_options[:tokenizer_args]) || {}))
|
52
22
|
|
53
23
|
[model, tokenizer]
|
54
24
|
end
|
@@ -75,7 +45,7 @@ class HuggingfaceModel < TorchModel
|
|
75
45
|
checkpoint_dir = File.join(tmpdir, 'checkpoints')
|
76
46
|
end
|
77
47
|
|
78
|
-
dataset_file =
|
48
|
+
dataset_file = TorchModel.text_dataset(tsv_file, texts)
|
79
49
|
training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
|
80
50
|
|
81
51
|
begin
|
@@ -102,7 +72,7 @@ class HuggingfaceModel < TorchModel
|
|
102
72
|
end
|
103
73
|
|
104
74
|
training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
|
105
|
-
dataset_file = HuggingfaceModel.
|
75
|
+
dataset_file = HuggingfaceModel.text_dataset(tsv_file, texts, labels, @model_options[:class_labels])
|
106
76
|
|
107
77
|
RbbtPython.call_method("rbbt_dm.huggingface", :train_model, model, tokenizer, training_args_obj, dataset_file, @model_options[:class_weights])
|
108
78
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rbbt/vector/model'
|
2
|
+
require 'rbbt/util/python'
|
3
|
+
|
4
|
+
RbbtPython.add_path Rbbt.python.find(:lib)
|
5
|
+
RbbtPython.init_rbbt
|
6
|
+
|
7
|
+
class PythonModel < VectorModel
|
8
|
+
attr_accessor :python_class, :python_module
|
9
|
+
def initialize(dir, python_class = nil, python_module = nil, model_options = nil)
|
10
|
+
python_module = :model if python_module.nil?
|
11
|
+
model_options, python_module = python_module, :model if model_options.nil? && Hash === python_module
|
12
|
+
model_options = {} if model_options.nil?
|
13
|
+
|
14
|
+
super(dir, model_options)
|
15
|
+
|
16
|
+
@python_class = python_class
|
17
|
+
@python_module = python_module
|
18
|
+
|
19
|
+
init_model do
|
20
|
+
RbbtPython.add_path @directory
|
21
|
+
RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
|
22
|
+
end if python_class
|
23
|
+
|
24
|
+
eval_model do |features,list=false|
|
25
|
+
init
|
26
|
+
if list
|
27
|
+
model.eval(features)
|
28
|
+
else
|
29
|
+
model.eval([features])[0]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -2,34 +2,30 @@ require 'rbbt/vector/model/torch'
|
|
2
2
|
|
3
3
|
class PytorchLightningModel < TorchModel
|
4
4
|
attr_accessor :loader, :val_loader, :trainer
|
5
|
-
def initialize(
|
6
|
-
super(
|
7
|
-
@module_name = module_name
|
8
|
-
@class_name = class_name
|
9
|
-
|
10
|
-
init_model do
|
11
|
-
RbbtPython.pyimport @module_name
|
12
|
-
RbbtPython.class_new_obj(@module_name, @class_name, @model_options[:model_args] || {})
|
13
|
-
end
|
5
|
+
def initialize(...)
|
6
|
+
super(...)
|
14
7
|
|
15
8
|
train_model do |features,labels|
|
16
9
|
model = init
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
model.call(RbbtPython.call_method(:torch, :tensor, features))
|
26
|
-
else
|
27
|
-
model.call(RbbtPython.call_method(:torch, :tensor, [features]))
|
10
|
+
loader = self.loader
|
11
|
+
val_loader = self.val_loader
|
12
|
+
if (features && features.any?) && loader.nil?
|
13
|
+
TmpFile.with_file do |tsv_dataset_file|
|
14
|
+
TorchModel.feature_dataset(tsv_dataset_file, features, labels)
|
15
|
+
RbbtPython.pyimport :rbbt_dm
|
16
|
+
loader = RbbtPython.rbbt_dm.tsv(tsv_dataset_file)
|
17
|
+
end
|
28
18
|
end
|
19
|
+
trainer.fit(model, loader, val_loader)
|
20
|
+
TorchModel.save_architecture(model, model_path) if @directory
|
21
|
+
TorchModel.save_state(model, model_path) if @directory
|
29
22
|
end
|
30
|
-
|
31
23
|
end
|
32
|
-
end
|
33
24
|
|
34
|
-
|
25
|
+
def trainer
|
26
|
+
@trainer ||= begin
|
27
|
+
options = @model_options[:training_args] || @model_options[:trainer_args]
|
28
|
+
RbbtPython.class_new_obj("pytorch_lightning", "Trainer", options || {})
|
29
|
+
end
|
30
|
+
end
|
35
31
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
class TorchModel
|
2
|
+
def self.feature_tsv(elements, labels = nil, class_labels = nil)
|
3
|
+
tsv = TSV.setup({}, :key_field => "ID", :fields => ["features"], :type => :flat)
|
4
|
+
if labels
|
5
|
+
tsv.fields = tsv.fields + ["label"]
|
6
|
+
labels = case class_labels
|
7
|
+
when Array
|
8
|
+
labels.collect{|l| class_labels.index l}
|
9
|
+
when Hash
|
10
|
+
inverse_class_labels = {}
|
11
|
+
class_labels.each{|c,l| inverse_class_labels[l] = c }
|
12
|
+
labels.collect{|l| inverse_class_labels[l]}
|
13
|
+
else
|
14
|
+
labels
|
15
|
+
end
|
16
|
+
elements.zip(labels).each_with_index do |p,i|
|
17
|
+
features, label = p
|
18
|
+
id = i
|
19
|
+
if Array === features
|
20
|
+
tsv[id] = features + [label]
|
21
|
+
else
|
22
|
+
tsv[id] = [features, label]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
else
|
26
|
+
elements.each_with_index do |features,i|
|
27
|
+
id = i
|
28
|
+
if Array === features
|
29
|
+
tsv[id] = features
|
30
|
+
else
|
31
|
+
tsv[id] = [features]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
tsv
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.feature_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
|
39
|
+
tsv = feature_tsv(elements, labels, class_labels)
|
40
|
+
Open.write(tsv_dataset_file, tsv.to_s)
|
41
|
+
tsv_dataset_file
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.text_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
|
45
|
+
elements = elements.collect{|e| e.gsub("\n", ' ') }
|
46
|
+
tsv = feature_tsv(elements, labels, class_labels)
|
47
|
+
if labels.nil?
|
48
|
+
tsv.fields[0] = "text"
|
49
|
+
tsv.type = :single
|
50
|
+
else
|
51
|
+
tsv.fields[0] = "text"
|
52
|
+
tsv.type = :list
|
53
|
+
end
|
54
|
+
Open.write(tsv_dataset_file, tsv.to_s)
|
55
|
+
tsv_dataset_file
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class TorchModel
|
2
|
+
module Tensor
|
3
|
+
def to_ruby
|
4
|
+
RbbtPython.numpy2ruby(self)
|
5
|
+
end
|
6
|
+
def self.setup(obj)
|
7
|
+
obj.extend Tensor
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.init_python
|
12
|
+
RbbtPython.pyimport :torch
|
13
|
+
RbbtPython.pyimport :rbbt
|
14
|
+
RbbtPython.pyimport :rbbt_dm
|
15
|
+
RbbtPython.pyfrom :rbbt_dm, import: :util
|
16
|
+
RbbtPython.pyfrom :torch, import: :nn
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.optimizer(model, training_args)
|
20
|
+
begin
|
21
|
+
learning_rate = training_args[:learning_rate] || 0.01
|
22
|
+
RbbtPython.torch.optim.SGD.new(model.parameters(), lr: learning_rate)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.device(model_options)
|
27
|
+
case model_options[:device]
|
28
|
+
when String, Symbol
|
29
|
+
RbbtPython.torch.device(model_options[:device].to_s)
|
30
|
+
when nil
|
31
|
+
RbbtPython.rbbt_dm.util.device()
|
32
|
+
else
|
33
|
+
model_options[:device]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.dtype(model_options)
|
38
|
+
case model_options[:dtype]
|
39
|
+
when String, Symbol
|
40
|
+
RbbtPython.torch.call(model_options[:dtype])
|
41
|
+
when nil
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
model_options[:dtype]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.tensor(obj, device, dtype)
|
49
|
+
RbbtPython.torch.tensor(obj, dtype: dtype, device: device)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class TorchModel
|
2
|
+
def self.get_layer(model, layer = nil)
|
3
|
+
if layer.nil?
|
4
|
+
model
|
5
|
+
else
|
6
|
+
layer.split(".").inject(model){|acc,l| PyCall.getattr(acc, l.to_sym) }
|
7
|
+
end
|
8
|
+
end
|
9
|
+
def get_layer(...); TorchModel.get_layer(model, ...); end
|
10
|
+
|
11
|
+
def self.get_weights(model, layer = nil)
|
12
|
+
Tensor.setup PyCall.getattr(get_layer(model, layer), :weight)
|
13
|
+
end
|
14
|
+
def get_weights(...); TorchModel.get_weights(model, ...); end
|
15
|
+
|
16
|
+
def self.freeze(layer)
|
17
|
+
begin
|
18
|
+
PyCall.getattr(layer, :weight).requires_grad = false
|
19
|
+
rescue
|
20
|
+
end
|
21
|
+
RbbtPython.iterate(layer.children) do |layer|
|
22
|
+
freeze(layer)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
def self.freeze_layer(model, layer)
|
26
|
+
layer = get_layer(model, layer)
|
27
|
+
freeze(layer)
|
28
|
+
end
|
29
|
+
def freeze_layer(...); TorchModel.freeze_layer(model, ...); end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class TorchModel
|
2
|
+
def self.model_architecture(model_path)
|
3
|
+
model_path + '.architecture'
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.save_state(model, model_path)
|
7
|
+
Log.debug "Saving model state into #{model_path}"
|
8
|
+
RbbtPython.torch.save(model.state_dict(), model_path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.load_state(model, model_path)
|
12
|
+
return model unless Open.exists?(model_path)
|
13
|
+
Log.debug "Loading model state from #{model_path}"
|
14
|
+
model.load_state_dict(RbbtPython.torch.load(model_path))
|
15
|
+
model
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.save_architecture(model, model_path)
|
19
|
+
model_architecture = model_architecture(model_path)
|
20
|
+
Log.debug "Saving model architecture into #{model_architecture}"
|
21
|
+
RbbtPython.torch.save(model, model_architecture)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.load_architecture(model_path)
|
25
|
+
model_architecture = model_architecture(model_path)
|
26
|
+
return unless Open.exists?(model_architecture)
|
27
|
+
Log.debug "Loading model architecture from #{model_architecture}"
|
28
|
+
RbbtPython.torch.load(model_architecture)
|
29
|
+
end
|
30
|
+
end
|
@@ -1,37 +1,71 @@
|
|
1
|
-
|
2
|
-
require 'rbbt/util/python'
|
1
|
+
require_relative 'python'
|
3
2
|
|
4
|
-
|
5
|
-
RbbtPython.init_rbbt
|
3
|
+
class TorchModel < PythonModel
|
6
4
|
|
7
|
-
|
5
|
+
attr_accessor :model, :criterion, :optimizer, :training_args
|
8
6
|
|
9
|
-
|
7
|
+
def initialize(...)
|
8
|
+
TorchModel.init_python
|
9
|
+
super(...)
|
10
|
+
@training_args = model_options[:training_args] || {}
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
12
|
+
init_model do
|
13
|
+
model = TorchModel.load_architecture(model_path)
|
14
|
+
if model.nil?
|
15
|
+
RbbtPython.add_path @directory
|
16
|
+
RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
|
17
|
+
else
|
18
|
+
TorchModel.load_state(model, model_path)
|
19
|
+
end
|
20
|
+
end
|
14
21
|
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
eval_model do |features,list=false|
|
23
|
+
init
|
24
|
+
@device ||= TorchModel.device(model_options)
|
25
|
+
@dtype ||= TorchModel.dtype(model_options)
|
26
|
+
model.to(@device)
|
18
27
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
28
|
+
tensor = list ? TorchModel.tensor(features, @device, @dtype) : TorchModel.tensor([features], @device, @dtype)
|
29
|
+
|
30
|
+
loss, res = model.call(tensor)
|
31
|
+
|
32
|
+
res = loss if res.nil?
|
33
|
+
|
34
|
+
res = TorchModel::Tensor.setup(list ? res : res[0])
|
35
|
+
|
36
|
+
res
|
26
37
|
end
|
27
|
-
end
|
28
38
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
39
|
+
train_model do |features,labels|
|
40
|
+
init
|
41
|
+
@device ||= TorchModel.device(model_options)
|
42
|
+
@dtype ||= TorchModel.dtype(model_options)
|
43
|
+
model.to(@device)
|
44
|
+
@optimizer ||= TorchModel.optimizer(model, training_args)
|
45
|
+
epochs = training_args[:epochs] || 3
|
46
|
+
|
47
|
+
inputs = TorchModel.tensor(features, @device, @dtype)
|
48
|
+
#target = TorchModel.tensor(labels.collect{|v| [v] }, @device, @dtype)
|
49
|
+
target = TorchModel.tensor(labels, @device, @dtype)
|
33
50
|
|
34
|
-
|
35
|
-
|
51
|
+
Log::ProgressBar.with_bar epochs, :desc => "Training" do |bar|
|
52
|
+
epochs.times do |i|
|
53
|
+
@optimizer.zero_grad()
|
54
|
+
outputs = model.call(inputs)
|
55
|
+
outputs = outputs.squeeze() if target.dim() == 1
|
56
|
+
loss = criterion.call(outputs, target)
|
57
|
+
loss.backward()
|
58
|
+
@optimizer.step
|
59
|
+
Log.debug "Epoch #{i}, loss #{loss}"
|
60
|
+
bar.tick
|
61
|
+
end
|
62
|
+
end
|
63
|
+
TorchModel.save_architecture(model, model_path) if @directory
|
64
|
+
TorchModel.save_state(model, model_path) if @directory
|
65
|
+
end
|
36
66
|
end
|
37
67
|
end
|
68
|
+
require_relative 'torch/helpers'
|
69
|
+
require_relative 'torch/dataloader'
|
70
|
+
require_relative 'torch/introspection'
|
71
|
+
require_relative 'torch/load_and_save'
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -448,10 +448,10 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
448
448
|
end
|
449
449
|
|
450
450
|
test_set = feature_folds[fix]
|
451
|
-
train_set = feature_folds.values_at(*rest).
|
451
|
+
train_set = feature_folds.values_at(*rest).flatten(1)
|
452
452
|
|
453
453
|
test_labels = labels_folds[fix]
|
454
|
-
train_labels = labels_folds.values_at(*rest).flatten
|
454
|
+
train_labels = labels_folds.values_at(*rest).flatten(1)
|
455
455
|
|
456
456
|
@features = train_set
|
457
457
|
@labels = train_labels
|
data/python/rbbt_dm/__init__.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
|
1
|
+
import rbbt
|
2
|
+
import torch
|
3
|
+
from .util import *
|
2
4
|
|
3
|
-
class TSVDataset(Dataset):
|
5
|
+
class TSVDataset(torch.utils.data.Dataset):
|
4
6
|
def __init__(self, tsv):
|
5
7
|
self.tsv = tsv
|
6
8
|
|
@@ -20,29 +22,10 @@ class TSVDataset(Dataset):
|
|
20
22
|
return len(self.tsv)
|
21
23
|
|
22
24
|
def tsv_dataset(filename, *args, **kwargs):
|
23
|
-
import rbbt
|
24
25
|
return TSVDataset(rbbt.tsv(filename, *args, **kwargs))
|
25
26
|
|
26
27
|
def tsv(*args, **kwargs):
|
27
28
|
return tsv_dataset(*args, **kwargs)
|
28
29
|
|
29
30
|
def data_dir():
|
30
|
-
import rbbt
|
31
31
|
return rbbt.path('var/rbbt_dm/data')
|
32
|
-
|
33
|
-
if __name__ == "__main__":
|
34
|
-
import rbbt
|
35
|
-
|
36
|
-
filename = "/home/miki/test/numeric.tsv"
|
37
|
-
ds = tsv(filename)
|
38
|
-
|
39
|
-
dl = DataLoader(ds, batch_size=1)
|
40
|
-
|
41
|
-
for f, l in iter(dl):
|
42
|
-
print(".")
|
43
|
-
print(f[0,:])
|
44
|
-
print(l[0])
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
@@ -1,4 +1,6 @@
|
|
1
1
|
#{{{ LOAD MODEL
|
2
|
+
import datasets
|
3
|
+
import rbbt
|
2
4
|
|
3
5
|
def import_module_class(module, class_name):
|
4
6
|
if (not module == None):
|
@@ -57,12 +59,15 @@ def eval_model(model, tokenizer, texts, return_logits = True):
|
|
57
59
|
#{{{ TRAIN AND PREDICT
|
58
60
|
|
59
61
|
def load_tsv(tsv_file):
|
60
|
-
|
61
|
-
|
62
|
+
tsv = rbbt.tsv(tsv_file)
|
63
|
+
print(tsv)
|
64
|
+
ds = datasets.Dataset.from_pandas(tsv)
|
65
|
+
d = datasets.DatasetDict()
|
66
|
+
d["train"] = ds
|
67
|
+
return d
|
62
68
|
|
63
69
|
def load_json(json_file):
|
64
|
-
|
65
|
-
return load_dataset('json', data_files=[json_file])
|
70
|
+
return datasets.load_dataset('json', data_files=[json_file])
|
66
71
|
|
67
72
|
def tokenize_dataset(tokenizer, dataset):
|
68
73
|
return dataset.map(lambda subset: subset if ("input_ids" in subset.keys()) else tokenizer(subset["text"], truncation=True), batched=True)
|
data/python/rbbt_dm/util.py
CHANGED
@@ -42,7 +42,7 @@ class TestHuggingface < Test::Unit::TestCase
|
|
42
42
|
assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true)["input_ids"].__len__
|
43
43
|
end
|
44
44
|
|
45
|
-
def
|
45
|
+
def test_sst_eval
|
46
46
|
TmpFile.with_file do |dir|
|
47
47
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
48
48
|
|
@@ -56,7 +56,7 @@ class TestHuggingface < Test::Unit::TestCase
|
|
56
56
|
end
|
57
57
|
|
58
58
|
|
59
|
-
def
|
59
|
+
def _test_sst_train
|
60
60
|
TmpFile.with_file do |dir|
|
61
61
|
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
62
62
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestPythonModel < Test::Unit::TestCase
|
5
|
+
def test_linear
|
6
|
+
model = nil
|
7
|
+
|
8
|
+
TmpFile.with_dir do |dir|
|
9
|
+
|
10
|
+
Misc.in_dir dir do
|
11
|
+
Open.write 'model.py', <<-EOF
|
12
|
+
class TestModel:
|
13
|
+
def __init__(self, delta):
|
14
|
+
self.delta = delta
|
15
|
+
|
16
|
+
def eval(self, x):
|
17
|
+
return [e + self.delta for e in x]
|
18
|
+
EOF
|
19
|
+
model = PythonModel.new dir, 'TestModel', :model, delta: 1
|
20
|
+
|
21
|
+
assert_equal 2, model.eval(1)
|
22
|
+
assert_equal [4, 6], model.eval_list([3, 5])
|
23
|
+
|
24
|
+
model = PythonModel.new dir, 'TestModel', :model, delta: 2
|
25
|
+
|
26
|
+
assert_equal 3, model.eval(1)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
@@ -2,82 +2,96 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_he
|
|
2
2
|
require 'rbbt/vector/model/pytorch_lightning'
|
3
3
|
|
4
4
|
class TestPytorchLightning < Test::Unit::TestCase
|
5
|
-
def
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
data = TSV.setup({}, :key_field => "Gene", :fields => samples + ["cluster"], :type => :list, :cast => :to_f)
|
10
|
-
|
11
|
-
profiles = []
|
12
|
-
p0 = 3
|
13
|
-
p1 = 7
|
14
|
-
profiles[0] = nsamples.times.collect{ rand() + p0 }
|
15
|
-
profiles[1] = nsamples.times.collect{ rand() + p1 }
|
16
|
-
|
17
|
-
ngenes.times do |genen|
|
18
|
-
gene = "Gene-#{genen}"
|
19
|
-
cluster = genen % 2
|
20
|
-
values = profiles[cluster].collect do |m|
|
21
|
-
rand() + m
|
22
|
-
end
|
23
|
-
data[gene] = values + [cluster]
|
24
|
-
end
|
5
|
+
def test_regresion
|
6
|
+
points = 10
|
7
|
+
a = 1
|
8
|
+
b = 1
|
25
9
|
|
10
|
+
x = (0..points - 1)
|
11
|
+
y = points.times.collect{|p| p }
|
12
|
+
|
26
13
|
python = <<~EOF
|
27
|
-
import torch
|
28
|
-
from torch import nn
|
29
|
-
from torch.nn import functional as F
|
30
|
-
from torch.utils.data import DataLoader
|
31
|
-
from torch.utils.data import random_split
|
32
|
-
from torchvision.datasets import MNIST
|
33
|
-
from torchvision import transforms
|
34
14
|
import pytorch_lightning as pl
|
15
|
+
import numpy as np
|
16
|
+
import torch
|
17
|
+
from torch.nn import MSELoss
|
18
|
+
from torch.optim import Adam
|
19
|
+
from torch.utils.data import DataLoader, Dataset
|
20
|
+
import torch.nn as nn
|
21
|
+
|
22
|
+
|
23
|
+
class SimpleDataset(Dataset):
|
24
|
+
def __init__(self):
|
25
|
+
X = np.arange(10000)
|
26
|
+
y = X * 2
|
27
|
+
X = [[_] for _ in X]
|
28
|
+
y = [[_] for _ in y]
|
29
|
+
self.X = torch.Tensor(X)
|
30
|
+
self.y = torch.Tensor(y)
|
31
|
+
|
32
|
+
def __len__(self):
|
33
|
+
return len(self.y)
|
34
|
+
|
35
|
+
def __getitem__(self, idx):
|
36
|
+
return {"X": self.X[idx], "y": self.y[idx]}
|
37
|
+
|
35
38
|
|
36
39
|
class TestPytorchLightningModel(pl.LightningModule):
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
x, y = train_batch
|
53
|
-
x = x.to(self.dtype)
|
54
|
-
y = y.to(self.dtype)
|
55
|
-
y_hat = self.model(x).squeeze()
|
56
|
-
loss = F.mse_loss(y, y_hat)
|
57
|
-
self.log('train_loss', loss)
|
58
|
-
return loss
|
59
|
-
|
60
|
-
@torch.cuda.amp.custom_fwd(cast_inputs=torch.float64)
|
61
|
-
def validation_step(self, val_batch, batch_idx):
|
62
|
-
x, y = train_batch
|
63
|
-
y_hat = self.model(x)
|
64
|
-
loss = F.mse_loss(y, y_hat)
|
65
|
-
self.log('val_loss', loss)
|
40
|
+
def __init__(self):
|
41
|
+
super().__init__()
|
42
|
+
self.fc = nn.Linear(1, 1)
|
43
|
+
self.criterion = MSELoss()
|
44
|
+
|
45
|
+
def forward(self, inputs, labels=None):
|
46
|
+
outputs = self.fc(inputs)
|
47
|
+
loss = 0
|
48
|
+
if labels is not None:
|
49
|
+
loss = self.criterion(outputs, labels)
|
50
|
+
return loss, outputs
|
51
|
+
|
52
|
+
def train_dataloader(self):
|
53
|
+
dataset = SimpleDataset()
|
54
|
+
return DataLoader(dataset, batch_size=1000)
|
66
55
|
|
56
|
+
def training_step(self, batch, batch_idx):
|
57
|
+
input_ids = batch["X"]
|
58
|
+
labels = batch["y"]
|
59
|
+
loss, outputs = self(input_ids, labels)
|
60
|
+
return {"loss": loss}
|
61
|
+
|
62
|
+
def configure_optimizers(self):
|
63
|
+
optimizer = Adam(self.parameters(), lr=0.1)
|
64
|
+
return optimizer
|
67
65
|
EOF
|
68
66
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
67
|
+
TmpFile.with_dir do |dir|
|
68
|
+
Open.write(File.join(dir, 'model.py'), python)
|
69
|
+
model = PytorchLightningModel.new dir, "TestPytorchLightningModel"
|
70
|
+
model.init
|
71
|
+
|
72
|
+
model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", max_epochs: 10, precision: 16)
|
73
|
+
model.init
|
74
|
+
|
76
75
|
model.train
|
77
|
-
|
78
|
-
|
76
|
+
|
77
|
+
w = model.get_weights('fc').to_ruby.first.first
|
78
|
+
|
79
|
+
assert w > 1.8
|
80
|
+
assert w < 2.2
|
81
|
+
|
82
|
+
res = model.eval(10.0)
|
83
|
+
assert_equal res, (10 * w)
|
84
|
+
assert res > 1.8 * 10.0
|
85
|
+
assert res < 2.2 * 10.0
|
86
|
+
|
87
|
+
res = model.eval([10.0])
|
88
|
+
res = model.eval_list([[10.0], [11.2], [14.3]])
|
89
|
+
assert_equal 3, RbbtPython.numpy2ruby(res).length
|
90
|
+
|
91
|
+
model = VectorModel.new dir
|
92
|
+
model.init
|
93
|
+
|
79
94
|
end
|
80
95
|
end
|
81
|
-
|
82
96
|
end
|
83
97
|
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestTorch < Test::Unit::TestCase
|
5
|
+
def test_linear
|
6
|
+
model = nil
|
7
|
+
|
8
|
+
TmpFile.with_dir do |dir|
|
9
|
+
|
10
|
+
# Create model
|
11
|
+
|
12
|
+
model = TorchModel.new dir
|
13
|
+
model.model = RbbtPython.torch.nn.Linear.new(1, 1)
|
14
|
+
model.criterion = RbbtPython.torch.nn.MSELoss.new()
|
15
|
+
|
16
|
+
model.extract_features do |f|
|
17
|
+
[f]
|
18
|
+
end
|
19
|
+
|
20
|
+
model.post_process do |v,list|
|
21
|
+
list ? v.to_ruby.collect{|vv| vv.first } : v.to_ruby.first
|
22
|
+
end
|
23
|
+
|
24
|
+
# Train model
|
25
|
+
|
26
|
+
model.add 5.0, [10.0]
|
27
|
+
model.add 10.0, [20.0]
|
28
|
+
|
29
|
+
model.training_args[:epochs] = 1000
|
30
|
+
model.train
|
31
|
+
|
32
|
+
w = model.get_weights.to_ruby.first.first
|
33
|
+
|
34
|
+
assert w > 1.8
|
35
|
+
assert w < 2.2
|
36
|
+
|
37
|
+
# Load the model again
|
38
|
+
|
39
|
+
model = VectorModel.new dir
|
40
|
+
|
41
|
+
# Test model
|
42
|
+
|
43
|
+
y = model.eval(100.0)
|
44
|
+
|
45
|
+
assert(y > 150.0)
|
46
|
+
assert(y < 250.0)
|
47
|
+
|
48
|
+
test = [1.0, 5.0, 10.0, 20.0]
|
49
|
+
input_sum = Misc.sum(test)
|
50
|
+
sum = Misc.sum(model.eval_list(test))
|
51
|
+
assert sum > 0.8 * input_sum * 2
|
52
|
+
assert sum < 1.2 * input_sum * 2
|
53
|
+
|
54
|
+
w = TorchModel.get_weights(model.model).to_ruby.first.first
|
55
|
+
|
56
|
+
assert w > 1.8
|
57
|
+
assert w < 2.2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -108,12 +108,17 @@ files:
|
|
108
108
|
- lib/rbbt/vector/model.rb
|
109
109
|
- lib/rbbt/vector/model/huggingface.rb
|
110
110
|
- lib/rbbt/vector/model/huggingface/masked_lm.rb
|
111
|
+
- lib/rbbt/vector/model/python.rb
|
111
112
|
- lib/rbbt/vector/model/pytorch_lightning.rb
|
112
113
|
- lib/rbbt/vector/model/random_forest.rb
|
113
114
|
- lib/rbbt/vector/model/spaCy.rb
|
114
115
|
- lib/rbbt/vector/model/svm.rb
|
115
116
|
- lib/rbbt/vector/model/tensorflow.rb
|
116
117
|
- lib/rbbt/vector/model/torch.rb
|
118
|
+
- lib/rbbt/vector/model/torch/dataloader.rb
|
119
|
+
- lib/rbbt/vector/model/torch/helpers.rb
|
120
|
+
- lib/rbbt/vector/model/torch/introspection.rb
|
121
|
+
- lib/rbbt/vector/model/torch/load_and_save.rb
|
117
122
|
- lib/rbbt/vector/model/util.rb
|
118
123
|
- python/rbbt_dm/__init__.py
|
119
124
|
- python/rbbt_dm/atcold/__init__.py
|
@@ -143,10 +148,12 @@ files:
|
|
143
148
|
- test/rbbt/test_stan.rb
|
144
149
|
- test/rbbt/vector/model/huggingface/test_masked_lm.rb
|
145
150
|
- test/rbbt/vector/model/test_huggingface.rb
|
151
|
+
- test/rbbt/vector/model/test_python.rb
|
146
152
|
- test/rbbt/vector/model/test_pytorch_lightning.rb
|
147
153
|
- test/rbbt/vector/model/test_spaCy.rb
|
148
154
|
- test/rbbt/vector/model/test_svm.rb
|
149
155
|
- test/rbbt/vector/model/test_tensorflow.rb
|
156
|
+
- test/rbbt/vector/model/test_torch.rb
|
150
157
|
- test/rbbt/vector/test_model.rb
|
151
158
|
- test/test_helper.rb
|
152
159
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
@@ -167,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
167
174
|
- !ruby/object:Gem::Version
|
168
175
|
version: '0'
|
169
176
|
requirements: []
|
170
|
-
rubygems_version: 3.
|
177
|
+
rubygems_version: 3.5.0.dev
|
171
178
|
signing_key:
|
172
179
|
specification_version: 4
|
173
180
|
summary: Data-mining and statistics
|
@@ -182,9 +189,11 @@ test_files:
|
|
182
189
|
- test/rbbt/test_stan.rb
|
183
190
|
- test/rbbt/vector/model/huggingface/test_masked_lm.rb
|
184
191
|
- test/rbbt/vector/model/test_huggingface.rb
|
192
|
+
- test/rbbt/vector/model/test_python.rb
|
185
193
|
- test/rbbt/vector/model/test_pytorch_lightning.rb
|
186
194
|
- test/rbbt/vector/model/test_spaCy.rb
|
187
195
|
- test/rbbt/vector/model/test_svm.rb
|
188
196
|
- test/rbbt/vector/model/test_tensorflow.rb
|
197
|
+
- test/rbbt/vector/model/test_torch.rb
|
189
198
|
- test/rbbt/vector/test_model.rb
|
190
199
|
- test/test_helper.rb
|