rbbt-dm 1.2.9 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db2cbab94e21fd2ca67f7306fa9941b59cbfb2865382e5439edf6313f50309e7
4
- data.tar.gz: f4acf3651daa90ef23bc454c62df68e208976a977d51e2e85d02558d48897187
3
+ metadata.gz: f9b8071884e4e9d7a8c04f175fe262aad9e2b77911dca787a957a5c5f797fb9b
4
+ data.tar.gz: 1c7334d62036d3ae07b7f625b310f401b5078022f909be34cd78bb66c5b2af06
5
5
  SHA512:
6
- metadata.gz: 7786759636450821aabca306cd210ab3d201e094b81bb70052d57d7bfb6e4de73a198576fe4b002487baf7997138f9c53b91644632cd12cc79b40ff62141a70a
7
- data.tar.gz: 9870745068a897909170f3a6187e520e6530b121f8ad4ab40224c3369a16a8bb1c1e55bb8ca8fd892943ec1fcead9f4661e49c3d97d07d3740236a1ec4f69a34
6
+ metadata.gz: 22c73d01543e93a2a7b10ecaa88db9a663b35c8264b6d0e5e9d4b00096f34955250105dec4787242529c594c1a959feb23a4b5cd46298850eee7a813dc551d0f
7
+ data.tar.gz: 545663b2ee93dd0e6e6b54e353cb3bfafab9001c7031b42e7f895fb95ea85ffb6c1dcdb54bb671ee5cace49561cca018212e25ee43592b457e4e1abe83277076
@@ -2,53 +2,23 @@ require 'rbbt/vector/model/torch'
2
2
 
3
3
  class HuggingfaceModel < TorchModel
4
4
 
5
- def self.tsv_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
6
-
7
- if labels
8
- labels = case class_labels
9
- when Array
10
- labels.collect{|l| class_labels.index l}
11
- when Hash
12
- inverse_class_labels = {}
13
- class_labels.each{|c,l| inverse_class_labels[l] = c }
14
- labels.collect{|l| inverse_class_labels[l]}
15
- else
16
- labels
17
- end
18
-
19
- Open.write(tsv_dataset_file) do |ffile|
20
- ffile.puts ["label", "text"].flatten * "\t"
21
- elements.zip(labels).each do |element,label|
22
- element = element.gsub("\n", " ")
23
- ffile.puts [label, element].flatten * "\t"
24
- end
25
- ffile.sync
26
- end
27
- else
28
- Open.write(tsv_dataset_file) do |ffile|
29
- ffile.puts ["text"].flatten * "\t"
30
- elements.each do |element|
31
- element = element.gsub("\n", " ")
32
- ffile.puts element
33
- end
34
- ffile.sync
35
- end
36
- end
37
-
38
- tsv_dataset_file
39
- end
40
-
41
5
  def initialize(task, checkpoint, dir = nil, model_options = {})
42
- super(dir, model_options)
6
+ super(dir, nil, model_options)
7
+
8
+ checkpoint = checkpoint.find if Path === checkpoint
43
9
 
44
10
  @model_options = Misc.add_defaults @model_options, :task => task, :checkpoint => checkpoint
45
11
 
46
12
  init_model do
47
13
  checkpoint = @model_path && File.directory?(@model_path) ? @model_path : @model_options[:checkpoint]
14
+
48
15
  model = RbbtPython.call_method("rbbt_dm.huggingface", :load_model,
49
16
  @model_options[:task], checkpoint, **(IndiferentHash.setup(model_options[:model_args]) || {}))
17
+
18
+ tokenizer_checkpoint = @model_options[:tokenizer_checkpoint] || checkpoint
19
+
50
20
  tokenizer = RbbtPython.call_method("rbbt_dm.huggingface", :load_tokenizer,
51
- @model_options[:task], checkpoint, **(IndiferentHash.setup(model_options[:tokenizer_args]) || {}))
21
+ @model_options[:task], tokenizer_checkpoint, **(IndiferentHash.setup(model_options[:tokenizer_args]) || {}))
52
22
 
53
23
  [model, tokenizer]
54
24
  end
@@ -75,7 +45,7 @@ class HuggingfaceModel < TorchModel
75
45
  checkpoint_dir = File.join(tmpdir, 'checkpoints')
76
46
  end
77
47
 
78
- dataset_file = HuggingfaceModel.tsv_dataset(tsv_file, texts)
48
+ dataset_file = TorchModel.text_dataset(tsv_file, texts)
79
49
  training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
80
50
 
81
51
  begin
@@ -102,7 +72,7 @@ class HuggingfaceModel < TorchModel
102
72
  end
103
73
 
104
74
  training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
105
- dataset_file = HuggingfaceModel.tsv_dataset(tsv_file, texts, labels, @model_options[:class_labels])
75
+ dataset_file = HuggingfaceModel.text_dataset(tsv_file, texts, labels, @model_options[:class_labels])
106
76
 
107
77
  RbbtPython.call_method("rbbt_dm.huggingface", :train_model, model, tokenizer, training_args_obj, dataset_file, @model_options[:class_weights])
108
78
 
@@ -0,0 +1,33 @@
1
+ require 'rbbt/vector/model'
2
+ require 'rbbt/util/python'
3
+
4
+ RbbtPython.add_path Rbbt.python.find(:lib)
5
+ RbbtPython.init_rbbt
6
+
7
+ class PythonModel < VectorModel
8
+ attr_accessor :python_class, :python_module
9
+ def initialize(dir, python_class = nil, python_module = nil, model_options = nil)
10
+ python_module = :model if python_module.nil?
11
+ model_options, python_module = python_module, :model if model_options.nil? && Hash === python_module
12
+ model_options = {} if model_options.nil?
13
+
14
+ super(dir, model_options)
15
+
16
+ @python_class = python_class
17
+ @python_module = python_module
18
+
19
+ init_model do
20
+ RbbtPython.add_path @directory
21
+ RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
22
+ end if python_class
23
+
24
+ eval_model do |features,list=false|
25
+ init
26
+ if list
27
+ model.eval(features)
28
+ else
29
+ model.eval([features])[0]
30
+ end
31
+ end
32
+ end
33
+ end
@@ -2,34 +2,30 @@ require 'rbbt/vector/model/torch'
2
2
 
3
3
  class PytorchLightningModel < TorchModel
4
4
  attr_accessor :loader, :val_loader, :trainer
5
- def initialize(module_name, class_name, dir = nil, model_options = {})
6
- super(dir, model_options)
7
- @module_name = module_name
8
- @class_name = class_name
9
-
10
- init_model do
11
- RbbtPython.pyimport @module_name
12
- RbbtPython.class_new_obj(@module_name, @class_name, @model_options[:model_args] || {})
13
- end
5
+ def initialize(...)
6
+ super(...)
14
7
 
15
8
  train_model do |features,labels|
16
9
  model = init
17
- raise "Use the loader" if @loader.nil?
18
- raise "Use the trainer" if @trainer.nil?
19
-
20
- trainer.fit(model, @loader, @val_loader)
21
- end
22
-
23
- eval_model do |features,list|
24
- if list
25
- model.call(RbbtPython.call_method(:torch, :tensor, features))
26
- else
27
- model.call(RbbtPython.call_method(:torch, :tensor, [features]))
10
+ loader = self.loader
11
+ val_loader = self.val_loader
12
+ if (features && features.any?) && loader.nil?
13
+ TmpFile.with_file do |tsv_dataset_file|
14
+ TorchModel.feature_dataset(tsv_dataset_file, features, labels)
15
+ RbbtPython.pyimport :rbbt_dm
16
+ loader = RbbtPython.rbbt_dm.tsv(tsv_dataset_file)
17
+ end
28
18
  end
19
+ trainer.fit(model, loader, val_loader)
20
+ TorchModel.save_architecture(model, model_path) if @directory
21
+ TorchModel.save_state(model, model_path) if @directory
29
22
  end
30
-
31
23
  end
32
- end
33
24
 
34
- if __FILE__ == $0
25
+ def trainer
26
+ @trainer ||= begin
27
+ options = @model_options[:training_args] || @model_options[:trainer_args]
28
+ RbbtPython.class_new_obj("pytorch_lightning", "Trainer", options || {})
29
+ end
30
+ end
35
31
  end
@@ -0,0 +1,58 @@
1
+ class TorchModel
2
+ def self.feature_tsv(elements, labels = nil, class_labels = nil)
3
+ tsv = TSV.setup({}, :key_field => "ID", :fields => ["features"], :type => :flat)
4
+ if labels
5
+ tsv.fields = tsv.fields + ["label"]
6
+ labels = case class_labels
7
+ when Array
8
+ labels.collect{|l| class_labels.index l}
9
+ when Hash
10
+ inverse_class_labels = {}
11
+ class_labels.each{|c,l| inverse_class_labels[l] = c }
12
+ labels.collect{|l| inverse_class_labels[l]}
13
+ else
14
+ labels
15
+ end
16
+ elements.zip(labels).each_with_index do |p,i|
17
+ features, label = p
18
+ id = i
19
+ if Array === features
20
+ tsv[id] = features + [label]
21
+ else
22
+ tsv[id] = [features, label]
23
+ end
24
+ end
25
+ else
26
+ elements.each_with_index do |features,i|
27
+ id = i
28
+ if Array === features
29
+ tsv[id] = features
30
+ else
31
+ tsv[id] = [features]
32
+ end
33
+ end
34
+ end
35
+ tsv
36
+ end
37
+
38
+ def self.feature_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
39
+ tsv = feature_tsv(elements, labels, class_labels)
40
+ Open.write(tsv_dataset_file, tsv.to_s)
41
+ tsv_dataset_file
42
+ end
43
+
44
+ def self.text_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
45
+ elements = elements.collect{|e| e.gsub("\n", ' ') }
46
+ tsv = feature_tsv(elements, labels, class_labels)
47
+ if labels.nil?
48
+ tsv.fields[0] = "text"
49
+ tsv.type = :single
50
+ else
51
+ tsv.fields[0] = "text"
52
+ tsv.type = :list
53
+ end
54
+ Open.write(tsv_dataset_file, tsv.to_s)
55
+ tsv_dataset_file
56
+ end
57
+
58
+ end
@@ -0,0 +1,52 @@
1
+ class TorchModel
2
+ module Tensor
3
+ def to_ruby
4
+ RbbtPython.numpy2ruby(self)
5
+ end
6
+ def self.setup(obj)
7
+ obj.extend Tensor
8
+ end
9
+ end
10
+
11
+ def self.init_python
12
+ RbbtPython.pyimport :torch
13
+ RbbtPython.pyimport :rbbt
14
+ RbbtPython.pyimport :rbbt_dm
15
+ RbbtPython.pyfrom :rbbt_dm, import: :util
16
+ RbbtPython.pyfrom :torch, import: :nn
17
+ end
18
+
19
+ def self.optimizer(model, training_args)
20
+ begin
21
+ learning_rate = training_args[:learning_rate] || 0.01
22
+ RbbtPython.torch.optim.SGD.new(model.parameters(), lr: learning_rate)
23
+ end
24
+ end
25
+
26
+ def self.device(model_options)
27
+ case model_options[:device]
28
+ when String, Symbol
29
+ RbbtPython.torch.device(model_options[:device].to_s)
30
+ when nil
31
+ RbbtPython.rbbt_dm.util.device()
32
+ else
33
+ model_options[:device]
34
+ end
35
+ end
36
+
37
+ def self.dtype(model_options)
38
+ case model_options[:dtype]
39
+ when String, Symbol
40
+ RbbtPython.torch.call(model_options[:dtype])
41
+ when nil
42
+ nil
43
+ else
44
+ model_options[:dtype]
45
+ end
46
+ end
47
+
48
+ def self.tensor(obj, device, dtype)
49
+ RbbtPython.torch.tensor(obj, dtype: dtype, device: device)
50
+ end
51
+
52
+ end
@@ -0,0 +1,31 @@
1
+ class TorchModel
2
+ def self.get_layer(model, layer = nil)
3
+ if layer.nil?
4
+ model
5
+ else
6
+ layer.split(".").inject(model){|acc,l| PyCall.getattr(acc, l.to_sym) }
7
+ end
8
+ end
9
+ def get_layer(...); TorchModel.get_layer(model, ...); end
10
+
11
+ def self.get_weights(model, layer = nil)
12
+ Tensor.setup PyCall.getattr(get_layer(model, layer), :weight)
13
+ end
14
+ def get_weights(...); TorchModel.get_weights(model, ...); end
15
+
16
+ def self.freeze(layer)
17
+ begin
18
+ PyCall.getattr(layer, :weight).requires_grad = false
19
+ rescue
20
+ end
21
+ RbbtPython.iterate(layer.children) do |layer|
22
+ freeze(layer)
23
+ end
24
+ end
25
+ def self.freeze_layer(model, layer)
26
+ layer = get_layer(model, layer)
27
+ freeze(layer)
28
+ end
29
+ def freeze_layer(...); TorchModel.freeze_layer(model, ...); end
30
+
31
+ end
@@ -0,0 +1,30 @@
1
+ class TorchModel
2
+ def self.model_architecture(model_path)
3
+ model_path + '.architecture'
4
+ end
5
+
6
+ def self.save_state(model, model_path)
7
+ Log.debug "Saving model state into #{model_path}"
8
+ RbbtPython.torch.save(model.state_dict(), model_path)
9
+ end
10
+
11
+ def self.load_state(model, model_path)
12
+ return model unless Open.exists?(model_path)
13
+ Log.debug "Loading model state from #{model_path}"
14
+ model.load_state_dict(RbbtPython.torch.load(model_path))
15
+ model
16
+ end
17
+
18
+ def self.save_architecture(model, model_path)
19
+ model_architecture = model_architecture(model_path)
20
+ Log.debug "Saving model architecture into #{model_architecture}"
21
+ RbbtPython.torch.save(model, model_architecture)
22
+ end
23
+
24
+ def self.load_architecture(model_path)
25
+ model_architecture = model_architecture(model_path)
26
+ return unless Open.exists?(model_architecture)
27
+ Log.debug "Loading model architecture from #{model_architecture}"
28
+ RbbtPython.torch.load(model_architecture)
29
+ end
30
+ end
@@ -1,37 +1,71 @@
1
- require 'rbbt/vector/model'
2
- require 'rbbt/util/python'
1
+ require_relative 'python'
3
2
 
4
- RbbtPython.add_path Rbbt.python.find(:lib)
5
- RbbtPython.init_rbbt
3
+ class TorchModel < PythonModel
6
4
 
7
- class TorchModel < VectorModel
5
+ attr_accessor :model, :criterion, :optimizer, :training_args
8
6
 
9
- attr_accessor :model
7
+ def initialize(...)
8
+ TorchModel.init_python
9
+ super(...)
10
+ @training_args = model_options[:training_args] || {}
10
11
 
11
- def self.get_layer(model, layer)
12
- layer.split(".").inject(model){|acc,l| PyCall.getattr(acc, l.to_sym) }
13
- end
12
+ init_model do
13
+ model = TorchModel.load_architecture(model_path)
14
+ if model.nil?
15
+ RbbtPython.add_path @directory
16
+ RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
17
+ else
18
+ TorchModel.load_state(model, model_path)
19
+ end
20
+ end
14
21
 
15
- def self.get_weights(model, layer)
16
- PyCall.getattr(get_layer(model, layer), :weight)
17
- end
22
+ eval_model do |features,list=false|
23
+ init
24
+ @device ||= TorchModel.device(model_options)
25
+ @dtype ||= TorchModel.dtype(model_options)
26
+ model.to(@device)
18
27
 
19
- def self.freeze(layer)
20
- begin
21
- PyCall.getattr(layer, :weight).requires_grad = false
22
- rescue
23
- end
24
- RbbtPython.iterate(layer.children) do |layer|
25
- freeze(layer)
28
+ tensor = list ? TorchModel.tensor(features, @device, @dtype) : TorchModel.tensor([features], @device, @dtype)
29
+
30
+ loss, res = model.call(tensor)
31
+
32
+ res = loss if res.nil?
33
+
34
+ res = TorchModel::Tensor.setup(list ? res : res[0])
35
+
36
+ res
26
37
  end
27
- end
28
38
 
29
- def self.freeze_layer(model, layer)
30
- layer = get_layer(model, layer)
31
- freeze(layer)
32
- end
39
+ train_model do |features,labels|
40
+ init
41
+ @device ||= TorchModel.device(model_options)
42
+ @dtype ||= TorchModel.dtype(model_options)
43
+ model.to(@device)
44
+ @optimizer ||= TorchModel.optimizer(model, training_args)
45
+ epochs = training_args[:epochs] || 3
46
+
47
+ inputs = TorchModel.tensor(features, @device, @dtype)
48
+ #target = TorchModel.tensor(labels.collect{|v| [v] }, @device, @dtype)
49
+ target = TorchModel.tensor(labels, @device, @dtype)
33
50
 
34
- def initialize(dir, model_options = {})
35
- super(dir, model_options)
51
+ Log::ProgressBar.with_bar epochs, :desc => "Training" do |bar|
52
+ epochs.times do |i|
53
+ @optimizer.zero_grad()
54
+ outputs = model.call(inputs)
55
+ outputs = outputs.squeeze() if target.dim() == 1
56
+ loss = criterion.call(outputs, target)
57
+ loss.backward()
58
+ @optimizer.step
59
+ Log.debug "Epoch #{i}, loss #{loss}"
60
+ bar.tick
61
+ end
62
+ end
63
+ TorchModel.save_architecture(model, model_path) if @directory
64
+ TorchModel.save_state(model, model_path) if @directory
65
+ end
36
66
  end
37
67
  end
68
+ require_relative 'torch/helpers'
69
+ require_relative 'torch/dataloader'
70
+ require_relative 'torch/introspection'
71
+ require_relative 'torch/load_and_save'
@@ -448,10 +448,10 @@ cat(paste(label, sep="\\n", collapse="\\n"));
448
448
  end
449
449
 
450
450
  test_set = feature_folds[fix]
451
- train_set = feature_folds.values_at(*rest).inject([]){|acc,e| acc += e; acc}
451
+ train_set = feature_folds.values_at(*rest).flatten(1)
452
452
 
453
453
  test_labels = labels_folds[fix]
454
- train_labels = labels_folds.values_at(*rest).flatten
454
+ train_labels = labels_folds.values_at(*rest).flatten(1)
455
455
 
456
456
  @features = train_set
457
457
  @labels = train_labels
@@ -1,6 +1,8 @@
1
- from torch.utils.data import Dataset, DataLoader
1
+ import rbbt
2
+ import torch
3
+ from .util import *
2
4
 
3
- class TSVDataset(Dataset):
5
+ class TSVDataset(torch.utils.data.Dataset):
4
6
  def __init__(self, tsv):
5
7
  self.tsv = tsv
6
8
 
@@ -20,29 +22,10 @@ class TSVDataset(Dataset):
20
22
  return len(self.tsv)
21
23
 
22
24
  def tsv_dataset(filename, *args, **kwargs):
23
- import rbbt
24
25
  return TSVDataset(rbbt.tsv(filename, *args, **kwargs))
25
26
 
26
27
  def tsv(*args, **kwargs):
27
28
  return tsv_dataset(*args, **kwargs)
28
29
 
29
30
  def data_dir():
30
- import rbbt
31
31
  return rbbt.path('var/rbbt_dm/data')
32
-
33
- if __name__ == "__main__":
34
- import rbbt
35
-
36
- filename = "/home/miki/test/numeric.tsv"
37
- ds = tsv(filename)
38
-
39
- dl = DataLoader(ds, batch_size=1)
40
-
41
- for f, l in iter(dl):
42
- print(".")
43
- print(f[0,:])
44
- print(l[0])
45
-
46
-
47
-
48
-
@@ -1,4 +1,6 @@
1
1
  #{{{ LOAD MODEL
2
+ import datasets
3
+ import rbbt
2
4
 
3
5
  def import_module_class(module, class_name):
4
6
  if (not module == None):
@@ -57,12 +59,15 @@ def eval_model(model, tokenizer, texts, return_logits = True):
57
59
  #{{{ TRAIN AND PREDICT
58
60
 
59
61
  def load_tsv(tsv_file):
60
- from datasets import load_dataset
61
- return load_dataset('csv', data_files=[tsv_file], sep="\t")
62
+ tsv = rbbt.tsv(tsv_file)
63
+ print(tsv)
64
+ ds = datasets.Dataset.from_pandas(tsv)
65
+ d = datasets.DatasetDict()
66
+ d["train"] = ds
67
+ return d
62
68
 
63
69
  def load_json(json_file):
64
- from datasets import load_dataset
65
- return load_dataset('json', data_files=[json_file])
70
+ return datasets.load_dataset('json', data_files=[json_file])
66
71
 
67
72
  def tokenize_dataset(tokenizer, dataset):
68
73
  return dataset.map(lambda subset: subset if ("input_ids" in subset.keys()) else tokenizer(subset["text"], truncation=True), batched=True)
@@ -28,3 +28,5 @@ def data_directory():
28
28
  from pathlib import Path
29
29
  print(Path.home())
30
30
 
31
+ def model_device(model):
32
+ return next(model.parameters()).device
@@ -42,7 +42,7 @@ class TestHuggingface < Test::Unit::TestCase
42
42
  assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true)["input_ids"].__len__
43
43
  end
44
44
 
45
- def _test_sst_eval
45
+ def test_sst_eval
46
46
  TmpFile.with_file do |dir|
47
47
  checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
48
48
 
@@ -56,7 +56,7 @@ class TestHuggingface < Test::Unit::TestCase
56
56
  end
57
57
 
58
58
 
59
- def test_sst_train
59
+ def _test_sst_train
60
60
  TmpFile.with_file do |dir|
61
61
  checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
62
62
 
@@ -0,0 +1,31 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestPythonModel < Test::Unit::TestCase
5
+ def test_linear
6
+ model = nil
7
+
8
+ TmpFile.with_dir do |dir|
9
+
10
+ Misc.in_dir dir do
11
+ Open.write 'model.py', <<-EOF
12
+ class TestModel:
13
+ def __init__(self, delta):
14
+ self.delta = delta
15
+
16
+ def eval(self, x):
17
+ return [e + self.delta for e in x]
18
+ EOF
19
+ model = PythonModel.new dir, 'TestModel', :model, delta: 1
20
+
21
+ assert_equal 2, model.eval(1)
22
+ assert_equal [4, 6], model.eval_list([3, 5])
23
+
24
+ model = PythonModel.new dir, 'TestModel', :model, delta: 2
25
+
26
+ assert_equal 3, model.eval(1)
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -2,82 +2,96 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_he
2
2
  require 'rbbt/vector/model/pytorch_lightning'
3
3
 
4
4
  class TestPytorchLightning < Test::Unit::TestCase
5
- def test_clustering
6
- nsamples = 10
7
- ngenes = 10000
8
- samples = nsamples.times.collect{|i| "Sample-#{i}" }
9
- data = TSV.setup({}, :key_field => "Gene", :fields => samples + ["cluster"], :type => :list, :cast => :to_f)
10
-
11
- profiles = []
12
- p0 = 3
13
- p1 = 7
14
- profiles[0] = nsamples.times.collect{ rand() + p0 }
15
- profiles[1] = nsamples.times.collect{ rand() + p1 }
16
-
17
- ngenes.times do |genen|
18
- gene = "Gene-#{genen}"
19
- cluster = genen % 2
20
- values = profiles[cluster].collect do |m|
21
- rand() + m
22
- end
23
- data[gene] = values + [cluster]
24
- end
5
+ def test_regresion
6
+ points = 10
7
+ a = 1
8
+ b = 1
25
9
 
10
+ x = (0..points - 1)
11
+ y = points.times.collect{|p| p }
12
+
26
13
  python = <<~EOF
27
- import torch
28
- from torch import nn
29
- from torch.nn import functional as F
30
- from torch.utils.data import DataLoader
31
- from torch.utils.data import random_split
32
- from torchvision.datasets import MNIST
33
- from torchvision import transforms
34
14
  import pytorch_lightning as pl
15
+ import numpy as np
16
+ import torch
17
+ from torch.nn import MSELoss
18
+ from torch.optim import Adam
19
+ from torch.utils.data import DataLoader, Dataset
20
+ import torch.nn as nn
21
+
22
+
23
+ class SimpleDataset(Dataset):
24
+ def __init__(self):
25
+ X = np.arange(10000)
26
+ y = X * 2
27
+ X = [[_] for _ in X]
28
+ y = [[_] for _ in y]
29
+ self.X = torch.Tensor(X)
30
+ self.y = torch.Tensor(y)
31
+
32
+ def __len__(self):
33
+ return len(self.y)
34
+
35
+ def __getitem__(self, idx):
36
+ return {"X": self.X[idx], "y": self.y[idx]}
37
+
35
38
 
36
39
  class TestPytorchLightningModel(pl.LightningModule):
37
- def __init__(self, input_size=10, internal_dim=1):
38
- super().__init__()
39
- self.model = nn.Tanh()
40
-
41
- def configure_optimizers(self):
42
- optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
43
- return optimizer
44
-
45
- @torch.cuda.amp.autocast(True)
46
- def forward(self, x):
47
- x = x.to(self.dtype)
48
- return self.model(x).squeeze()
49
-
50
- @torch.cuda.amp.autocast(True)
51
- def training_step(self, train_batch, batch_idx):
52
- x, y = train_batch
53
- x = x.to(self.dtype)
54
- y = y.to(self.dtype)
55
- y_hat = self.model(x).squeeze()
56
- loss = F.mse_loss(y, y_hat)
57
- self.log('train_loss', loss)
58
- return loss
59
-
60
- @torch.cuda.amp.custom_fwd(cast_inputs=torch.float64)
61
- def validation_step(self, val_batch, batch_idx):
62
- x, y = train_batch
63
- y_hat = self.model(x)
64
- loss = F.mse_loss(y, y_hat)
65
- self.log('val_loss', loss)
40
+ def __init__(self):
41
+ super().__init__()
42
+ self.fc = nn.Linear(1, 1)
43
+ self.criterion = MSELoss()
44
+
45
+ def forward(self, inputs, labels=None):
46
+ outputs = self.fc(inputs)
47
+ loss = 0
48
+ if labels is not None:
49
+ loss = self.criterion(outputs, labels)
50
+ return loss, outputs
51
+
52
+ def train_dataloader(self):
53
+ dataset = SimpleDataset()
54
+ return DataLoader(dataset, batch_size=1000)
66
55
 
56
+ def training_step(self, batch, batch_idx):
57
+ input_ids = batch["X"]
58
+ labels = batch["y"]
59
+ loss, outputs = self(input_ids, labels)
60
+ return {"loss": loss}
61
+
62
+ def configure_optimizers(self):
63
+ optimizer = Adam(self.parameters(), lr=0.1)
64
+ return optimizer
67
65
  EOF
68
66
 
69
- with_python(python) do |pkg|
70
- model = PytorchLightningModel.new pkg , "TestPytorchLightningModel", nil, model_args: {internal_dim: 1}
71
- TmpFile.with_file(data.to_s) do |data_file|
72
- ds = RbbtPython.call_method "rbbt_dm", :tsv, filename: data_file
73
- model.loader = RbbtPython.class_new_obj("torch.utils.data", :DataLoader, dataset: ds, batch_size: 64)
74
- model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", gpus: 1, max_epochs: 5, precision: 16)
75
- end
67
+ TmpFile.with_dir do |dir|
68
+ Open.write(File.join(dir, 'model.py'), python)
69
+ model = PytorchLightningModel.new dir, "TestPytorchLightningModel"
70
+ model.init
71
+
72
+ model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", max_epochs: 10, precision: 16)
73
+ model.init
74
+
76
75
  model.train
77
- encoding = model.eval_list(data.values.collect{|v| v[0..-2] }).detach().cpu().numpy()
78
- iii encoding[0..10]
76
+
77
+ w = model.get_weights('fc').to_ruby.first.first
78
+
79
+ assert w > 1.8
80
+ assert w < 2.2
81
+
82
+ res = model.eval(10.0)
83
+ assert_equal res, (10 * w)
84
+ assert res > 1.8 * 10.0
85
+ assert res < 2.2 * 10.0
86
+
87
+ res = model.eval([10.0])
88
+ res = model.eval_list([[10.0], [11.2], [14.3]])
89
+ assert_equal 3, RbbtPython.numpy2ruby(res).length
90
+
91
+ model = VectorModel.new dir
92
+ model.init
93
+
79
94
  end
80
95
  end
81
-
82
96
  end
83
97
 
@@ -0,0 +1,61 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestTorch < Test::Unit::TestCase
5
+ def test_linear
6
+ model = nil
7
+
8
+ TmpFile.with_dir do |dir|
9
+
10
+ # Create model
11
+
12
+ model = TorchModel.new dir
13
+ model.model = RbbtPython.torch.nn.Linear.new(1, 1)
14
+ model.criterion = RbbtPython.torch.nn.MSELoss.new()
15
+
16
+ model.extract_features do |f|
17
+ [f]
18
+ end
19
+
20
+ model.post_process do |v,list|
21
+ list ? v.to_ruby.collect{|vv| vv.first } : v.to_ruby.first
22
+ end
23
+
24
+ # Train model
25
+
26
+ model.add 5.0, [10.0]
27
+ model.add 10.0, [20.0]
28
+
29
+ model.training_args[:epochs] = 1000
30
+ model.train
31
+
32
+ w = model.get_weights.to_ruby.first.first
33
+
34
+ assert w > 1.8
35
+ assert w < 2.2
36
+
37
+ # Load the model again
38
+
39
+ model = VectorModel.new dir
40
+
41
+ # Test model
42
+
43
+ y = model.eval(100.0)
44
+
45
+ assert(y > 150.0)
46
+ assert(y < 250.0)
47
+
48
+ test = [1.0, 5.0, 10.0, 20.0]
49
+ input_sum = Misc.sum(test)
50
+ sum = Misc.sum(model.eval_list(test))
51
+ assert sum > 0.8 * input_sum * 2
52
+ assert sum < 1.2 * input_sum * 2
53
+
54
+ w = TorchModel.get_weights(model.model).to_ruby.first.first
55
+
56
+ assert w > 1.8
57
+ assert w < 2.2
58
+ end
59
+ end
60
+ end
61
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.9
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-30 00:00:00.000000000 Z
11
+ date: 2023-12-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -108,12 +108,17 @@ files:
108
108
  - lib/rbbt/vector/model.rb
109
109
  - lib/rbbt/vector/model/huggingface.rb
110
110
  - lib/rbbt/vector/model/huggingface/masked_lm.rb
111
+ - lib/rbbt/vector/model/python.rb
111
112
  - lib/rbbt/vector/model/pytorch_lightning.rb
112
113
  - lib/rbbt/vector/model/random_forest.rb
113
114
  - lib/rbbt/vector/model/spaCy.rb
114
115
  - lib/rbbt/vector/model/svm.rb
115
116
  - lib/rbbt/vector/model/tensorflow.rb
116
117
  - lib/rbbt/vector/model/torch.rb
118
+ - lib/rbbt/vector/model/torch/dataloader.rb
119
+ - lib/rbbt/vector/model/torch/helpers.rb
120
+ - lib/rbbt/vector/model/torch/introspection.rb
121
+ - lib/rbbt/vector/model/torch/load_and_save.rb
117
122
  - lib/rbbt/vector/model/util.rb
118
123
  - python/rbbt_dm/__init__.py
119
124
  - python/rbbt_dm/atcold/__init__.py
@@ -143,10 +148,12 @@ files:
143
148
  - test/rbbt/test_stan.rb
144
149
  - test/rbbt/vector/model/huggingface/test_masked_lm.rb
145
150
  - test/rbbt/vector/model/test_huggingface.rb
151
+ - test/rbbt/vector/model/test_python.rb
146
152
  - test/rbbt/vector/model/test_pytorch_lightning.rb
147
153
  - test/rbbt/vector/model/test_spaCy.rb
148
154
  - test/rbbt/vector/model/test_svm.rb
149
155
  - test/rbbt/vector/model/test_tensorflow.rb
156
+ - test/rbbt/vector/model/test_torch.rb
150
157
  - test/rbbt/vector/test_model.rb
151
158
  - test/test_helper.rb
152
159
  homepage: http://github.com/mikisvaz/rbbt-phgx
@@ -167,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
174
  - !ruby/object:Gem::Version
168
175
  version: '0'
169
176
  requirements: []
170
- rubygems_version: 3.4.19
177
+ rubygems_version: 3.5.0.dev
171
178
  signing_key:
172
179
  specification_version: 4
173
180
  summary: Data-mining and statistics
@@ -182,9 +189,11 @@ test_files:
182
189
  - test/rbbt/test_stan.rb
183
190
  - test/rbbt/vector/model/huggingface/test_masked_lm.rb
184
191
  - test/rbbt/vector/model/test_huggingface.rb
192
+ - test/rbbt/vector/model/test_python.rb
185
193
  - test/rbbt/vector/model/test_pytorch_lightning.rb
186
194
  - test/rbbt/vector/model/test_spaCy.rb
187
195
  - test/rbbt/vector/model/test_svm.rb
188
196
  - test/rbbt/vector/model/test_tensorflow.rb
197
+ - test/rbbt/vector/model/test_torch.rb
189
198
  - test/rbbt/vector/test_model.rb
190
199
  - test/test_helper.rb