rbbt-dm 1.2.9 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db2cbab94e21fd2ca67f7306fa9941b59cbfb2865382e5439edf6313f50309e7
4
- data.tar.gz: f4acf3651daa90ef23bc454c62df68e208976a977d51e2e85d02558d48897187
3
+ metadata.gz: f9b8071884e4e9d7a8c04f175fe262aad9e2b77911dca787a957a5c5f797fb9b
4
+ data.tar.gz: 1c7334d62036d3ae07b7f625b310f401b5078022f909be34cd78bb66c5b2af06
5
5
  SHA512:
6
- metadata.gz: 7786759636450821aabca306cd210ab3d201e094b81bb70052d57d7bfb6e4de73a198576fe4b002487baf7997138f9c53b91644632cd12cc79b40ff62141a70a
7
- data.tar.gz: 9870745068a897909170f3a6187e520e6530b121f8ad4ab40224c3369a16a8bb1c1e55bb8ca8fd892943ec1fcead9f4661e49c3d97d07d3740236a1ec4f69a34
6
+ metadata.gz: 22c73d01543e93a2a7b10ecaa88db9a663b35c8264b6d0e5e9d4b00096f34955250105dec4787242529c594c1a959feb23a4b5cd46298850eee7a813dc551d0f
7
+ data.tar.gz: 545663b2ee93dd0e6e6b54e353cb3bfafab9001c7031b42e7f895fb95ea85ffb6c1dcdb54bb671ee5cace49561cca018212e25ee43592b457e4e1abe83277076
@@ -2,53 +2,23 @@ require 'rbbt/vector/model/torch'
2
2
 
3
3
  class HuggingfaceModel < TorchModel
4
4
 
5
- def self.tsv_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
6
-
7
- if labels
8
- labels = case class_labels
9
- when Array
10
- labels.collect{|l| class_labels.index l}
11
- when Hash
12
- inverse_class_labels = {}
13
- class_labels.each{|c,l| inverse_class_labels[l] = c }
14
- labels.collect{|l| inverse_class_labels[l]}
15
- else
16
- labels
17
- end
18
-
19
- Open.write(tsv_dataset_file) do |ffile|
20
- ffile.puts ["label", "text"].flatten * "\t"
21
- elements.zip(labels).each do |element,label|
22
- element = element.gsub("\n", " ")
23
- ffile.puts [label, element].flatten * "\t"
24
- end
25
- ffile.sync
26
- end
27
- else
28
- Open.write(tsv_dataset_file) do |ffile|
29
- ffile.puts ["text"].flatten * "\t"
30
- elements.each do |element|
31
- element = element.gsub("\n", " ")
32
- ffile.puts element
33
- end
34
- ffile.sync
35
- end
36
- end
37
-
38
- tsv_dataset_file
39
- end
40
-
41
5
  def initialize(task, checkpoint, dir = nil, model_options = {})
42
- super(dir, model_options)
6
+ super(dir, nil, model_options)
7
+
8
+ checkpoint = checkpoint.find if Path === checkpoint
43
9
 
44
10
  @model_options = Misc.add_defaults @model_options, :task => task, :checkpoint => checkpoint
45
11
 
46
12
  init_model do
47
13
  checkpoint = @model_path && File.directory?(@model_path) ? @model_path : @model_options[:checkpoint]
14
+
48
15
  model = RbbtPython.call_method("rbbt_dm.huggingface", :load_model,
49
16
  @model_options[:task], checkpoint, **(IndiferentHash.setup(model_options[:model_args]) || {}))
17
+
18
+ tokenizer_checkpoint = @model_options[:tokenizer_checkpoint] || checkpoint
19
+
50
20
  tokenizer = RbbtPython.call_method("rbbt_dm.huggingface", :load_tokenizer,
51
- @model_options[:task], checkpoint, **(IndiferentHash.setup(model_options[:tokenizer_args]) || {}))
21
+ @model_options[:task], tokenizer_checkpoint, **(IndiferentHash.setup(model_options[:tokenizer_args]) || {}))
52
22
 
53
23
  [model, tokenizer]
54
24
  end
@@ -75,7 +45,7 @@ class HuggingfaceModel < TorchModel
75
45
  checkpoint_dir = File.join(tmpdir, 'checkpoints')
76
46
  end
77
47
 
78
- dataset_file = HuggingfaceModel.tsv_dataset(tsv_file, texts)
48
+ dataset_file = TorchModel.text_dataset(tsv_file, texts)
79
49
  training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
80
50
 
81
51
  begin
@@ -102,7 +72,7 @@ class HuggingfaceModel < TorchModel
102
72
  end
103
73
 
104
74
  training_args_obj = RbbtPython.call_method("rbbt_dm.huggingface", :training_args, checkpoint_dir, @model_options[:training_args])
105
- dataset_file = HuggingfaceModel.tsv_dataset(tsv_file, texts, labels, @model_options[:class_labels])
75
+ dataset_file = HuggingfaceModel.text_dataset(tsv_file, texts, labels, @model_options[:class_labels])
106
76
 
107
77
  RbbtPython.call_method("rbbt_dm.huggingface", :train_model, model, tokenizer, training_args_obj, dataset_file, @model_options[:class_weights])
108
78
 
@@ -0,0 +1,33 @@
1
+ require 'rbbt/vector/model'
2
+ require 'rbbt/util/python'
3
+
4
+ RbbtPython.add_path Rbbt.python.find(:lib)
5
+ RbbtPython.init_rbbt
6
+
7
+ class PythonModel < VectorModel
8
+ attr_accessor :python_class, :python_module
9
+ def initialize(dir, python_class = nil, python_module = nil, model_options = nil)
10
+ python_module = :model if python_module.nil?
11
+ model_options, python_module = python_module, :model if model_options.nil? && Hash === python_module
12
+ model_options = {} if model_options.nil?
13
+
14
+ super(dir, model_options)
15
+
16
+ @python_class = python_class
17
+ @python_module = python_module
18
+
19
+ init_model do
20
+ RbbtPython.add_path @directory
21
+ RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
22
+ end if python_class
23
+
24
+ eval_model do |features,list=false|
25
+ init
26
+ if list
27
+ model.eval(features)
28
+ else
29
+ model.eval([features])[0]
30
+ end
31
+ end
32
+ end
33
+ end
@@ -2,34 +2,30 @@ require 'rbbt/vector/model/torch'
2
2
 
3
3
  class PytorchLightningModel < TorchModel
4
4
  attr_accessor :loader, :val_loader, :trainer
5
- def initialize(module_name, class_name, dir = nil, model_options = {})
6
- super(dir, model_options)
7
- @module_name = module_name
8
- @class_name = class_name
9
-
10
- init_model do
11
- RbbtPython.pyimport @module_name
12
- RbbtPython.class_new_obj(@module_name, @class_name, @model_options[:model_args] || {})
13
- end
5
+ def initialize(...)
6
+ super(...)
14
7
 
15
8
  train_model do |features,labels|
16
9
  model = init
17
- raise "Use the loader" if @loader.nil?
18
- raise "Use the trainer" if @trainer.nil?
19
-
20
- trainer.fit(model, @loader, @val_loader)
21
- end
22
-
23
- eval_model do |features,list|
24
- if list
25
- model.call(RbbtPython.call_method(:torch, :tensor, features))
26
- else
27
- model.call(RbbtPython.call_method(:torch, :tensor, [features]))
10
+ loader = self.loader
11
+ val_loader = self.val_loader
12
+ if (features && features.any?) && loader.nil?
13
+ TmpFile.with_file do |tsv_dataset_file|
14
+ TorchModel.feature_dataset(tsv_dataset_file, features, labels)
15
+ RbbtPython.pyimport :rbbt_dm
16
+ loader = RbbtPython.rbbt_dm.tsv(tsv_dataset_file)
17
+ end
28
18
  end
19
+ trainer.fit(model, loader, val_loader)
20
+ TorchModel.save_architecture(model, model_path) if @directory
21
+ TorchModel.save_state(model, model_path) if @directory
29
22
  end
30
-
31
23
  end
32
- end
33
24
 
34
- if __FILE__ == $0
25
+ def trainer
26
+ @trainer ||= begin
27
+ options = @model_options[:training_args] || @model_options[:trainer_args]
28
+ RbbtPython.class_new_obj("pytorch_lightning", "Trainer", options || {})
29
+ end
30
+ end
35
31
  end
@@ -0,0 +1,58 @@
1
+ class TorchModel
2
+ def self.feature_tsv(elements, labels = nil, class_labels = nil)
3
+ tsv = TSV.setup({}, :key_field => "ID", :fields => ["features"], :type => :flat)
4
+ if labels
5
+ tsv.fields = tsv.fields + ["label"]
6
+ labels = case class_labels
7
+ when Array
8
+ labels.collect{|l| class_labels.index l}
9
+ when Hash
10
+ inverse_class_labels = {}
11
+ class_labels.each{|c,l| inverse_class_labels[l] = c }
12
+ labels.collect{|l| inverse_class_labels[l]}
13
+ else
14
+ labels
15
+ end
16
+ elements.zip(labels).each_with_index do |p,i|
17
+ features, label = p
18
+ id = i
19
+ if Array === features
20
+ tsv[id] = features + [label]
21
+ else
22
+ tsv[id] = [features, label]
23
+ end
24
+ end
25
+ else
26
+ elements.each_with_index do |features,i|
27
+ id = i
28
+ if Array === features
29
+ tsv[id] = features
30
+ else
31
+ tsv[id] = [features]
32
+ end
33
+ end
34
+ end
35
+ tsv
36
+ end
37
+
38
+ def self.feature_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
39
+ tsv = feature_tsv(elements, labels, class_labels)
40
+ Open.write(tsv_dataset_file, tsv.to_s)
41
+ tsv_dataset_file
42
+ end
43
+
44
+ def self.text_dataset(tsv_dataset_file, elements, labels = nil, class_labels = nil)
45
+ elements = elements.collect{|e| e.gsub("\n", ' ') }
46
+ tsv = feature_tsv(elements, labels, class_labels)
47
+ if labels.nil?
48
+ tsv.fields[0] = "text"
49
+ tsv.type = :single
50
+ else
51
+ tsv.fields[0] = "text"
52
+ tsv.type = :list
53
+ end
54
+ Open.write(tsv_dataset_file, tsv.to_s)
55
+ tsv_dataset_file
56
+ end
57
+
58
+ end
@@ -0,0 +1,52 @@
1
+ class TorchModel
2
+ module Tensor
3
+ def to_ruby
4
+ RbbtPython.numpy2ruby(self)
5
+ end
6
+ def self.setup(obj)
7
+ obj.extend Tensor
8
+ end
9
+ end
10
+
11
+ def self.init_python
12
+ RbbtPython.pyimport :torch
13
+ RbbtPython.pyimport :rbbt
14
+ RbbtPython.pyimport :rbbt_dm
15
+ RbbtPython.pyfrom :rbbt_dm, import: :util
16
+ RbbtPython.pyfrom :torch, import: :nn
17
+ end
18
+
19
+ def self.optimizer(model, training_args)
20
+ begin
21
+ learning_rate = training_args[:learning_rate] || 0.01
22
+ RbbtPython.torch.optim.SGD.new(model.parameters(), lr: learning_rate)
23
+ end
24
+ end
25
+
26
+ def self.device(model_options)
27
+ case model_options[:device]
28
+ when String, Symbol
29
+ RbbtPython.torch.device(model_options[:device].to_s)
30
+ when nil
31
+ RbbtPython.rbbt_dm.util.device()
32
+ else
33
+ model_options[:device]
34
+ end
35
+ end
36
+
37
+ def self.dtype(model_options)
38
+ case model_options[:dtype]
39
+ when String, Symbol
40
+ RbbtPython.torch.call(model_options[:dtype])
41
+ when nil
42
+ nil
43
+ else
44
+ model_options[:dtype]
45
+ end
46
+ end
47
+
48
+ def self.tensor(obj, device, dtype)
49
+ RbbtPython.torch.tensor(obj, dtype: dtype, device: device)
50
+ end
51
+
52
+ end
@@ -0,0 +1,31 @@
1
+ class TorchModel
2
+ def self.get_layer(model, layer = nil)
3
+ if layer.nil?
4
+ model
5
+ else
6
+ layer.split(".").inject(model){|acc,l| PyCall.getattr(acc, l.to_sym) }
7
+ end
8
+ end
9
+ def get_layer(...); TorchModel.get_layer(model, ...); end
10
+
11
+ def self.get_weights(model, layer = nil)
12
+ Tensor.setup PyCall.getattr(get_layer(model, layer), :weight)
13
+ end
14
+ def get_weights(...); TorchModel.get_weights(model, ...); end
15
+
16
+ def self.freeze(layer)
17
+ begin
18
+ PyCall.getattr(layer, :weight).requires_grad = false
19
+ rescue
20
+ end
21
+ RbbtPython.iterate(layer.children) do |layer|
22
+ freeze(layer)
23
+ end
24
+ end
25
+ def self.freeze_layer(model, layer)
26
+ layer = get_layer(model, layer)
27
+ freeze(layer)
28
+ end
29
+ def freeze_layer(...); TorchModel.freeze_layer(model, ...); end
30
+
31
+ end
@@ -0,0 +1,30 @@
1
+ class TorchModel
2
+ def self.model_architecture(model_path)
3
+ model_path + '.architecture'
4
+ end
5
+
6
+ def self.save_state(model, model_path)
7
+ Log.debug "Saving model state into #{model_path}"
8
+ RbbtPython.torch.save(model.state_dict(), model_path)
9
+ end
10
+
11
+ def self.load_state(model, model_path)
12
+ return model unless Open.exists?(model_path)
13
+ Log.debug "Loading model state from #{model_path}"
14
+ model.load_state_dict(RbbtPython.torch.load(model_path))
15
+ model
16
+ end
17
+
18
+ def self.save_architecture(model, model_path)
19
+ model_architecture = model_architecture(model_path)
20
+ Log.debug "Saving model architecture into #{model_architecture}"
21
+ RbbtPython.torch.save(model, model_architecture)
22
+ end
23
+
24
+ def self.load_architecture(model_path)
25
+ model_architecture = model_architecture(model_path)
26
+ return unless Open.exists?(model_architecture)
27
+ Log.debug "Loading model architecture from #{model_architecture}"
28
+ RbbtPython.torch.load(model_architecture)
29
+ end
30
+ end
@@ -1,37 +1,71 @@
1
- require 'rbbt/vector/model'
2
- require 'rbbt/util/python'
1
+ require_relative 'python'
3
2
 
4
- RbbtPython.add_path Rbbt.python.find(:lib)
5
- RbbtPython.init_rbbt
3
+ class TorchModel < PythonModel
6
4
 
7
- class TorchModel < VectorModel
5
+ attr_accessor :model, :criterion, :optimizer, :training_args
8
6
 
9
- attr_accessor :model
7
+ def initialize(...)
8
+ TorchModel.init_python
9
+ super(...)
10
+ @training_args = model_options[:training_args] || {}
10
11
 
11
- def self.get_layer(model, layer)
12
- layer.split(".").inject(model){|acc,l| PyCall.getattr(acc, l.to_sym) }
13
- end
12
+ init_model do
13
+ model = TorchModel.load_architecture(model_path)
14
+ if model.nil?
15
+ RbbtPython.add_path @directory
16
+ RbbtPython.class_new_obj(@python_module, @python_class, **model_options)
17
+ else
18
+ TorchModel.load_state(model, model_path)
19
+ end
20
+ end
14
21
 
15
- def self.get_weights(model, layer)
16
- PyCall.getattr(get_layer(model, layer), :weight)
17
- end
22
+ eval_model do |features,list=false|
23
+ init
24
+ @device ||= TorchModel.device(model_options)
25
+ @dtype ||= TorchModel.dtype(model_options)
26
+ model.to(@device)
18
27
 
19
- def self.freeze(layer)
20
- begin
21
- PyCall.getattr(layer, :weight).requires_grad = false
22
- rescue
23
- end
24
- RbbtPython.iterate(layer.children) do |layer|
25
- freeze(layer)
28
+ tensor = list ? TorchModel.tensor(features, @device, @dtype) : TorchModel.tensor([features], @device, @dtype)
29
+
30
+ loss, res = model.call(tensor)
31
+
32
+ res = loss if res.nil?
33
+
34
+ res = TorchModel::Tensor.setup(list ? res : res[0])
35
+
36
+ res
26
37
  end
27
- end
28
38
 
29
- def self.freeze_layer(model, layer)
30
- layer = get_layer(model, layer)
31
- freeze(layer)
32
- end
39
+ train_model do |features,labels|
40
+ init
41
+ @device ||= TorchModel.device(model_options)
42
+ @dtype ||= TorchModel.dtype(model_options)
43
+ model.to(@device)
44
+ @optimizer ||= TorchModel.optimizer(model, training_args)
45
+ epochs = training_args[:epochs] || 3
46
+
47
+ inputs = TorchModel.tensor(features, @device, @dtype)
48
+ #target = TorchModel.tensor(labels.collect{|v| [v] }, @device, @dtype)
49
+ target = TorchModel.tensor(labels, @device, @dtype)
33
50
 
34
- def initialize(dir, model_options = {})
35
- super(dir, model_options)
51
+ Log::ProgressBar.with_bar epochs, :desc => "Training" do |bar|
52
+ epochs.times do |i|
53
+ @optimizer.zero_grad()
54
+ outputs = model.call(inputs)
55
+ outputs = outputs.squeeze() if target.dim() == 1
56
+ loss = criterion.call(outputs, target)
57
+ loss.backward()
58
+ @optimizer.step
59
+ Log.debug "Epoch #{i}, loss #{loss}"
60
+ bar.tick
61
+ end
62
+ end
63
+ TorchModel.save_architecture(model, model_path) if @directory
64
+ TorchModel.save_state(model, model_path) if @directory
65
+ end
36
66
  end
37
67
  end
68
+ require_relative 'torch/helpers'
69
+ require_relative 'torch/dataloader'
70
+ require_relative 'torch/introspection'
71
+ require_relative 'torch/load_and_save'
@@ -448,10 +448,10 @@ cat(paste(label, sep="\\n", collapse="\\n"));
448
448
  end
449
449
 
450
450
  test_set = feature_folds[fix]
451
- train_set = feature_folds.values_at(*rest).inject([]){|acc,e| acc += e; acc}
451
+ train_set = feature_folds.values_at(*rest).flatten(1)
452
452
 
453
453
  test_labels = labels_folds[fix]
454
- train_labels = labels_folds.values_at(*rest).flatten
454
+ train_labels = labels_folds.values_at(*rest).flatten(1)
455
455
 
456
456
  @features = train_set
457
457
  @labels = train_labels
@@ -1,6 +1,8 @@
1
- from torch.utils.data import Dataset, DataLoader
1
+ import rbbt
2
+ import torch
3
+ from .util import *
2
4
 
3
- class TSVDataset(Dataset):
5
+ class TSVDataset(torch.utils.data.Dataset):
4
6
  def __init__(self, tsv):
5
7
  self.tsv = tsv
6
8
 
@@ -20,29 +22,10 @@ class TSVDataset(Dataset):
20
22
  return len(self.tsv)
21
23
 
22
24
  def tsv_dataset(filename, *args, **kwargs):
23
- import rbbt
24
25
  return TSVDataset(rbbt.tsv(filename, *args, **kwargs))
25
26
 
26
27
  def tsv(*args, **kwargs):
27
28
  return tsv_dataset(*args, **kwargs)
28
29
 
29
30
  def data_dir():
30
- import rbbt
31
31
  return rbbt.path('var/rbbt_dm/data')
32
-
33
- if __name__ == "__main__":
34
- import rbbt
35
-
36
- filename = "/home/miki/test/numeric.tsv"
37
- ds = tsv(filename)
38
-
39
- dl = DataLoader(ds, batch_size=1)
40
-
41
- for f, l in iter(dl):
42
- print(".")
43
- print(f[0,:])
44
- print(l[0])
45
-
46
-
47
-
48
-
@@ -1,4 +1,6 @@
1
1
  #{{{ LOAD MODEL
2
+ import datasets
3
+ import rbbt
2
4
 
3
5
  def import_module_class(module, class_name):
4
6
  if (not module == None):
@@ -57,12 +59,15 @@ def eval_model(model, tokenizer, texts, return_logits = True):
57
59
  #{{{ TRAIN AND PREDICT
58
60
 
59
61
  def load_tsv(tsv_file):
60
- from datasets import load_dataset
61
- return load_dataset('csv', data_files=[tsv_file], sep="\t")
62
+ tsv = rbbt.tsv(tsv_file)
63
+ print(tsv)
64
+ ds = datasets.Dataset.from_pandas(tsv)
65
+ d = datasets.DatasetDict()
66
+ d["train"] = ds
67
+ return d
62
68
 
63
69
  def load_json(json_file):
64
- from datasets import load_dataset
65
- return load_dataset('json', data_files=[json_file])
70
+ return datasets.load_dataset('json', data_files=[json_file])
66
71
 
67
72
  def tokenize_dataset(tokenizer, dataset):
68
73
  return dataset.map(lambda subset: subset if ("input_ids" in subset.keys()) else tokenizer(subset["text"], truncation=True), batched=True)
@@ -28,3 +28,5 @@ def data_directory():
28
28
  from pathlib import Path
29
29
  print(Path.home())
30
30
 
31
+ def model_device(model):
32
+ return next(model.parameters()).device
@@ -42,7 +42,7 @@ class TestHuggingface < Test::Unit::TestCase
42
42
  assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true)["input_ids"].__len__
43
43
  end
44
44
 
45
- def _test_sst_eval
45
+ def test_sst_eval
46
46
  TmpFile.with_file do |dir|
47
47
  checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
48
48
 
@@ -56,7 +56,7 @@ class TestHuggingface < Test::Unit::TestCase
56
56
  end
57
57
 
58
58
 
59
- def test_sst_train
59
+ def _test_sst_train
60
60
  TmpFile.with_file do |dir|
61
61
  checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
62
62
 
@@ -0,0 +1,31 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestPythonModel < Test::Unit::TestCase
5
+ def test_linear
6
+ model = nil
7
+
8
+ TmpFile.with_dir do |dir|
9
+
10
+ Misc.in_dir dir do
11
+ Open.write 'model.py', <<-EOF
12
+ class TestModel:
13
+ def __init__(self, delta):
14
+ self.delta = delta
15
+
16
+ def eval(self, x):
17
+ return [e + self.delta for e in x]
18
+ EOF
19
+ model = PythonModel.new dir, 'TestModel', :model, delta: 1
20
+
21
+ assert_equal 2, model.eval(1)
22
+ assert_equal [4, 6], model.eval_list([3, 5])
23
+
24
+ model = PythonModel.new dir, 'TestModel', :model, delta: 2
25
+
26
+ assert_equal 3, model.eval(1)
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -2,82 +2,96 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_he
2
2
  require 'rbbt/vector/model/pytorch_lightning'
3
3
 
4
4
  class TestPytorchLightning < Test::Unit::TestCase
5
- def test_clustering
6
- nsamples = 10
7
- ngenes = 10000
8
- samples = nsamples.times.collect{|i| "Sample-#{i}" }
9
- data = TSV.setup({}, :key_field => "Gene", :fields => samples + ["cluster"], :type => :list, :cast => :to_f)
10
-
11
- profiles = []
12
- p0 = 3
13
- p1 = 7
14
- profiles[0] = nsamples.times.collect{ rand() + p0 }
15
- profiles[1] = nsamples.times.collect{ rand() + p1 }
16
-
17
- ngenes.times do |genen|
18
- gene = "Gene-#{genen}"
19
- cluster = genen % 2
20
- values = profiles[cluster].collect do |m|
21
- rand() + m
22
- end
23
- data[gene] = values + [cluster]
24
- end
5
+ def test_regresion
6
+ points = 10
7
+ a = 1
8
+ b = 1
25
9
 
10
+ x = (0..points - 1)
11
+ y = points.times.collect{|p| p }
12
+
26
13
  python = <<~EOF
27
- import torch
28
- from torch import nn
29
- from torch.nn import functional as F
30
- from torch.utils.data import DataLoader
31
- from torch.utils.data import random_split
32
- from torchvision.datasets import MNIST
33
- from torchvision import transforms
34
14
  import pytorch_lightning as pl
15
+ import numpy as np
16
+ import torch
17
+ from torch.nn import MSELoss
18
+ from torch.optim import Adam
19
+ from torch.utils.data import DataLoader, Dataset
20
+ import torch.nn as nn
21
+
22
+
23
+ class SimpleDataset(Dataset):
24
+ def __init__(self):
25
+ X = np.arange(10000)
26
+ y = X * 2
27
+ X = [[_] for _ in X]
28
+ y = [[_] for _ in y]
29
+ self.X = torch.Tensor(X)
30
+ self.y = torch.Tensor(y)
31
+
32
+ def __len__(self):
33
+ return len(self.y)
34
+
35
+ def __getitem__(self, idx):
36
+ return {"X": self.X[idx], "y": self.y[idx]}
37
+
35
38
 
36
39
  class TestPytorchLightningModel(pl.LightningModule):
37
- def __init__(self, input_size=10, internal_dim=1):
38
- super().__init__()
39
- self.model = nn.Tanh()
40
-
41
- def configure_optimizers(self):
42
- optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
43
- return optimizer
44
-
45
- @torch.cuda.amp.autocast(True)
46
- def forward(self, x):
47
- x = x.to(self.dtype)
48
- return self.model(x).squeeze()
49
-
50
- @torch.cuda.amp.autocast(True)
51
- def training_step(self, train_batch, batch_idx):
52
- x, y = train_batch
53
- x = x.to(self.dtype)
54
- y = y.to(self.dtype)
55
- y_hat = self.model(x).squeeze()
56
- loss = F.mse_loss(y, y_hat)
57
- self.log('train_loss', loss)
58
- return loss
59
-
60
- @torch.cuda.amp.custom_fwd(cast_inputs=torch.float64)
61
- def validation_step(self, val_batch, batch_idx):
62
- x, y = train_batch
63
- y_hat = self.model(x)
64
- loss = F.mse_loss(y, y_hat)
65
- self.log('val_loss', loss)
40
+ def __init__(self):
41
+ super().__init__()
42
+ self.fc = nn.Linear(1, 1)
43
+ self.criterion = MSELoss()
44
+
45
+ def forward(self, inputs, labels=None):
46
+ outputs = self.fc(inputs)
47
+ loss = 0
48
+ if labels is not None:
49
+ loss = self.criterion(outputs, labels)
50
+ return loss, outputs
51
+
52
+ def train_dataloader(self):
53
+ dataset = SimpleDataset()
54
+ return DataLoader(dataset, batch_size=1000)
66
55
 
56
+ def training_step(self, batch, batch_idx):
57
+ input_ids = batch["X"]
58
+ labels = batch["y"]
59
+ loss, outputs = self(input_ids, labels)
60
+ return {"loss": loss}
61
+
62
+ def configure_optimizers(self):
63
+ optimizer = Adam(self.parameters(), lr=0.1)
64
+ return optimizer
67
65
  EOF
68
66
 
69
- with_python(python) do |pkg|
70
- model = PytorchLightningModel.new pkg , "TestPytorchLightningModel", nil, model_args: {internal_dim: 1}
71
- TmpFile.with_file(data.to_s) do |data_file|
72
- ds = RbbtPython.call_method "rbbt_dm", :tsv, filename: data_file
73
- model.loader = RbbtPython.class_new_obj("torch.utils.data", :DataLoader, dataset: ds, batch_size: 64)
74
- model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", gpus: 1, max_epochs: 5, precision: 16)
75
- end
67
+ TmpFile.with_dir do |dir|
68
+ Open.write(File.join(dir, 'model.py'), python)
69
+ model = PytorchLightningModel.new dir, "TestPytorchLightningModel"
70
+ model.init
71
+
72
+ model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", max_epochs: 10, precision: 16)
73
+ model.init
74
+
76
75
  model.train
77
- encoding = model.eval_list(data.values.collect{|v| v[0..-2] }).detach().cpu().numpy()
78
- iii encoding[0..10]
76
+
77
+ w = model.get_weights('fc').to_ruby.first.first
78
+
79
+ assert w > 1.8
80
+ assert w < 2.2
81
+
82
+ res = model.eval(10.0)
83
+ assert_equal res, (10 * w)
84
+ assert res > 1.8 * 10.0
85
+ assert res < 2.2 * 10.0
86
+
87
+ res = model.eval([10.0])
88
+ res = model.eval_list([[10.0], [11.2], [14.3]])
89
+ assert_equal 3, RbbtPython.numpy2ruby(res).length
90
+
91
+ model = VectorModel.new dir
92
+ model.init
93
+
79
94
  end
80
95
  end
81
-
82
96
  end
83
97
 
@@ -0,0 +1,61 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestTorch < Test::Unit::TestCase
5
+ def test_linear
6
+ model = nil
7
+
8
+ TmpFile.with_dir do |dir|
9
+
10
+ # Create model
11
+
12
+ model = TorchModel.new dir
13
+ model.model = RbbtPython.torch.nn.Linear.new(1, 1)
14
+ model.criterion = RbbtPython.torch.nn.MSELoss.new()
15
+
16
+ model.extract_features do |f|
17
+ [f]
18
+ end
19
+
20
+ model.post_process do |v,list|
21
+ list ? v.to_ruby.collect{|vv| vv.first } : v.to_ruby.first
22
+ end
23
+
24
+ # Train model
25
+
26
+ model.add 5.0, [10.0]
27
+ model.add 10.0, [20.0]
28
+
29
+ model.training_args[:epochs] = 1000
30
+ model.train
31
+
32
+ w = model.get_weights.to_ruby.first.first
33
+
34
+ assert w > 1.8
35
+ assert w < 2.2
36
+
37
+ # Load the model again
38
+
39
+ model = VectorModel.new dir
40
+
41
+ # Test model
42
+
43
+ y = model.eval(100.0)
44
+
45
+ assert(y > 150.0)
46
+ assert(y < 250.0)
47
+
48
+ test = [1.0, 5.0, 10.0, 20.0]
49
+ input_sum = Misc.sum(test)
50
+ sum = Misc.sum(model.eval_list(test))
51
+ assert sum > 0.8 * input_sum * 2
52
+ assert sum < 1.2 * input_sum * 2
53
+
54
+ w = TorchModel.get_weights(model.model).to_ruby.first.first
55
+
56
+ assert w > 1.8
57
+ assert w < 2.2
58
+ end
59
+ end
60
+ end
61
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.9
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-30 00:00:00.000000000 Z
11
+ date: 2023-12-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -108,12 +108,17 @@ files:
108
108
  - lib/rbbt/vector/model.rb
109
109
  - lib/rbbt/vector/model/huggingface.rb
110
110
  - lib/rbbt/vector/model/huggingface/masked_lm.rb
111
+ - lib/rbbt/vector/model/python.rb
111
112
  - lib/rbbt/vector/model/pytorch_lightning.rb
112
113
  - lib/rbbt/vector/model/random_forest.rb
113
114
  - lib/rbbt/vector/model/spaCy.rb
114
115
  - lib/rbbt/vector/model/svm.rb
115
116
  - lib/rbbt/vector/model/tensorflow.rb
116
117
  - lib/rbbt/vector/model/torch.rb
118
+ - lib/rbbt/vector/model/torch/dataloader.rb
119
+ - lib/rbbt/vector/model/torch/helpers.rb
120
+ - lib/rbbt/vector/model/torch/introspection.rb
121
+ - lib/rbbt/vector/model/torch/load_and_save.rb
117
122
  - lib/rbbt/vector/model/util.rb
118
123
  - python/rbbt_dm/__init__.py
119
124
  - python/rbbt_dm/atcold/__init__.py
@@ -143,10 +148,12 @@ files:
143
148
  - test/rbbt/test_stan.rb
144
149
  - test/rbbt/vector/model/huggingface/test_masked_lm.rb
145
150
  - test/rbbt/vector/model/test_huggingface.rb
151
+ - test/rbbt/vector/model/test_python.rb
146
152
  - test/rbbt/vector/model/test_pytorch_lightning.rb
147
153
  - test/rbbt/vector/model/test_spaCy.rb
148
154
  - test/rbbt/vector/model/test_svm.rb
149
155
  - test/rbbt/vector/model/test_tensorflow.rb
156
+ - test/rbbt/vector/model/test_torch.rb
150
157
  - test/rbbt/vector/test_model.rb
151
158
  - test/test_helper.rb
152
159
  homepage: http://github.com/mikisvaz/rbbt-phgx
@@ -167,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
174
  - !ruby/object:Gem::Version
168
175
  version: '0'
169
176
  requirements: []
170
- rubygems_version: 3.4.19
177
+ rubygems_version: 3.5.0.dev
171
178
  signing_key:
172
179
  specification_version: 4
173
180
  summary: Data-mining and statistics
@@ -182,9 +189,11 @@ test_files:
182
189
  - test/rbbt/test_stan.rb
183
190
  - test/rbbt/vector/model/huggingface/test_masked_lm.rb
184
191
  - test/rbbt/vector/model/test_huggingface.rb
192
+ - test/rbbt/vector/model/test_python.rb
185
193
  - test/rbbt/vector/model/test_pytorch_lightning.rb
186
194
  - test/rbbt/vector/model/test_spaCy.rb
187
195
  - test/rbbt/vector/model/test_svm.rb
188
196
  - test/rbbt/vector/model/test_tensorflow.rb
197
+ - test/rbbt/vector/model/test_torch.rb
189
198
  - test/rbbt/vector/test_model.rb
190
199
  - test/test_helper.rb