RubyGems - rbbt-dm - Versions diffs - 1.2.7 → 1.2.9 - Mend

rbbt-dm 1.2.7 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/lib/rbbt/matrix/barcode.rb +2 -2
data/lib/rbbt/matrix/differential.rb +3 -3
data/lib/rbbt/matrix/knowledge_base.rb +1 -1
data/lib/rbbt/plots/bar.rb +1 -1
data/lib/rbbt/stan.rb +1 -1
data/lib/rbbt/statistics/hypergeometric.rb +2 -1
data/lib/rbbt/vector/model/huggingface/masked_lm.rb +50 -0
data/lib/rbbt/vector/model/huggingface.rb +57 -38
data/lib/rbbt/vector/model/pytorch_lightning.rb +35 -0
data/lib/rbbt/vector/model/random_forest.rb +1 -1
data/lib/rbbt/vector/model/spaCy.rb +8 -6
data/lib/rbbt/vector/model/tensorflow.rb +6 -5
data/lib/rbbt/vector/model/torch.rb +37 -0
data/lib/rbbt/vector/model.rb +82 -52
data/python/rbbt_dm/__init__.py +48 -1
data/python/rbbt_dm/atcold/__init__.py +0 -0
data/python/rbbt_dm/atcold/plot_lib.py +141 -0
data/python/rbbt_dm/atcold/spiral.py +27 -0
data/python/rbbt_dm/huggingface.py +57 -26
data/python/rbbt_dm/language_model.py +70 -0
data/python/rbbt_dm/util.py +30 -0
data/share/spaCy/gpu/textcat_accuracy.conf +2 -1
data/test/rbbt/vector/model/huggingface/test_masked_lm.rb +41 -0
data/test/rbbt/vector/model/test_huggingface.rb +258 -27
data/test/rbbt/vector/model/test_pytorch_lightning.rb +83 -0
data/test/rbbt/vector/model/test_spaCy.rb +1 -1
data/test/rbbt/vector/model/test_tensorflow.rb +3 -0
data/test/rbbt/vector/test_model.rb +25 -26
data/test/test_helper.rb +13 -0
metadata +26 -16
data/lib/rbbt/tensorflow.rb +0 -43
data/lib/rbbt/vector/model/huggingface.old.rb +0 -160

data/python/rbbt_dm/language_model.py ADDED Viewed

@@ -0,0 +1,70 @@
+def group_texts(examples):
+    # Concatenate all texts.
+    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+    total_length = len(concatenated_examples[list(examples.keys())[0]])
+    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+        # customize this part to your needs.
+    total_length = (total_length // block_size) * block_size
+    # Split by chunks of max_len.
+    result = {
+        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+        for k, t in concatenated_examples.items()
+    }
+    result["labels"] = result["input_ids"].copy()
+    return result
+def whole_word_masking_data_collator(features):
+    from transformers import default_data_collator
+    for feature in features:
+        word_ids = feature.pop("word_ids")
+        # Create a map between words and corresponding token indices
+        mapping = collections.defaultdict(list)
+        current_word_index = -1
+        current_word = None
+        for idx, word_id in enumerate(word_ids):
+            if word_id is not None:
+                if word_id != current_word:
+                    current_word = word_id
+                    current_word_index += 1
+                mapping[current_word_index].append(idx)
+        # Randomly mask words
+        mask = np.random.binomial(1, wwm_probability, (len(mapping),))
+        input_ids = feature["input_ids"]
+        labels = feature["labels"]
+        new_labels = [-100] * len(labels)
+        for word_id in np.where(mask)[0]:
+            word_id = word_id.item()
+            for idx in mapping[word_id]:
+                new_labels[idx] = labels[idx]
+                input_ids[idx] = tokenizer.mask_token_id
+        feature["labels"] = new_labels
+    return default_data_collator(features)
+if __name__ == "__main__2":
+    from transformers import AutoModelForMaskedLM
+    from transformers import AutoTokenizer
+    import torch
+    model_checkpoint = "distilbert-base-uncased"
+    model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
+    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+    text = "This is a great [MASK]."
+    inputs = tokenizer(text, return_tensors="pt")
+    token_logits = model(**inputs).logits
+    # Find the location of [MASK] and extract its logits
+    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+    mask_token_logits = token_logits[0, mask_token_index, :]
+    # Pick the [MASK] candidates with the highest logits
+    top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
+    for token in top_5_tokens:
+        print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")

data/python/rbbt_dm/util.py ADDED Viewed

@@ -0,0 +1,30 @@
+import random
+import torch
+import numpy
+def set_seed(seed):
+    """
+    Set seed in several backends
+    """
+    random.seed(seed)
+    numpy.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+def deterministic():
+    """
+    Ensure that all operations are deterministic on GPU (if used) for
+    reproducibility
+    """
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def device():
+    return torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
+def data_directory():
+    from pathlib import Path
+    print(Path.home())

data/share/spaCy/gpu/textcat_accuracy.conf CHANGED Viewed

@@ -20,7 +20,8 @@ factory = "transformer"
 [components.transformer.model]
 @architectures = "spacy-transformers.TransformerModel.v1"
-name = "emilyalsentzer/Bio_ClinicalBERT"
+name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+#name = "emilyalsentzer/Bio_ClinicalBERT"
 tokenizer_config = {"use_fast": true}
 [components.transformer.model.get_spans]

data/test/rbbt/vector/model/huggingface/test_masked_lm.rb ADDED Viewed

@@ -0,0 +1,41 @@
+require File.join(File.expand_path(File.dirname(__FILE__)),'../../../..', 'test_helper.rb')
+require 'rbbt/vector/model/huggingface/masked_lm'
+class TestMaskedLM < Test::Unit::TestCase
+  def test_train_new_word
+    TmpFile.with_file do |dir|
+      checkpoint = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+      mlm = MaskedLMModel.new checkpoint, dir, tokenizer_args: {max_length: 16, model_max_length: 16}
+      mod, tokenizer = mlm.init
+      if tokenizer.vocab["[GENE]"].nil?
+        tokenizer.add_tokens("[GENE]")
+        mod.resize_token_embeddings(tokenizer.__len__)
+      end
+      100.times do
+        mlm.add "This [GENE] is [MASK] on tumor cells.", %w(expressed)
+        mlm.add "This [MASK] is expressed.", %w([GENE])
+      end
+      assert_equal "protein", mlm.eval(["This [MASK] is expressed."])
+      mlm.train
+      assert_equal "[GENE]", mlm.eval(["This [MASK] is expressed."])
+      assert_equal "expressed", mlm.eval(["This [GENE] is [MASK] in tumor cells."])
+      mlm = MaskedLMModel.new checkpoint, dir, :max_length => 16
+      assert_equal "[GENE]", mlm.eval(["This [MASK] is expressed."])
+      assert_equal "expressed", mlm.eval(["This [GENE] is [MASK] in tumor cells."])
+      mlm = VectorModel.new dir
+      assert_equal "[GENE]", mlm.eval(["This [MASK] is expressed."])
+      assert_equal "expressed", mlm.eval(["This [GENE] is [MASK] in tumor cells."])
+    end
+  end
+end

data/test/rbbt/vector/model/test_huggingface.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'rbbt/vector/model/huggingface'
 class TestHuggingface < Test::Unit::TestCase
-  def test_options
+  def _test_options
     TmpFile.with_file do |dir|
       checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
       task = "SequenceClassification"
@@ -11,20 +11,20 @@ class TestHuggingface < Test::Unit::TestCase
       model = HuggingfaceModel.new task, checkpoint, dir, :class_labels => %w(bad good)
       iii model.eval "This is dog"
       iii model.eval "This is cat"
-      iii model.eval(["This is dog", "This is cat"])
+      iii model.eval_list(["This is dog", "This is cat"])
       model = VectorModel.new dir
-      iii model.eval(["This is dog", "This is cat"])
+      iii model.eval_list(["This is dog", "This is cat"])
     end
   end
-  def test_pipeline
+  def _test_pipeline
     require 'rbbt/util/python'
     model = VectorModel.new
     model.post_process do |elements|
       elements.collect{|e| e['label'] }
     end
-    model.eval_model do |file, elements|
+    model.eval_model do |elements|
       RbbtPython.run :transformers do
         classifier ||= transformers.pipeline("sentiment-analysis")
         classifier.call(elements)
@@ -33,21 +33,53 @@ class TestHuggingface < Test::Unit::TestCase
     assert_equal ["POSITIVE"], model.eval("I've been waiting for a HuggingFace course my whole life.")
   end
+  def _test_tokenizer_size
+    checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+    tokenizer = RbbtPython.call_method("rbbt_dm.huggingface", :load_tokenizer,
+                                       "MaskedLM", checkpoint, :max_length => 5, :model_max_length => 5)
+    assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true, max_length: 5)["input_ids"].__len__
+    assert_equal 5, tokenizer.call("This is a sentence that has several words", truncation: true)["input_ids"].__len__
+  end
-  def test_sst_eval
+  def _test_sst_eval
     TmpFile.with_file do |dir|
       checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
+      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir, :tokenizer_args => {:max_length => 16}
       model.model_options[:class_labels] = ["Bad", "Good"]
-      assert_equal ["Bad", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal "Bad", model.eval("This is dog")
+      assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
     end
   end
   def test_sst_train
+    TmpFile.with_file do |dir|
+      checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir, max_length: 128
+      model.model_options[:class_labels] = %w(Bad Good)
+      assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
+      100.times do
+        model.add "Dog is good", "Good"
+      end
+      model.train
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
+      model = VectorModel.new dir
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
+    end
+  end
+  def _test_sst_train_with_labels
     TmpFile.with_file do |dir|
       checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
@@ -55,28 +87,29 @@ class TestHuggingface < Test::Unit::TestCase
       model.model_options[:class_labels] = %w(Bad Good)
-      assert_equal ["Bad", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
       100.times do
-        model.add "Dog is good", 1
+        model.add "Dog is good", "Good"
       end
       model.train
-      assert_equal ["Good", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
       model = VectorModel.new dir
-      assert_equal ["Good", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
     end
   end
-  def test_sst_train_no_save
+  def _test_sst_train_no_save
     checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
     model = HuggingfaceModel.new "SequenceClassification", checkpoint
     model.model_options[:class_labels] = ["Bad", "Good"]
-    assert_equal ["Bad", "Good"], model.eval(["This is dog", "This is cat"])
+    assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
     100.times do
       model.add "Dog is good", 1
@@ -84,48 +117,50 @@ class TestHuggingface < Test::Unit::TestCase
     model.train
-    assert_equal ["Good", "Good"], model.eval(["This is dog", "This is cat"])
+    assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
   end
-  def test_sst_train_save_and_load
+  def _test_sst_train_save_and_load
     TmpFile.with_file do |dir|
       checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
       model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
       model.model_options[:class_labels] = ["Bad", "Good"]
-      assert_equal ["Bad", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Bad", "Good"], model.eval_list(["This is dog", "This is cat"])
       100.times do
-        model.add "Dog is good", 1
+        model.add "Dog is good", "Good"
       end
       model.train
       model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
-      assert_equal ["Good", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
-      model_file = model.model_file
+      model_path = model.model_path
-      model = HuggingfaceModel.new "SequenceClassification", model_file
+      model = HuggingfaceModel.new "SequenceClassification", model_path
       model.model_options[:class_labels] = ["Bad", "Good"]
-      assert_equal ["Good", "Good"], model.eval(["This is dog", "This is cat"])
+      assert_equal ["Good", "Good"], model.eval_list(["This is dog", "This is cat"])
       model = VectorModel.new dir
-      assert_equal "Good", model.eval("This is dog")
+      assert_equal "Good", model.eval_list("This is dog")
     end
   end
-  def test_sst_stress_test
+  def _test_sst_stress_test
     TmpFile.with_file do |dir|
       checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
       model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
+      assert_equal 0, model.eval("This is dog")
       100.times do
         model.add "Dog is good", 1
         model.add "Cat is bad", 0
@@ -136,18 +171,214 @@ class TestHuggingface < Test::Unit::TestCase
       end
       Misc.benchmark 1000 do
-        model.eval(["This is good", "This is terrible", "This is dog", "This is cat", "Very different stuff", "Dog is bad", "Cat is good"])
+        model.eval_list(["This is good", "This is terrible", "This is dog", "This is cat", "Very different stuff", "Dog is bad", "Cat is good"])
       end
     end
   end
-  def test_mask_eval
+  def _test_mask_eval
     checkpoint = "bert-base-uncased"
     model = HuggingfaceModel.new "MaskedLM", checkpoint
-    assert_equal 3, model.eval(["Paris is the [MASK] of the France.", "The [MASK] worked very hard all the time.", "The [MASK] arrested the dangerous [MASK]."]).
+    assert_equal 3, model.eval_list(["Paris is the [MASK] of the France.", "The [MASK] worked very hard all the time.", "The [MASK] arrested the dangerous [MASK]."]).
       reject{|v| v.empty?}.length
   end
+  def _test_mask_eval_tokenizer
+    checkpoint = "bert-base-uncased"
+    model = HuggingfaceModel.new "MaskedLM", checkpoint
+    mod, tokenizer = model.init
+    orig =  tokenizer.call("Hi [GENE]")["input_ids"]
+    tokenizer.add_tokens(["[GENE]"])
+    mod.resize_token_embeddings(tokenizer.__len__)
+    new =  tokenizer.call("Hi [GENE]")["input_ids"]
+    assert orig.length > new.length
+  end
+  def _test_custom_class
+    TmpFile.with_file do |dir|
+      Open.write File.join(dir, "mypkg/__init__.py"), ""
+      Open.write File.join(dir, "mypkg/mymodel.py"), <<~EOF
+# Esta clase es igual que la de RobertaForTokenClassification
+# Importamos los métodos necesarios
+import torch.nn as nn
+from transformers import RobertaConfig
+from transformers.modeling_outputs import TokenClassifierOutput
+from transformers.models.roberta.modeling_roberta import RobertaModel, RobertaPreTrainedModel
+# Creamos una clase que herede de RobertaPreTrainedModel
+class RobertaForTokenClassification_NER(RobertaPreTrainedModel):
+  config_class = RobertaConfig
+  def __init__(self, config):
+    # Se usa para inicializar el modelo Roberta
+    super().__init__(config)
+    # Numero de etiquetas que se van a clasificar (sería el número de etiquetas del corpus*2)
+    # Una correspondiente a la etiqueta I y otra a la B.
+    self.num_labels = config.num_labels
+    # No incorporamos pooling layer para devolver los hidden states de cada token (no sólo el CLS)
+    self.roberta = RobertaModel(config, add_pooling_layer=False)
+    self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+    self.init_weights()
+  def forward(self, input_ids = None, attention_mask = None, token_type_ids = None, labels = None,
+              **kwargs):
+    # Obtenemos una codificación del input (los hidden states)
+    outputs = self.roberta(input_ids, attention_mask = attention_mask,
+                           token_type_ids = token_type_ids, **kwargs)
+    # A la salida de los hidden states le aplicamos la capa de dropout
+    sequence_output = self.dropout(outputs[0])
+    # Y posteriormente la capa de clasificación.
+    logits = self.classifier(sequence_output)
+    # Si labels tiene algún valor (lo que se hará durante el proceso de entrenamiento), se calculan las Loss
+    # para justar los pesos en el backprop.
+    loss = None
+    if labels is not None:
+      loss_fct = nn.CrossEntropyLoss()
+      loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+    return TokenClassifierOutput(loss=loss, logits=logits,
+                                 hidden_states=outputs.hidden_states,
+                                 attentions=outputs.attentions)
+      EOF
+      RbbtPython.add_path dir
+      biomedical_roberta = "PlanTL-GOB-ES/bsc-bio-ehr-es-cantemist"
+      model = HuggingfaceModel.new "mypkg.mymodel:RobertaForTokenClassification_NER", biomedical_roberta
+      model.post_process do |result,is_list|
+        if is_list
+          RbbtPython.numpy2ruby result.predictions
+        else
+          result["logits"][0]
+        end
+      end
+      texto = "El paciente tiene un cáncer del pulmon"
+      assert model.eval(texto)[5][1] > 0
+    end
+  end
+  def _test_sst_train_word_embeddings
+    TmpFile.with_file do |dir|
+      checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
+      model.model_options[:class_labels] = %w(Bad Good)
+      mod, tokenizer = model.init
+      orig = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
+      orig = RbbtPython.numpy2ruby(orig.cpu.detach.numpy)
+      100.times do
+        model.add "Dog is good", "Good"
+      end
+      model.train
+      new = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
+      new = RbbtPython.numpy2ruby(new.cpu.detach.numpy)
+      diff = []
+      new.each_with_index do |row,i|
+        diff << i if row != orig[i]
+      end
+      assert diff.length > 0
+    end
+  end
+  def _test_sst_freeze_word_embeddings
+    TmpFile.with_file do |dir|
+      checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
+      model.model_options[:class_labels] = %w(Bad Good)
+      mod, tokenizer = model.init
+      layer = HuggingfaceModel.freeze_layer(mod, 'distilbert')
+      orig = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
+      orig = RbbtPython.numpy2ruby(orig.cpu.detach.numpy)
+      100.times do
+        model.add "Dog is good", "Good"
+      end
+      model.train
+      new = HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings')
+      new = RbbtPython.numpy2ruby(new.cpu.detach.numpy)
+      diff = []
+      new.each_with_index do |row,i|
+        diff << i if row != orig[i]
+      end
+      assert diff.length == 0
+    end
+  end
+  def _test_sst_save_word_embeddings
+    TmpFile.with_file do |dir|
+      checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+      model = HuggingfaceModel.new "SequenceClassification", checkpoint, dir
+      model.model_options[:class_labels] = %w(Bad Good)
+      mod, tokenizer = model.init
+      100.times do
+        model.add "Dog is good", "Good"
+      end
+      model.train
+      orig = RbbtPython.numpy2ruby(
+        HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
+      model = HuggingfaceModel.new "MaskedLM", checkpoint, dir
+      mod, tokenizer = model.init
+      new = RbbtPython.numpy2ruby(
+        HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
+      diff = []
+      new.each_with_index do |row,i|
+        diff << i if row != orig[i]
+      end
+      assert diff.length == 0
+      model = HuggingfaceModel.new "MaskedLM", checkpoint
+      mod, tokenizer = model.init
+      new = RbbtPython.numpy2ruby(
+        HuggingfaceModel.get_weights(mod, 'distilbert.embeddings.word_embeddings').cpu.detach.numpy)
+      diff = []
+      new.each_with_index do |row,i|
+        diff << i if row != orig[i]
+      end
+      assert diff.length > 0
+    end
+  end
 end

data/test/rbbt/vector/model/test_pytorch_lightning.rb ADDED Viewed

@@ -0,0 +1,83 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
+require 'rbbt/vector/model/pytorch_lightning'
+class TestPytorchLightning < Test::Unit::TestCase
+  def test_clustering
+    nsamples = 10
+    ngenes = 10000
+    samples = nsamples.times.collect{|i| "Sample-#{i}" }
+    data = TSV.setup({}, :key_field => "Gene", :fields => samples + ["cluster"], :type => :list, :cast => :to_f)
+    profiles = []
+    p0 = 3
+    p1 = 7
+    profiles[0] = nsamples.times.collect{ rand() + p0 }
+    profiles[1] = nsamples.times.collect{ rand() + p1 }
+    ngenes.times do |genen|
+      gene = "Gene-#{genen}"
+      cluster = genen % 2
+      values = profiles[cluster].collect do |m|
+        rand() + m
+      end
+      data[gene] = values + [cluster]
+    end
+    python = <<~EOF
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.data import random_split
+from torchvision.datasets import MNIST
+from torchvision import transforms
+import pytorch_lightning as pl
+class TestPytorchLightningModel(pl.LightningModule):
+  def __init__(self, input_size=10, internal_dim=1):
+    super().__init__()
+    self.model = nn.Tanh()
+  def configure_optimizers(self):
+    optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+    return optimizer
+  @torch.cuda.amp.autocast(True)
+  def forward(self, x):
+    x = x.to(self.dtype)
+    return self.model(x).squeeze()
+  @torch.cuda.amp.autocast(True)
+  def training_step(self, train_batch, batch_idx):
+    x, y = train_batch
+    x = x.to(self.dtype)
+    y = y.to(self.dtype)
+    y_hat = self.model(x).squeeze()
+    loss = F.mse_loss(y, y_hat)
+    self.log('train_loss', loss)
+    return loss
+  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float64)
+  def validation_step(self, val_batch, batch_idx):
+    x, y = train_batch
+    y_hat = self.model(x)
+    loss = F.mse_loss(y, y_hat)
+    self.log('val_loss', loss)
+    EOF
+    with_python(python) do |pkg|
+      model = PytorchLightningModel.new pkg , "TestPytorchLightningModel", nil, model_args: {internal_dim: 1}
+      TmpFile.with_file(data.to_s) do |data_file|
+        ds = RbbtPython.call_method "rbbt_dm", :tsv, filename: data_file
+        model.loader = RbbtPython.class_new_obj("torch.utils.data", :DataLoader, dataset: ds, batch_size: 64)
+        model.trainer = RbbtPython.class_new_obj("pytorch_lightning", "Trainer", gpus: 1, max_epochs: 5, precision: 16)
+      end
+      model.train
+      encoding = model.eval_list(data.values.collect{|v| v[0..-2] }).detach().cpu().numpy()
+      iii encoding[0..10]
+    end
+  end
+end

data/test/rbbt/vector/model/test_spaCy.rb CHANGED Viewed

@@ -100,7 +100,7 @@ class TestSpaCyModel < Test::Unit::TestCase
       )
-      Rbbt::Config.set 'gpu_id', nil, :spacy
+      Rbbt::Config.set 'gpu_id', 0, :spacy
       require 'rbbt/tsv/csv'
       url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
       tsv = TSV.csv(Open.open(url))

data/test/rbbt/vector/model/test_tensorflow.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
 require 'rbbt/vector/model/tensorflow'
+require 'rbbt/util/python'
 class TestTensorflowModel < Test::Unit::TestCase
@@ -10,6 +11,7 @@ class TestTensorflowModel < Test::Unit::TestCase
       model = TensorFlowModel.new(
         dir,
+        jit_compile: true,
         optimizer: 'adam',
         loss: 'sparse_categorical_crossentropy',
         metrics: ['accuracy']
@@ -53,5 +55,6 @@ class TestTensorflowModel < Test::Unit::TestCase
       assert sum.to_f / predictions.length > 0.7
     end
   end
 end