RubyGems - rbbt-dm - Versions diffs - 1.1.57 → 1.1.58 - Mend

rbbt-dm 1.1.57 → 1.1.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/LICENSE +1 -1
data/lib/rbbt/vector/model/random_forest.rb +11 -1
data/lib/rbbt/vector/model/spaCy.rb +8 -10
data/lib/rbbt/vector/model/svm.rb +3 -3
data/lib/rbbt/vector/model.rb +10 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 072d57536b223931947dfd71d5e359961c6bfe44da0870cd2dbab440aa91ba6c
-  data.tar.gz: 7c5d2a3fc627992231b621b6efe4d2160aa093b11c89f31f728ede5121d2edc9
+  metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
+  data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
 SHA512:
-  metadata.gz: 5216b3179bc4a809829b79bc4c447159e88444dc7943da3d0c3643b728e3eb5a6da7c08a8538fb88db1e463f95e55e104976ac81f010aeaf729a6cb1c4ca1374
-  data.tar.gz: 12baae07f04ca3770dfef81c3166385badb7944d25b7072364631e5f93b419e04c53d5b7157934b3d1522ef83e84acf0c4e2c5d39ad3604b7ef8f5f460d2f750
+  metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
+  data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23

data/LICENSE CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2010-2013 Miguel Vázquez García
+Copyright (c) 2010-2022 Miguel Vázquez García
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the

data/lib/rbbt/vector/model/random_forest.rb CHANGED Viewed

@@ -16,11 +16,21 @@ model = randomForest(as.factor(label) ~ ., data = features);
 rbbt.require("randomForest");
 pred = names(model$forest$xlevels)
 for (p in pred) {
-  if (class(features[[p]]) == "factor") {
+  if (is.factor(features[[p]])) {
       features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
     }
 }
 label = predict(model, features);
     EOF
   end
+  def importance
+    TmpFile.with_file do |tmp|
+      tsv = R.run <<-EOF
+load(file="#{model_file}");
+rbbt.tsv.write('#{tmp}', model$importance)
+      EOF
+      TSV.open(tmp)
+    end
+  end
 end

data/lib/rbbt/vector/model/spaCy.rb CHANGED Viewed

@@ -30,6 +30,7 @@ class SpaCyModel < VectorModel
     @train_model = Proc.new do |file, features, labels|
       texts = features
       docs = []
+      unique_labels = labels.uniq
       tmpconfig = File.join(file, 'config')
       tmptrain = File.join(file, 'train.spacy')
       SpaCy.config(@config, tmpconfig)
@@ -37,14 +38,11 @@ class SpaCyModel < VectorModel
         nlp = SpaCy.nlp(lang)
         docs = []
         RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
-          doc.cats[label] = 1
-          #if %w(1 true pos).include?(label.to_s.downcase)
-          #  doc.cats["positive"] = 1
-          #  doc.cats["negative"] = 0
-          #else
-          #  doc.cats["positive"] = 0
-          #  doc.cats["negative"] = 1
-          #end
+          unique_labels.each do |other_label|
+            next if other_label == label
+            doc.cats[other_label] = false
+          end
+          doc.cats[label] = true
           docs << doc
         end
@@ -56,8 +54,9 @@ class SpaCyModel < VectorModel
       CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}",  "--gpu-id" => gpu)
     end
-    @eval_model = Proc.new do |file, features|
+    @eval_model = Proc.new do |file, features,list|
       texts = features
+      texts = [texts] unless list
       docs = []
       SpaCyModel.spacy do
@@ -68,7 +67,6 @@ class SpaCyModel < VectorModel
             cats = nlp.(text).cats
             bar.tick
             cats.sort_by{|l,v| v.to_f }.last.first
-            #cats['positive'] > cats['negative']  ? 1 : 0
           end
         end
       end

data/lib/rbbt/vector/model/svm.rb CHANGED Viewed

@@ -3,16 +3,16 @@ class SVMModel < VectorModel
   def initialize(dir)
     super(dir)
-    @extract_features = Proc.new{|element|
+    @extract_features ||= Proc.new{|element|
       element
     }
-    @train_model =<<-EOF
+    @train_model ||=<<-EOF
 rbbt.require('e1071');
 model = svm(as.factor(label) ~ ., data = features);
     EOF
-    @eval_model =<<-EOF
+    @eval_model ||=<<-EOF
 rbbt.require('e1071');
 label = predict(model, features);
     EOF

data/lib/rbbt/vector/model.rb CHANGED Viewed

@@ -53,6 +53,13 @@ features = cbind(features, label = labels);
     "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
   end * "\n" if factor_levels }
 #{code}
+# Save used factor levels
+factor_levels = c()
+for (c in names(features)){
+  if (is.factor(features[[c]]))
+    factor_levels[c] = paste(levels(features[[c]]), collapse="\t")
+}
+rbbt.tsv.write("#{model_file}.factor_levels", factor_levels, names=c('Levels'), type='flat')
 save(model, file='#{model_file}')
       EOF
     end
@@ -150,6 +157,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
       if File.exists?(@levels_file)
         @factor_levels = YAML.load(Open.read(@levels_file))
       end
+      if File.exists?(@model_file + '.factor_levels')
+        @factor_levels = TSV.open(@model_file + '.factor_levels')
+      end
     else
       @factor_levels = factor_levels
     end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-dm
 version: !ruby/object:Gem::Version
-  version: 1.1.57
+  version: 1.1.58
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-11 00:00:00.000000000 Z
+date: 2022-07-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rbbt-util