rbbt-dm 1.1.58 → 1.1.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
4
- data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
3
+ metadata.gz: 1e001607266948a5221118c15d1fc95ed4266b0f8880b2fa628350d429ed3f7d
4
+ data.tar.gz: 1d56618e3039e1d99c8183aace2ae20e8cd3dafce0d574b5dbd49ce4f5a1ee14
5
5
  SHA512:
6
- metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
7
- data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
6
+ metadata.gz: d8d324c664257cb142ae7363de776ea7b6e367cd14c22026018c00de335bc3e35be428d00dad6d84a61c3f0874057612d1379e6839b1cea6fc312ea5d8e9a699
7
+ data.tar.gz: b2e52024a63f3105ac88ca1b471df0b69fe91237a1e3fa70185fc519e0740421c58755eb3560003c9f4e4f60b6479bf449fca7596684e3badba46e4ec242feee
@@ -34,10 +34,12 @@ class SpaCyModel < VectorModel
34
34
  tmpconfig = File.join(file, 'config')
35
35
  tmptrain = File.join(file, 'train.spacy')
36
36
  SpaCy.config(@config, tmpconfig)
37
+
38
+ bar = bar(features.length, "Training documents into spacy format")
37
39
  SpaCyModel.spacy do
38
40
  nlp = SpaCy.nlp(lang)
39
41
  docs = []
40
- RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
42
+ RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => bar do |doc,label|
41
43
  unique_labels.each do |other_label|
42
44
  next if other_label == label
43
45
  doc.cats[other_label] = false
@@ -59,16 +61,22 @@ class SpaCyModel < VectorModel
59
61
  texts = [texts] unless list
60
62
 
61
63
  docs = []
64
+ bar = bar(features.length, "Evaluating model")
62
65
  SpaCyModel.spacy do
63
66
  nlp = spacy.load("#{file}/model-best")
64
67
 
65
- Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
66
- texts.collect do |text|
67
- cats = nlp.(text).cats
68
- bar.tick
69
- cats.sort_by{|l,v| v.to_f }.last.first
70
- end
68
+ docs = nlp.pipe(texts)
69
+ RbbtPython.collect docs, :bar => bar do |d|
70
+ d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
71
71
  end
72
+ #nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
73
+ #Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
74
+ # texts.collect do |text|
75
+ # cats = nlp.(text).cats
76
+ # bar.tick
77
+ # cats.sort_by{|l,v| v.to_f }.last.first
78
+ # end
79
+ #end
72
80
  end
73
81
  end
74
82
  end
@@ -0,0 +1,12 @@
1
+ class VectorModel
2
+ attr_accessor :bar
3
+
4
+ def bar(max = nil, desc = nil)
5
+ desc, max = max, nil if desc.nil?
6
+ @bar ||= Log::ProgressBar.new max
7
+ @bar.desc = desc
8
+ @bar.max = max
9
+ @bar.init
10
+ @bar
11
+ end
12
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/R'
2
+ require 'rbbt/vector/model/util'
2
3
 
3
4
  class VectorModel
4
5
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
@@ -96,14 +96,16 @@ class TestSpaCyModel < Test::Unit::TestCase
96
96
 
97
97
  model = SpaCyModel.new(
98
98
  dir,
99
- "gpu/textcat_accuracy.conf"
99
+ "cpu/textcat_efficiency.conf"
100
100
  )
101
101
 
102
102
 
103
+ Rbbt::Config.set 'gpu_id', nil, :spacy
103
104
  require 'rbbt/tsv/csv'
104
105
  url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
105
106
  tsv = TSV.csv(Open.open(url))
106
107
  tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
108
+ tsv = tsv.subset(tsv.keys.sample(100))
107
109
 
108
110
  good = tsv.select("Recommended IND" => '1')
109
111
  bad = tsv.select("Recommended IND" => '0')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.58
4
+ version: 1.1.59
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-19 00:00:00.000000000 Z
11
+ date: 2022-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -111,6 +111,7 @@ files:
111
111
  - lib/rbbt/vector/model/spaCy.rb
112
112
  - lib/rbbt/vector/model/svm.rb
113
113
  - lib/rbbt/vector/model/tensorflow.rb
114
+ - lib/rbbt/vector/model/util.rb
114
115
  - share/R/MA.R
115
116
  - share/R/barcode.R
116
117
  - share/R/heatmap.3.R