rbbt-dm 1.1.58 → 1.1.59

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
4
- data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
3
+ metadata.gz: 1e001607266948a5221118c15d1fc95ed4266b0f8880b2fa628350d429ed3f7d
4
+ data.tar.gz: 1d56618e3039e1d99c8183aace2ae20e8cd3dafce0d574b5dbd49ce4f5a1ee14
5
5
  SHA512:
6
- metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
7
- data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
6
+ metadata.gz: d8d324c664257cb142ae7363de776ea7b6e367cd14c22026018c00de335bc3e35be428d00dad6d84a61c3f0874057612d1379e6839b1cea6fc312ea5d8e9a699
7
+ data.tar.gz: b2e52024a63f3105ac88ca1b471df0b69fe91237a1e3fa70185fc519e0740421c58755eb3560003c9f4e4f60b6479bf449fca7596684e3badba46e4ec242feee
@@ -34,10 +34,12 @@ class SpaCyModel < VectorModel
34
34
  tmpconfig = File.join(file, 'config')
35
35
  tmptrain = File.join(file, 'train.spacy')
36
36
  SpaCy.config(@config, tmpconfig)
37
+
38
+ bar = bar(features.length, "Training documents into spacy format")
37
39
  SpaCyModel.spacy do
38
40
  nlp = SpaCy.nlp(lang)
39
41
  docs = []
40
- RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
42
+ RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => bar do |doc,label|
41
43
  unique_labels.each do |other_label|
42
44
  next if other_label == label
43
45
  doc.cats[other_label] = false
@@ -59,16 +61,22 @@ class SpaCyModel < VectorModel
59
61
  texts = [texts] unless list
60
62
 
61
63
  docs = []
64
+ bar = bar(features.length, "Evaluating model")
62
65
  SpaCyModel.spacy do
63
66
  nlp = spacy.load("#{file}/model-best")
64
67
 
65
- Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
66
- texts.collect do |text|
67
- cats = nlp.(text).cats
68
- bar.tick
69
- cats.sort_by{|l,v| v.to_f }.last.first
70
- end
68
+ docs = nlp.pipe(texts)
69
+ RbbtPython.collect docs, :bar => bar do |d|
70
+ d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
71
71
  end
72
+ #nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
73
+ #Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
74
+ # texts.collect do |text|
75
+ # cats = nlp.(text).cats
76
+ # bar.tick
77
+ # cats.sort_by{|l,v| v.to_f }.last.first
78
+ # end
79
+ #end
72
80
  end
73
81
  end
74
82
  end
@@ -0,0 +1,12 @@
1
+ class VectorModel
2
+ attr_accessor :bar
3
+
4
+ def bar(max = nil, desc = nil)
5
+ desc, max = max, nil if desc.nil?
6
+ @bar ||= Log::ProgressBar.new max
7
+ @bar.desc = desc
8
+ @bar.max = max
9
+ @bar.init
10
+ @bar
11
+ end
12
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/R'
2
+ require 'rbbt/vector/model/util'
2
3
 
3
4
  class VectorModel
4
5
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
@@ -96,14 +96,16 @@ class TestSpaCyModel < Test::Unit::TestCase
96
96
 
97
97
  model = SpaCyModel.new(
98
98
  dir,
99
- "gpu/textcat_accuracy.conf"
99
+ "cpu/textcat_efficiency.conf"
100
100
  )
101
101
 
102
102
 
103
+ Rbbt::Config.set 'gpu_id', nil, :spacy
103
104
  require 'rbbt/tsv/csv'
104
105
  url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
105
106
  tsv = TSV.csv(Open.open(url))
106
107
  tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
108
+ tsv = tsv.subset(tsv.keys.sample(100))
107
109
 
108
110
  good = tsv.select("Recommended IND" => '1')
109
111
  bad = tsv.select("Recommended IND" => '0')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.58
4
+ version: 1.1.59
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-19 00:00:00.000000000 Z
11
+ date: 2022-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -111,6 +111,7 @@ files:
111
111
  - lib/rbbt/vector/model/spaCy.rb
112
112
  - lib/rbbt/vector/model/svm.rb
113
113
  - lib/rbbt/vector/model/tensorflow.rb
114
+ - lib/rbbt/vector/model/util.rb
114
115
  - share/R/MA.R
115
116
  - share/R/barcode.R
116
117
  - share/R/heatmap.3.R