rbbt-dm 1.1.58 → 1.1.59
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/vector/model/spaCy.rb +15 -7
- data/lib/rbbt/vector/model/util.rb +12 -0
- data/lib/rbbt/vector/model.rb +1 -0
- data/test/rbbt/vector/model/test_spaCy.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e001607266948a5221118c15d1fc95ed4266b0f8880b2fa628350d429ed3f7d
|
4
|
+
data.tar.gz: 1d56618e3039e1d99c8183aace2ae20e8cd3dafce0d574b5dbd49ce4f5a1ee14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8d324c664257cb142ae7363de776ea7b6e367cd14c22026018c00de335bc3e35be428d00dad6d84a61c3f0874057612d1379e6839b1cea6fc312ea5d8e9a699
|
7
|
+
data.tar.gz: b2e52024a63f3105ac88ca1b471df0b69fe91237a1e3fa70185fc519e0740421c58755eb3560003c9f4e4f60b6479bf449fca7596684e3badba46e4ec242feee
|
@@ -34,10 +34,12 @@ class SpaCyModel < VectorModel
|
|
34
34
|
tmpconfig = File.join(file, 'config')
|
35
35
|
tmptrain = File.join(file, 'train.spacy')
|
36
36
|
SpaCy.config(@config, tmpconfig)
|
37
|
+
|
38
|
+
bar = bar(features.length, "Training documents into spacy format")
|
37
39
|
SpaCyModel.spacy do
|
38
40
|
nlp = SpaCy.nlp(lang)
|
39
41
|
docs = []
|
40
|
-
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar =>
|
42
|
+
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => bar do |doc,label|
|
41
43
|
unique_labels.each do |other_label|
|
42
44
|
next if other_label == label
|
43
45
|
doc.cats[other_label] = false
|
@@ -59,16 +61,22 @@ class SpaCyModel < VectorModel
|
|
59
61
|
texts = [texts] unless list
|
60
62
|
|
61
63
|
docs = []
|
64
|
+
bar = bar(features.length, "Evaluating model")
|
62
65
|
SpaCyModel.spacy do
|
63
66
|
nlp = spacy.load("#{file}/model-best")
|
64
67
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
bar.tick
|
69
|
-
cats.sort_by{|l,v| v.to_f }.last.first
|
70
|
-
end
|
68
|
+
docs = nlp.pipe(texts)
|
69
|
+
RbbtPython.collect docs, :bar => bar do |d|
|
70
|
+
d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
|
71
71
|
end
|
72
|
+
#nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
|
73
|
+
#Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
|
74
|
+
# texts.collect do |text|
|
75
|
+
# cats = nlp.(text).cats
|
76
|
+
# bar.tick
|
77
|
+
# cats.sort_by{|l,v| v.to_f }.last.first
|
78
|
+
# end
|
79
|
+
#end
|
72
80
|
end
|
73
81
|
end
|
74
82
|
end
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -96,14 +96,16 @@ class TestSpaCyModel < Test::Unit::TestCase
|
|
96
96
|
|
97
97
|
model = SpaCyModel.new(
|
98
98
|
dir,
|
99
|
-
"
|
99
|
+
"cpu/textcat_efficiency.conf"
|
100
100
|
)
|
101
101
|
|
102
102
|
|
103
|
+
Rbbt::Config.set 'gpu_id', nil, :spacy
|
103
104
|
require 'rbbt/tsv/csv'
|
104
105
|
url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
|
105
106
|
tsv = TSV.csv(Open.open(url))
|
106
107
|
tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
|
108
|
+
tsv = tsv.subset(tsv.keys.sample(100))
|
107
109
|
|
108
110
|
good = tsv.select("Recommended IND" => '1')
|
109
111
|
bad = tsv.select("Recommended IND" => '0')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.59
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- lib/rbbt/vector/model/spaCy.rb
|
112
112
|
- lib/rbbt/vector/model/svm.rb
|
113
113
|
- lib/rbbt/vector/model/tensorflow.rb
|
114
|
+
- lib/rbbt/vector/model/util.rb
|
114
115
|
- share/R/MA.R
|
115
116
|
- share/R/barcode.R
|
116
117
|
- share/R/heatmap.3.R
|