rbbt-dm 1.1.58 → 1.1.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/vector/model/spaCy.rb +15 -7
- data/lib/rbbt/vector/model/util.rb +12 -0
- data/lib/rbbt/vector/model.rb +1 -0
- data/test/rbbt/vector/model/test_spaCy.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e001607266948a5221118c15d1fc95ed4266b0f8880b2fa628350d429ed3f7d
|
4
|
+
data.tar.gz: 1d56618e3039e1d99c8183aace2ae20e8cd3dafce0d574b5dbd49ce4f5a1ee14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8d324c664257cb142ae7363de776ea7b6e367cd14c22026018c00de335bc3e35be428d00dad6d84a61c3f0874057612d1379e6839b1cea6fc312ea5d8e9a699
|
7
|
+
data.tar.gz: b2e52024a63f3105ac88ca1b471df0b69fe91237a1e3fa70185fc519e0740421c58755eb3560003c9f4e4f60b6479bf449fca7596684e3badba46e4ec242feee
|
@@ -34,10 +34,12 @@ class SpaCyModel < VectorModel
|
|
34
34
|
tmpconfig = File.join(file, 'config')
|
35
35
|
tmptrain = File.join(file, 'train.spacy')
|
36
36
|
SpaCy.config(@config, tmpconfig)
|
37
|
+
|
38
|
+
bar = bar(features.length, "Training documents into spacy format")
|
37
39
|
SpaCyModel.spacy do
|
38
40
|
nlp = SpaCy.nlp(lang)
|
39
41
|
docs = []
|
40
|
-
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar =>
|
42
|
+
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => bar do |doc,label|
|
41
43
|
unique_labels.each do |other_label|
|
42
44
|
next if other_label == label
|
43
45
|
doc.cats[other_label] = false
|
@@ -59,16 +61,22 @@ class SpaCyModel < VectorModel
|
|
59
61
|
texts = [texts] unless list
|
60
62
|
|
61
63
|
docs = []
|
64
|
+
bar = bar(features.length, "Evaluating model")
|
62
65
|
SpaCyModel.spacy do
|
63
66
|
nlp = spacy.load("#{file}/model-best")
|
64
67
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
bar.tick
|
69
|
-
cats.sort_by{|l,v| v.to_f }.last.first
|
70
|
-
end
|
68
|
+
docs = nlp.pipe(texts)
|
69
|
+
RbbtPython.collect docs, :bar => bar do |d|
|
70
|
+
d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
|
71
71
|
end
|
72
|
+
#nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
|
73
|
+
#Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
|
74
|
+
# texts.collect do |text|
|
75
|
+
# cats = nlp.(text).cats
|
76
|
+
# bar.tick
|
77
|
+
# cats.sort_by{|l,v| v.to_f }.last.first
|
78
|
+
# end
|
79
|
+
#end
|
72
80
|
end
|
73
81
|
end
|
74
82
|
end
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -96,14 +96,16 @@ class TestSpaCyModel < Test::Unit::TestCase
|
|
96
96
|
|
97
97
|
model = SpaCyModel.new(
|
98
98
|
dir,
|
99
|
-
"
|
99
|
+
"cpu/textcat_efficiency.conf"
|
100
100
|
)
|
101
101
|
|
102
102
|
|
103
|
+
Rbbt::Config.set 'gpu_id', nil, :spacy
|
103
104
|
require 'rbbt/tsv/csv'
|
104
105
|
url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
|
105
106
|
tsv = TSV.csv(Open.open(url))
|
106
107
|
tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
|
108
|
+
tsv = tsv.subset(tsv.keys.sample(100))
|
107
109
|
|
108
110
|
good = tsv.select("Recommended IND" => '1')
|
109
111
|
bad = tsv.select("Recommended IND" => '0')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.59
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- lib/rbbt/vector/model/spaCy.rb
|
112
112
|
- lib/rbbt/vector/model/svm.rb
|
113
113
|
- lib/rbbt/vector/model/tensorflow.rb
|
114
|
+
- lib/rbbt/vector/model/util.rb
|
114
115
|
- share/R/MA.R
|
115
116
|
- share/R/barcode.R
|
116
117
|
- share/R/heatmap.3.R
|