rbbt-dm 1.1.57 → 1.1.58
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/lib/rbbt/vector/model/random_forest.rb +11 -1
- data/lib/rbbt/vector/model/spaCy.rb +8 -10
- data/lib/rbbt/vector/model/svm.rb +3 -3
- data/lib/rbbt/vector/model.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
|
4
|
+
data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
|
7
|
+
data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
|
data/LICENSE
CHANGED
@@ -16,11 +16,21 @@ model = randomForest(as.factor(label) ~ ., data = features);
|
|
16
16
|
rbbt.require("randomForest");
|
17
17
|
pred = names(model$forest$xlevels)
|
18
18
|
for (p in pred) {
|
19
|
-
if (
|
19
|
+
if (is.factor(features[[p]])) {
|
20
20
|
features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
|
21
21
|
}
|
22
22
|
}
|
23
23
|
label = predict(model, features);
|
24
24
|
EOF
|
25
25
|
end
|
26
|
+
|
27
|
+
def importance
|
28
|
+
TmpFile.with_file do |tmp|
|
29
|
+
tsv = R.run <<-EOF
|
30
|
+
load(file="#{model_file}");
|
31
|
+
rbbt.tsv.write('#{tmp}', model$importance)
|
32
|
+
EOF
|
33
|
+
TSV.open(tmp)
|
34
|
+
end
|
35
|
+
end
|
26
36
|
end
|
@@ -30,6 +30,7 @@ class SpaCyModel < VectorModel
|
|
30
30
|
@train_model = Proc.new do |file, features, labels|
|
31
31
|
texts = features
|
32
32
|
docs = []
|
33
|
+
unique_labels = labels.uniq
|
33
34
|
tmpconfig = File.join(file, 'config')
|
34
35
|
tmptrain = File.join(file, 'train.spacy')
|
35
36
|
SpaCy.config(@config, tmpconfig)
|
@@ -37,14 +38,11 @@ class SpaCyModel < VectorModel
|
|
37
38
|
nlp = SpaCy.nlp(lang)
|
38
39
|
docs = []
|
39
40
|
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# doc.cats["positive"] = 0
|
46
|
-
# doc.cats["negative"] = 1
|
47
|
-
#end
|
41
|
+
unique_labels.each do |other_label|
|
42
|
+
next if other_label == label
|
43
|
+
doc.cats[other_label] = false
|
44
|
+
end
|
45
|
+
doc.cats[label] = true
|
48
46
|
docs << doc
|
49
47
|
end
|
50
48
|
|
@@ -56,8 +54,9 @@ class SpaCyModel < VectorModel
|
|
56
54
|
CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}", "--gpu-id" => gpu)
|
57
55
|
end
|
58
56
|
|
59
|
-
@eval_model = Proc.new do |file, features|
|
57
|
+
@eval_model = Proc.new do |file, features,list|
|
60
58
|
texts = features
|
59
|
+
texts = [texts] unless list
|
61
60
|
|
62
61
|
docs = []
|
63
62
|
SpaCyModel.spacy do
|
@@ -68,7 +67,6 @@ class SpaCyModel < VectorModel
|
|
68
67
|
cats = nlp.(text).cats
|
69
68
|
bar.tick
|
70
69
|
cats.sort_by{|l,v| v.to_f }.last.first
|
71
|
-
#cats['positive'] > cats['negative'] ? 1 : 0
|
72
70
|
end
|
73
71
|
end
|
74
72
|
end
|
@@ -3,16 +3,16 @@ class SVMModel < VectorModel
|
|
3
3
|
def initialize(dir)
|
4
4
|
super(dir)
|
5
5
|
|
6
|
-
@extract_features
|
6
|
+
@extract_features ||= Proc.new{|element|
|
7
7
|
element
|
8
8
|
}
|
9
9
|
|
10
|
-
@train_model
|
10
|
+
@train_model ||=<<-EOF
|
11
11
|
rbbt.require('e1071');
|
12
12
|
model = svm(as.factor(label) ~ ., data = features);
|
13
13
|
EOF
|
14
14
|
|
15
|
-
@eval_model
|
15
|
+
@eval_model ||=<<-EOF
|
16
16
|
rbbt.require('e1071');
|
17
17
|
label = predict(model, features);
|
18
18
|
EOF
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -53,6 +53,13 @@ features = cbind(features, label = labels);
|
|
53
53
|
"features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
|
54
54
|
end * "\n" if factor_levels }
|
55
55
|
#{code}
|
56
|
+
# Save used factor levels
|
57
|
+
factor_levels = c()
|
58
|
+
for (c in names(features)){
|
59
|
+
if (is.factor(features[[c]]))
|
60
|
+
factor_levels[c] = paste(levels(features[[c]]), collapse="\t")
|
61
|
+
}
|
62
|
+
rbbt.tsv.write("#{model_file}.factor_levels", factor_levels, names=c('Levels'), type='flat')
|
56
63
|
save(model, file='#{model_file}')
|
57
64
|
EOF
|
58
65
|
end
|
@@ -150,6 +157,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
150
157
|
if File.exists?(@levels_file)
|
151
158
|
@factor_levels = YAML.load(Open.read(@levels_file))
|
152
159
|
end
|
160
|
+
if File.exists?(@model_file + '.factor_levels')
|
161
|
+
@factor_levels = TSV.open(@model_file + '.factor_levels')
|
162
|
+
end
|
153
163
|
else
|
154
164
|
@factor_levels = factor_levels
|
155
165
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.58
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|