rbbt-dm 1.1.57 → 1.1.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/lib/rbbt/vector/model/random_forest.rb +11 -1
- data/lib/rbbt/vector/model/spaCy.rb +8 -10
- data/lib/rbbt/vector/model/svm.rb +3 -3
- data/lib/rbbt/vector/model.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
|
4
|
+
data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
|
7
|
+
data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
|
data/LICENSE
CHANGED
@@ -16,11 +16,21 @@ model = randomForest(as.factor(label) ~ ., data = features);
|
|
16
16
|
rbbt.require("randomForest");
|
17
17
|
pred = names(model$forest$xlevels)
|
18
18
|
for (p in pred) {
|
19
|
-
if (
|
19
|
+
if (is.factor(features[[p]])) {
|
20
20
|
features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
|
21
21
|
}
|
22
22
|
}
|
23
23
|
label = predict(model, features);
|
24
24
|
EOF
|
25
25
|
end
|
26
|
+
|
27
|
+
def importance
|
28
|
+
TmpFile.with_file do |tmp|
|
29
|
+
tsv = R.run <<-EOF
|
30
|
+
load(file="#{model_file}");
|
31
|
+
rbbt.tsv.write('#{tmp}', model$importance)
|
32
|
+
EOF
|
33
|
+
TSV.open(tmp)
|
34
|
+
end
|
35
|
+
end
|
26
36
|
end
|
@@ -30,6 +30,7 @@ class SpaCyModel < VectorModel
|
|
30
30
|
@train_model = Proc.new do |file, features, labels|
|
31
31
|
texts = features
|
32
32
|
docs = []
|
33
|
+
unique_labels = labels.uniq
|
33
34
|
tmpconfig = File.join(file, 'config')
|
34
35
|
tmptrain = File.join(file, 'train.spacy')
|
35
36
|
SpaCy.config(@config, tmpconfig)
|
@@ -37,14 +38,11 @@ class SpaCyModel < VectorModel
|
|
37
38
|
nlp = SpaCy.nlp(lang)
|
38
39
|
docs = []
|
39
40
|
RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# doc.cats["positive"] = 0
|
46
|
-
# doc.cats["negative"] = 1
|
47
|
-
#end
|
41
|
+
unique_labels.each do |other_label|
|
42
|
+
next if other_label == label
|
43
|
+
doc.cats[other_label] = false
|
44
|
+
end
|
45
|
+
doc.cats[label] = true
|
48
46
|
docs << doc
|
49
47
|
end
|
50
48
|
|
@@ -56,8 +54,9 @@ class SpaCyModel < VectorModel
|
|
56
54
|
CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}", "--gpu-id" => gpu)
|
57
55
|
end
|
58
56
|
|
59
|
-
@eval_model = Proc.new do |file, features|
|
57
|
+
@eval_model = Proc.new do |file, features,list|
|
60
58
|
texts = features
|
59
|
+
texts = [texts] unless list
|
61
60
|
|
62
61
|
docs = []
|
63
62
|
SpaCyModel.spacy do
|
@@ -68,7 +67,6 @@ class SpaCyModel < VectorModel
|
|
68
67
|
cats = nlp.(text).cats
|
69
68
|
bar.tick
|
70
69
|
cats.sort_by{|l,v| v.to_f }.last.first
|
71
|
-
#cats['positive'] > cats['negative'] ? 1 : 0
|
72
70
|
end
|
73
71
|
end
|
74
72
|
end
|
@@ -3,16 +3,16 @@ class SVMModel < VectorModel
|
|
3
3
|
def initialize(dir)
|
4
4
|
super(dir)
|
5
5
|
|
6
|
-
@extract_features
|
6
|
+
@extract_features ||= Proc.new{|element|
|
7
7
|
element
|
8
8
|
}
|
9
9
|
|
10
|
-
@train_model
|
10
|
+
@train_model ||=<<-EOF
|
11
11
|
rbbt.require('e1071');
|
12
12
|
model = svm(as.factor(label) ~ ., data = features);
|
13
13
|
EOF
|
14
14
|
|
15
|
-
@eval_model
|
15
|
+
@eval_model ||=<<-EOF
|
16
16
|
rbbt.require('e1071');
|
17
17
|
label = predict(model, features);
|
18
18
|
EOF
|
data/lib/rbbt/vector/model.rb
CHANGED
@@ -53,6 +53,13 @@ features = cbind(features, label = labels);
|
|
53
53
|
"features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
|
54
54
|
end * "\n" if factor_levels }
|
55
55
|
#{code}
|
56
|
+
# Save used factor levels
|
57
|
+
factor_levels = c()
|
58
|
+
for (c in names(features)){
|
59
|
+
if (is.factor(features[[c]]))
|
60
|
+
factor_levels[c] = paste(levels(features[[c]]), collapse="\t")
|
61
|
+
}
|
62
|
+
rbbt.tsv.write("#{model_file}.factor_levels", factor_levels, names=c('Levels'), type='flat')
|
56
63
|
save(model, file='#{model_file}')
|
57
64
|
EOF
|
58
65
|
end
|
@@ -150,6 +157,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
|
|
150
157
|
if File.exists?(@levels_file)
|
151
158
|
@factor_levels = YAML.load(Open.read(@levels_file))
|
152
159
|
end
|
160
|
+
if File.exists?(@model_file + '.factor_levels')
|
161
|
+
@factor_levels = TSV.open(@model_file + '.factor_levels')
|
162
|
+
end
|
153
163
|
else
|
154
164
|
@factor_levels = factor_levels
|
155
165
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.58
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|