rbbt-dm 1.1.57 → 1.1.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 072d57536b223931947dfd71d5e359961c6bfe44da0870cd2dbab440aa91ba6c
4
- data.tar.gz: 7c5d2a3fc627992231b621b6efe4d2160aa093b11c89f31f728ede5121d2edc9
3
+ metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
4
+ data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
5
5
  SHA512:
6
- metadata.gz: 5216b3179bc4a809829b79bc4c447159e88444dc7943da3d0c3643b728e3eb5a6da7c08a8538fb88db1e463f95e55e104976ac81f010aeaf729a6cb1c4ca1374
7
- data.tar.gz: 12baae07f04ca3770dfef81c3166385badb7944d25b7072364631e5f93b419e04c53d5b7157934b3d1522ef83e84acf0c4e2c5d39ad3604b7ef8f5f460d2f750
6
+ metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
7
+ data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2013 Miguel Vázquez García
1
+ Copyright (c) 2010-2022 Miguel Vázquez García
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -16,11 +16,21 @@ model = randomForest(as.factor(label) ~ ., data = features);
16
16
  rbbt.require("randomForest");
17
17
  pred = names(model$forest$xlevels)
18
18
  for (p in pred) {
19
- if (class(features[[p]]) == "factor") {
19
+ if (is.factor(features[[p]])) {
20
20
  features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
21
21
  }
22
22
  }
23
23
  label = predict(model, features);
24
24
  EOF
25
25
  end
26
+
27
+ def importance
28
+ TmpFile.with_file do |tmp|
29
+ tsv = R.run <<-EOF
30
+ load(file="#{model_file}");
31
+ rbbt.tsv.write('#{tmp}', model$importance)
32
+ EOF
33
+ TSV.open(tmp)
34
+ end
35
+ end
26
36
  end
@@ -30,6 +30,7 @@ class SpaCyModel < VectorModel
30
30
  @train_model = Proc.new do |file, features, labels|
31
31
  texts = features
32
32
  docs = []
33
+ unique_labels = labels.uniq
33
34
  tmpconfig = File.join(file, 'config')
34
35
  tmptrain = File.join(file, 'train.spacy')
35
36
  SpaCy.config(@config, tmpconfig)
@@ -37,14 +38,11 @@ class SpaCyModel < VectorModel
37
38
  nlp = SpaCy.nlp(lang)
38
39
  docs = []
39
40
  RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
40
- doc.cats[label] = 1
41
- #if %w(1 true pos).include?(label.to_s.downcase)
42
- # doc.cats["positive"] = 1
43
- # doc.cats["negative"] = 0
44
- #else
45
- # doc.cats["positive"] = 0
46
- # doc.cats["negative"] = 1
47
- #end
41
+ unique_labels.each do |other_label|
42
+ next if other_label == label
43
+ doc.cats[other_label] = false
44
+ end
45
+ doc.cats[label] = true
48
46
  docs << doc
49
47
  end
50
48
 
@@ -56,8 +54,9 @@ class SpaCyModel < VectorModel
56
54
  CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}", "--gpu-id" => gpu)
57
55
  end
58
56
 
59
- @eval_model = Proc.new do |file, features|
57
+ @eval_model = Proc.new do |file, features,list|
60
58
  texts = features
59
+ texts = [texts] unless list
61
60
 
62
61
  docs = []
63
62
  SpaCyModel.spacy do
@@ -68,7 +67,6 @@ class SpaCyModel < VectorModel
68
67
  cats = nlp.(text).cats
69
68
  bar.tick
70
69
  cats.sort_by{|l,v| v.to_f }.last.first
71
- #cats['positive'] > cats['negative'] ? 1 : 0
72
70
  end
73
71
  end
74
72
  end
@@ -3,16 +3,16 @@ class SVMModel < VectorModel
3
3
  def initialize(dir)
4
4
  super(dir)
5
5
 
6
- @extract_features = Proc.new{|element|
6
+ @extract_features ||= Proc.new{|element|
7
7
  element
8
8
  }
9
9
 
10
- @train_model =<<-EOF
10
+ @train_model ||=<<-EOF
11
11
  rbbt.require('e1071');
12
12
  model = svm(as.factor(label) ~ ., data = features);
13
13
  EOF
14
14
 
15
- @eval_model =<<-EOF
15
+ @eval_model ||=<<-EOF
16
16
  rbbt.require('e1071');
17
17
  label = predict(model, features);
18
18
  EOF
@@ -53,6 +53,13 @@ features = cbind(features, label = labels);
53
53
  "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
54
54
  end * "\n" if factor_levels }
55
55
  #{code}
56
+ # Save used factor levels
57
+ factor_levels = c()
58
+ for (c in names(features)){
59
+ if (is.factor(features[[c]]))
60
+ factor_levels[c] = paste(levels(features[[c]]), collapse="\t")
61
+ }
62
+ rbbt.tsv.write("#{model_file}.factor_levels", factor_levels, names=c('Levels'), type='flat')
56
63
  save(model, file='#{model_file}')
57
64
  EOF
58
65
  end
@@ -150,6 +157,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
150
157
  if File.exists?(@levels_file)
151
158
  @factor_levels = YAML.load(Open.read(@levels_file))
152
159
  end
160
+ if File.exists?(@model_file + '.factor_levels')
161
+ @factor_levels = TSV.open(@model_file + '.factor_levels')
162
+ end
153
163
  else
154
164
  @factor_levels = factor_levels
155
165
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.57
4
+ version: 1.1.58
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-11 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util