rbbt-dm 1.1.57 → 1.1.58

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 072d57536b223931947dfd71d5e359961c6bfe44da0870cd2dbab440aa91ba6c
4
- data.tar.gz: 7c5d2a3fc627992231b621b6efe4d2160aa093b11c89f31f728ede5121d2edc9
3
+ metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
4
+ data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
5
5
  SHA512:
6
- metadata.gz: 5216b3179bc4a809829b79bc4c447159e88444dc7943da3d0c3643b728e3eb5a6da7c08a8538fb88db1e463f95e55e104976ac81f010aeaf729a6cb1c4ca1374
7
- data.tar.gz: 12baae07f04ca3770dfef81c3166385badb7944d25b7072364631e5f93b419e04c53d5b7157934b3d1522ef83e84acf0c4e2c5d39ad3604b7ef8f5f460d2f750
6
+ metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
7
+ data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2013 Miguel Vázquez García
1
+ Copyright (c) 2010-2022 Miguel Vázquez García
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -16,11 +16,21 @@ model = randomForest(as.factor(label) ~ ., data = features);
16
16
  rbbt.require("randomForest");
17
17
  pred = names(model$forest$xlevels)
18
18
  for (p in pred) {
19
- if (class(features[[p]]) == "factor") {
19
+ if (is.factor(features[[p]])) {
20
20
  features[[p]] = factor(features[[p]], levels=model$forest$xlevels[[p]])
21
21
  }
22
22
  }
23
23
  label = predict(model, features);
24
24
  EOF
25
25
  end
26
+
27
+ def importance
28
+ TmpFile.with_file do |tmp|
29
+ tsv = R.run <<-EOF
30
+ load(file="#{model_file}");
31
+ rbbt.tsv.write('#{tmp}', model$importance)
32
+ EOF
33
+ TSV.open(tmp)
34
+ end
35
+ end
26
36
  end
@@ -30,6 +30,7 @@ class SpaCyModel < VectorModel
30
30
  @train_model = Proc.new do |file, features, labels|
31
31
  texts = features
32
32
  docs = []
33
+ unique_labels = labels.uniq
33
34
  tmpconfig = File.join(file, 'config')
34
35
  tmptrain = File.join(file, 'train.spacy')
35
36
  SpaCy.config(@config, tmpconfig)
@@ -37,14 +38,11 @@ class SpaCyModel < VectorModel
37
38
  nlp = SpaCy.nlp(lang)
38
39
  docs = []
39
40
  RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
40
- doc.cats[label] = 1
41
- #if %w(1 true pos).include?(label.to_s.downcase)
42
- # doc.cats["positive"] = 1
43
- # doc.cats["negative"] = 0
44
- #else
45
- # doc.cats["positive"] = 0
46
- # doc.cats["negative"] = 1
47
- #end
41
+ unique_labels.each do |other_label|
42
+ next if other_label == label
43
+ doc.cats[other_label] = false
44
+ end
45
+ doc.cats[label] = true
48
46
  docs << doc
49
47
  end
50
48
 
@@ -56,8 +54,9 @@ class SpaCyModel < VectorModel
56
54
  CMD.cmd_log(:spacy, "train #{tmpconfig} --output #{file} --paths.train #{tmptrain} --paths.dev #{tmptrain}", "--gpu-id" => gpu)
57
55
  end
58
56
 
59
- @eval_model = Proc.new do |file, features|
57
+ @eval_model = Proc.new do |file, features,list|
60
58
  texts = features
59
+ texts = [texts] unless list
61
60
 
62
61
  docs = []
63
62
  SpaCyModel.spacy do
@@ -68,7 +67,6 @@ class SpaCyModel < VectorModel
68
67
  cats = nlp.(text).cats
69
68
  bar.tick
70
69
  cats.sort_by{|l,v| v.to_f }.last.first
71
- #cats['positive'] > cats['negative'] ? 1 : 0
72
70
  end
73
71
  end
74
72
  end
@@ -3,16 +3,16 @@ class SVMModel < VectorModel
3
3
  def initialize(dir)
4
4
  super(dir)
5
5
 
6
- @extract_features = Proc.new{|element|
6
+ @extract_features ||= Proc.new{|element|
7
7
  element
8
8
  }
9
9
 
10
- @train_model =<<-EOF
10
+ @train_model ||=<<-EOF
11
11
  rbbt.require('e1071');
12
12
  model = svm(as.factor(label) ~ ., data = features);
13
13
  EOF
14
14
 
15
- @eval_model =<<-EOF
15
+ @eval_model ||=<<-EOF
16
16
  rbbt.require('e1071');
17
17
  label = predict(model, features);
18
18
  EOF
@@ -53,6 +53,13 @@ features = cbind(features, label = labels);
53
53
  "features[['#{name}']] = factor(features[['#{name}']], levels=#{R.ruby2R levels})"
54
54
  end * "\n" if factor_levels }
55
55
  #{code}
56
+ # Save used factor levels
57
+ factor_levels = c()
58
+ for (c in names(features)){
59
+ if (is.factor(features[[c]]))
60
+ factor_levels[c] = paste(levels(features[[c]]), collapse="\t")
61
+ }
62
+ rbbt.tsv.write("#{model_file}.factor_levels", factor_levels, names=c('Levels'), type='flat')
56
63
  save(model, file='#{model_file}')
57
64
  EOF
58
65
  end
@@ -150,6 +157,9 @@ cat(paste(label, sep="\\n", collapse="\\n"));
150
157
  if File.exists?(@levels_file)
151
158
  @factor_levels = YAML.load(Open.read(@levels_file))
152
159
  end
160
+ if File.exists?(@model_file + '.factor_levels')
161
+ @factor_levels = TSV.open(@model_file + '.factor_levels')
162
+ end
153
163
  else
154
164
  @factor_levels = factor_levels
155
165
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.57
4
+ version: 1.1.58
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-11 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util