rbbt-dm 1.1.58 → 1.1.60

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 286385d90b276d30cd5e1b21ae38c5e6a203e2ce3ac10673c434c19a2f45cfb1
4
- data.tar.gz: 7879d74a364886ea8cb507be51c4979cfb598bdb273f948c3c3930a5dce199e6
3
+ metadata.gz: c1f04f874e4f4d9e6b7b4dd19fb7f8e1c5c0926f6621cb246324294dab0c9664
4
+ data.tar.gz: ddc35ddc2c747a98405dd021e4268379f667859d3f62d0bf4785457dc8ab952c
5
5
  SHA512:
6
- metadata.gz: b82c77bd736c8422e49c4dc83b63d6a91da6e76857af4b5cf5aff0a9a58b4147bc50b49b1b1534e8b07ca1bce5f6a5a673c5d688fb8cd7856623370d19fd1bda
7
- data.tar.gz: 1b267a85ab600b878e99f414f725255cf086165a27f8cdec42ed83349b4f36bdb9e29615e0aaada9b30f098df8382e4778cebaf2b8649e17b8985e79d9b8bd23
6
+ metadata.gz: cac8f02f1d4a34658f8cf3fb8f226964b83ae56ebb79fb854f7485b6deb0364a80a7620e1f1b55113e9c18e2b48c43ec080de0eb046a3e1fb762896970a2332e
7
+ data.tar.gz: b72acf9908a04cbd0812772456cfe23f52385b830405231a520642a6e08b96773c01f8bb112758ca775e5a86d8ab923d1c325b56efe6561611d7102af7fcfcdf
@@ -9,7 +9,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
9
9
  rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
- R.run(cmd)
12
+ R.run(cmd, :monitor => true)
13
13
  end
14
14
 
15
15
  def barcode_ruby(outfile, factor = 2)
@@ -55,7 +55,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
55
55
  rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R key_field}, #{R.ruby2R clusters})
56
56
  EOF
57
57
 
58
- R.run(cmd)
58
+ R.run(cmd, :monitor => true)
59
59
  end
60
60
 
61
61
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/R'
2
+ require 'rbbt/matrix'
2
3
 
3
4
  class RbbtMatrix
4
5
  def differential(main, contrast, path = nil)
@@ -31,9 +32,7 @@ class RbbtMatrix
31
32
  trend = false
32
33
  two_channel = false
33
34
  when 'fpkm'
34
- log2 = true
35
- trend = true
36
- two_channel = false
35
+ type = "DESeq"
37
36
  when 'log2 ratio', 'transformed count'
38
37
  log2 = false
39
38
  trend = false
@@ -53,6 +52,7 @@ class RbbtMatrix
53
52
  source('#{Rbbt.share.R["MA.R"].find(:lib)}')
54
53
 
55
54
  data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
55
+ type = #{R.ruby2R type},
56
56
  main = #{R.ruby2R(main_samples)},
57
57
  contrast = #{R.ruby2R(contrast_samples)},
58
58
  log2=#{ R.ruby2R log2 },
data/lib/rbbt/matrix.rb CHANGED
@@ -140,6 +140,21 @@ class RbbtMatrix
140
140
  [main_samples, contrast_samples]
141
141
  end
142
142
 
143
+ def transpose(id = nil)
144
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
145
+
146
+ file = Persist.persist(data_file, :tsv, :prefix => "Transpose", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
147
+
148
+ data = data_file.tsv(:cast => :to_f, :type => :double).transpose(id)
149
+
150
+ data.to_list{|v| v.length > 1 ? Misc.mean(v) : v }
151
+ end
152
+ subsets = self.subsets
153
+ matrix = RbbtMatrix.new file, labels, value_type, key_field, organism
154
+ matrix.subsets = subsets
155
+ matrix
156
+ end
157
+
143
158
  def to_average(identifiers = nil)
144
159
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
145
160
 
@@ -3,8 +3,16 @@ require 'fc'
3
3
  module Paths
4
4
 
5
5
  def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
6
+
6
7
  return nil unless adjacency.include? start_node
7
8
 
9
+ case end_node
10
+ when String
11
+ return nil unless adjacency.values.flatten.include? end_node
12
+ when Array
13
+ return nil unless (adjacency.values.flatten & end_node).any?
14
+ end
15
+
8
16
  active = FastContainers::PriorityQueue.new(:min)
9
17
  distances = Hash.new { 1.0 / 0.0 }
10
18
  parents = Hash.new
@@ -34,10 +34,12 @@ class SpaCyModel < VectorModel
34
34
  tmpconfig = File.join(file, 'config')
35
35
  tmptrain = File.join(file, 'train.spacy')
36
36
  SpaCy.config(@config, tmpconfig)
37
+
38
+ bar = bar(features.length, "Training documents into spacy format")
37
39
  SpaCyModel.spacy do
38
40
  nlp = SpaCy.nlp(lang)
39
41
  docs = []
40
- RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => "Training documents into spacy format" do |doc,label|
42
+ RbbtPython.iterate nlp.pipe(texts.zip(labels), as_tuples: true), :bar => bar do |doc,label|
41
43
  unique_labels.each do |other_label|
42
44
  next if other_label == label
43
45
  doc.cats[other_label] = false
@@ -59,16 +61,27 @@ class SpaCyModel < VectorModel
59
61
  texts = [texts] unless list
60
62
 
61
63
  docs = []
64
+ bar = bar(features.length, "Evaluating model")
62
65
  SpaCyModel.spacy do
66
+ gpu = Rbbt::Config.get('gpu_id', :spacy, :spacy_train, :default => 0)
67
+ gpu = gpu.to_i if gpu && gpu != ""
68
+ spacy.require_gpu(gpu) if gpu
63
69
  nlp = spacy.load("#{file}/model-best")
64
70
 
65
- Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
66
- texts.collect do |text|
67
- cats = nlp.(text).cats
68
- bar.tick
69
- cats.sort_by{|l,v| v.to_f }.last.first
71
+ docs = nlp.pipe(texts)
72
+ RbbtPython.collect docs, :bar => bar do |d|
73
+ Misc.timeout_insist(20) do
74
+ d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
70
75
  end
71
76
  end
77
+ #nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
78
+ #Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
79
+ # texts.collect do |text|
80
+ # cats = nlp.(text).cats
81
+ # bar.tick
82
+ # cats.sort_by{|l,v| v.to_f }.last.first
83
+ # end
84
+ #end
72
85
  end
73
86
  end
74
87
  end
@@ -0,0 +1,12 @@
1
+ class VectorModel
2
+ attr_accessor :bar
3
+
4
+ def bar(max = nil, desc = nil)
5
+ desc, max = max, nil if desc.nil?
6
+ @bar ||= Log::ProgressBar.new max
7
+ @bar.desc = desc
8
+ @bar.max = max
9
+ @bar.init
10
+ @bar
11
+ end
12
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/R'
2
+ require 'rbbt/vector/model/util'
2
3
 
3
4
  class VectorModel
4
5
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
data/share/R/MA.R CHANGED
@@ -1,4 +1,4 @@
1
- rbbt.require('limma')
1
+ rbbt.require('edgeR')
2
2
 
3
3
  #########################################################################
4
4
  # Model processing
@@ -57,6 +57,33 @@ rbbt.dm.matrix.differential.limma.twoside <- function(expr, subset.main, subset.
57
57
  return(list(t= fit$t[,2], p.values= fit$p.value[,2]));
58
58
  }
59
59
 
60
+ rbbt.dm.matrix.differential.DESeq <- function(expr, subset.main, subset.contrast) {
61
+ rbbt.require('DESeq2')
62
+ rbbt.require('HTSFilter')
63
+ rbbt.require('apeglm')
64
+
65
+ #expr[expr == 0] = NA
66
+ good.rows = apply(is.na(expr),1,sum) == 0
67
+ expr = expr[good.rows,]
68
+
69
+ condition_values = rep(c("contrast"), length(subset.contrast))
70
+ condition_values = c(condition_values, rep(c("condition"), length(subset.main)))
71
+ names = c(subset.contrast, subset.main)
72
+ conditions = data.frame(condition = as.factor(condition_values))
73
+
74
+
75
+ expr = expr[,names]
76
+
77
+ dds <- DESeqDataSetFromMatrix(countData = round(expr), colData = conditions, design = ~ condition)
78
+ dds <- DESeq(dds)
79
+
80
+ filter <- HTSFilter(dds, s.len=25, plot=FALSE)$filteredData
81
+
82
+ res <- lfcShrink(filter, type="apeglm", coef="condition_contrast_vs_condition")
83
+
84
+ return(res)
85
+ }
86
+
60
87
 
61
88
  rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
62
89
  if (two.channel){
@@ -66,34 +93,24 @@ rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
66
93
  }
67
94
  }
68
95
 
69
- rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
70
- if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
71
- data = data.matrix(rbbt.tsv(file));
72
- dimnames = dimnames(data)
73
- original.dimnames = dimnames;
74
-
75
- dimnames[[1]] = make.names(dimnames[[1]])
76
- dimnames[[2]] = make.names(dimnames[[2]])
77
-
78
- dimnames(data) <- dimnames
79
- main <- make.names(main);
80
- contrast <- make.names(contrast);
81
-
82
- data[data == 0] = NA
83
- good.rows = apply(is.na(data),1,sum) != dim(data)[2]
84
- data = data[good.rows,]
85
-
86
- ids = rownames(data);
87
- if (is.null(key.field)){ key.field = "ID" }
88
-
96
+ rbbt.dm.matrix.differential.limma <- function(data, main, contrast=NULL, log2=NULL, two.channel=NULL, eBayes.trend=NULL){
89
97
  if (is.null(log2)){
90
98
  log2 = rbbt.dm.matrix.guess.log2(data, two.channel)
91
99
  }
92
100
 
93
101
  if (log2){
94
- data = log2(data);
102
+ cutoff <- 1
103
+ drop <- which(apply(data, 1, max) < cutoff)
95
104
  min = min(data[data != -Inf])
96
105
  data[data == -Inf] = min
106
+ data <- DGEList(data)
107
+ data <- calcNormFactors(data)
108
+ data = cpm(data, log=TRUE, prior.count=3)
109
+ data <- data[-drop,]
110
+ }else{
111
+ data[data == 0] = NA
112
+ good.rows = apply(is.na(data),1,sum) != dim(data)[2]
113
+ data = data[good.rows,]
97
114
  }
98
115
 
99
116
  if (is.null(contrast)){
@@ -135,20 +152,50 @@ rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALS
135
152
 
136
153
 
137
154
  if (! is.null(limma) && sum(is.na(limma$t)) != length(limma$t)){
155
+ ids = rownames(data)
138
156
  result = data.frame(ratio = ratio[ids], t.values = limma$t[ids], p.values = limma$p.values[ids])
139
157
  result["adjusted.p.values"] = p.adjust(abs(result$p.values), "fdr") * sign(result$p.values)
140
158
  }else{
141
159
  result = data.frame(ratio = ratio)
142
160
  }
143
161
 
144
- rownames(result) <- original.dimnames[[1]][good.rows]
162
+ rownames(result) <- rownames(data)
163
+ result = result[!is.na(result$ratio),]
164
+
165
+ return(result)
166
+ }
167
+
168
+ rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, type = 'limma', log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
169
+ data = data.matrix(rbbt.tsv(file));
170
+ dimnames = dimnames(data)
171
+
172
+ original.dimnames = dimnames;
173
+
174
+ #dimnames[[1]] = make.names(dimnames[[1]])
175
+ dimnames[[2]] = make.names(dimnames[[2]])
176
+
177
+ dimnames(data) <- dimnames
178
+ main <- make.names(main);
179
+
180
+ if (! is.null(contrast)){
181
+ contrast <- make.names(contrast);
182
+ }
183
+
184
+ if (type == 'limma')
185
+ result = rbbt.dm.matrix.differential.limma(data, main, contrast, log2, two.channel, eBayes.trend)
186
+ else
187
+ result = rbbt.dm.matrix.differential.DESeq(data, main, contrast)
188
+
189
+ if (is.null(outfile)){
190
+ return(result);
191
+ }else{
192
+ if (is.null(key.field)){ key.field = "ID" }
193
+ if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
194
+
195
+ rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
196
+ return(NULL);
197
+ }
145
198
 
146
- if (is.null(outfile)){
147
- return(result);
148
- }else{
149
- rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
150
- return(NULL);
151
- }
152
199
  }
153
200
 
154
201
 
data/share/R/barcode.R CHANGED
@@ -66,17 +66,21 @@ rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.fi
66
66
  rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID", clusters = c(2,3)){
67
67
 
68
68
  rbbt.require('mclust')
69
+ rbbt.require('R.utils')
69
70
 
70
71
  data = rbbt.tsv.numeric(matrix_file)
71
72
 
72
- classes = apply(data,1,function(row){
73
+ classes = apply(data, 1, function(row){
73
74
  row.na = is.na(row)
74
75
  clust = rep(NA, length(row))
75
- if (sum(row.na) <= length(row) - 5){
76
+ rbbt.log(str(row))
77
+ if (sum(row.na) <= length(row) - 5 && length(unique(row[!row.na])) > 4){
76
78
  clust[!row.na] = densityMclust(row[!row.na], prior=priorControl(), G=clusters)$classification
79
+ rbbt.log(str(clust))
77
80
  }
78
81
  clust
79
82
  })
83
+ rbbt.log("DONE")
80
84
 
81
85
  classes = data.frame(t(classes))
82
86
 
@@ -21,6 +21,26 @@ N4 N5
21
21
 
22
22
  path = Paths.dijkstra(network, start_node, [end_node])
23
23
  assert_equal %w(N1 N2 N4 N5), path.reverse
24
+
25
+ path = Paths.dijkstra(network, start_node, end_node)
26
+ assert_equal %w(N1 N2 N4 N5), path.reverse
27
+ end
28
+
29
+ def test_dijsktra_missing
30
+ network_txt=<<-EOF
31
+ #: :sep=/\s/#:type=:flat
32
+ #Start End
33
+ N1 N2
34
+ N2 N3 N4
35
+ N4 N5
36
+ EOF
37
+ network = TSV.open(StringIO.new(network_txt))
38
+
39
+ start_node = "N1"
40
+ end_node = "M5"
41
+
42
+ path = Paths.dijkstra(network, start_node, [end_node])
43
+ assert_nil path
24
44
  end
25
45
 
26
46
  def test_weighted_dijsktra
@@ -96,14 +96,16 @@ class TestSpaCyModel < Test::Unit::TestCase
96
96
 
97
97
  model = SpaCyModel.new(
98
98
  dir,
99
- "gpu/textcat_accuracy.conf"
99
+ "cpu/textcat_efficiency.conf"
100
100
  )
101
101
 
102
102
 
103
+ Rbbt::Config.set 'gpu_id', nil, :spacy
103
104
  require 'rbbt/tsv/csv'
104
105
  url = "https://raw.githubusercontent.com/hanzhang0420/Women-Clothing-E-commerce/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
105
106
  tsv = TSV.csv(Open.open(url))
106
107
  tsv = tsv.reorder("Review Text", ["Recommended IND"]).to_single
108
+ tsv = tsv.subset(tsv.keys.sample(100))
107
109
 
108
110
  good = tsv.select("Recommended IND" => '1')
109
111
  bad = tsv.select("Recommended IND" => '0')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.58
4
+ version: 1.1.60
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-19 00:00:00.000000000 Z
11
+ date: 2022-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -111,6 +111,7 @@ files:
111
111
  - lib/rbbt/vector/model/spaCy.rb
112
112
  - lib/rbbt/vector/model/svm.rb
113
113
  - lib/rbbt/vector/model/tensorflow.rb
114
+ - lib/rbbt/vector/model/util.rb
114
115
  - share/R/MA.R
115
116
  - share/R/barcode.R
116
117
  - share/R/heatmap.3.R