rbbt-dm 1.1.59 → 1.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e001607266948a5221118c15d1fc95ed4266b0f8880b2fa628350d429ed3f7d
4
- data.tar.gz: 1d56618e3039e1d99c8183aace2ae20e8cd3dafce0d574b5dbd49ce4f5a1ee14
3
+ metadata.gz: c1f04f874e4f4d9e6b7b4dd19fb7f8e1c5c0926f6621cb246324294dab0c9664
4
+ data.tar.gz: ddc35ddc2c747a98405dd021e4268379f667859d3f62d0bf4785457dc8ab952c
5
5
  SHA512:
6
- metadata.gz: d8d324c664257cb142ae7363de776ea7b6e367cd14c22026018c00de335bc3e35be428d00dad6d84a61c3f0874057612d1379e6839b1cea6fc312ea5d8e9a699
7
- data.tar.gz: b2e52024a63f3105ac88ca1b471df0b69fe91237a1e3fa70185fc519e0740421c58755eb3560003c9f4e4f60b6479bf449fca7596684e3badba46e4ec242feee
6
+ metadata.gz: cac8f02f1d4a34658f8cf3fb8f226964b83ae56ebb79fb854f7485b6deb0364a80a7620e1f1b55113e9c18e2b48c43ec080de0eb046a3e1fb762896970a2332e
7
+ data.tar.gz: b72acf9908a04cbd0812772456cfe23f52385b830405231a520642a6e08b96773c01f8bb112758ca775e5a86d8ab923d1c325b56efe6561611d7102af7fcfcdf
@@ -9,7 +9,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
9
9
  rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
- R.run(cmd)
12
+ R.run(cmd, :monitor => true)
13
13
  end
14
14
 
15
15
  def barcode_ruby(outfile, factor = 2)
@@ -55,7 +55,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
55
55
  rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R key_field}, #{R.ruby2R clusters})
56
56
  EOF
57
57
 
58
- R.run(cmd)
58
+ R.run(cmd, :monitor => true)
59
59
  end
60
60
 
61
61
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/R'
2
+ require 'rbbt/matrix'
2
3
 
3
4
  class RbbtMatrix
4
5
  def differential(main, contrast, path = nil)
@@ -31,9 +32,7 @@ class RbbtMatrix
31
32
  trend = false
32
33
  two_channel = false
33
34
  when 'fpkm'
34
- log2 = true
35
- trend = true
36
- two_channel = false
35
+ type = "DESeq"
37
36
  when 'log2 ratio', 'transformed count'
38
37
  log2 = false
39
38
  trend = false
@@ -53,6 +52,7 @@ class RbbtMatrix
53
52
  source('#{Rbbt.share.R["MA.R"].find(:lib)}')
54
53
 
55
54
  data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
55
+ type = #{R.ruby2R type},
56
56
  main = #{R.ruby2R(main_samples)},
57
57
  contrast = #{R.ruby2R(contrast_samples)},
58
58
  log2=#{ R.ruby2R log2 },
data/lib/rbbt/matrix.rb CHANGED
@@ -140,6 +140,21 @@ class RbbtMatrix
140
140
  [main_samples, contrast_samples]
141
141
  end
142
142
 
143
+ def transpose(id = nil)
144
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
145
+
146
+ file = Persist.persist(data_file, :tsv, :prefix => "Transpose", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
147
+
148
+ data = data_file.tsv(:cast => :to_f, :type => :double).transpose(id)
149
+
150
+ data.to_list{|v| v.length > 1 ? Misc.mean(v) : v }
151
+ end
152
+ subsets = self.subsets
153
+ matrix = RbbtMatrix.new file, labels, value_type, key_field, organism
154
+ matrix.subsets = subsets
155
+ matrix
156
+ end
157
+
143
158
  def to_average(identifiers = nil)
144
159
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
145
160
 
@@ -3,8 +3,16 @@ require 'fc'
3
3
  module Paths
4
4
 
5
5
  def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
6
+
6
7
  return nil unless adjacency.include? start_node
7
8
 
9
+ case end_node
10
+ when String
11
+ return nil unless adjacency.values.flatten.include? end_node
12
+ when Array
13
+ return nil unless (adjacency.values.flatten & end_node).any?
14
+ end
15
+
8
16
  active = FastContainers::PriorityQueue.new(:min)
9
17
  distances = Hash.new { 1.0 / 0.0 }
10
18
  parents = Hash.new
@@ -63,11 +63,16 @@ class SpaCyModel < VectorModel
63
63
  docs = []
64
64
  bar = bar(features.length, "Evaluating model")
65
65
  SpaCyModel.spacy do
66
+ gpu = Rbbt::Config.get('gpu_id', :spacy, :spacy_train, :default => 0)
67
+ gpu = gpu.to_i if gpu && gpu != ""
68
+ spacy.require_gpu(gpu) if gpu
66
69
  nlp = spacy.load("#{file}/model-best")
67
70
 
68
71
  docs = nlp.pipe(texts)
69
72
  RbbtPython.collect docs, :bar => bar do |d|
70
- d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
73
+ Misc.timeout_insist(20) do
74
+ d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
75
+ end
71
76
  end
72
77
  #nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
73
78
  #Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
data/share/R/MA.R CHANGED
@@ -1,4 +1,4 @@
1
- rbbt.require('limma')
1
+ rbbt.require('edgeR')
2
2
 
3
3
  #########################################################################
4
4
  # Model processing
@@ -57,6 +57,33 @@ rbbt.dm.matrix.differential.limma.twoside <- function(expr, subset.main, subset.
57
57
  return(list(t= fit$t[,2], p.values= fit$p.value[,2]));
58
58
  }
59
59
 
60
+ rbbt.dm.matrix.differential.DESeq <- function(expr, subset.main, subset.contrast) {
61
+ rbbt.require('DESeq2')
62
+ rbbt.require('HTSFilter')
63
+ rbbt.require('apeglm')
64
+
65
+ #expr[expr == 0] = NA
66
+ good.rows = apply(is.na(expr),1,sum) == 0
67
+ expr = expr[good.rows,]
68
+
69
+ condition_values = rep(c("contrast"), length(subset.contrast))
70
+ condition_values = c(condition_values, rep(c("condition"), length(subset.main)))
71
+ names = c(subset.contrast, subset.main)
72
+ conditions = data.frame(condition = as.factor(condition_values))
73
+
74
+
75
+ expr = expr[,names]
76
+
77
+ dds <- DESeqDataSetFromMatrix(countData = round(expr), colData = conditions, design = ~ condition)
78
+ dds <- DESeq(dds)
79
+
80
+ filter <- HTSFilter(dds, s.len=25, plot=FALSE)$filteredData
81
+
82
+ res <- lfcShrink(filter, type="apeglm", coef="condition_contrast_vs_condition")
83
+
84
+ return(res)
85
+ }
86
+
60
87
 
61
88
  rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
62
89
  if (two.channel){
@@ -66,34 +93,24 @@ rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
66
93
  }
67
94
  }
68
95
 
69
- rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
70
- if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
71
- data = data.matrix(rbbt.tsv(file));
72
- dimnames = dimnames(data)
73
- original.dimnames = dimnames;
74
-
75
- dimnames[[1]] = make.names(dimnames[[1]])
76
- dimnames[[2]] = make.names(dimnames[[2]])
77
-
78
- dimnames(data) <- dimnames
79
- main <- make.names(main);
80
- contrast <- make.names(contrast);
81
-
82
- data[data == 0] = NA
83
- good.rows = apply(is.na(data),1,sum) != dim(data)[2]
84
- data = data[good.rows,]
85
-
86
- ids = rownames(data);
87
- if (is.null(key.field)){ key.field = "ID" }
88
-
96
+ rbbt.dm.matrix.differential.limma <- function(data, main, contrast=NULL, log2=NULL, two.channel=NULL, eBayes.trend=NULL){
89
97
  if (is.null(log2)){
90
98
  log2 = rbbt.dm.matrix.guess.log2(data, two.channel)
91
99
  }
92
100
 
93
101
  if (log2){
94
- data = log2(data);
102
+ cutoff <- 1
103
+ drop <- which(apply(data, 1, max) < cutoff)
95
104
  min = min(data[data != -Inf])
96
105
  data[data == -Inf] = min
106
+ data <- DGEList(data)
107
+ data <- calcNormFactors(data)
108
+ data = cpm(data, log=TRUE, prior.count=3)
109
+ data <- data[-drop,]
110
+ }else{
111
+ data[data == 0] = NA
112
+ good.rows = apply(is.na(data),1,sum) != dim(data)[2]
113
+ data = data[good.rows,]
97
114
  }
98
115
 
99
116
  if (is.null(contrast)){
@@ -135,20 +152,50 @@ rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALS
135
152
 
136
153
 
137
154
  if (! is.null(limma) && sum(is.na(limma$t)) != length(limma$t)){
155
+ ids = rownames(data)
138
156
  result = data.frame(ratio = ratio[ids], t.values = limma$t[ids], p.values = limma$p.values[ids])
139
157
  result["adjusted.p.values"] = p.adjust(abs(result$p.values), "fdr") * sign(result$p.values)
140
158
  }else{
141
159
  result = data.frame(ratio = ratio)
142
160
  }
143
161
 
144
- rownames(result) <- original.dimnames[[1]][good.rows]
162
+ rownames(result) <- rownames(data)
163
+ result = result[!is.na(result$ratio),]
164
+
165
+ return(result)
166
+ }
167
+
168
+ rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, type = 'limma', log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
169
+ data = data.matrix(rbbt.tsv(file));
170
+ dimnames = dimnames(data)
171
+
172
+ original.dimnames = dimnames;
173
+
174
+ #dimnames[[1]] = make.names(dimnames[[1]])
175
+ dimnames[[2]] = make.names(dimnames[[2]])
176
+
177
+ dimnames(data) <- dimnames
178
+ main <- make.names(main);
179
+
180
+ if (! is.null(contrast)){
181
+ contrast <- make.names(contrast);
182
+ }
183
+
184
+ if (type == 'limma')
185
+ result = rbbt.dm.matrix.differential.limma(data, main, contrast, log2, two.channel, eBayes.trend)
186
+ else
187
+ result = rbbt.dm.matrix.differential.DESeq(data, main, contrast)
188
+
189
+ if (is.null(outfile)){
190
+ return(result);
191
+ }else{
192
+ if (is.null(key.field)){ key.field = "ID" }
193
+ if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
194
+
195
+ rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
196
+ return(NULL);
197
+ }
145
198
 
146
- if (is.null(outfile)){
147
- return(result);
148
- }else{
149
- rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
150
- return(NULL);
151
- }
152
199
  }
153
200
 
154
201
 
data/share/R/barcode.R CHANGED
@@ -66,17 +66,21 @@ rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.fi
66
66
  rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID", clusters = c(2,3)){
67
67
 
68
68
  rbbt.require('mclust')
69
+ rbbt.require('R.utils')
69
70
 
70
71
  data = rbbt.tsv.numeric(matrix_file)
71
72
 
72
- classes = apply(data,1,function(row){
73
+ classes = apply(data, 1, function(row){
73
74
  row.na = is.na(row)
74
75
  clust = rep(NA, length(row))
75
- if (sum(row.na) <= length(row) - 5){
76
+ rbbt.log(str(row))
77
+ if (sum(row.na) <= length(row) - 5 && length(unique(row[!row.na])) > 4){
76
78
  clust[!row.na] = densityMclust(row[!row.na], prior=priorControl(), G=clusters)$classification
79
+ rbbt.log(str(clust))
77
80
  }
78
81
  clust
79
82
  })
83
+ rbbt.log("DONE")
80
84
 
81
85
  classes = data.frame(t(classes))
82
86
 
@@ -21,6 +21,26 @@ N4 N5
21
21
 
22
22
  path = Paths.dijkstra(network, start_node, [end_node])
23
23
  assert_equal %w(N1 N2 N4 N5), path.reverse
24
+
25
+ path = Paths.dijkstra(network, start_node, end_node)
26
+ assert_equal %w(N1 N2 N4 N5), path.reverse
27
+ end
28
+
29
+ def test_dijsktra_missing
30
+ network_txt=<<-EOF
31
+ #: :sep=/\s/#:type=:flat
32
+ #Start End
33
+ N1 N2
34
+ N2 N3 N4
35
+ N4 N5
36
+ EOF
37
+ network = TSV.open(StringIO.new(network_txt))
38
+
39
+ start_node = "N1"
40
+ end_node = "M5"
41
+
42
+ path = Paths.dijkstra(network, start_node, [end_node])
43
+ assert_nil path
24
44
  end
25
45
 
26
46
  def test_weighted_dijsktra
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.59
4
+ version: 1.1.60
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-29 00:00:00.000000000 Z
11
+ date: 2022-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util