rbbt-dm 1.1.59 → 1.1.60
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix/barcode.rb +2 -2
- data/lib/rbbt/matrix/differential.rb +3 -3
- data/lib/rbbt/matrix.rb +15 -0
- data/lib/rbbt/network/paths.rb +8 -0
- data/lib/rbbt/vector/model/spaCy.rb +6 -1
- data/share/R/MA.R +76 -29
- data/share/R/barcode.R +6 -2
- data/test/rbbt/network/test_paths.rb +20 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1f04f874e4f4d9e6b7b4dd19fb7f8e1c5c0926f6621cb246324294dab0c9664
|
4
|
+
data.tar.gz: ddc35ddc2c747a98405dd021e4268379f667859d3f62d0bf4785457dc8ab952c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cac8f02f1d4a34658f8cf3fb8f226964b83ae56ebb79fb854f7485b6deb0364a80a7620e1f1b55113e9c18e2b48c43ec080de0eb046a3e1fb762896970a2332e
|
7
|
+
data.tar.gz: b72acf9908a04cbd0812772456cfe23f52385b830405231a520642a6e08b96773c01f8bb112758ca775e5a86d8ab923d1c325b56efe6561611d7102af7fcfcdf
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
@@ -9,7 +9,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
|
|
9
9
|
rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
10
10
|
EOF
|
11
11
|
|
12
|
-
R.run(cmd)
|
12
|
+
R.run(cmd, :monitor => true)
|
13
13
|
end
|
14
14
|
|
15
15
|
def barcode_ruby(outfile, factor = 2)
|
@@ -55,7 +55,7 @@ source('#{Rbbt.share.R['barcode.R'].find}')
|
|
55
55
|
rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R key_field}, #{R.ruby2R clusters})
|
56
56
|
EOF
|
57
57
|
|
58
|
-
R.run(cmd)
|
58
|
+
R.run(cmd, :monitor => true)
|
59
59
|
end
|
60
60
|
|
61
61
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt/util/R'
|
2
|
+
require 'rbbt/matrix'
|
2
3
|
|
3
4
|
class RbbtMatrix
|
4
5
|
def differential(main, contrast, path = nil)
|
@@ -31,9 +32,7 @@ class RbbtMatrix
|
|
31
32
|
trend = false
|
32
33
|
two_channel = false
|
33
34
|
when 'fpkm'
|
34
|
-
|
35
|
-
trend = true
|
36
|
-
two_channel = false
|
35
|
+
type = "DESeq"
|
37
36
|
when 'log2 ratio', 'transformed count'
|
38
37
|
log2 = false
|
39
38
|
trend = false
|
@@ -53,6 +52,7 @@ class RbbtMatrix
|
|
53
52
|
source('#{Rbbt.share.R["MA.R"].find(:lib)}')
|
54
53
|
|
55
54
|
data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
|
55
|
+
type = #{R.ruby2R type},
|
56
56
|
main = #{R.ruby2R(main_samples)},
|
57
57
|
contrast = #{R.ruby2R(contrast_samples)},
|
58
58
|
log2=#{ R.ruby2R log2 },
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -140,6 +140,21 @@ class RbbtMatrix
|
|
140
140
|
[main_samples, contrast_samples]
|
141
141
|
end
|
142
142
|
|
143
|
+
def transpose(id = nil)
|
144
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
145
|
+
|
146
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Transpose", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
|
147
|
+
|
148
|
+
data = data_file.tsv(:cast => :to_f, :type => :double).transpose(id)
|
149
|
+
|
150
|
+
data.to_list{|v| v.length > 1 ? Misc.mean(v) : v }
|
151
|
+
end
|
152
|
+
subsets = self.subsets
|
153
|
+
matrix = RbbtMatrix.new file, labels, value_type, key_field, organism
|
154
|
+
matrix.subsets = subsets
|
155
|
+
matrix
|
156
|
+
end
|
157
|
+
|
143
158
|
def to_average(identifiers = nil)
|
144
159
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
145
160
|
|
data/lib/rbbt/network/paths.rb
CHANGED
@@ -3,8 +3,16 @@ require 'fc'
|
|
3
3
|
module Paths
|
4
4
|
|
5
5
|
def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
|
6
|
+
|
6
7
|
return nil unless adjacency.include? start_node
|
7
8
|
|
9
|
+
case end_node
|
10
|
+
when String
|
11
|
+
return nil unless adjacency.values.flatten.include? end_node
|
12
|
+
when Array
|
13
|
+
return nil unless (adjacency.values.flatten & end_node).any?
|
14
|
+
end
|
15
|
+
|
8
16
|
active = FastContainers::PriorityQueue.new(:min)
|
9
17
|
distances = Hash.new { 1.0 / 0.0 }
|
10
18
|
parents = Hash.new
|
@@ -63,11 +63,16 @@ class SpaCyModel < VectorModel
|
|
63
63
|
docs = []
|
64
64
|
bar = bar(features.length, "Evaluating model")
|
65
65
|
SpaCyModel.spacy do
|
66
|
+
gpu = Rbbt::Config.get('gpu_id', :spacy, :spacy_train, :default => 0)
|
67
|
+
gpu = gpu.to_i if gpu && gpu != ""
|
68
|
+
spacy.require_gpu(gpu) if gpu
|
66
69
|
nlp = spacy.load("#{file}/model-best")
|
67
70
|
|
68
71
|
docs = nlp.pipe(texts)
|
69
72
|
RbbtPython.collect docs, :bar => bar do |d|
|
70
|
-
|
73
|
+
Misc.timeout_insist(20) do
|
74
|
+
d.cats.sort_by{|l,v| v.to_f || 0 }.last.first
|
75
|
+
end
|
71
76
|
end
|
72
77
|
#nlp.(docs).cats.collect{|cats| cats.sort_by{|l,v| v.to_f }.last.first }
|
73
78
|
#Log::ProgressBar.with_bar texts.length, :desc => "Evaluating documents" do |bar|
|
data/share/R/MA.R
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
rbbt.require('
|
1
|
+
rbbt.require('edgeR')
|
2
2
|
|
3
3
|
#########################################################################
|
4
4
|
# Model processing
|
@@ -57,6 +57,33 @@ rbbt.dm.matrix.differential.limma.twoside <- function(expr, subset.main, subset.
|
|
57
57
|
return(list(t= fit$t[,2], p.values= fit$p.value[,2]));
|
58
58
|
}
|
59
59
|
|
60
|
+
rbbt.dm.matrix.differential.DESeq <- function(expr, subset.main, subset.contrast) {
|
61
|
+
rbbt.require('DESeq2')
|
62
|
+
rbbt.require('HTSFilter')
|
63
|
+
rbbt.require('apeglm')
|
64
|
+
|
65
|
+
#expr[expr == 0] = NA
|
66
|
+
good.rows = apply(is.na(expr),1,sum) == 0
|
67
|
+
expr = expr[good.rows,]
|
68
|
+
|
69
|
+
condition_values = rep(c("contrast"), length(subset.contrast))
|
70
|
+
condition_values = c(condition_values, rep(c("condition"), length(subset.main)))
|
71
|
+
names = c(subset.contrast, subset.main)
|
72
|
+
conditions = data.frame(condition = as.factor(condition_values))
|
73
|
+
|
74
|
+
|
75
|
+
expr = expr[,names]
|
76
|
+
|
77
|
+
dds <- DESeqDataSetFromMatrix(countData = round(expr), colData = conditions, design = ~ condition)
|
78
|
+
dds <- DESeq(dds)
|
79
|
+
|
80
|
+
filter <- HTSFilter(dds, s.len=25, plot=FALSE)$filteredData
|
81
|
+
|
82
|
+
res <- lfcShrink(filter, type="apeglm", coef="condition_contrast_vs_condition")
|
83
|
+
|
84
|
+
return(res)
|
85
|
+
}
|
86
|
+
|
60
87
|
|
61
88
|
rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
|
62
89
|
if (two.channel){
|
@@ -66,34 +93,24 @@ rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
|
|
66
93
|
}
|
67
94
|
}
|
68
95
|
|
69
|
-
rbbt.dm.matrix.differential <- function(
|
70
|
-
if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
|
71
|
-
data = data.matrix(rbbt.tsv(file));
|
72
|
-
dimnames = dimnames(data)
|
73
|
-
original.dimnames = dimnames;
|
74
|
-
|
75
|
-
dimnames[[1]] = make.names(dimnames[[1]])
|
76
|
-
dimnames[[2]] = make.names(dimnames[[2]])
|
77
|
-
|
78
|
-
dimnames(data) <- dimnames
|
79
|
-
main <- make.names(main);
|
80
|
-
contrast <- make.names(contrast);
|
81
|
-
|
82
|
-
data[data == 0] = NA
|
83
|
-
good.rows = apply(is.na(data),1,sum) != dim(data)[2]
|
84
|
-
data = data[good.rows,]
|
85
|
-
|
86
|
-
ids = rownames(data);
|
87
|
-
if (is.null(key.field)){ key.field = "ID" }
|
88
|
-
|
96
|
+
rbbt.dm.matrix.differential.limma <- function(data, main, contrast=NULL, log2=NULL, two.channel=NULL, eBayes.trend=NULL){
|
89
97
|
if (is.null(log2)){
|
90
98
|
log2 = rbbt.dm.matrix.guess.log2(data, two.channel)
|
91
99
|
}
|
92
100
|
|
93
101
|
if (log2){
|
94
|
-
|
102
|
+
cutoff <- 1
|
103
|
+
drop <- which(apply(data, 1, max) < cutoff)
|
95
104
|
min = min(data[data != -Inf])
|
96
105
|
data[data == -Inf] = min
|
106
|
+
data <- DGEList(data)
|
107
|
+
data <- calcNormFactors(data)
|
108
|
+
data = cpm(data, log=TRUE, prior.count=3)
|
109
|
+
data <- data[-drop,]
|
110
|
+
}else{
|
111
|
+
data[data == 0] = NA
|
112
|
+
good.rows = apply(is.na(data),1,sum) != dim(data)[2]
|
113
|
+
data = data[good.rows,]
|
97
114
|
}
|
98
115
|
|
99
116
|
if (is.null(contrast)){
|
@@ -135,20 +152,50 @@ rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALS
|
|
135
152
|
|
136
153
|
|
137
154
|
if (! is.null(limma) && sum(is.na(limma$t)) != length(limma$t)){
|
155
|
+
ids = rownames(data)
|
138
156
|
result = data.frame(ratio = ratio[ids], t.values = limma$t[ids], p.values = limma$p.values[ids])
|
139
157
|
result["adjusted.p.values"] = p.adjust(abs(result$p.values), "fdr") * sign(result$p.values)
|
140
158
|
}else{
|
141
159
|
result = data.frame(ratio = ratio)
|
142
160
|
}
|
143
161
|
|
144
|
-
rownames(result) <-
|
162
|
+
rownames(result) <- rownames(data)
|
163
|
+
result = result[!is.na(result$ratio),]
|
164
|
+
|
165
|
+
return(result)
|
166
|
+
}
|
167
|
+
|
168
|
+
rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, type = 'limma', log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
|
169
|
+
data = data.matrix(rbbt.tsv(file));
|
170
|
+
dimnames = dimnames(data)
|
171
|
+
|
172
|
+
original.dimnames = dimnames;
|
173
|
+
|
174
|
+
#dimnames[[1]] = make.names(dimnames[[1]])
|
175
|
+
dimnames[[2]] = make.names(dimnames[[2]])
|
176
|
+
|
177
|
+
dimnames(data) <- dimnames
|
178
|
+
main <- make.names(main);
|
179
|
+
|
180
|
+
if (! is.null(contrast)){
|
181
|
+
contrast <- make.names(contrast);
|
182
|
+
}
|
183
|
+
|
184
|
+
if (type == 'limma')
|
185
|
+
result = rbbt.dm.matrix.differential.limma(data, main, contrast, log2, two.channel, eBayes.trend)
|
186
|
+
else
|
187
|
+
result = rbbt.dm.matrix.differential.DESeq(data, main, contrast)
|
188
|
+
|
189
|
+
if (is.null(outfile)){
|
190
|
+
return(result);
|
191
|
+
}else{
|
192
|
+
if (is.null(key.field)){ key.field = "ID" }
|
193
|
+
if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
|
194
|
+
|
195
|
+
rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
|
196
|
+
return(NULL);
|
197
|
+
}
|
145
198
|
|
146
|
-
if (is.null(outfile)){
|
147
|
-
return(result);
|
148
|
-
}else{
|
149
|
-
rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
|
150
|
-
return(NULL);
|
151
|
-
}
|
152
199
|
}
|
153
200
|
|
154
201
|
|
data/share/R/barcode.R
CHANGED
@@ -66,17 +66,21 @@ rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.fi
|
|
66
66
|
rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID", clusters = c(2,3)){
|
67
67
|
|
68
68
|
rbbt.require('mclust')
|
69
|
+
rbbt.require('R.utils')
|
69
70
|
|
70
71
|
data = rbbt.tsv.numeric(matrix_file)
|
71
72
|
|
72
|
-
classes = apply(data,1,function(row){
|
73
|
+
classes = apply(data, 1, function(row){
|
73
74
|
row.na = is.na(row)
|
74
75
|
clust = rep(NA, length(row))
|
75
|
-
|
76
|
+
rbbt.log(str(row))
|
77
|
+
if (sum(row.na) <= length(row) - 5 && length(unique(row[!row.na])) > 4){
|
76
78
|
clust[!row.na] = densityMclust(row[!row.na], prior=priorControl(), G=clusters)$classification
|
79
|
+
rbbt.log(str(clust))
|
77
80
|
}
|
78
81
|
clust
|
79
82
|
})
|
83
|
+
rbbt.log("DONE")
|
80
84
|
|
81
85
|
classes = data.frame(t(classes))
|
82
86
|
|
@@ -21,6 +21,26 @@ N4 N5
|
|
21
21
|
|
22
22
|
path = Paths.dijkstra(network, start_node, [end_node])
|
23
23
|
assert_equal %w(N1 N2 N4 N5), path.reverse
|
24
|
+
|
25
|
+
path = Paths.dijkstra(network, start_node, end_node)
|
26
|
+
assert_equal %w(N1 N2 N4 N5), path.reverse
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_dijsktra_missing
|
30
|
+
network_txt=<<-EOF
|
31
|
+
#: :sep=/\s/#:type=:flat
|
32
|
+
#Start End
|
33
|
+
N1 N2
|
34
|
+
N2 N3 N4
|
35
|
+
N4 N5
|
36
|
+
EOF
|
37
|
+
network = TSV.open(StringIO.new(network_txt))
|
38
|
+
|
39
|
+
start_node = "N1"
|
40
|
+
end_node = "M5"
|
41
|
+
|
42
|
+
path = Paths.dijkstra(network, start_node, [end_node])
|
43
|
+
assert_nil path
|
24
44
|
end
|
25
45
|
|
26
46
|
def test_weighted_dijsktra
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.60
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|