miga-base 1.3.13.9 → 1.3.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/browse.rb +1 -1
- data/lib/miga/project/result.rb +6 -1
- data/lib/miga/remote_dataset/base.rb +9 -0
- data/lib/miga/remote_dataset/download.rb +6 -3
- data/lib/miga/remote_dataset.rb +10 -0
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +10 -9
- data/scripts/ani_distances.bash +10 -9
- data/utils/find-medoid.R +12 -7
- data/utils/subclade/pipeline.rb +3 -2
- data/utils/subclades.R +18 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2459aa0f06af71628701bc0a71c1830bd21fc24d5c3e0999c8f59b6bce8b6cf6
|
4
|
+
data.tar.gz: 1590b03b5dfbe42241dd943a61388ff2e22a4956c6e9169d321a9bb857b9713f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf4da3f0519c77fd290e92650a6a53b871d678516eda891ef15272e0fa5a8b4aa518a4531211b0ee9d78ca2e42798f6421b41a3980c3f6e64a159f78bf105e9a
|
7
|
+
data.tar.gz: e771646155d87a7f8e7896b5cfa121f94a60dbce78cd5be0231faf42e7f4aa2b75de4c3fdf043dadb33d07a67bca180d12a12871d1066bff73a454eecdf565a5
|
@@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
|
|
179
179
|
str
|
180
180
|
.to_s.unmiga_name
|
181
181
|
.sub(/^./, &:upcase)
|
182
|
-
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
|
182
|
+
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
|
183
183
|
.sub(/Haai/, 'hAAI')
|
184
184
|
.sub(/Mytaxa/, 'MyTaxa')
|
185
185
|
.sub(/ pvalue$/, ' p-value')
|
data/lib/miga/project/result.rb
CHANGED
@@ -55,10 +55,13 @@ module MiGA::Project::Result
|
|
55
55
|
##
|
56
56
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
57
57
|
def add_result_distances(base, _opts)
|
58
|
-
return nil unless result_files_exist?(base,
|
58
|
+
return nil unless result_files_exist?(base, ['.txt']) &&
|
59
|
+
(result_files_exist?(base, ['.rds']) ||
|
60
|
+
result_files_exist?(base, ['.rda']))
|
59
61
|
|
60
62
|
r = MiGA::Result.new("#{base}.json")
|
61
63
|
r.add_file(:rds, 'miga-project.rds')
|
64
|
+
r.add_file(:rda, 'miga-project.rda')
|
62
65
|
r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
|
63
66
|
r.add_file(:matrix, 'miga-project.txt')
|
64
67
|
r.add_file(:log, 'miga-project.log') # Legacy file
|
@@ -84,6 +87,7 @@ module MiGA::Project::Result
|
|
84
87
|
|
85
88
|
r = add_result_iter_clades(base)
|
86
89
|
r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
|
90
|
+
r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
|
87
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
88
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
89
93
|
r.add_file(:clades_aai90, 'miga-project.aai90-clades')
|
@@ -108,6 +112,7 @@ module MiGA::Project::Result
|
|
108
112
|
r = add_result_iter_clades(base)
|
109
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
110
114
|
r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
|
115
|
+
r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
|
111
116
|
r
|
112
117
|
end
|
113
118
|
|
@@ -134,6 +134,15 @@ module MiGA::RemoteDataset::Base
|
|
134
134
|
end,
|
135
135
|
method: :get
|
136
136
|
},
|
137
|
+
ncbi_fetch: {
|
138
|
+
dbs: { nuccore: { stage: :metadata, format: :gb } },
|
139
|
+
uri: lambda do |opts|
|
140
|
+
@@_EUTILS_BUILD[:efetch,
|
141
|
+
db: opts[:db], id: opts[:ids], rettype: opts[:format], retmode: :text
|
142
|
+
]
|
143
|
+
end,
|
144
|
+
method: :get
|
145
|
+
},
|
137
146
|
ncbi_search: {
|
138
147
|
dbs: {
|
139
148
|
assembly: { stage: :metadata, format: :json },
|
@@ -107,9 +107,12 @@ class MiGA::RemoteDataset
|
|
107
107
|
|
108
108
|
MiGA::MiGA.DEBUG 'Empty sequence, attempting download as WGS records'
|
109
109
|
a, b = opts[:obj].metadata[:ncbi_wgs].split('-', 2)
|
110
|
-
|
111
|
-
|
112
|
-
|
110
|
+
ids = [a]
|
111
|
+
unless b.nil?
|
112
|
+
pref = longest_common_prefix([a, b])
|
113
|
+
rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
|
114
|
+
ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
|
115
|
+
end
|
113
116
|
download_rest(opts.merge(universe: :ncbi, db: :nuccore, ids: ids))
|
114
117
|
end
|
115
118
|
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -300,6 +300,16 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
300
300
|
metadata[:web_assembly_gz] ||=
|
301
301
|
'%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
|
302
302
|
end
|
303
|
+
|
304
|
+
# If all conditions are right, try getting the WGS range
|
305
|
+
if @_ncbi_asm_json_doc['wgs'] && !@_ncbi_asm_json_doc['wgs'].empty? &&
|
306
|
+
metadata[:ncbi_nuccore] && !metadata[:ncbi_wgs]
|
307
|
+
doc = self.class.download(:ncbi_fetch, :nuccore, metadata[:ncbi_nuccore], :gb).split(/\n/)
|
308
|
+
ln = doc.grep(/^WGS\s+\S+-\S+/).first
|
309
|
+
wgs = ln&.gsub(/^WGS\s+(\S+-\S+).*/, '\1')
|
310
|
+
metadata[:ncbi_wgs] = wgs if wgs
|
311
|
+
end
|
312
|
+
|
303
313
|
@_ncbi_asm_json_doc
|
304
314
|
end
|
305
315
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 14, 1].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024,
|
23
|
+
VERSION_DATE = Date.new(2024, 4, 1)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/aai_distances.bash
CHANGED
@@ -40,15 +40,16 @@ rm "miga-project.txt.lno"
|
|
40
40
|
# R-ify
|
41
41
|
cat <<R | R --vanilla
|
42
42
|
file <- gzfile("miga-project.txt.gz")
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
)
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
44
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
45
|
+
a <- sapply(list, function(x) x[1])
|
46
|
+
b <- sapply(list, function(x) x[2])
|
47
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
48
|
+
save(a, b, d, file = "miga-project.rda")
|
49
|
+
|
50
|
+
non_self <- a != b
|
51
|
+
if(sum(non_self) > 0) {
|
52
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
52
53
|
len <- length(h[["breaks"]])
|
53
54
|
write.table(
|
54
55
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/scripts/ani_distances.bash
CHANGED
@@ -34,15 +34,16 @@ rm "miga-project.txt.lno"
|
|
34
34
|
# R-ify
|
35
35
|
cat <<R | R --vanilla
|
36
36
|
file <- gzfile("miga-project.txt.gz")
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
|
37
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
38
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
39
|
+
a <- sapply(list, function(x) x[1])
|
40
|
+
b <- sapply(list, function(x) x[2])
|
41
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
42
|
+
save(a, b, d, file = "miga-project.rda")
|
43
|
+
|
44
|
+
non_self <- a != b
|
45
|
+
if(sum(non_self) > 0) {
|
46
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
46
47
|
len <- length(h[["breaks"]])
|
47
48
|
write.table(
|
48
49
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/utils/find-medoid.R
CHANGED
@@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){
|
|
16
16
|
))
|
17
17
|
}
|
18
18
|
|
19
|
-
find_medoids <- function (
|
20
|
-
if(
|
21
|
-
|
22
|
-
dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
|
19
|
+
find_medoids <- function (a, b, d, out, clades) {
|
20
|
+
if (length(d) == 0) return(NULL)
|
21
|
+
dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
|
23
22
|
dist <- as.matrix(dist)
|
24
23
|
cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
|
25
24
|
cl.s <- c()
|
26
25
|
medoids <- c()
|
27
|
-
for(i in cl){
|
26
|
+
for (i in cl) {
|
28
27
|
lab <- strsplit(i, ",")[[1]]
|
29
28
|
if(length(lab) == 1) {
|
30
29
|
lab.s <- lab
|
@@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) {
|
|
44
43
|
|
45
44
|
#= Main
|
46
45
|
cat("Finding Medoids\n")
|
47
|
-
|
48
|
-
|
46
|
+
if (grepl("\\.rds$", argv[1])) {
|
47
|
+
ani <- readRDS(argv[1])
|
48
|
+
find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
|
49
|
+
out = argv[2], clades = argv[3])
|
50
|
+
} else {
|
51
|
+
load(argv[1]) # assume .rda
|
52
|
+
find_medoids(a, b, d, out = argv[2], clades = argv[3])
|
53
|
+
}
|
49
54
|
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline
|
|
48
48
|
# Find genomospecies medoids
|
49
49
|
src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
|
50
50
|
dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
|
51
|
+
dat = "../../09.distances/#{dir}/miga-project.rda"
|
52
|
+
dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
|
51
53
|
run_cmd([
|
52
|
-
'Rscript', src,
|
53
|
-
'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
|
+
'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
55
|
])
|
55
56
|
if File.exist? 'miga-project.gsp-clades.sorted'
|
56
57
|
File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
|
data/utils/subclades.R
CHANGED
@@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
|
|
338
338
|
}
|
339
339
|
|
340
340
|
ani_distance <- function (ani_file, sel) {
|
341
|
-
# Try to locate rds, otherwise read gzipped table
|
342
|
-
|
343
|
-
if (file.exists(
|
344
|
-
|
341
|
+
# Try to locate rda, then rds, and otherwise read gzipped table
|
342
|
+
rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
|
343
|
+
if (file.exists(rda)) {
|
344
|
+
load(rda) # Should already contain `a`, `b`, and `d` as vectors
|
345
345
|
} else {
|
346
|
-
|
347
|
-
|
346
|
+
rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
|
347
|
+
if (file.exists(rds)) {
|
348
|
+
sim <- readRDS(rds)
|
349
|
+
} else {
|
350
|
+
sim <- read.table(
|
351
|
+
gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
|
352
|
+
)
|
353
|
+
}
|
348
354
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
355
|
+
# Extract individual variables to deal with very large matrices
|
356
|
+
a <- sim$a
|
357
|
+
b <- sim$b
|
358
|
+
d <- 1 - (sim$value / 100)
|
359
|
+
}
|
353
360
|
|
354
361
|
# If there is no data, end process
|
355
362
|
if (length(a) == 0) return(NULL)
|
@@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) {
|
|
359
366
|
if (!is.na(sel) && file.exists(sel)) {
|
360
367
|
say("Filter selection")
|
361
368
|
ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
|
362
|
-
sel.idx <- which(
|
369
|
+
sel.idx <- which(a %in% ids & b %in% ids)
|
363
370
|
a <- a[sel.idx]
|
364
371
|
b <- b[sel.idx]
|
365
372
|
d <- d[sel.idx]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.14.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|