miga-base 1.3.13.10 → 1.3.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/browse.rb +1 -1
- data/lib/miga/cli/action/find.rb +1 -1
- data/lib/miga/project/result.rb +6 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +10 -9
- data/scripts/ani_distances.bash +10 -9
- data/utils/find-medoid.R +12 -7
- data/utils/subclade/pipeline.rb +3 -2
- data/utils/subclades.R +18 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ca873c0a599eec97f55a5226b4b0ffc524bf3ffb577ac36b951df0dc36f4ef2
|
4
|
+
data.tar.gz: 11bac7b7c287ff99e019f4c29c5af1949e0039a3d706aa26e2e60cf3845d170d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0d13451f7d4a15e64755ef6c09cde833112f1299e0ee924470bc51c0f02dea6040947d40f837ab0e3d91496423cfe8e5f2f219973afabb4cccb1979f9b3e211
|
7
|
+
data.tar.gz: a4f98424ab210e01bb549755ded82e0343d4080c1e15daa1545d25051bca40402d44801f14ea8eb9f978470c5892d7b96016b9a226bedaea8917012a4b854169
|
@@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
|
|
179
179
|
str
|
180
180
|
.to_s.unmiga_name
|
181
181
|
.sub(/^./, &:upcase)
|
182
|
-
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
|
182
|
+
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
|
183
183
|
.sub(/Haai/, 'hAAI')
|
184
184
|
.sub(/Mytaxa/, 'MyTaxa')
|
185
185
|
.sub(/ pvalue$/, ' p-value')
|
data/lib/miga/cli/action/find.rb
CHANGED
data/lib/miga/project/result.rb
CHANGED
@@ -55,10 +55,13 @@ module MiGA::Project::Result
|
|
55
55
|
##
|
56
56
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
57
57
|
def add_result_distances(base, _opts)
|
58
|
-
return nil unless result_files_exist?(base,
|
58
|
+
return nil unless result_files_exist?(base, ['.txt']) &&
|
59
|
+
(result_files_exist?(base, ['.rds']) ||
|
60
|
+
result_files_exist?(base, ['.rda']))
|
59
61
|
|
60
62
|
r = MiGA::Result.new("#{base}.json")
|
61
63
|
r.add_file(:rds, 'miga-project.rds')
|
64
|
+
r.add_file(:rda, 'miga-project.rda')
|
62
65
|
r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
|
63
66
|
r.add_file(:matrix, 'miga-project.txt')
|
64
67
|
r.add_file(:log, 'miga-project.log') # Legacy file
|
@@ -84,6 +87,7 @@ module MiGA::Project::Result
|
|
84
87
|
|
85
88
|
r = add_result_iter_clades(base)
|
86
89
|
r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
|
90
|
+
r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
|
87
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
88
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
89
93
|
r.add_file(:clades_aai90, 'miga-project.aai90-clades')
|
@@ -108,6 +112,7 @@ module MiGA::Project::Result
|
|
108
112
|
r = add_result_iter_clades(base)
|
109
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
110
114
|
r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
|
115
|
+
r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
|
111
116
|
r
|
112
117
|
end
|
113
118
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 14, 2].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 4,
|
23
|
+
VERSION_DATE = Date.new(2024, 4, 12)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/aai_distances.bash
CHANGED
@@ -40,15 +40,16 @@ rm "miga-project.txt.lno"
|
|
40
40
|
# R-ify
|
41
41
|
cat <<R | R --vanilla
|
42
42
|
file <- gzfile("miga-project.txt.gz")
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
)
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
44
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
45
|
+
a <- sapply(list, function(x) x[1])
|
46
|
+
b <- sapply(list, function(x) x[2])
|
47
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
48
|
+
save(a, b, d, file = "miga-project.rda")
|
49
|
+
|
50
|
+
non_self <- a != b
|
51
|
+
if(sum(non_self) > 0) {
|
52
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
52
53
|
len <- length(h[["breaks"]])
|
53
54
|
write.table(
|
54
55
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/scripts/ani_distances.bash
CHANGED
@@ -34,15 +34,16 @@ rm "miga-project.txt.lno"
|
|
34
34
|
# R-ify
|
35
35
|
cat <<R | R --vanilla
|
36
36
|
file <- gzfile("miga-project.txt.gz")
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
|
37
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
38
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
39
|
+
a <- sapply(list, function(x) x[1])
|
40
|
+
b <- sapply(list, function(x) x[2])
|
41
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
42
|
+
save(a, b, d, file = "miga-project.rda")
|
43
|
+
|
44
|
+
non_self <- a != b
|
45
|
+
if(sum(non_self) > 0) {
|
46
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
46
47
|
len <- length(h[["breaks"]])
|
47
48
|
write.table(
|
48
49
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/utils/find-medoid.R
CHANGED
@@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){
|
|
16
16
|
))
|
17
17
|
}
|
18
18
|
|
19
|
-
find_medoids <- function (
|
20
|
-
if(
|
21
|
-
|
22
|
-
dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
|
19
|
+
find_medoids <- function (a, b, d, out, clades) {
|
20
|
+
if (length(d) == 0) return(NULL)
|
21
|
+
dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
|
23
22
|
dist <- as.matrix(dist)
|
24
23
|
cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
|
25
24
|
cl.s <- c()
|
26
25
|
medoids <- c()
|
27
|
-
for(i in cl){
|
26
|
+
for (i in cl) {
|
28
27
|
lab <- strsplit(i, ",")[[1]]
|
29
28
|
if(length(lab) == 1) {
|
30
29
|
lab.s <- lab
|
@@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) {
|
|
44
43
|
|
45
44
|
#= Main
|
46
45
|
cat("Finding Medoids\n")
|
47
|
-
|
48
|
-
|
46
|
+
if (grepl("\\.rds$", argv[1])) {
|
47
|
+
ani <- readRDS(argv[1])
|
48
|
+
find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
|
49
|
+
out = argv[2], clades = argv[3])
|
50
|
+
} else {
|
51
|
+
load(argv[1]) # assume .rda
|
52
|
+
find_medoids(a, b, d, out = argv[2], clades = argv[3])
|
53
|
+
}
|
49
54
|
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline
|
|
48
48
|
# Find genomospecies medoids
|
49
49
|
src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
|
50
50
|
dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
|
51
|
+
dat = "../../09.distances/#{dir}/miga-project.rda"
|
52
|
+
dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
|
51
53
|
run_cmd([
|
52
|
-
'Rscript', src,
|
53
|
-
'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
|
+
'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
55
|
])
|
55
56
|
if File.exist? 'miga-project.gsp-clades.sorted'
|
56
57
|
File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
|
data/utils/subclades.R
CHANGED
@@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
|
|
338
338
|
}
|
339
339
|
|
340
340
|
ani_distance <- function (ani_file, sel) {
|
341
|
-
# Try to locate rds, otherwise read gzipped table
|
342
|
-
|
343
|
-
if (file.exists(
|
344
|
-
|
341
|
+
# Try to locate rda, then rds, and otherwise read gzipped table
|
342
|
+
rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
|
343
|
+
if (file.exists(rda)) {
|
344
|
+
load(rda) # Should already contain `a`, `b`, and `d` as vectors
|
345
345
|
} else {
|
346
|
-
|
347
|
-
|
346
|
+
rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
|
347
|
+
if (file.exists(rds)) {
|
348
|
+
sim <- readRDS(rds)
|
349
|
+
} else {
|
350
|
+
sim <- read.table(
|
351
|
+
gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
|
352
|
+
)
|
353
|
+
}
|
348
354
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
355
|
+
# Extract individual variables to deal with very large matrices
|
356
|
+
a <- sim$a
|
357
|
+
b <- sim$b
|
358
|
+
d <- 1 - (sim$value / 100)
|
359
|
+
}
|
353
360
|
|
354
361
|
# If there is no data, end process
|
355
362
|
if (length(a) == 0) return(NULL)
|
@@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) {
|
|
359
366
|
if (!is.na(sel) && file.exists(sel)) {
|
360
367
|
say("Filter selection")
|
361
368
|
ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
|
362
|
-
sel.idx <- which(
|
369
|
+
sel.idx <- which(a %in% ids & b %in% ids)
|
363
370
|
a <- a[sel.idx]
|
364
371
|
b <- b[sel.idx]
|
365
372
|
d <- d[sel.idx]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-04-
|
11
|
+
date: 2024-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|