miga-base 1.3.13.10 → 1.3.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/browse.rb +1 -1
- data/lib/miga/cli/action/find.rb +1 -1
- data/lib/miga/project/result.rb +6 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +10 -9
- data/scripts/ani_distances.bash +10 -9
- data/utils/find-medoid.R +12 -7
- data/utils/subclade/pipeline.rb +3 -2
- data/utils/subclades.R +18 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ca873c0a599eec97f55a5226b4b0ffc524bf3ffb577ac36b951df0dc36f4ef2
|
4
|
+
data.tar.gz: 11bac7b7c287ff99e019f4c29c5af1949e0039a3d706aa26e2e60cf3845d170d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0d13451f7d4a15e64755ef6c09cde833112f1299e0ee924470bc51c0f02dea6040947d40f837ab0e3d91496423cfe8e5f2f219973afabb4cccb1979f9b3e211
|
7
|
+
data.tar.gz: a4f98424ab210e01bb549755ded82e0343d4080c1e15daa1545d25051bca40402d44801f14ea8eb9f978470c5892d7b96016b9a226bedaea8917012a4b854169
|
@@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
|
|
179
179
|
str
|
180
180
|
.to_s.unmiga_name
|
181
181
|
.sub(/^./, &:upcase)
|
182
|
-
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
|
182
|
+
.gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
|
183
183
|
.sub(/Haai/, 'hAAI')
|
184
184
|
.sub(/Mytaxa/, 'MyTaxa')
|
185
185
|
.sub(/ pvalue$/, ' p-value')
|
data/lib/miga/cli/action/find.rb
CHANGED
data/lib/miga/project/result.rb
CHANGED
@@ -55,10 +55,13 @@ module MiGA::Project::Result
|
|
55
55
|
##
|
56
56
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
57
57
|
def add_result_distances(base, _opts)
|
58
|
-
return nil unless result_files_exist?(base,
|
58
|
+
return nil unless result_files_exist?(base, ['.txt']) &&
|
59
|
+
(result_files_exist?(base, ['.rds']) ||
|
60
|
+
result_files_exist?(base, ['.rda']))
|
59
61
|
|
60
62
|
r = MiGA::Result.new("#{base}.json")
|
61
63
|
r.add_file(:rds, 'miga-project.rds')
|
64
|
+
r.add_file(:rda, 'miga-project.rda')
|
62
65
|
r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
|
63
66
|
r.add_file(:matrix, 'miga-project.txt')
|
64
67
|
r.add_file(:log, 'miga-project.log') # Legacy file
|
@@ -84,6 +87,7 @@ module MiGA::Project::Result
|
|
84
87
|
|
85
88
|
r = add_result_iter_clades(base)
|
86
89
|
r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
|
90
|
+
r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
|
87
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
88
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
89
93
|
r.add_file(:clades_aai90, 'miga-project.aai90-clades')
|
@@ -108,6 +112,7 @@ module MiGA::Project::Result
|
|
108
112
|
r = add_result_iter_clades(base)
|
109
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
110
114
|
r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
|
115
|
+
r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
|
111
116
|
r
|
112
117
|
end
|
113
118
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 14, 2].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 4,
|
23
|
+
VERSION_DATE = Date.new(2024, 4, 12)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/aai_distances.bash
CHANGED
@@ -40,15 +40,16 @@ rm "miga-project.txt.lno"
|
|
40
40
|
# R-ify
|
41
41
|
cat <<R | R --vanilla
|
42
42
|
file <- gzfile("miga-project.txt.gz")
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
)
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
44
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
45
|
+
a <- sapply(list, function(x) x[1])
|
46
|
+
b <- sapply(list, function(x) x[2])
|
47
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
48
|
+
save(a, b, d, file = "miga-project.rda")
|
49
|
+
|
50
|
+
non_self <- a != b
|
51
|
+
if(sum(non_self) > 0) {
|
52
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
52
53
|
len <- length(h[["breaks"]])
|
53
54
|
write.table(
|
54
55
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/scripts/ani_distances.bash
CHANGED
@@ -34,15 +34,16 @@ rm "miga-project.txt.lno"
|
|
34
34
|
# R-ify
|
35
35
|
cat <<R | R --vanilla
|
36
36
|
file <- gzfile("miga-project.txt.gz")
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
|
37
|
+
text <- readLines(file, n = $LNO + 1, ok = FALSE)
|
38
|
+
list <- strsplit(text[-1], "\t", fixed = TRUE)
|
39
|
+
a <- sapply(list, function(x) x[1])
|
40
|
+
b <- sapply(list, function(x) x[2])
|
41
|
+
d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
|
42
|
+
save(a, b, d, file = "miga-project.rda")
|
43
|
+
|
44
|
+
non_self <- a != b
|
45
|
+
if(sum(non_self) > 0) {
|
46
|
+
h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
|
46
47
|
len <- length(h[["breaks"]])
|
47
48
|
write.table(
|
48
49
|
cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
|
data/utils/find-medoid.R
CHANGED
@@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){
|
|
16
16
|
))
|
17
17
|
}
|
18
18
|
|
19
|
-
find_medoids <- function (
|
20
|
-
if(
|
21
|
-
|
22
|
-
dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
|
19
|
+
find_medoids <- function (a, b, d, out, clades) {
|
20
|
+
if (length(d) == 0) return(NULL)
|
21
|
+
dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
|
23
22
|
dist <- as.matrix(dist)
|
24
23
|
cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
|
25
24
|
cl.s <- c()
|
26
25
|
medoids <- c()
|
27
|
-
for(i in cl){
|
26
|
+
for (i in cl) {
|
28
27
|
lab <- strsplit(i, ",")[[1]]
|
29
28
|
if(length(lab) == 1) {
|
30
29
|
lab.s <- lab
|
@@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) {
|
|
44
43
|
|
45
44
|
#= Main
|
46
45
|
cat("Finding Medoids\n")
|
47
|
-
|
48
|
-
|
46
|
+
if (grepl("\\.rds$", argv[1])) {
|
47
|
+
ani <- readRDS(argv[1])
|
48
|
+
find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
|
49
|
+
out = argv[2], clades = argv[3])
|
50
|
+
} else {
|
51
|
+
load(argv[1]) # assume .rda
|
52
|
+
find_medoids(a, b, d, out = argv[2], clades = argv[3])
|
53
|
+
}
|
49
54
|
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline
|
|
48
48
|
# Find genomospecies medoids
|
49
49
|
src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
|
50
50
|
dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
|
51
|
+
dat = "../../09.distances/#{dir}/miga-project.rda"
|
52
|
+
dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
|
51
53
|
run_cmd([
|
52
|
-
'Rscript', src,
|
53
|
-
'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
|
+
'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
|
54
55
|
])
|
55
56
|
if File.exist? 'miga-project.gsp-clades.sorted'
|
56
57
|
File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
|
data/utils/subclades.R
CHANGED
@@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
|
|
338
338
|
}
|
339
339
|
|
340
340
|
ani_distance <- function (ani_file, sel) {
|
341
|
-
# Try to locate rds, otherwise read gzipped table
|
342
|
-
|
343
|
-
if (file.exists(
|
344
|
-
|
341
|
+
# Try to locate rda, then rds, and otherwise read gzipped table
|
342
|
+
rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
|
343
|
+
if (file.exists(rda)) {
|
344
|
+
load(rda) # Should already contain `a`, `b`, and `d` as vectors
|
345
345
|
} else {
|
346
|
-
|
347
|
-
|
346
|
+
rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
|
347
|
+
if (file.exists(rds)) {
|
348
|
+
sim <- readRDS(rds)
|
349
|
+
} else {
|
350
|
+
sim <- read.table(
|
351
|
+
gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
|
352
|
+
)
|
353
|
+
}
|
348
354
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
355
|
+
# Extract individual variables to deal with very large matrices
|
356
|
+
a <- sim$a
|
357
|
+
b <- sim$b
|
358
|
+
d <- 1 - (sim$value / 100)
|
359
|
+
}
|
353
360
|
|
354
361
|
# If there is no data, end process
|
355
362
|
if (length(a) == 0) return(NULL)
|
@@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) {
|
|
359
366
|
if (!is.na(sel) && file.exists(sel)) {
|
360
367
|
say("Filter selection")
|
361
368
|
ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
|
362
|
-
sel.idx <- which(
|
369
|
+
sel.idx <- which(a %in% ids & b %in% ids)
|
363
370
|
a <- a[sel.idx]
|
364
371
|
b <- b[sel.idx]
|
365
372
|
d <- d[sel.idx]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-04-
|
11
|
+
date: 2024-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|