miga-base 1.3.13.9 → 1.3.14.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 843f35d4fa4d3543cc030db30b32ba87e40e21c967c4ecf3832973857e58dc05
4
- data.tar.gz: e6f121106bce201cba4d6da053c1a07d977b12f5ee4d02d84c63ca204e4afa29
3
+ metadata.gz: 2459aa0f06af71628701bc0a71c1830bd21fc24d5c3e0999c8f59b6bce8b6cf6
4
+ data.tar.gz: 1590b03b5dfbe42241dd943a61388ff2e22a4956c6e9169d321a9bb857b9713f
5
5
  SHA512:
6
- metadata.gz: de88c20be5deffa734c0fc692bde8ecc7f87c1bea571c125915b032232e1c9a54e41eb37f3c666f54a2adfd1be64adfb12ec61000553a3b1ace9ba70c7a3d048
7
- data.tar.gz: d2ff4a70266159ccca8c1cff3dbaf0c52316e5608096014c04523e309f6ce768e1daa4f779275ef0293cb83d14af982d9c8a5412e1d433f25425f76adcac8261
6
+ metadata.gz: cf4da3f0519c77fd290e92650a6a53b871d678516eda891ef15272e0fa5a8b4aa518a4531211b0ee9d78ca2e42798f6421b41a3980c3f6e64a159f78bf105e9a
7
+ data.tar.gz: e771646155d87a7f8e7896b5cfa121f94a60dbce78cd5be0231faf42e7f4aa2b75de4c3fdf043dadb33d07a67bca180d12a12871d1066bff73a454eecdf565a5
@@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
179
179
  str
180
180
  .to_s.unmiga_name
181
181
  .sub(/^./, &:upcase)
182
- .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
182
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
183
183
  .sub(/Haai/, 'hAAI')
184
184
  .sub(/Mytaxa/, 'MyTaxa')
185
185
  .sub(/ pvalue$/, ' p-value')
@@ -55,10 +55,13 @@ module MiGA::Project::Result
55
55
  ##
56
56
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
57
57
  def add_result_distances(base, _opts)
58
- return nil unless result_files_exist?(base, %w[.rds .txt])
58
+ return nil unless result_files_exist?(base, ['.txt']) &&
59
+ (result_files_exist?(base, ['.rds']) ||
60
+ result_files_exist?(base, ['.rda']))
59
61
 
60
62
  r = MiGA::Result.new("#{base}.json")
61
63
  r.add_file(:rds, 'miga-project.rds')
64
+ r.add_file(:rda, 'miga-project.rda')
62
65
  r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
63
66
  r.add_file(:matrix, 'miga-project.txt')
64
67
  r.add_file(:log, 'miga-project.log') # Legacy file
@@ -84,6 +87,7 @@ module MiGA::Project::Result
84
87
 
85
88
  r = add_result_iter_clades(base)
86
89
  r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
90
+ r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
87
91
  r.add_file(:aai_tree, 'miga-project.aai.nwk')
88
92
  r.add_file(:proposal, 'miga-project.proposed-clades')
89
93
  r.add_file(:clades_aai90, 'miga-project.aai90-clades')
@@ -108,6 +112,7 @@ module MiGA::Project::Result
108
112
  r = add_result_iter_clades(base)
109
113
  r.add_file(:ani_tree, 'miga-project.ani.nwk')
110
114
  r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
115
+ r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
111
116
  r
112
117
  end
113
118
 
@@ -134,6 +134,15 @@ module MiGA::RemoteDataset::Base
134
134
  end,
135
135
  method: :get
136
136
  },
137
+ ncbi_fetch: {
138
+ dbs: { nuccore: { stage: :metadata, format: :gb } },
139
+ uri: lambda do |opts|
140
+ @@_EUTILS_BUILD[:efetch,
141
+ db: opts[:db], id: opts[:ids], rettype: opts[:format], retmode: :text
142
+ ]
143
+ end,
144
+ method: :get
145
+ },
137
146
  ncbi_search: {
138
147
  dbs: {
139
148
  assembly: { stage: :metadata, format: :json },
@@ -107,9 +107,12 @@ class MiGA::RemoteDataset
107
107
 
108
108
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download as WGS records'
109
109
  a, b = opts[:obj].metadata[:ncbi_wgs].split('-', 2)
110
- pref = longest_common_prefix([a, b])
111
- rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
112
- ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
110
+ ids = [a]
111
+ unless b.nil?
112
+ pref = longest_common_prefix([a, b])
113
+ rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
114
+ ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
115
+ end
113
116
  download_rest(opts.merge(universe: :ncbi, db: :nuccore, ids: ids))
114
117
  end
115
118
 
@@ -300,6 +300,16 @@ class MiGA::RemoteDataset < MiGA::MiGA
300
300
  metadata[:web_assembly_gz] ||=
301
301
  '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
302
302
  end
303
+
304
+ # If all conditions are right, try getting the WGS range
305
+ if @_ncbi_asm_json_doc['wgs'] && !@_ncbi_asm_json_doc['wgs'].empty? &&
306
+ metadata[:ncbi_nuccore] && !metadata[:ncbi_wgs]
307
+ doc = self.class.download(:ncbi_fetch, :nuccore, metadata[:ncbi_nuccore], :gb).split(/\n/)
308
+ ln = doc.grep(/^WGS\s+\S+-\S+/).first
309
+ wgs = ln&.gsub(/^WGS\s+(\S+-\S+).*/, '\1')
310
+ metadata[:ncbi_wgs] = wgs if wgs
311
+ end
312
+
303
313
  @_ncbi_asm_json_doc
304
314
  end
305
315
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 13, 9].freeze
15
+ VERSION = [1.3, 14, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 3, 31)
23
+ VERSION_DATE = Date.new(2024, 4, 1)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -40,15 +40,16 @@ rm "miga-project.txt.lno"
40
40
  # R-ify
41
41
  cat <<R | R --vanilla
42
42
  file <- gzfile("miga-project.txt.gz")
43
- aai <- read.table(
44
- file, sep = "\t", header = TRUE, as.is = TRUE, quote = "",
45
- stringsAsFactors = FALSE, comment.char = "", nrows = $LNO,
46
- colClasses = c("character", "character",
47
- "numeric", "numeric", "integer", "integer")
48
- )
49
- saveRDS(aai, file = "miga-project.rds")
50
- if(sum(aai[, "a"] != aai[, "b"]) > 0) {
51
- h <- hist(aai[aai[, "a"] != aai[, "b"], "value"], breaks = 100, plot = FALSE)
43
+ text <- readLines(file, n = $LNO + 1, ok = FALSE)
44
+ list <- strsplit(text[-1], "\t", fixed = TRUE)
45
+ a <- sapply(list, function(x) x[1])
46
+ b <- sapply(list, function(x) x[2])
47
+ d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
48
+ save(a, b, d, file = "miga-project.rda")
49
+
50
+ non_self <- a != b
51
+ if(sum(non_self) > 0) {
52
+ h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
52
53
  len <- length(h[["breaks"]])
53
54
  write.table(
54
55
  cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
@@ -34,15 +34,16 @@ rm "miga-project.txt.lno"
34
34
  # R-ify
35
35
  cat <<R | R --vanilla
36
36
  file <- gzfile("miga-project.txt.gz")
37
- ani <- read.table(
38
- file, sep = "\t", header = TRUE, as.is = TRUE, quote = "",
39
- stringsAsFactors = FALSE, comment.char = "", nrows = $LNO,
40
- colClasses = c("character", "character",
41
- "numeric", "numeric", "integer", "integer")
42
- )
43
- saveRDS(ani, file = "miga-project.rds")
44
- if(sum(ani[, "a"] != ani[, "b"]) > 0) {
45
- h <- hist(ani[ani[, "a"] != ani[, "b"], "value"], breaks = 100, plot = FALSE)
37
+ text <- readLines(file, n = $LNO + 1, ok = FALSE)
38
+ list <- strsplit(text[-1], "\t", fixed = TRUE)
39
+ a <- sapply(list, function(x) x[1])
40
+ b <- sapply(list, function(x) x[2])
41
+ d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
42
+ save(a, b, d, file = "miga-project.rda")
43
+
44
+ non_self <- a != b
45
+ if(sum(non_self) > 0) {
46
+ h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
46
47
  len <- length(h[["breaks"]])
47
48
  write.table(
48
49
  cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
data/utils/find-medoid.R CHANGED
@@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){
16
16
  ))
17
17
  }
18
18
 
19
- find_medoids <- function (ani.df, out, clades) {
20
- if(nrow(ani.df) == 0) return(NULL)
21
- ani.df$d <- 1 - (ani.df$value/100)
22
- dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
19
+ find_medoids <- function (a, b, d, out, clades) {
20
+ if (length(d) == 0) return(NULL)
21
+ dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
23
22
  dist <- as.matrix(dist)
24
23
  cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
25
24
  cl.s <- c()
26
25
  medoids <- c()
27
- for(i in cl){
26
+ for (i in cl) {
28
27
  lab <- strsplit(i, ",")[[1]]
29
28
  if(length(lab) == 1) {
30
29
  lab.s <- lab
@@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) {
44
43
 
45
44
  #= Main
46
45
  cat("Finding Medoids\n")
47
- ani <- readRDS(argv[1])
48
- find_medoids(ani.df = ani, out = argv[2], clades = argv[3])
46
+ if (grepl("\\.rds$", argv[1])) {
47
+ ani <- readRDS(argv[1])
48
+ find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
49
+ out = argv[2], clades = argv[3])
50
+ } else {
51
+ load(argv[1]) # assume .rda
52
+ find_medoids(a, b, d, out = argv[2], clades = argv[3])
53
+ }
49
54
 
@@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline
48
48
  # Find genomospecies medoids
49
49
  src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
50
50
  dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
51
+ dat = "../../09.distances/#{dir}/miga-project.rda"
52
+ dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
51
53
  run_cmd([
52
- 'Rscript', src, "../../09.distances/#{dir}/miga-project.rds",
53
- 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
54
+ 'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
54
55
  ])
55
56
  if File.exist? 'miga-project.gsp-clades.sorted'
56
57
  File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
data/utils/subclades.R CHANGED
@@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
338
338
  }
339
339
 
340
340
  ani_distance <- function (ani_file, sel) {
341
- # Try to locate rds, otherwise read gzipped table
342
- rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
343
- if (file.exists(rds)) {
344
- sim <- readRDS(rds)
341
+ # Try to locate rda, then rds, and otherwise read gzipped table
342
+ rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
343
+ if (file.exists(rda)) {
344
+ load(rda) # Should already contain `a`, `b`, and `d` as vectors
345
345
  } else {
346
- sim <- read.table(gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE)
347
- }
346
+ rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
347
+ if (file.exists(rds)) {
348
+ sim <- readRDS(rds)
349
+ } else {
350
+ sim <- read.table(
351
+ gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
352
+ )
353
+ }
348
354
 
349
- # Extract individual variables to deal with very large matrices
350
- a <- sim$a
351
- b <- sim$b
352
- d <- 1 - (sim$value / 100)
355
+ # Extract individual variables to deal with very large matrices
356
+ a <- sim$a
357
+ b <- sim$b
358
+ d <- 1 - (sim$value / 100)
359
+ }
353
360
 
354
361
  # If there is no data, end process
355
362
  if (length(a) == 0) return(NULL)
@@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) {
359
366
  if (!is.na(sel) && file.exists(sel)) {
360
367
  say("Filter selection")
361
368
  ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
362
- sel.idx <- which(sim$a %in% ids & sim$b %in% ids)
369
+ sel.idx <- which(a %in% ids & b %in% ids)
363
370
  a <- a[sel.idx]
364
371
  b <- b[sel.idx]
365
372
  d <- d[sel.idx]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.13.9
4
+ version: 1.3.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-31 00:00:00.000000000 Z
11
+ date: 2024-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons