miga-base 1.3.13.9 → 1.3.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 843f35d4fa4d3543cc030db30b32ba87e40e21c967c4ecf3832973857e58dc05
4
- data.tar.gz: e6f121106bce201cba4d6da053c1a07d977b12f5ee4d02d84c63ca204e4afa29
3
+ metadata.gz: 2459aa0f06af71628701bc0a71c1830bd21fc24d5c3e0999c8f59b6bce8b6cf6
4
+ data.tar.gz: 1590b03b5dfbe42241dd943a61388ff2e22a4956c6e9169d321a9bb857b9713f
5
5
  SHA512:
6
- metadata.gz: de88c20be5deffa734c0fc692bde8ecc7f87c1bea571c125915b032232e1c9a54e41eb37f3c666f54a2adfd1be64adfb12ec61000553a3b1ace9ba70c7a3d048
7
- data.tar.gz: d2ff4a70266159ccca8c1cff3dbaf0c52316e5608096014c04523e309f6ce768e1daa4f779275ef0293cb83d14af982d9c8a5412e1d433f25425f76adcac8261
6
+ metadata.gz: cf4da3f0519c77fd290e92650a6a53b871d678516eda891ef15272e0fa5a8b4aa518a4531211b0ee9d78ca2e42798f6421b41a3980c3f6e64a159f78bf105e9a
7
+ data.tar.gz: e771646155d87a7f8e7896b5cfa121f94a60dbce78cd5be0231faf42e7f4aa2b75de4c3fdf043dadb33d07a67bca180d12a12871d1066bff73a454eecdf565a5
@@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
179
179
  str
180
180
  .to_s.unmiga_name
181
181
  .sub(/^./, &:upcase)
182
- .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
182
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
183
183
  .sub(/Haai/, 'hAAI')
184
184
  .sub(/Mytaxa/, 'MyTaxa')
185
185
  .sub(/ pvalue$/, ' p-value')
@@ -55,10 +55,13 @@ module MiGA::Project::Result
55
55
  ##
56
56
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
57
57
  def add_result_distances(base, _opts)
58
- return nil unless result_files_exist?(base, %w[.rds .txt])
58
+ return nil unless result_files_exist?(base, ['.txt']) &&
59
+ (result_files_exist?(base, ['.rds']) ||
60
+ result_files_exist?(base, ['.rda']))
59
61
 
60
62
  r = MiGA::Result.new("#{base}.json")
61
63
  r.add_file(:rds, 'miga-project.rds')
64
+ r.add_file(:rda, 'miga-project.rda')
62
65
  r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
63
66
  r.add_file(:matrix, 'miga-project.txt')
64
67
  r.add_file(:log, 'miga-project.log') # Legacy file
@@ -84,6 +87,7 @@ module MiGA::Project::Result
84
87
 
85
88
  r = add_result_iter_clades(base)
86
89
  r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
90
+ r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
87
91
  r.add_file(:aai_tree, 'miga-project.aai.nwk')
88
92
  r.add_file(:proposal, 'miga-project.proposed-clades')
89
93
  r.add_file(:clades_aai90, 'miga-project.aai90-clades')
@@ -108,6 +112,7 @@ module MiGA::Project::Result
108
112
  r = add_result_iter_clades(base)
109
113
  r.add_file(:ani_tree, 'miga-project.ani.nwk')
110
114
  r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
115
+ r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
111
116
  r
112
117
  end
113
118
 
@@ -134,6 +134,15 @@ module MiGA::RemoteDataset::Base
134
134
  end,
135
135
  method: :get
136
136
  },
137
+ ncbi_fetch: {
138
+ dbs: { nuccore: { stage: :metadata, format: :gb } },
139
+ uri: lambda do |opts|
140
+ @@_EUTILS_BUILD[:efetch,
141
+ db: opts[:db], id: opts[:ids], rettype: opts[:format], retmode: :text
142
+ ]
143
+ end,
144
+ method: :get
145
+ },
137
146
  ncbi_search: {
138
147
  dbs: {
139
148
  assembly: { stage: :metadata, format: :json },
@@ -107,9 +107,12 @@ class MiGA::RemoteDataset
107
107
 
108
108
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download as WGS records'
109
109
  a, b = opts[:obj].metadata[:ncbi_wgs].split('-', 2)
110
- pref = longest_common_prefix([a, b])
111
- rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
112
- ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
110
+ ids = [a]
111
+ unless b.nil?
112
+ pref = longest_common_prefix([a, b])
113
+ rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
114
+ ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
115
+ end
113
116
  download_rest(opts.merge(universe: :ncbi, db: :nuccore, ids: ids))
114
117
  end
115
118
 
@@ -300,6 +300,16 @@ class MiGA::RemoteDataset < MiGA::MiGA
300
300
  metadata[:web_assembly_gz] ||=
301
301
  '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
302
302
  end
303
+
304
+ # If all conditions are right, try getting the WGS range
305
+ if @_ncbi_asm_json_doc['wgs'] && !@_ncbi_asm_json_doc['wgs'].empty? &&
306
+ metadata[:ncbi_nuccore] && !metadata[:ncbi_wgs]
307
+ doc = self.class.download(:ncbi_fetch, :nuccore, metadata[:ncbi_nuccore], :gb).split(/\n/)
308
+ ln = doc.grep(/^WGS\s+\S+-\S+/).first
309
+ wgs = ln&.gsub(/^WGS\s+(\S+-\S+).*/, '\1')
310
+ metadata[:ncbi_wgs] = wgs if wgs
311
+ end
312
+
303
313
  @_ncbi_asm_json_doc
304
314
  end
305
315
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 13, 9].freeze
15
+ VERSION = [1.3, 14, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 3, 31)
23
+ VERSION_DATE = Date.new(2024, 4, 1)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -40,15 +40,16 @@ rm "miga-project.txt.lno"
40
40
  # R-ify
41
41
  cat <<R | R --vanilla
42
42
  file <- gzfile("miga-project.txt.gz")
43
- aai <- read.table(
44
- file, sep = "\t", header = TRUE, as.is = TRUE, quote = "",
45
- stringsAsFactors = FALSE, comment.char = "", nrows = $LNO,
46
- colClasses = c("character", "character",
47
- "numeric", "numeric", "integer", "integer")
48
- )
49
- saveRDS(aai, file = "miga-project.rds")
50
- if(sum(aai[, "a"] != aai[, "b"]) > 0) {
51
- h <- hist(aai[aai[, "a"] != aai[, "b"], "value"], breaks = 100, plot = FALSE)
43
+ text <- readLines(file, n = $LNO + 1, ok = FALSE)
44
+ list <- strsplit(text[-1], "\t", fixed = TRUE)
45
+ a <- sapply(list, function(x) x[1])
46
+ b <- sapply(list, function(x) x[2])
47
+ d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
48
+ save(a, b, d, file = "miga-project.rda")
49
+
50
+ non_self <- a != b
51
+ if(sum(non_self) > 0) {
52
+ h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
52
53
  len <- length(h[["breaks"]])
53
54
  write.table(
54
55
  cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
@@ -34,15 +34,16 @@ rm "miga-project.txt.lno"
34
34
  # R-ify
35
35
  cat <<R | R --vanilla
36
36
  file <- gzfile("miga-project.txt.gz")
37
- ani <- read.table(
38
- file, sep = "\t", header = TRUE, as.is = TRUE, quote = "",
39
- stringsAsFactors = FALSE, comment.char = "", nrows = $LNO,
40
- colClasses = c("character", "character",
41
- "numeric", "numeric", "integer", "integer")
42
- )
43
- saveRDS(ani, file = "miga-project.rds")
44
- if(sum(ani[, "a"] != ani[, "b"]) > 0) {
45
- h <- hist(ani[ani[, "a"] != ani[, "b"], "value"], breaks = 100, plot = FALSE)
37
+ text <- readLines(file, n = $LNO + 1, ok = FALSE)
38
+ list <- strsplit(text[-1], "\t", fixed = TRUE)
39
+ a <- sapply(list, function(x) x[1])
40
+ b <- sapply(list, function(x) x[2])
41
+ d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
42
+ save(a, b, d, file = "miga-project.rda")
43
+
44
+ non_self <- a != b
45
+ if(sum(non_self) > 0) {
46
+ h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
46
47
  len <- length(h[["breaks"]])
47
48
  write.table(
48
49
  cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
data/utils/find-medoid.R CHANGED
@@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){
16
16
  ))
17
17
  }
18
18
 
19
- find_medoids <- function (ani.df, out, clades) {
20
- if(nrow(ani.df) == 0) return(NULL)
21
- ani.df$d <- 1 - (ani.df$value/100)
22
- dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
19
+ find_medoids <- function (a, b, d, out, clades) {
20
+ if (length(d) == 0) return(NULL)
21
+ dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
23
22
  dist <- as.matrix(dist)
24
23
  cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
25
24
  cl.s <- c()
26
25
  medoids <- c()
27
- for(i in cl){
26
+ for (i in cl) {
28
27
  lab <- strsplit(i, ",")[[1]]
29
28
  if(length(lab) == 1) {
30
29
  lab.s <- lab
@@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) {
44
43
 
45
44
  #= Main
46
45
  cat("Finding Medoids\n")
47
- ani <- readRDS(argv[1])
48
- find_medoids(ani.df = ani, out = argv[2], clades = argv[3])
46
+ if (grepl("\\.rds$", argv[1])) {
47
+ ani <- readRDS(argv[1])
48
+ find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
49
+ out = argv[2], clades = argv[3])
50
+ } else {
51
+ load(argv[1]) # assume .rda
52
+ find_medoids(a, b, d, out = argv[2], clades = argv[3])
53
+ }
49
54
 
@@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline
48
48
  # Find genomospecies medoids
49
49
  src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
50
50
  dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
51
+ dat = "../../09.distances/#{dir}/miga-project.rda"
52
+ dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
51
53
  run_cmd([
52
- 'Rscript', src, "../../09.distances/#{dir}/miga-project.rds",
53
- 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
54
+ 'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
54
55
  ])
55
56
  if File.exist? 'miga-project.gsp-clades.sorted'
56
57
  File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
data/utils/subclades.R CHANGED
@@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
338
338
  }
339
339
 
340
340
  ani_distance <- function (ani_file, sel) {
341
- # Try to locate rds, otherwise read gzipped table
342
- rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
343
- if (file.exists(rds)) {
344
- sim <- readRDS(rds)
341
+ # Try to locate rda, then rds, and otherwise read gzipped table
342
+ rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
343
+ if (file.exists(rda)) {
344
+ load(rda) # Should already contain `a`, `b`, and `d` as vectors
345
345
  } else {
346
- sim <- read.table(gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE)
347
- }
346
+ rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
347
+ if (file.exists(rds)) {
348
+ sim <- readRDS(rds)
349
+ } else {
350
+ sim <- read.table(
351
+ gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
352
+ )
353
+ }
348
354
 
349
- # Extract individual variables to deal with very large matrices
350
- a <- sim$a
351
- b <- sim$b
352
- d <- 1 - (sim$value / 100)
355
+ # Extract individual variables to deal with very large matrices
356
+ a <- sim$a
357
+ b <- sim$b
358
+ d <- 1 - (sim$value / 100)
359
+ }
353
360
 
354
361
  # If there is no data, end process
355
362
  if (length(a) == 0) return(NULL)
@@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) {
359
366
  if (!is.na(sel) && file.exists(sel)) {
360
367
  say("Filter selection")
361
368
  ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
362
- sel.idx <- which(sim$a %in% ids & sim$b %in% ids)
369
+ sel.idx <- which(a %in% ids & b %in% ids)
363
370
  a <- a[sel.idx]
364
371
  b <- b[sel.idx]
365
372
  d <- d[sel.idx]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.13.9
4
+ version: 1.3.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-31 00:00:00.000000000 Z
11
+ date: 2024-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons