miga-base 1.1.2.1 → 1.1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70955c93dc93a73a0ac28d3aaa8f75b4d0f2e0e5dc8797d1fc2bf57e969a8fbe
4
- data.tar.gz: 89ba5a42a7a12a104a12f9116f46b0e107bc82b22731622403d09c2e2b7459c7
3
+ metadata.gz: 73de682930481bd837b829588081e2c9e70a87054e9e1d91b7d40bf319030349
4
+ data.tar.gz: b0ed9f7f1acf8fb2530fde84803938e8f3d7fac3400a629c3db88779dd9a679f
5
5
  SHA512:
6
- metadata.gz: d57d59d5cb439119dda8f4c8ff90cd3d2d253ff631117c3005b60f321014e61d584c04655f6861de29807cfa2a9c569f2e0e35fde6931cc7140982512a35f13c
7
- data.tar.gz: a9c1090876f87c32376b477363f95410786a2d2da73f72be3d81a2fcfe28c6f1dc3829b741b1ad4986ded59bdc06999ad775739d8ecbd7694b4c6e40077905ec
6
+ metadata.gz: 4477253800d6a04f3b8e612ed8f5af8ccdb47e5f6aff6efdf2539d252daa7cbc70d009f28ec75fcada925420590614589effce76e465a93d9cb2a7ce093d79ff
7
+ data.tar.gz: bbb998c715274dc6b000fa3fc4b9f1f550867b78707b4b5eaa43c24b692939769c2958c3beef0bc10f6dbfd9f38a63b143b2617e44208550b19aa837c45711aa
@@ -5,7 +5,7 @@
5
5
  module MiGA::Cli::Action::Init::DaemonHelper
6
6
  def configure_daemon
7
7
  cli.puts 'Default daemon configuration:'
8
- daemon_f = File.expand_path('.miga_daemon.json', ENV['HOME'])
8
+ daemon_f = File.expand_path('.miga_daemon.json', ENV['MIGA_HOME'])
9
9
  unless File.exist?(daemon_f) and cli.ask_user(
10
10
  'A template daemon already exists, do you want to preserve it?',
11
11
  'yes', %w(yes no)
@@ -4,7 +4,7 @@
4
4
  # Helper module with files configuration functions for MiGA::Cli::Action::Init
5
5
  module MiGA::Cli::Action::Init::FilesHelper
6
6
  def open_rc_file
7
- rc_path = File.expand_path('.miga_rc', ENV['HOME'])
7
+ rc_path = File.expand_path('.miga_rc', ENV['MIGA_HOME'])
8
8
  if File.exist? rc_path
9
9
  if cli.ask_user(
10
10
  'I found a previous configuration. Do you want to continue?',
data/lib/miga/dataset.rb CHANGED
@@ -107,7 +107,7 @@ class MiGA::Dataset < MiGA::MiGA
107
107
  metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
108
108
  metadata[:inactive] = true
109
109
  metadata.save
110
- project.recalculate_tasks('Reference dataset inactivated') if ref?
110
+ project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
111
111
  pull_hook :on_inactivate
112
112
  end
113
113
 
@@ -117,7 +117,7 @@ class MiGA::Dataset < MiGA::MiGA
117
117
  metadata[:inactive] = nil
118
118
  metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
119
119
  metadata.save
120
- project.recalculate_tasks('Reference dataset activated') if ref?
120
+ project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
121
121
  pull_hook :on_activate
122
122
  end
123
123
 
@@ -52,7 +52,9 @@ module MiGA::Project::Dataset
52
52
  @metadata[:datasets] << name
53
53
  @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
54
54
  save
55
- recalculate_tasks('New reference dataset added') if d.ref? && d.active?
55
+ if d.ref? && d.active?
56
+ recalculate_tasks("Reference dataset added: #{d.name}")
57
+ end
56
58
  pull_hook(:on_add_dataset, name)
57
59
  end
58
60
  dataset(name)
@@ -66,7 +68,9 @@ module MiGA::Project::Dataset
66
68
 
67
69
  self.metadata[:datasets].delete(name)
68
70
  save
69
- recalculate_tasks('Reference dataset unlinked') if d.ref? && d.active?
71
+ if d.ref? && d.active?
72
+ recalculate_tasks("Reference dataset unlinked: #{d.name}")
73
+ end
70
74
  pull_hook(:on_unlink_dataset, name)
71
75
  d
72
76
  end
@@ -55,10 +55,11 @@ module MiGA::Project::Result
55
55
  ##
56
56
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
57
57
  def add_result_distances(base, _opts)
58
- return nil unless result_files_exist?(base, %w[.Rdata .txt])
58
+ return nil unless result_files_exist?(base, %w[.rds .txt])
59
59
 
60
60
  r = MiGA::Result.new("#{base}.json")
61
- r.add_file(:rdata, 'miga-project.Rdata')
61
+ r.add_file(:rds, 'miga-project.rds')
62
+ r.add_file(:rdata, 'miga-project.Rdata') # Legacy file
62
63
  r.add_file(:matrix, 'miga-project.txt')
63
64
  r.add_file(:log, 'miga-project.log') # Legacy file
64
65
  r.add_file(:hist, 'miga-project.hist')
@@ -82,12 +83,13 @@ module MiGA::Project::Result
82
83
  end
83
84
 
84
85
  r = add_result_iter_clades(base)
85
- r.add_file(:aai_tree, 'miga-project.aai.nwk')
86
- r.add_file(:proposal, 'miga-project.proposed-clades')
87
- r.add_file(:clades_aai90, 'miga-project.aai90-clades')
88
- r.add_file(:clades_ani95, 'miga-project.ani95-clades')
89
- r.add_file(:clades_gsp, 'miga-project.gsp-clades')
90
- r.add_file(:medoids_gsp, 'miga-project.gsp-medoids')
86
+ r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
87
+ r.add_file(:aai_tree, 'miga-project.aai.nwk')
88
+ r.add_file(:proposal, 'miga-project.proposed-clades')
89
+ r.add_file(:clades_aai90, 'miga-project.aai90-clades')
90
+ r.add_file(:clades_ani95, 'miga-project.ani95-clades')
91
+ r.add_file(:clades_gsp, 'miga-project.gsp-clades')
92
+ r.add_file(:medoids_gsp, 'miga-project.gsp-medoids')
91
93
  r
92
94
  end
93
95
 
@@ -105,6 +107,7 @@ module MiGA::Project::Result
105
107
 
106
108
  r = add_result_iter_clades(base)
107
109
  r.add_file(:ani_tree, 'miga-project.ani.nwk')
110
+ r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
108
111
  r
109
112
  end
110
113
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 2, 1].freeze
15
+ VERSION = [1.1, 3, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 11, 7)
23
+ VERSION_DATE = Date.new(2021, 11, 21)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -22,15 +22,15 @@ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
22
22
 
23
23
  # R-ify
24
24
  cat <<R | R --vanilla
25
- file <- gzfile('miga-project.txt.gz')
26
- aai <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
- save(aai, file = 'miga-project.Rdata')
28
- if(sum(aai[, 'a'] != aai[, 'b']) > 0) {
29
- h <- hist(aai[aai[, 'a'] != aai[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
- len <- length(h[['breaks']])
25
+ file <- gzfile("miga-project.txt.gz")
26
+ aai <- read.table(file, sep = "\t", header = TRUE, as.is = TRUE)
27
+ saveRDS(aai, file = "miga-project.rds")
28
+ if(sum(aai[, "a"] != aai[, "b"]) > 0) {
29
+ h <- hist(aai[aai[, "a"] != aai[, "b"], "value"], breaks = 100, plot = FALSE)
30
+ len <- length(h[["breaks"]])
31
31
  write.table(
32
- cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
- file = 'miga-project.hist', quote = FALSE, sep = '\t',
32
+ cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
33
+ file = "miga-project.hist", quote = FALSE, sep = "\t",
34
34
  col.names = FALSE, row.names = FALSE
35
35
  )
36
36
  }
@@ -22,15 +22,15 @@ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
22
22
 
23
23
  # R-ify
24
24
  cat <<R | R --vanilla
25
- file <- gzfile('miga-project.txt.gz')
26
- ani <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
- save(ani, file = 'miga-project.Rdata')
28
- if(sum(ani[, 'a'] != ani[, 'b']) > 0) {
29
- h <- hist(ani[ani[, 'a'] != ani[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
- len <- length(h[['breaks']])
25
+ file <- gzfile("miga-project.txt.gz")
26
+ ani <- read.table(file, sep = "\t", header = TRUE, as.is = TRUE)
27
+ saveRDS(ani, file = "miga-project.rds")
28
+ if(sum(ani[, "a"] != ani[, "b"]) > 0) {
29
+ h <- hist(ani[ani[, "a"] != ani[, "b"], "value"], breaks = 100, plot = FALSE)
30
+ len <- length(h[["breaks"]])
31
31
  write.table(
32
- cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
- file = 'miga-project.hist', quote = FALSE, sep = '\t',
32
+ cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
33
+ file = "miga-project.hist", quote = FALSE, sep = "\t",
34
34
  col.names = FALSE, row.names = FALSE
35
35
  )
36
36
  }
@@ -15,7 +15,7 @@ ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
15
15
  # No real need for hAAI distributions at all
16
16
  echo -n "" > miga-project.log
17
17
  echo -n "" > miga-project.txt
18
- echo "aai <- NULL; save(aai, file = 'miga-project.Rdata')" | R --vanilla
18
+ echo 'aai <- NULL; saveRDS(aai, file = "miga-project.rds")' | R --vanilla
19
19
 
20
20
  # Finalize
21
21
  miga_end_project_step "$DIR"
data/test/project_test.rb CHANGED
@@ -82,7 +82,7 @@ class ProjectTest < Test::Unit::TestCase
82
82
  def test_add_result
83
83
  p1 = project
84
84
  assert_nil(p1.add_result(:doom))
85
- %w[.Rdata .log .txt .done].each do |x|
85
+ %w[.rds .log .txt .done].each do |x|
86
86
  assert_nil(p1.add_result(:haai_distances))
87
87
  FileUtils.touch(
88
88
  File.join(
@@ -117,11 +117,12 @@ class ProjectTest < Test::Unit::TestCase
117
117
  # Project tasks
118
118
  expected_files = {
119
119
  project_stats: %w[.taxonomy.json .metadata.db],
120
- haai_distances: %w[.Rdata .log .txt],
121
- aai_distances: %w[.Rdata .log .txt],
122
- ani_distances: %w[.Rdata .log .txt],
123
- clade_finding: %w[.pdf .classif .medoids
124
- .class.tsv .class.nwk .proposed-clades],
120
+ haai_distances: %w[.rds .log .txt],
121
+ aai_distances: %w[.rds .log .txt],
122
+ ani_distances: %w[.rds .log .txt],
123
+ clade_finding: %w[
124
+ .pdf .classif .medoids .class.tsv .class.nwk .proposed-clades
125
+ ],
125
126
  subclades: %w[.pdf .classif .medoids .class.tsv .class.nwk],
126
127
  ogs: %w[.ogs .stats]
127
128
  }
data/utils/find-medoid.R CHANGED
@@ -5,26 +5,28 @@
5
5
  #
6
6
 
7
7
  #= Load stuff
8
- argv <- commandArgs(trailingOnly = T)
8
+ argv <- commandArgs(trailingOnly = TRUE)
9
9
  suppressPackageStartupMessages(library(ape))
10
- if(Sys.getenv('MIGA') == ''){
10
+ if(Sys.getenv("MIGA") == ""){
11
11
  suppressPackageStartupMessages(library(enveomics.R))
12
12
  }else{
13
- source(file.path(Sys.getenv('MIGA'),
14
- 'utils', 'enveomics', 'enveomics.R', 'R', 'df2dist.R'))
13
+ source(file.path(
14
+ Sys.getenv("MIGA"),
15
+ "utils", "enveomics", "enveomics.R", "R", "df2dist.R"
16
+ ))
15
17
  }
16
18
 
17
- find_medoids <- function(ani.df, out, clades) {
19
+ find_medoids <- function (ani.df, out, clades) {
18
20
  if(nrow(ani.df) == 0) return(NULL)
19
21
  ani.df$d <- 1 - (ani.df$value/100)
20
- dist <- enve.df2dist(ani.df, 'a', 'b', 'd', default.d = max(ani.df$d)*1.2)
22
+ dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
21
23
  dist <- as.matrix(dist)
22
- cl <- read.table(clades, header = FALSE, sep = '\t', as.is = TRUE)[,1]
24
+ cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
23
25
  cl.s <- c()
24
26
  medoids <- c()
25
27
  for(i in cl){
26
- lab <- strsplit(i, ',')[[1]]
27
- cat('Clade of:', lab[1], '\n')
28
+ lab <- strsplit(i, ",")[[1]]
29
+ cat("Clade of:", lab[1], "\n")
28
30
  if(length(lab) == 1) {
29
31
  lab.s <- lab
30
32
  } else {
@@ -32,15 +34,17 @@ find_medoids <- function(ani.df, out, clades) {
32
34
  }
33
35
  med <- lab.s[1]
34
36
  medoids <- c(medoids, med)
35
- cl.s <- c(cl.s, paste(lab.s, collapse = ','))
37
+ cl.s <- c(cl.s, paste(lab.s, collapse = ","))
36
38
  }
37
39
  write.table(medoids, out, quote = FALSE, row.names = FALSE, col.names = FALSE)
38
- write.table(cl.s, paste(clades, '.sorted', sep = ''), quote = FALSE,
39
- row.names = FALSE, col.names = FALSE)
40
+ write.table(
41
+ cl.s, paste(clades, ".sorted", sep = ""), quote = FALSE,
42
+ row.names = FALSE, col.names = FALSE
43
+ )
40
44
  }
41
45
 
42
46
  #= Main
43
- load(argv[1])
44
- if(! exists('ani')) ani <- aai
47
+ cat("Finding Medoids")
48
+ ani <- readRDS(argv[1])
45
49
  find_medoids(ani.df = ani, out = argv[2], clades = argv[3])
46
50
 
@@ -44,7 +44,7 @@ module MiGA::SubcladeRunner::Pipeline
44
44
  # Find genomospecies medoids
45
45
  src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
46
46
  dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
47
- `Rscript '#{src}' ../../09.distances/#{dir}/miga-project.Rdata \
47
+ `Rscript '#{src}' '../../09.distances/#{dir}/miga-project.rds' \
48
48
  miga-project.gsp-medoids miga-project.gsp-clades`
49
49
  if File.exist? 'miga-project.gsp-clades.sorted'
50
50
  File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
@@ -54,8 +54,6 @@ module MiGA::SubcladeRunner::Pipeline
54
54
  ofh = File.open('miga-project.proposed-clades', 'w')
55
55
  File.open('miga-project.gsp-clades', 'r') do |ifh|
56
56
  ifh.each_line do |ln|
57
- next if $. == 1
58
-
59
57
  r = ln.chomp.split(',')
60
58
  ofh.puts r.join("\t") if r.size >= 5
61
59
  end
data/utils/subclades.R CHANGED
@@ -10,56 +10,51 @@ suppressPackageStartupMessages(library(ape))
10
10
  suppressPackageStartupMessages(library(vegan))
11
11
  suppressPackageStartupMessages(library(cluster))
12
12
  suppressPackageStartupMessages(library(parallel))
13
- if(Sys.getenv('MIGA') == ''){
13
+ if(Sys.getenv("MIGA") == ""){
14
14
  suppressPackageStartupMessages(library(enveomics.R))
15
15
  }else{
16
- source(file.path(Sys.getenv('MIGA'),
17
- 'utils', 'enveomics', 'enveomics.R', 'R', 'df2dist.R'))
16
+ source(file.path(
17
+ Sys.getenv("MIGA"),
18
+ "utils", "enveomics", "enveomics.R", "R", "df2dist.R"
19
+ ))
18
20
  }
19
21
 
20
22
  #= Main function
21
23
  subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
22
- say('==> Out base:', out_base, '<==')
24
+ say("==> Out base:", out_base, "<==")
23
25
 
24
26
  # Normalize input matrix
25
- dist_rdata = paste(out_base, 'dist.rdata', sep = '.')
26
- if(!missing(ani_file)){
27
- if(length(ani.d) == 0 && !file.exists(dist_rdata)){
27
+ dist_rds <- paste(out_base, "dist.rds", sep = ".")
28
+ if (!missing(ani_file)) {
29
+ if(length(ani.d) == 0 && !file.exists(dist_rds)){
28
30
  # Read from ani_file
29
- a <- read.table(gzfile(ani_file), sep = '\t', header = TRUE, as.is = TRUE)
30
- if(nrow(a) == 0){
31
+ ani.d <- ani_distance(ani_file, sel)
32
+ if (is.null(ani.d)) {
31
33
  generate_empty_files(out_base)
32
34
  return(NULL)
35
+ } else {
36
+ saveRDS(ani.d, dist_rds)
33
37
  }
34
- if(!is.na(sel) && file.exists(sel)){
35
- say('Filter selection')
36
- lab <- read.table(sel, sep='\t', head=FALSE, as.is=TRUE)[,1]
37
- a <- a[a$a %in% lab & a$b %in% lab, ]
38
- }
39
- say('Distances')
40
- a$d <- 1 - (a$value/100)
41
- ani.d <- enve.df2dist(a, 'a', 'b', 'd', default.d = max(a$d)*1.2)
42
- save(ani.d, file = dist_rdata)
43
38
  }
44
39
  }
45
40
 
46
41
  # Read result if the subclade is ready, run it otherwise
47
- if(file.exists(paste(out_base, 'classif', sep = '.'))){
42
+ if (file.exists(paste(out_base, "classif", sep = "."))) {
48
43
  say("Loading")
49
44
  ani.medoids <- read.table(paste(out_base, "medoids", sep = "."),
50
- sep = ' ', as.is = TRUE)[,1]
45
+ sep = " ", as.is = TRUE)[,1]
51
46
  a <- read.table(paste(out_base, "classif", sep="."),
52
- sep = '\t', as.is = TRUE)
47
+ sep = "\t", as.is = TRUE)
53
48
  ani.types <- a[,2]
54
49
  names(ani.types) <- a[,1]
55
- if(length(ani.d) == 0) load(dist_rdata)
56
- }else if(length(labels(ani.d)) > 8L){
57
- res <- subclade_clustering(out_base, thr, ani.d, dist_rdata)
58
- if(length(res) == 0) return(NULL)
59
- ani.medoids <- res[['ani.medoids']]
60
- ani.types <- res[['ani.types']]
61
- ani.d <- res[['ani.d']]
62
- }else{
50
+ if(length(ani.d) == 0) ani.d <- readRDS(dist_rds)
51
+ } else if (length(labels(ani.d)) > 8L) {
52
+ res <- subclade_clustering(out_base, thr, ani.d, dist_rds)
53
+ if (length(res) == 0) return(NULL)
54
+ ani.medoids <- res[["ani.medoids"]]
55
+ ani.types <- res[["ani.types"]]
56
+ ani.d <- res[["ani.d"]]
57
+ } else {
63
58
  ani.medoids <- labels(ani.d)[which.min(colSums(as.matrix(ani.d)))]
64
59
  ani.types <- rep(1, length(labels(ani.d)))
65
60
  names(ani.types) <- labels(ani.d)
@@ -69,66 +64,80 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
69
64
 
70
65
  # Recursive search
71
66
  say("Recursive search")
72
- for(i in 1:length(ani.medoids)){
67
+ for (i in 1:length(ani.medoids)) {
73
68
  medoid <- ani.medoids[i]
74
69
  ds_f <- names(ani.types)[ ani.types==i ]
75
70
  say("Analyzing subclade", i, "with medoid:", medoid)
76
71
  dir_f <- paste(out_base, ".sc-", i, sep="")
77
- if(!dir.exists(dir_f)) dir.create(dir_f)
72
+ if (!dir.exists(dir_f)) dir.create(dir_f)
78
73
  write.table(ds_f,
79
74
  paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
80
75
  quote=FALSE, col.names=FALSE, row.names=FALSE)
81
- if(length(ds_f) > 8L){
76
+ if (length(ds_f) > 8L) {
82
77
  ani_subset <- as.dist(as.matrix(ani.d)[ds_f, ds_f])
83
- subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
84
- thr=thr, ani.d=ani_subset)
78
+ subclades(
79
+ out_base = paste(out_base, ".sc-", i, "/miga-project", sep=""),
80
+ thr = thr,
81
+ ani.d = ani_subset
82
+ )
85
83
  }
86
84
  }
87
85
 
88
86
  # Declare recursion up-to-here complete
89
- write.table(date(), paste(out_base, 'ready', sep='.'),
90
- quote=FALSE, row.names=FALSE, col.names=FALSE)
87
+ write.table(
88
+ date(), paste(out_base, "ready", sep = "."),
89
+ quote = FALSE, row.names = FALSE, col.names = FALSE
90
+ )
91
91
  }
92
92
 
93
93
  #= Heavy-lifter
94
- subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
94
+ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
95
95
  # Get ANI distances
96
- if(length(ani.d) > 0){
97
- # Just use ani.d (and save in dist_rdata_
98
- save(ani.d, file=dist_rdata)
99
- }else if(file.exists(dist_rdata)){
100
- # Read from dist_rdata
101
- load(dist_rdata)
102
- }else{
96
+ if (length(ani.d) > 0) {
97
+ # Just use ani.d (and save in dist_rds)
98
+ if (!file.exists(dist_rds)) saveRDS(ani.d, dist_rds)
99
+ } else if (file.exists(dist_rds)) {
100
+ # Read from dist_rds
101
+ ani.d <- readRDS(dist_rds)
102
+ } else {
103
103
  stop("Cannot find input matrix", out_base)
104
104
  }
105
- if(length(labels(ani.d)) <= 8L) return(list())
105
+ if (length(labels(ani.d)) <= 8L) return(list())
106
106
 
107
107
  # Build tree
108
108
  say("Tree")
109
109
  ani.ph <- bionj(ani.d)
110
- express.ori <- options('expressions')$expressions
111
- if(express.ori < ani.ph$Nnode*4){
112
- options(expressions=min(c(5e7,ani.ph$Nnode*4)))
110
+ say("- Write")
111
+ express.ori <- options("expressions")$expressions
112
+ if(express.ori < ani.ph$Nnode * 4){
113
+ options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
113
114
  }
114
- write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
115
+ write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
115
116
  options(expressions=express.ori)
116
117
 
117
118
  # Silhouette
118
119
  say("Silhouette")
119
120
  nn <- length(labels(ani.d))
120
121
  k <- min(max(floor(0.005 * nn), 2), 20):min(nn-1, 100)
122
+ say("- Make cluster")
121
123
  cl <- makeCluster(thr)
122
- s <- parSapply(cl, k, function(x) {
124
+ say("- Launch parallel jobs")
125
+ s <- parSapply(
126
+ cl, k,
127
+ function(x) {
123
128
  library(cluster)
124
- s <- pam(ani.d, x, do.swap=FALSE, pamonce=1)$silinfo
125
- c(s$avg.width, -sum(ifelse(s$widths[,3]>0,0,s$widths[,3])))
126
- })
129
+ s <- pam(ani.d, x, do.swap = FALSE, pamonce = 1)$silinfo
130
+ c(s$avg.width, -sum(ifelse(s$widths[,3] > 0, 0, s$widths[,3])))
131
+ }
132
+ )
133
+ say("- Stop cluster")
127
134
  stopCluster(cl)
128
- s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
129
- s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
130
- ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
131
- if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
135
+ say("- Calculate custom criteria")
136
+ s.avg.z <- (s[1,] - mean(s[1,])) / (sd(s[1,]) + 0.0001)
137
+ s.neg.z <- (s[2,] - mean(s[2,])) / (sd(s[2,]) + 0.01)
138
+ ds <- s.avg.z - s.neg.z - 2 / (1:length(k)) - (1:length(k)) / 50
139
+ if(mean(s[1,] < 0) < 0.75)
140
+ ds[s[1,] < 0] <- mean(ds) # <- k's with negative average
132
141
  top.n <- k[which.max(ds)]
133
142
 
134
143
  # Classify genomes
@@ -139,8 +148,8 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
139
148
 
140
149
  # Generate graphic report
141
150
  say("Graphic report")
142
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
143
- layout(matrix(c(1,1,2,2,3,3,4,5),byrow=TRUE, ncol=2))
151
+ pdf(paste(out_base, ".pdf", sep = ""), 7, 12)
152
+ layout(matrix(c(rep(1:3, each = 2), 4:5), byrow = TRUE, ncol = 2))
144
153
  plot_distances(ani.d)
145
154
  plot_silhouette(k, s[1,], s[2,], ds, top.n)
146
155
  plot_clustering(ani.cl, ani.d, ani.types)
@@ -153,112 +162,170 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
153
162
  # Return data
154
163
  say("Cluster ready")
155
164
  return(list(
156
- ani.medoids=ani.medoids,
157
- ani.types=ani.types,
158
- ani.d=ani.d
165
+ ani.medoids = ani.medoids,
166
+ ani.types = ani.types,
167
+ ani.d = ani.d
159
168
  ))
160
169
  }
161
170
 
162
171
  #= Helper functions
163
- say <- function(...) { message(paste("[",date(),"]",...,"\n"),appendLF=FALSE) }
172
+ say <- function (...) {
173
+ message(paste("[", date(), "]", ..., "\n"), appendLF = FALSE)
174
+ }
164
175
 
165
- generate_empty_files <- function(out_base) {
166
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
167
- plot(1, t="n", axes=F)
168
- legend("center", "No data", bty="n")
176
+ generate_empty_files <- function (out_base) {
177
+ pdf(paste(out_base, ".pdf", sep = ""), 7, 12)
178
+ plot(1, t = "n", axes = F)
179
+ legend("center", "No data", bty = "n")
169
180
  dev.off()
170
- file.create(paste(out_base,".1.classif",sep=""))
171
- file.create(paste(out_base,".1.medoids",sep=""))
181
+ file.create(paste(out_base, ".1.classif", sep = ""))
182
+ file.create(paste(out_base, ".1.medoids", sep = ""))
172
183
  }
173
184
 
174
- write_text_report <- function(out_base, ani.d, ani.medoids, ani.types){
175
- say('Text report')
176
- write.table(ani.medoids, paste(out_base, 'medoids', sep='.'),
177
- quote=FALSE, col.names=FALSE, row.names=FALSE)
178
- classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
179
- ani.d.m <- 100 - as.matrix(ani.d)*100
180
- for(j in 1:nrow(classif)){
185
+ write_text_report <- function (out_base, ani.d, ani.medoids, ani.types) {
186
+ say("Text report")
187
+ write.table(
188
+ ani.medoids, paste(out_base, "medoids", sep = "."),
189
+ quote = FALSE, col.names = FALSE, row.names = FALSE
190
+ )
191
+ classif <- cbind(names(ani.types), ani.types, ani.medoids[ani.types], NA)
192
+ ani.d.m <- 100 - as.matrix(ani.d) * 100
193
+ for (j in 1:nrow(classif)) {
181
194
  classif[j,4] <- ani.d.m[classif[j,1], classif[j,3]]
182
195
  }
183
- write.table(classif, paste(out_base,"classif",sep="."),
184
- quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
196
+ write.table(
197
+ classif, paste(out_base, "classif", sep="."),
198
+ quote = FALSE, col.names = FALSE, row.names = FALSE, sep = "\t"
199
+ )
185
200
  }
186
201
 
187
- plot_silhouette <- function(k, s, ns, ds, top.n) {
202
+ plot_silhouette <- function (k, s, ns, ds, top.n) {
188
203
  # s
189
- par(mar=c(4,5,1,5)+0.1)
190
- plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
191
- ylim=range(s), bty="n", xaxs="i", yaxt="n")
192
- polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
193
- axis(2, fg="grey60", col.axis="grey60")
194
- mtext("Mean silhouette", side=2, line=3, col="grey60")
204
+ par(mar = c(4,5,1,5)+0.1)
205
+ plot(
206
+ 1, t = "n", xlab = "k (clusters)", ylab = "", xlim = range(c(0,k)),
207
+ ylim = range(s), bty = "n", xaxs = "i", yaxt = "n"
208
+ )
209
+ polygon(c(k[1], k, k[length(k)]), c(0,s,0), border = NA, col = "grey80")
210
+ axis(2, fg = "grey60", col.axis = "grey60")
211
+ mtext("Mean silhouette", side = 2, line = 3, col = "grey60")
212
+
195
213
  # ns
196
- par(new=TRUE)
197
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
198
- ylim=range(ns), bty="n", xaxs="i")
199
- points(k, ns, type="o", pch=16, col=rgb(1/2,0,0,3/4))
200
- axis(4, fg="darkred", col.axis="darkred")
201
- mtext("Negative silhouette area", side=4, line=3, col="darkred")
214
+ par(new = TRUE)
215
+ plot(
216
+ 1, t = "n", bty = "n",
217
+ xlab = "", ylab = "", xaxt = "n", yaxt = "n", xaxs = "i",
218
+ xlim = range(c(0,k)), ylim = range(ns)
219
+ )
220
+ points(k, ns, type = "o", pch = 16, col = rgb(1/2,0,0,3/4))
221
+ axis(4, fg = "darkred", col.axis = "darkred")
222
+ mtext("Negative silhouette area", side = 4, line = 3, col = "darkred")
223
+
202
224
  # ds
203
- par(new=TRUE)
204
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
205
- ylim=range(ds), bty="n", xaxs="i")
225
+ par(new = TRUE)
226
+ plot(
227
+ 1, t = "n", bty = "n",
228
+ xlab = "", ylab = "", xaxt = "n", yaxt = "n", xaxs = "i",
229
+ xlim = range(c(0,k)), ylim = range(ds)
230
+ )
206
231
  lines(k, ds)
207
- abline(v=top.n, lty=2)
232
+ abline(v = top.n, lty = 2)
208
233
  }
209
234
 
210
- plot_distances <- function(dist) {
211
- par(mar=c(5,4,1,2)+0.1)
212
- hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
235
+ plot_distances <- function (dist) {
236
+ par(mar = c(5,4,1,2) + 0.1)
237
+ hist(
238
+ dist, border = NA, col = "grey60", breaks = 50,
239
+ xlab = "Distances", main = ""
240
+ )
213
241
  }
214
242
 
215
- plot_clustering <- function(cl, dist, types) {
216
- par(mar=c(5,4,4,2)+0.1)
243
+ plot_clustering <- function (cl, dist, types) {
244
+ par(mar = c(5,4,4,2) + 0.1)
217
245
  top.n <- length(cl$medoids)
218
246
  col <- ggplotColours(top.n)
219
- plot(silhouette(cl), col=col)
220
- if(length(labels(dist))<=15){
221
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
222
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
223
- }else{
224
- ani.mds <- cmdscale(dist, k=4)
225
- if(ncol(ani.mds)==4){
226
- plot(ani.mds[,1], ani.mds[,2], col=col[types], cex=1/2,
227
- xlab='Component 1', ylab='Component 2')
228
- plot(ani.mds[,3], ani.mds[,4], col=col[types], cex=1/2,
229
- xlab='Component 3', ylab='Component 4')
247
+ plot(silhouette(cl), col = col)
248
+ if (length(labels(dist)) <= 15) {
249
+ plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", bty = "n")
250
+ plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", bty = "n")
251
+ } else {
252
+ ani.mds <- cmdscale(dist, k = 4)
253
+ if (ncol(ani.mds) == 4) {
254
+ plot(
255
+ ani.mds[,1], ani.mds[,2], col = col[types], cex = 1/2,
256
+ xlab = "Component 1", ylab = "Component 2"
257
+ )
258
+ plot(
259
+ ani.mds[,3], ani.mds[,4], col = col[types], cex = 1/2,
260
+ xlab = "Component 3", ylab="Component 4"
261
+ )
230
262
  }else{
231
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
232
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
263
+ for (i in 1:2)
264
+ plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", bty = "n")
233
265
  }
234
266
  }
235
267
  }
236
268
 
237
- plot_tree <- function(phy, types, medoids){
269
+ plot_tree <- function (phy, types, medoids) {
238
270
  layout(1)
239
271
  top.n <- length(unique(types))
240
272
  col <- ggplotColours(top.n)
241
273
  is.medoid <- phy$tip.label %in% medoids
242
- phy$tip.label[is.medoid] <- paste(phy$tip.label[is.medoid],
243
- " [", types[phy$tip.label[is.medoid]], "]", sep='')
244
- plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
245
- font=ifelse(is.medoid, 2, 1),
246
- tip.color=col[types[phy$tip.label]])
274
+ phy$tip.label[is.medoid] <- paste(
275
+ phy$tip.label[is.medoid],
276
+ " [", types[phy$tip.label[is.medoid]], "]",
277
+ sep = ""
278
+ )
279
+ plot(
280
+ phy, cex = ifelse(is.medoid, 1/3, 1/6),
281
+ font = ifelse(is.medoid, 2, 1),
282
+ tip.color = col[types[phy$tip.label]]
283
+ )
284
+ }
285
+
286
+ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) {
287
+ if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360 / n
288
+ hcl(h = seq(h[1], h[2], length = n), c = 100, l = 65, alpha = alpha)
247
289
  }
248
290
 
249
- ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
250
- if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
251
- hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
291
+ ani_distance <- function (ani_file, sel) {
292
+ # Try to locate rds, otherwise read gzipped table
293
+ rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
294
+ if (file.exists(rds)) {
295
+ sim <- readRDS(rds)
296
+ } else {
297
+ sim <- read.table(gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE)
298
+ }
299
+
300
+ # If there is not data end process
301
+ if (nrow(sim) == 0) return(NULL)
302
+
303
+ # Apply filter (if requested)
304
+ if (!is.na(sel) && file.exists(sel)) {
305
+ say("Filter selection")
306
+ lab <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
307
+ sim <- sim[sim$a %in% lab & sim$b %in% lab, ]
308
+ }
309
+
310
+ # Transform to distances
311
+ say("Distances")
312
+ sim$d <- 1 - (sim$value / 100)
313
+ return(enve.df2dist(sim, "a", "b", "d", default.d = max(sim$d) * 1.2))
252
314
  }
253
315
 
254
316
  #= Main
255
317
  options(warn = 1)
256
- if(length(argv) >= 5 & argv[5] == 'empty'){
318
+ if (length(argv) >= 5 & argv[5] == "empty") {
257
319
  generate_empty_files(argv[2])
258
- write.table(NULL, paste(argv[2], "medoids", sep="."))
259
- write.table(NULL, paste(argv[2], "classif", sep="."))
260
- write.table(date(), paste(argv[2], "ready", sep="."))
320
+ write.table(NULL, paste(argv[2], "medoids", sep = "."))
321
+ write.table(NULL, paste(argv[2], "classif", sep = "."))
322
+ write.table(date(), paste(argv[2], "ready", sep = "."))
261
323
  }else{
262
- subclades(ani_file = argv[1], out_base = argv[2],
263
- thr = ifelse(is.na(argv[3]), 1, as.numeric(argv[3])), sel = argv[4])
324
+ subclades(
325
+ ani_file = argv[1],
326
+ out_base = argv[2],
327
+ thr = ifelse(is.na(argv[3]), 1, as.numeric(argv[3])),
328
+ sel = argv[4]
329
+ )
264
330
  }
331
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2.1
4
+ version: 1.1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-07 00:00:00.000000000 Z
11
+ date: 2021-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons