miga-base 0.7.26.2 → 1.0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/env.rb +26 -0
  11. data/lib/miga/cli/action/init.rb +11 -7
  12. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  13. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  14. data/lib/miga/cli/action/tax_dist.rb +2 -2
  15. data/lib/miga/cli/action/wf.rb +5 -4
  16. data/lib/miga/cli/base.rb +1 -0
  17. data/lib/miga/common.rb +1 -0
  18. data/lib/miga/daemon.rb +11 -4
  19. data/lib/miga/dataset/result.rb +10 -6
  20. data/lib/miga/json.rb +5 -4
  21. data/lib/miga/metadata.rb +5 -1
  22. data/lib/miga/parallel.rb +36 -0
  23. data/lib/miga/project.rb +8 -8
  24. data/lib/miga/project/base.rb +4 -4
  25. data/lib/miga/project/result.rb +2 -2
  26. data/lib/miga/sqlite.rb +10 -2
  27. data/lib/miga/version.rb +23 -9
  28. data/scripts/aai_distances.bash +16 -18
  29. data/scripts/ani_distances.bash +16 -17
  30. data/scripts/assembly.bash +31 -16
  31. data/scripts/haai_distances.bash +3 -27
  32. data/scripts/miga.bash +12 -8
  33. data/scripts/p.bash +1 -1
  34. data/scripts/read_quality.bash +9 -18
  35. data/scripts/trimmed_fasta.bash +14 -30
  36. data/scripts/trimmed_reads.bash +36 -36
  37. data/test/parallel_test.rb +31 -0
  38. data/test/project_test.rb +2 -1
  39. data/test/remote_dataset_test.rb +1 -1
  40. data/utils/distance/commands.rb +1 -0
  41. data/utils/distance/database.rb +0 -1
  42. data/utils/distance/runner.rb +2 -4
  43. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  44. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  45. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  46. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  47. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  48. data/utils/enveomics/Manifest/categories.json +13 -4
  49. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  50. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  51. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  52. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  53. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  54. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  55. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  56. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  57. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  58. data/utils/enveomics/Scripts/aai.rb +3 -2
  59. data/utils/enveomics/Scripts/anir.rb +137 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  64. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  65. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  66. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  67. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  68. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  69. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  70. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  71. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  72. data/utils/enveomics/Scripts/rbm.rb +87 -133
  73. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  74. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  75. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  76. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  77. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  78. data/utils/enveomics/enveomics.R/README.md +1 -0
  79. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  80. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  81. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  82. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  83. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  84. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  85. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  86. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  88. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  89. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  90. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  100. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  101. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  102. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  103. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  104. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  105. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  106. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  107. data/utils/multitrim/README.md +67 -0
  108. data/utils/multitrim/multitrim.py +1555 -0
  109. data/utils/multitrim/multitrim.yml +13 -0
  110. data/utils/requirements.txt +4 -3
  111. data/utils/subclade/pipeline.rb +2 -2
  112. metadata +33 -4
  113. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
data/lib/miga/json.rb CHANGED
@@ -1,5 +1,4 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  require 'json'
5
4
 
@@ -45,8 +44,10 @@ class MiGA::Json < MiGA::MiGA
45
44
  raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
45
 
47
46
  # Parse JSON
48
- params = { symbolize_names: opts[:symbolize],
49
- create_additions: opts[:additions] }
47
+ params = {
48
+ symbolize_names: opts[:symbolize],
49
+ create_additions: opts[:additions]
50
+ }
50
51
  y = JSON.parse(cont, params)
51
52
 
52
53
  # Add defaults
data/lib/miga/metadata.rb CHANGED
@@ -105,7 +105,11 @@ class MiGA::Metadata < MiGA::MiGA
105
105
  ##
106
106
  # Return the value of +k+ in #data
107
107
  def [](k)
108
- data[k.to_sym]
108
+ if k.to_s =~ /(.+):(.+)/
109
+ data[$1.to_sym]&.fetch($2)
110
+ else
111
+ data[k.to_sym]
112
+ end
109
113
  end
110
114
 
111
115
  ##
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Parallel execution in MiGA.
5
+ class MiGA::Parallel < MiGA::MiGA
6
+ class << self
7
+ ##
8
+ # Executes the passed block with the thread number as argument (0-numbered)
9
+ # in +threads+ processes
10
+ def process(threads)
11
+ threads.times do |i|
12
+ Process.fork { yield(i) }
13
+ end
14
+ Process.waitall
15
+ end
16
+
17
+ ##
18
+ # Distributes +enum+ across +threads+ and calls the passed block with args:
19
+ # 1. Unitary object from +enum+
20
+ # 2. Index of the unitary object
21
+ # 3. Index of the acting thread
22
+ def distribute(enum, threads, &blk)
23
+ process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
24
+ end
25
+
26
+ ##
27
+ # Enum through +enum+ executing the passed block only for thread with index
28
+ # +thr+, one of +threads+ threads. The passed block has the same arguments
29
+ # as the one in +#distribute+
30
+ def thread_enum(enum, threads, thr)
31
+ enum.each_with_index do |obj, idx|
32
+ yield(obj, idx, thr) if idx % threads == thr
33
+ end
34
+ end
35
+ end
36
+ end
data/lib/miga/project.rb CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
42
42
  # Create an empty project
43
43
  def create
44
44
  unless MiGA::MiGA.initialized?
45
- raise 'Impossible to create project in uninitialized MiGA.'
45
+ warn 'Projects cannot be processed yet, first run: miga init'
46
46
  end
47
47
 
48
- dirs = [path] + @@FOLDERS.map { |d| "#{path}/#{d}" } +
49
- @@DATA_FOLDERS.map { |d| "#{path}/data/#{d}" }
50
- dirs.each { |d| Dir.mkdir(d) unless Dir.exist? d }
48
+ dirs = @@FOLDERS.map { |d| File.join(path, d) }
49
+ dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
50
+ dirs.each { |d| FileUtils.mkdir_p(d) }
51
51
  @metadata = MiGA::Metadata.new(
52
- File.expand_path('miga.project.json', path),
53
- { datasets: [], name: File.basename(path) }
52
+ File.join(path, 'miga.project.json'),
53
+ datasets: [], name: File.basename(path)
54
54
  )
55
- d_path = File.expand_path('daemon/daemon.json', path)
56
- File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
55
+ d_path = File.join(path, 'daemon', 'daemon.json')
56
+ File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
57
57
  pull_hook :on_create
58
58
  self.load
59
59
  end
@@ -131,15 +131,15 @@ module MiGA::Project::Base
131
131
  },
132
132
  haai_p: {
133
133
  desc: 'Value of aai.rb -p on hAAI', type: String,
134
- default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
- in: %w[fastaai blast+ blast blat diamond no]
134
+ default: proc { |project| project.clade? ? 'no' : 'fastaai' },
135
+ in: %w[blast+ blast blat diamond fastaai no]
136
136
  },
137
137
  aai_p: {
138
- desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
138
+ desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
139
139
  in: %w[blast+ blast blat diamond]
140
140
  },
141
141
  ani_p: {
142
- desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
142
+ desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
143
143
  in: %w[blast+ blast blat fastani]
144
144
  },
145
145
  max_try: {
@@ -55,12 +55,12 @@ module MiGA::Project::Result
55
55
  ##
56
56
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
57
57
  def add_result_distances(base, _opts)
58
- return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
58
+ return nil unless result_files_exist?(base, %w[.Rdata .txt])
59
59
 
60
60
  r = MiGA::Result.new("#{base}.json")
61
61
  r.add_file(:rdata, 'miga-project.Rdata')
62
62
  r.add_file(:matrix, 'miga-project.txt')
63
- r.add_file(:log, 'miga-project.log')
63
+ r.add_file(:log, 'miga-project.log') # Legacy file
64
64
  r.add_file(:hist, 'miga-project.hist')
65
65
  r
66
66
  end
data/lib/miga/sqlite.rb CHANGED
@@ -37,12 +37,20 @@ class MiGA::SQLite < MiGA::MiGA
37
37
  # Executes +cmd+ and returns the result
38
38
  def run(*cmd)
39
39
  busy_attempts ||= 0
40
- conn = SQLite3::Database.new(path)
41
- conn.execute(*cmd)
40
+ io_attempts ||= 0
41
+ y = nil
42
+ SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
43
+ y
42
44
  rescue SQLite3::BusyException => e
43
45
  busy_attempts += 1
44
46
  raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
45
47
 
48
+ sleep(1)
49
+ retry
50
+ rescue SQLite3::IOException => e
51
+ io_attempts += 1
52
+ raise "Database I/O error #{path}: #{e.message}" if io_attempts >= 3
53
+
46
54
  sleep(1)
47
55
  retry
48
56
  end
data/lib/miga/version.rb CHANGED
@@ -9,23 +9,33 @@ module MiGA
9
9
  # Current version of MiGA. An Array with three values:
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
- # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 26, 2].freeze
12
+ # - String indicating release status:
13
+ # - rc* release candidate, not released as gem
14
+ # - [0-9]+ stable release, released as gem
15
+ VERSION = [1.0, 3, 0].freeze
14
16
 
15
17
  ##
16
18
  # Nickname for the current major.minor version.
17
- VERSION_NAME = 'lithograph'
19
+ VERSION_NAME = 'prima'
18
20
 
19
21
  ##
20
22
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 3, 1)
23
+ VERSION_DATE = Date.new(2021, 6, 4)
22
24
 
23
25
  ##
24
- # Reference of MiGA.
25
- CITATION = 'Rodriguez-R et al (2018). ' \
26
- 'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
27
- 'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
28
- 'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
26
+ # References of MiGA
27
+ CITATION = []
28
+ CITATION << <<~REF
29
+ Rodriguez-R et al (2018). The Microbial Genomes Atlas (MiGA) webserver:
30
+ taxonomic and gene diversity analysis of Archaea and Bacteria at the whole
31
+ genome level. Nucleic Acids Research 46(W1):W282-W288.
32
+ doi:10.1093/nar/gky467.
33
+ REF
34
+ CITATION << <<~REF
35
+ Rodriguez-R et al (2020). Classifying prokaryotic genomes using the
36
+ Microbial Genomes Atlas (MiGA) webserver. Bergey's Manual of Systematics
37
+ of Archaea and Bacteria.
38
+ REF
29
39
  end
30
40
 
31
41
  class MiGA::MiGA
@@ -58,6 +68,10 @@ class MiGA::MiGA
58
68
  ##
59
69
  # Reference of MiGA
60
70
  def self.CITATION
71
+ CITATION.map { |i| "- #{i}" }.join
72
+ end
73
+
74
+ def self.CITATION_ARRAY
61
75
  CITATION
62
76
  end
63
77
  end
@@ -9,34 +9,32 @@ DIR="$PROJECT/data/09.distances/02.aai"
9
9
  # Initialize
10
10
  miga_start_project_step "$DIR"
11
11
 
12
- echo -n "" > miga-project.log
13
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
-
15
12
  # Extract values
16
13
  rm -f miga-project.txt
14
+ SQL="SELECT seq1, seq2, aai, sd, n, omega from aai;"
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
17
16
  (
18
- echo "metric a b value sd n omega" | tr " " "\\t"
17
+ echo "a b value sd n omega" | tr " " "\\t"
19
18
  for i in $DS ; do
20
- echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
21
- " seq1, seq2, aai, sd, n, omega from aai;" \
22
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
23
- echo "$i" >> miga-project.log
19
+ echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
24
20
  done
25
21
  ) | gzip -9c > miga-project.txt.gz
26
22
 
27
23
  # R-ify
28
- echo "
29
- aai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
30
- save(aai, file='miga-project.Rdata');
31
- if(sum(aai[,'a'] != aai[,'b']) > 0){
32
- h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
24
+ cat <<R | R --vanilla
25
+ file <- gzfile('miga-project.txt.gz')
26
+ aai <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
+ save(aai, file = 'miga-project.Rdata')
28
+ if(sum(aai[, 'a'] != aai[, 'b']) > 0) {
29
+ h <- hist(aai[aai[, 'a'] != aai[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
+ len <- length(h[['breaks']])
33
31
  write.table(
34
- cbind(h[['breaks']][-length(h[['breaks']])],
35
- h[['breaks']][-1], h[['counts']]),
36
- file='miga-project.hist', quote=FALSE, sep='\\t',
37
- col.names=FALSE, row.names=FALSE);
32
+ cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
+ file = 'miga-project.hist', quote = FALSE, sep = '\t',
34
+ col.names = FALSE, row.names = FALSE
35
+ )
38
36
  }
39
- " | R --vanilla
37
+ R
40
38
 
41
39
  # Finalize
42
40
  miga_end_project_step "$DIR"
@@ -9,33 +9,32 @@ DIR="$PROJECT/data/09.distances/03.ani"
9
9
  # Initialize
10
10
  miga_start_project_step "$DIR"
11
11
 
12
- echo -n "" > miga-project.log
13
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
-
15
12
  # Extract values
16
13
  rm -f miga-project.txt
14
+ SQL="SELECT seq1, seq2, ani, sd, n, omega from ani;"
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
17
16
  (
18
- echo "metric a b value sd n omega" | tr " " "\\t"
17
+ echo "a b value sd n omega" | tr " " "\\t"
19
18
  for i in $DS ; do
20
- echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
21
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
22
- echo "$i" >> miga-project.log
19
+ echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
23
20
  done
24
21
  ) | gzip -9c > miga-project.txt.gz
25
22
 
26
23
  # R-ify
27
- echo "
28
- ani <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
29
- save(ani, file='miga-project.Rdata');
30
- if(sum(ani[,'a'] != ani[,'b']) > 0){
31
- h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
24
+ cat <<R | R --vanilla
25
+ file <- gzfile('miga-project.txt.gz')
26
+ ani <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
+ save(ani, file = 'miga-project.Rdata')
28
+ if(sum(ani[, 'a'] != ani[, 'b']) > 0) {
29
+ h <- hist(ani[ani[, 'a'] != ani[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
+ len <- length(h[['breaks']])
32
31
  write.table(
33
- cbind(h[['breaks']][-length(h[['breaks']])],
34
- h[['breaks']][-1], h[['counts']]),
35
- file='miga-project.hist', quote=FALSE, sep='\\t',
36
- col.names=FALSE, row.names=FALSE);
32
+ cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
+ file = 'miga-project.hist', quote = FALSE, sep = '\t',
34
+ col.names = FALSE, row.names = FALSE
35
+ )
37
36
  }
38
- " | R --vanilla
37
+ R
39
38
 
40
39
  # Finalize
41
40
  miga_end_project_step "$DIR"
@@ -11,30 +11,44 @@ miga date > "$DATASET.start"
11
11
 
12
12
  # Interpose (if needed)
13
13
  TF="../04.trimmed_fasta"
14
- if [[ -s "$TF/$DATASET.1.fasta" \
15
- && -s "$TF/$DATASET.2.fasta" \
16
- && ! -s "$TF/$DATASET.CoupledReads.fa" ]] ; then
17
- FastA.interpose.pl "$TF/$DATASET.CoupledReads.fa" "$TF/$DATASET".[12].fasta
18
- gzip -9 -f "$TF/$DATASET.1.fasta"
19
- gzip -9 -f "$TF/$DATASET.2.fasta"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
14
+ b=$DATASET
15
+ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
16
+ cr="$TF/${b}.CoupledReads.fa"
17
+ if [[ ! -s "$cr" && ! -s "${cr}.gz" ]] ; then
18
+ for s in 1 2 ; do
19
+ if [[ -s "$TF/${b}.${s}.fasta" ]] ; then
20
+ ln -s "$TF/${b}.${s}.fasta" "${b}.${s}.tmp"
21
+ else
22
+ gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
23
+ fi
24
+ done
25
+ FastA.interpose.pl "$cr" "$b".[12].tmp
26
+ rm "$b".[12].tmp
27
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
28
+ fi
21
29
  fi
22
30
 
31
+ # Gzip (if needed)
32
+ for i in SingleReads CoupledReads ; do
33
+ base="$TF/${DATASET}.${i}.fa"
34
+ if [[ -e "$base" && ! -s "${base}.gz" ]] ; then
35
+ gzip -9f "$base"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
37
+ fi
38
+ done
39
+
23
40
  # Assemble
24
- FA="$TF/$DATASET.CoupledReads.fa"
25
- [[ -e "$FA" ]] || FA="$FA.gz"
26
- [[ -e "$FA" ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
27
- [[ -e "$FA" ]] || FA="$FA.gz"
41
+ FA="$TF/${DATASET}.CoupledReads.fa.gz"
42
+ [[ -e "$FA" ]] || FA="$TF/${DATASET}.SingleReads.fa.gz"
28
43
  RD="r"
29
44
  [[ $FA == *.SingleReads.fa* ]] && RD="l"
30
- idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
45
+ gzip -cd "$FA" \
46
+ | idba_ud --pre_correction -$RD /dev/stdin \
47
+ -o "$DATASET" --num_threads "$CORES" || true
31
48
  [[ -s "$DATASET/contig.fa" ]] || exit 1
32
49
 
33
50
  # Clean
34
- (
35
- cd "$DATASET"
36
- rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
37
- )
51
+ ( cd "$DATASET" && rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa )
38
52
 
39
53
  # Extract
40
54
  if [[ -s "$DATASET/scaffold.fa" ]] ; then
@@ -49,3 +63,4 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
49
63
  # Finalize
50
64
  miga date > "$DATASET.done"
51
65
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
66
+
@@ -12,34 +12,10 @@ miga_start_project_step "$DIR"
12
12
  # Cleanup databases
13
13
  ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
14
14
 
15
- # Run hAAI
15
+ # No real need for hAAI distributions at all
16
16
  echo -n "" > miga-project.log
17
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
18
-
19
- # Extract values
20
- rm -f miga-project.txt
21
- (
22
- echo "metric a b value sd n omega" | tr " " "\\t"
23
- for i in $DS ; do
24
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
25
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
26
- echo "$i" >> miga-project.log
27
- done
28
- ) | gzip -9c > miga-project.txt.gz
29
-
30
- # R-ify
31
- echo "
32
- haai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
33
- save(haai, file='miga-project.Rdata');
34
- if(sum(haai[,'a'] != haai[,'b']) > 0){
35
- h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
36
- write.table(
37
- cbind(h[['breaks']][-length(h[['breaks']])],
38
- h[['breaks']][-1], h[['counts']]),
39
- file='miga-project.hist', quote=FALSE, sep='\\t',
40
- col.names=FALSE, row.names=FALSE);
41
- }
42
- " | R --vanilla
17
+ echo -n "" > miga-project.txt
18
+ echo "aai <- NULL; save(aai, file = 'miga-project.Rdata')" | R --vanilla
43
19
 
44
20
  # Finalize
45
21
  miga_end_project_step "$DIR"
data/scripts/miga.bash CHANGED
@@ -1,19 +1,21 @@
1
1
  #!/bin/bash
2
2
 
3
+ ###
3
4
  # Setup environment
4
5
  set -e
5
- MIGA_HOME=${MIGA_HOME:-"$HOME"}
6
+ eval "$("$MIGA/bin/miga" env)"
6
7
  SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
7
- # shellcheck source=/dev/null
8
- . "$MIGA_HOME/.miga_rc"
9
-
10
- # Ensure submodules are first in PATH
11
- export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
12
- export PATH="$MIGA/utils/FastAAI/FastAAI:$PATH"
13
8
 
9
+ ###
14
10
  # Ancillary functions
11
+
12
+ # Evaluates if the first passed argument is an existing file
15
13
  function exists { [[ -e "$1" ]] ; }
14
+
15
+ # Evaluates if the first passed argument is a function
16
16
  function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
17
+
18
+ # Initiate a project-wide run
17
19
  function miga_start_project_step {
18
20
  local dir="$1"
19
21
  local dir_r="${dir}.running"
@@ -22,6 +24,8 @@ function miga_start_project_step {
22
24
  cd "$dir_r"
23
25
  miga date > "miga-project.start"
24
26
  }
27
+
28
+ # Finalize a project-wide run
25
29
  function miga_end_project_step {
26
30
  local dir="$1"
27
31
  local dir_r="${dir}.running"
@@ -38,7 +42,7 @@ if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
38
42
  echo ""
39
43
  echo "######[ $SCRIPT ]######"
40
44
  echo "# Date: $(miga date)"
41
- echo "# Host: $(hostname)"
45
+ echo "# Host: $(hostname) [$CORES]"
42
46
  echo "# MiGA: $MIGA"
43
47
  echo "# Project: $PROJECT"
44
48
  if [[ -n $DATASET ]] ; then