miga-base 0.7.26.2 → 1.0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/env.rb +26 -0
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +12 -8
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- data/utils/subclade/pipeline.rb +2 -2
- metadata +33 -4
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
data/lib/miga/json.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
# @license Artistic-2.0
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
3
|
require 'json'
|
5
4
|
|
@@ -45,8 +44,10 @@ class MiGA::Json < MiGA::MiGA
|
|
45
44
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
46
45
|
|
47
46
|
# Parse JSON
|
48
|
-
params = {
|
49
|
-
|
47
|
+
params = {
|
48
|
+
symbolize_names: opts[:symbolize],
|
49
|
+
create_additions: opts[:additions]
|
50
|
+
}
|
50
51
|
y = JSON.parse(cont, params)
|
51
52
|
|
52
53
|
# Add defaults
|
data/lib/miga/metadata.rb
CHANGED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Parallel execution in MiGA.
|
5
|
+
class MiGA::Parallel < MiGA::MiGA
|
6
|
+
class << self
|
7
|
+
##
|
8
|
+
# Executes the passed block with the thread number as argument (0-numbered)
|
9
|
+
# in +threads+ processes
|
10
|
+
def process(threads)
|
11
|
+
threads.times do |i|
|
12
|
+
Process.fork { yield(i) }
|
13
|
+
end
|
14
|
+
Process.waitall
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Distributes +enum+ across +threads+ and calls the passed block with args:
|
19
|
+
# 1. Unitary object from +enum+
|
20
|
+
# 2. Index of the unitary object
|
21
|
+
# 3. Index of the acting thread
|
22
|
+
def distribute(enum, threads, &blk)
|
23
|
+
process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Enum through +enum+ executing the passed block only for thread with index
|
28
|
+
# +thr+, one of +threads+ threads. The passed block has the same arguments
|
29
|
+
# as the one in +#distribute+
|
30
|
+
def thread_enum(enum, threads, thr)
|
31
|
+
enum.each_with_index do |obj, idx|
|
32
|
+
yield(obj, idx, thr) if idx % threads == thr
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/miga/project.rb
CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
|
|
42
42
|
# Create an empty project
|
43
43
|
def create
|
44
44
|
unless MiGA::MiGA.initialized?
|
45
|
-
|
45
|
+
warn 'Projects cannot be processed yet, first run: miga init'
|
46
46
|
end
|
47
47
|
|
48
|
-
dirs =
|
49
|
-
|
50
|
-
dirs.each { |d|
|
48
|
+
dirs = @@FOLDERS.map { |d| File.join(path, d) }
|
49
|
+
dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
|
50
|
+
dirs.each { |d| FileUtils.mkdir_p(d) }
|
51
51
|
@metadata = MiGA::Metadata.new(
|
52
|
-
File.
|
53
|
-
|
52
|
+
File.join(path, 'miga.project.json'),
|
53
|
+
datasets: [], name: File.basename(path)
|
54
54
|
)
|
55
|
-
d_path = File.
|
56
|
-
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?
|
55
|
+
d_path = File.join(path, 'daemon', 'daemon.json')
|
56
|
+
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
|
57
57
|
pull_hook :on_create
|
58
58
|
self.load
|
59
59
|
end
|
data/lib/miga/project/base.rb
CHANGED
@@ -131,15 +131,15 @@ module MiGA::Project::Base
|
|
131
131
|
},
|
132
132
|
haai_p: {
|
133
133
|
desc: 'Value of aai.rb -p on hAAI', type: String,
|
134
|
-
default: proc { |project| project.clade? ? 'no' : '
|
135
|
-
in: %w[
|
134
|
+
default: proc { |project| project.clade? ? 'no' : 'fastaai' },
|
135
|
+
in: %w[blast+ blast blat diamond fastaai no]
|
136
136
|
},
|
137
137
|
aai_p: {
|
138
|
-
desc: 'Value of aai.rb -p on AAI', default: '
|
138
|
+
desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
|
139
139
|
in: %w[blast+ blast blat diamond]
|
140
140
|
},
|
141
141
|
ani_p: {
|
142
|
-
desc: 'Value of ani.rb -p on ANI', default: '
|
142
|
+
desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
|
143
143
|
in: %w[blast+ blast blat fastani]
|
144
144
|
},
|
145
145
|
max_try: {
|
data/lib/miga/project/result.rb
CHANGED
@@ -55,12 +55,12 @@ module MiGA::Project::Result
|
|
55
55
|
##
|
56
56
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
57
57
|
def add_result_distances(base, _opts)
|
58
|
-
return nil unless result_files_exist?(base, %w[.Rdata .
|
58
|
+
return nil unless result_files_exist?(base, %w[.Rdata .txt])
|
59
59
|
|
60
60
|
r = MiGA::Result.new("#{base}.json")
|
61
61
|
r.add_file(:rdata, 'miga-project.Rdata')
|
62
62
|
r.add_file(:matrix, 'miga-project.txt')
|
63
|
-
r.add_file(:log, 'miga-project.log')
|
63
|
+
r.add_file(:log, 'miga-project.log') # Legacy file
|
64
64
|
r.add_file(:hist, 'miga-project.hist')
|
65
65
|
r
|
66
66
|
end
|
data/lib/miga/sqlite.rb
CHANGED
@@ -37,12 +37,20 @@ class MiGA::SQLite < MiGA::MiGA
|
|
37
37
|
# Executes +cmd+ and returns the result
|
38
38
|
def run(*cmd)
|
39
39
|
busy_attempts ||= 0
|
40
|
-
|
41
|
-
|
40
|
+
io_attempts ||= 0
|
41
|
+
y = nil
|
42
|
+
SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
|
43
|
+
y
|
42
44
|
rescue SQLite3::BusyException => e
|
43
45
|
busy_attempts += 1
|
44
46
|
raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
|
45
47
|
|
48
|
+
sleep(1)
|
49
|
+
retry
|
50
|
+
rescue SQLite3::IOException => e
|
51
|
+
io_attempts += 1
|
52
|
+
raise "Database I/O error #{path}: #{e.message}" if io_attempts >= 3
|
53
|
+
|
46
54
|
sleep(1)
|
47
55
|
retry
|
48
56
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -9,23 +9,33 @@ module MiGA
|
|
9
9
|
# Current version of MiGA. An Array with three values:
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
|
-
# -
|
13
|
-
|
12
|
+
# - String indicating release status:
|
13
|
+
# - rc* release candidate, not released as gem
|
14
|
+
# - [0-9]+ stable release, released as gem
|
15
|
+
VERSION = [1.0, 3, 0].freeze
|
14
16
|
|
15
17
|
##
|
16
18
|
# Nickname for the current major.minor version.
|
17
|
-
VERSION_NAME = '
|
19
|
+
VERSION_NAME = 'prima'
|
18
20
|
|
19
21
|
##
|
20
22
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2021,
|
23
|
+
VERSION_DATE = Date.new(2021, 6, 4)
|
22
24
|
|
23
25
|
##
|
24
|
-
#
|
25
|
-
CITATION =
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
# References of MiGA
|
27
|
+
CITATION = []
|
28
|
+
CITATION << <<~REF
|
29
|
+
Rodriguez-R et al (2018). The Microbial Genomes Atlas (MiGA) webserver:
|
30
|
+
taxonomic and gene diversity analysis of Archaea and Bacteria at the whole
|
31
|
+
genome level. Nucleic Acids Research 46(W1):W282-W288.
|
32
|
+
doi:10.1093/nar/gky467.
|
33
|
+
REF
|
34
|
+
CITATION << <<~REF
|
35
|
+
Rodriguez-R et al (2020). Classifying prokaryotic genomes using the
|
36
|
+
Microbial Genomes Atlas (MiGA) webserver. Bergey's Manual of Systematics
|
37
|
+
of Archaea and Bacteria.
|
38
|
+
REF
|
29
39
|
end
|
30
40
|
|
31
41
|
class MiGA::MiGA
|
@@ -58,6 +68,10 @@ class MiGA::MiGA
|
|
58
68
|
##
|
59
69
|
# Reference of MiGA
|
60
70
|
def self.CITATION
|
71
|
+
CITATION.map { |i| "- #{i}" }.join
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.CITATION_ARRAY
|
61
75
|
CITATION
|
62
76
|
end
|
63
77
|
end
|
data/scripts/aai_distances.bash
CHANGED
@@ -9,34 +9,32 @@ DIR="$PROJECT/data/09.distances/02.aai"
|
|
9
9
|
# Initialize
|
10
10
|
miga_start_project_step "$DIR"
|
11
11
|
|
12
|
-
echo -n "" > miga-project.log
|
13
|
-
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
14
|
-
|
15
12
|
# Extract values
|
16
13
|
rm -f miga-project.txt
|
14
|
+
SQL="SELECT seq1, seq2, aai, sd, n, omega from aai;"
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
17
16
|
(
|
18
|
-
echo "
|
17
|
+
echo "a b value sd n omega" | tr " " "\\t"
|
19
18
|
for i in $DS ; do
|
20
|
-
echo "
|
21
|
-
" seq1, seq2, aai, sd, n, omega from aai;" \
|
22
|
-
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
23
|
-
echo "$i" >> miga-project.log
|
19
|
+
echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
24
20
|
done
|
25
21
|
) | gzip -9c > miga-project.txt.gz
|
26
22
|
|
27
23
|
# R-ify
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
cat <<R | R --vanilla
|
25
|
+
file <- gzfile('miga-project.txt.gz')
|
26
|
+
aai <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
|
27
|
+
save(aai, file = 'miga-project.Rdata')
|
28
|
+
if(sum(aai[, 'a'] != aai[, 'b']) > 0) {
|
29
|
+
h <- hist(aai[aai[, 'a'] != aai[, 'b'], 'value'], breaks = 100, plot = FALSE)
|
30
|
+
len <- length(h[['breaks']])
|
33
31
|
write.table(
|
34
|
-
cbind(h[['breaks']][-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
|
33
|
+
file = 'miga-project.hist', quote = FALSE, sep = '\t',
|
34
|
+
col.names = FALSE, row.names = FALSE
|
35
|
+
)
|
38
36
|
}
|
39
|
-
|
37
|
+
R
|
40
38
|
|
41
39
|
# Finalize
|
42
40
|
miga_end_project_step "$DIR"
|
data/scripts/ani_distances.bash
CHANGED
@@ -9,33 +9,32 @@ DIR="$PROJECT/data/09.distances/03.ani"
|
|
9
9
|
# Initialize
|
10
10
|
miga_start_project_step "$DIR"
|
11
11
|
|
12
|
-
echo -n "" > miga-project.log
|
13
|
-
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
14
|
-
|
15
12
|
# Extract values
|
16
13
|
rm -f miga-project.txt
|
14
|
+
SQL="SELECT seq1, seq2, ani, sd, n, omega from ani;"
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
17
16
|
(
|
18
|
-
echo "
|
17
|
+
echo "a b value sd n omega" | tr " " "\\t"
|
19
18
|
for i in $DS ; do
|
20
|
-
echo "
|
21
|
-
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
22
|
-
echo "$i" >> miga-project.log
|
19
|
+
echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
23
20
|
done
|
24
21
|
) | gzip -9c > miga-project.txt.gz
|
25
22
|
|
26
23
|
# R-ify
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
cat <<R | R --vanilla
|
25
|
+
file <- gzfile('miga-project.txt.gz')
|
26
|
+
ani <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
|
27
|
+
save(ani, file = 'miga-project.Rdata')
|
28
|
+
if(sum(ani[, 'a'] != ani[, 'b']) > 0) {
|
29
|
+
h <- hist(ani[ani[, 'a'] != ani[, 'b'], 'value'], breaks = 100, plot = FALSE)
|
30
|
+
len <- length(h[['breaks']])
|
32
31
|
write.table(
|
33
|
-
cbind(h[['breaks']][-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
|
33
|
+
file = 'miga-project.hist', quote = FALSE, sep = '\t',
|
34
|
+
col.names = FALSE, row.names = FALSE
|
35
|
+
)
|
37
36
|
}
|
38
|
-
|
37
|
+
R
|
39
38
|
|
40
39
|
# Finalize
|
41
40
|
miga_end_project_step "$DIR"
|
data/scripts/assembly.bash
CHANGED
@@ -11,30 +11,44 @@ miga date > "$DATASET.start"
|
|
11
11
|
|
12
12
|
# Interpose (if needed)
|
13
13
|
TF="../04.trimmed_fasta"
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
b=$DATASET
|
15
|
+
if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
|
16
|
+
cr="$TF/${b}.CoupledReads.fa"
|
17
|
+
if [[ ! -s "$cr" && ! -s "${cr}.gz" ]] ; then
|
18
|
+
for s in 1 2 ; do
|
19
|
+
if [[ -s "$TF/${b}.${s}.fasta" ]] ; then
|
20
|
+
ln -s "$TF/${b}.${s}.fasta" "${b}.${s}.tmp"
|
21
|
+
else
|
22
|
+
gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
|
23
|
+
fi
|
24
|
+
done
|
25
|
+
FastA.interpose.pl "$cr" "$b".[12].tmp
|
26
|
+
rm "$b".[12].tmp
|
27
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
|
28
|
+
fi
|
21
29
|
fi
|
22
30
|
|
31
|
+
# Gzip (if needed)
|
32
|
+
for i in SingleReads CoupledReads ; do
|
33
|
+
base="$TF/${DATASET}.${i}.fa"
|
34
|
+
if [[ -e "$base" && ! -s "${base}.gz" ]] ; then
|
35
|
+
gzip -9f "$base"
|
36
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
|
37
|
+
fi
|
38
|
+
done
|
39
|
+
|
23
40
|
# Assemble
|
24
|
-
FA="$TF/$DATASET.CoupledReads.fa"
|
25
|
-
[[ -e "$FA" ]] || FA="$
|
26
|
-
[[ -e "$FA" ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
27
|
-
[[ -e "$FA" ]] || FA="$FA.gz"
|
41
|
+
FA="$TF/${DATASET}.CoupledReads.fa.gz"
|
42
|
+
[[ -e "$FA" ]] || FA="$TF/${DATASET}.SingleReads.fa.gz"
|
28
43
|
RD="r"
|
29
44
|
[[ $FA == *.SingleReads.fa* ]] && RD="l"
|
30
|
-
|
45
|
+
gzip -cd "$FA" \
|
46
|
+
| idba_ud --pre_correction -$RD /dev/stdin \
|
47
|
+
-o "$DATASET" --num_threads "$CORES" || true
|
31
48
|
[[ -s "$DATASET/contig.fa" ]] || exit 1
|
32
49
|
|
33
50
|
# Clean
|
34
|
-
(
|
35
|
-
cd "$DATASET"
|
36
|
-
rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
|
37
|
-
)
|
51
|
+
( cd "$DATASET" && rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa )
|
38
52
|
|
39
53
|
# Extract
|
40
54
|
if [[ -s "$DATASET/scaffold.fa" ]] ; then
|
@@ -49,3 +63,4 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
|
49
63
|
# Finalize
|
50
64
|
miga date > "$DATASET.done"
|
51
65
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
|
66
|
+
|
data/scripts/haai_distances.bash
CHANGED
@@ -12,34 +12,10 @@ miga_start_project_step "$DIR"
|
|
12
12
|
# Cleanup databases
|
13
13
|
ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
|
14
14
|
|
15
|
-
#
|
15
|
+
# No real need for hAAI distributions at all
|
16
16
|
echo -n "" > miga-project.log
|
17
|
-
|
18
|
-
|
19
|
-
# Extract values
|
20
|
-
rm -f miga-project.txt
|
21
|
-
(
|
22
|
-
echo "metric a b value sd n omega" | tr " " "\\t"
|
23
|
-
for i in $DS ; do
|
24
|
-
echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
|
25
|
-
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
26
|
-
echo "$i" >> miga-project.log
|
27
|
-
done
|
28
|
-
) | gzip -9c > miga-project.txt.gz
|
29
|
-
|
30
|
-
# R-ify
|
31
|
-
echo "
|
32
|
-
haai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
|
33
|
-
save(haai, file='miga-project.Rdata');
|
34
|
-
if(sum(haai[,'a'] != haai[,'b']) > 0){
|
35
|
-
h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
|
36
|
-
write.table(
|
37
|
-
cbind(h[['breaks']][-length(h[['breaks']])],
|
38
|
-
h[['breaks']][-1], h[['counts']]),
|
39
|
-
file='miga-project.hist', quote=FALSE, sep='\\t',
|
40
|
-
col.names=FALSE, row.names=FALSE);
|
41
|
-
}
|
42
|
-
" | R --vanilla
|
17
|
+
echo -n "" > miga-project.txt
|
18
|
+
echo "aai <- NULL; save(aai, file = 'miga-project.Rdata')" | R --vanilla
|
43
19
|
|
44
20
|
# Finalize
|
45
21
|
miga_end_project_step "$DIR"
|
data/scripts/miga.bash
CHANGED
@@ -1,19 +1,21 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
|
3
|
+
###
|
3
4
|
# Setup environment
|
4
5
|
set -e
|
5
|
-
|
6
|
+
eval "$("$MIGA/bin/miga" env)"
|
6
7
|
SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
|
7
|
-
# shellcheck source=/dev/null
|
8
|
-
. "$MIGA_HOME/.miga_rc"
|
9
|
-
|
10
|
-
# Ensure submodules are first in PATH
|
11
|
-
export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
|
12
|
-
export PATH="$MIGA/utils/FastAAI/FastAAI:$PATH"
|
13
8
|
|
9
|
+
###
|
14
10
|
# Ancillary functions
|
11
|
+
|
12
|
+
# Evaluates if the first passed argument is an existing file
|
15
13
|
function exists { [[ -e "$1" ]] ; }
|
14
|
+
|
15
|
+
# Evaluates if the first passed argument is a function
|
16
16
|
function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
|
17
|
+
|
18
|
+
# Initiate a project-wide run
|
17
19
|
function miga_start_project_step {
|
18
20
|
local dir="$1"
|
19
21
|
local dir_r="${dir}.running"
|
@@ -22,6 +24,8 @@ function miga_start_project_step {
|
|
22
24
|
cd "$dir_r"
|
23
25
|
miga date > "miga-project.start"
|
24
26
|
}
|
27
|
+
|
28
|
+
# Finalize a project-wide run
|
25
29
|
function miga_end_project_step {
|
26
30
|
local dir="$1"
|
27
31
|
local dir_r="${dir}.running"
|
@@ -38,7 +42,7 @@ if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
|
|
38
42
|
echo ""
|
39
43
|
echo "######[ $SCRIPT ]######"
|
40
44
|
echo "# Date: $(miga date)"
|
41
|
-
echo "# Host: $(hostname)"
|
45
|
+
echo "# Host: $(hostname) [$CORES]"
|
42
46
|
echo "# MiGA: $MIGA"
|
43
47
|
echo "# Project: $PROJECT"
|
44
48
|
if [[ -n $DATASET ]] ; then
|