miga-base 0.7.26.2 → 1.0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/env.rb +26 -0
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +12 -8
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- data/utils/subclade/pipeline.rb +2 -2
- metadata +33 -4
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3fdcceb4a8c5751e23241e0db5390e8d10f4aa2d62077b3bc73508d9da15bd75
|
4
|
+
data.tar.gz: 9c221524ad7f8e7240a9f7b02c9fe2275a3728a7290d22cfedce26972c7d7db8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f05075e290329eb9a4b9a889c0fae6c379dca8e949a0beefb84de2e7d37f3bdfe04e7ccad63c2dd6258cacf2e3ce47e4b2a39bdd1abbb796eea6338e54d0fb6
|
7
|
+
data.tar.gz: d6358abaa5041631b3b3bd9e6a74656aae7739d14b031b2218382084f3d7384fb679bff7423ac0c31c673339fe80ab02a173df7042814fedd8dfb325ec6c0005
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
50
50
|
# Input data
|
51
51
|
ref_db = reference_db
|
52
52
|
norun = %w[
|
53
|
-
|
53
|
+
haai_distances aai_distances ani_distances clade_finding
|
54
54
|
]
|
55
55
|
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
56
56
|
p = create_project(
|
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
59
59
|
run_ssu: false, run_mytaxa_scan: false, run_distances: false
|
60
60
|
)
|
61
61
|
p.set_option(:ref_project, ref_db.path)
|
62
|
-
p.set_option(:tax_pvalue, cli[:pvalue]
|
62
|
+
p.set_option(:tax_pvalue, cli[:pvalue])
|
63
63
|
# Run
|
64
64
|
run_daemon
|
65
65
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
52
52
|
# Input data
|
53
53
|
p = create_project(
|
54
54
|
:assembly,
|
55
|
-
{
|
55
|
+
{ run_clades: false },
|
56
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
57
57
|
)
|
58
58
|
p.set_option(:gsp_metric, cli[:metric].to_s)
|
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
93
93
|
k += 1
|
94
94
|
cli.advance('Datasets:', k, n, false) if i == 0
|
95
95
|
next unless k % cli[:threads] == i
|
96
|
-
each_database_file(d) do |db_file, metric, result|
|
96
|
+
each_database_file(d) do |db_file, metric, result, _rank|
|
97
97
|
check_sqlite3_database(db_file, metric) do
|
98
98
|
cli.say(
|
99
99
|
" > Removing malformed database from #{d.name}:#{result} "
|
@@ -116,27 +116,70 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
116
116
|
# Perform bidirectional operation with MiGA::Cli +cli+
|
117
117
|
def check_bidir(cli)
|
118
118
|
cli.say 'Checking if reference distances are bidirectional'
|
119
|
-
|
119
|
+
project = cli.load_project
|
120
|
+
ref_ds = project.each_dataset.select(&:ref?)
|
120
121
|
ref_names = ref_ds.map(&:name)
|
121
122
|
n = ref_ds.size
|
122
|
-
(0 .. cli[:threads] - 1).map do |i|
|
123
|
-
Process.fork do
|
124
|
-
k = 0
|
125
|
-
ref_ds.each do |d|
|
126
|
-
k += 1
|
127
|
-
cli.advance('Datasets:', k, n, false) if i == 0
|
128
|
-
next unless k % cli[:threads] == i
|
129
123
|
|
130
|
-
|
131
|
-
|
124
|
+
# Read data first (threaded)
|
125
|
+
tmp = File.join(project.path, 'doctor-bidirectional.tmp')
|
126
|
+
FileUtils.mkdir_p(tmp)
|
127
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
128
|
+
file = File.join(tmp, "#{thr}.json")
|
129
|
+
fh = File.open(file, 'w')
|
130
|
+
[:aai, :ani].each do |metric|
|
131
|
+
fh.puts "# #{metric}"
|
132
|
+
ref_ds.each_with_index do |ds, idx|
|
133
|
+
if idx % cli[:threads] == thr
|
134
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
135
|
+
row = read_bidirectional(ds, metric)
|
136
|
+
fh.puts "#{ds.name} #{JSON.fast_generate(row)}" unless row.empty?
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
fh.puts '# end'
|
141
|
+
fh.flush # necessary for large threaded runs
|
142
|
+
fh.close
|
143
|
+
if thr == 0
|
144
|
+
cli.advance('Reading:', n, n, false)
|
145
|
+
cli.say
|
146
|
+
end
|
147
|
+
end
|
132
148
|
|
133
|
-
|
134
|
-
|
149
|
+
# Merge pieces per thread
|
150
|
+
dist = { aai: {}, ani: {} }
|
151
|
+
cli[:threads].times do |i|
|
152
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
153
|
+
file = File.join(tmp, "#{i}.json")
|
154
|
+
File.open(file, 'r') do |fh|
|
155
|
+
metric = nil
|
156
|
+
fh.each do |ln|
|
157
|
+
qry, row = ln.chomp.split(' ', 2)
|
158
|
+
if qry == '#'
|
159
|
+
metric = row.to_sym
|
160
|
+
else
|
161
|
+
raise "Unrecognized metric: #{metric}" unless dist[metric]
|
162
|
+
JSON.parse(row).each do |sbj, val|
|
163
|
+
dist[metric][qry] ||= {}
|
164
|
+
if dist[metric][sbj]&.include?(qry)
|
165
|
+
dist[metric][sbj].delete(qry) # Already bidirectional
|
166
|
+
else
|
167
|
+
dist[metric][qry][sbj] = val
|
168
|
+
end
|
169
|
+
end
|
135
170
|
end
|
136
171
|
end
|
172
|
+
raise "Incomplete thread dump: #{file}" unless metric == :end
|
137
173
|
end
|
138
174
|
end
|
139
|
-
|
175
|
+
cli.say
|
176
|
+
FileUtils.rm_rf(tmp)
|
177
|
+
|
178
|
+
# Write missing values (threaded)
|
179
|
+
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
180
|
+
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
181
|
+
save_bidirectional(ds, dist)
|
182
|
+
end
|
140
183
|
cli.say
|
141
184
|
end
|
142
185
|
|
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def each_database_file(dataset, &blk)
|
19
|
-
ref_db = {
|
20
|
-
|
19
|
+
ref_db = {
|
20
|
+
haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
|
21
|
+
}
|
22
|
+
qry_db = {
|
23
|
+
haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
|
24
|
+
}
|
21
25
|
base = File.join(dataset.project.path, 'data', '09.distances')
|
22
26
|
result = :distances
|
23
27
|
if dataset.ref?
|
24
28
|
file_db = "#{dataset.name}.db"
|
25
|
-
ref_db.each do |
|
29
|
+
ref_db.each do |rank, v|
|
30
|
+
dir, metric = *v
|
26
31
|
file = File.join(base, dir, file_db)
|
27
|
-
blk[file, metric, result] if File.exist? file
|
32
|
+
blk[file, metric, result, rank] if File.exist? file
|
28
33
|
end
|
29
34
|
base = File.join(base, '05.taxonomy')
|
30
35
|
result = :taxonomy
|
31
36
|
end
|
32
|
-
qry_db.each do |
|
37
|
+
qry_db.each do |rank, v|
|
38
|
+
ext, metric = *v
|
33
39
|
file = File.join(base, "#{dataset.name}#{ext}")
|
34
|
-
blk[file, metric, result] if File.exist? file
|
40
|
+
blk[file, metric, result, rank] if File.exist? file
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -109,25 +115,43 @@ module MiGA::Cli::Action::Doctor::Base
|
|
109
115
|
end
|
110
116
|
|
111
117
|
##
|
112
|
-
#
|
113
|
-
#
|
114
|
-
def
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
# Reads all the distance estimates in +a+ -> * for +metric+ and
|
119
|
+
# returns them as a hash +{"b_name" => [val, sd, ...], ...}+
|
120
|
+
def read_bidirectional(a, metric)
|
121
|
+
db_file = a.result(:distances)&.file_path("#{metric}_db") or return {}
|
122
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
123
|
+
data = MiGA::SQLite.new(db_file).run(sql) || []
|
124
|
+
Hash[
|
125
|
+
data.map do |row|
|
126
|
+
k, v = row.shift(2)
|
127
|
+
[k, row.all?(&:zero?) ? v : [v] + row]
|
128
|
+
end
|
129
|
+
]
|
130
|
+
end
|
122
131
|
|
123
|
-
|
124
|
-
|
132
|
+
##
|
133
|
+
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
134
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object, with currently
|
135
|
+
# saved values read from the hash +dist+
|
136
|
+
def save_bidirectional(a, dist)
|
137
|
+
each_database_file(a) do |db_file, metric, result, rank|
|
138
|
+
next if rank == :haai # No need for hAAI to be bidirectional
|
125
139
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
140
|
+
b2a = dist[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
141
|
+
a2b = dist[rank][a.name]&.keys || []
|
142
|
+
SQLite3::Database.new(db_file) do |db|
|
143
|
+
sql = <<~SQL
|
144
|
+
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
145
|
+
values(?, ?, ?, ?, ?, ?);
|
146
|
+
SQL
|
147
|
+
db.execute('BEGIN TRANSACTION;')
|
148
|
+
(b2a - a2b).each do |b_name|
|
149
|
+
val = dist[rank][b_name][a.name]
|
150
|
+
val = [val, 0, 0, 0] unless val.is_a?(Array)
|
151
|
+
db.execute(sql, [a.name, b_name] + val)
|
152
|
+
end
|
153
|
+
db.execute('COMMIT;')
|
154
|
+
end
|
131
155
|
end
|
132
156
|
end
|
133
157
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Env < MiGA::Cli::Action
|
7
|
+
def parse_cli
|
8
|
+
cli.parse { |_| }
|
9
|
+
end
|
10
|
+
|
11
|
+
def perform
|
12
|
+
puts <<~BASH
|
13
|
+
MIGA="#{MiGA::MiGA.root_path}"
|
14
|
+
MIGA_HOME=${MIGA_HOME:-"$HOME"}
|
15
|
+
. "$MIGA_HOME/.miga_rc"
|
16
|
+
# Ensure MiGA & submodules are first in PATH
|
17
|
+
export PATH="$MIGA/bin:$PATH"
|
18
|
+
for util in enveomics/Scripts FastAAI/FastAAI multitrim ; do
|
19
|
+
export PATH="$MIGA/utils/$util:$PATH"
|
20
|
+
done
|
21
|
+
BASH
|
22
|
+
end
|
23
|
+
|
24
|
+
def empty_action
|
25
|
+
end
|
26
|
+
end
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -112,12 +112,15 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
112
112
|
|
113
113
|
def check_software_requirements(rc_fh)
|
114
114
|
cli.puts 'Looking for requirements:'
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
115
|
+
opt_groups = {
|
116
|
+
mytaxa: 'MyTaxa',
|
117
|
+
rdp: 'RDP classifier',
|
118
|
+
reads: 'read processing'
|
119
|
+
}
|
120
|
+
opt_groups.each do |k, v|
|
121
|
+
ask_for_optional(k, v)
|
122
|
+
rc_fh.puts "export MIGA_#{k.to_s.upcase}='#{cli[k] ? 'yes' : 'no'}'"
|
123
|
+
end
|
121
124
|
paths = {}
|
122
125
|
rc_fh.puts 'MIGA_PATH=""'
|
123
126
|
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
@@ -196,8 +199,9 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
196
199
|
cli.puts 'yes'
|
197
200
|
else
|
198
201
|
cli.puts 'no, installing'
|
199
|
-
|
202
|
+
out = install_library(cli, paths, language, library)
|
200
203
|
unless test_library(cli, paths, language, library)
|
204
|
+
cli.puts out
|
201
205
|
raise "Cannot install #{language.to_s.capitalize} library: #{library}"
|
202
206
|
end
|
203
207
|
end
|
@@ -18,7 +18,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
18
18
|
cli.opt_object(opt, [:project])
|
19
19
|
opt.on(
|
20
20
|
'-T', '--taxon STRING',
|
21
|
-
'(Mandatory
|
21
|
+
'(Mandatory) Taxon name (e.g., a species binomial)'
|
22
22
|
) { |v| cli[:taxon] = v }
|
23
23
|
opt.on(
|
24
24
|
'-m', '--metadata STRING',
|
@@ -137,7 +137,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def sanitize_cli
|
140
|
-
cli.ensure_par(taxon: '-T')
|
140
|
+
cli.ensure_par(taxon: '-T')
|
141
141
|
tasks = %w[reference complete chromosome scaffold contig]
|
142
142
|
unless tasks.any? { |i| cli[i.to_sym] }
|
143
143
|
raise 'No action requested: pick at least one type of genome'
|
@@ -204,7 +204,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
204
204
|
'from(GenomeAssemblies).' \
|
205
205
|
'usingschema(/schema/GenomeAssemblies).' \
|
206
206
|
'matching(tab==["Prokaryotes"] and q=="' \
|
207
|
-
"#{cli[:taxon]
|
207
|
+
"#{cli[:taxon]&.tr('"', "'")}\"",
|
208
208
|
fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
|
209
209
|
'level|level,ftp_path_genbank|ftp_path_genbank,' \
|
210
210
|
'release_date|release_date,strain|strain',
|
@@ -99,13 +99,13 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
99
99
|
ds_name = []
|
100
100
|
File.open(tab, 'r') do |fh|
|
101
101
|
fh.each_line do |ln|
|
102
|
-
if ln =~ /^ {
|
102
|
+
if ln =~ /^ {0,#{(rank_i - 1) * 2}}\S+:\S+:/
|
103
103
|
in_rank = nil
|
104
104
|
ds_name = []
|
105
105
|
elsif ln =~ /^ {#{rank_i * 2}}(#{rank}:(\S+)):/
|
106
106
|
in_rank = $2 == '?' ? nil : $1
|
107
107
|
ds_name = []
|
108
|
-
elsif ln =~ /^ *# (\S+)/
|
108
|
+
elsif ln =~ /^ *# (\S+)/ && !in_rank.nil?
|
109
109
|
ds_i = $1
|
110
110
|
ds_name << ds_i
|
111
111
|
ds_name.each do |ds_j|
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -81,21 +81,22 @@ module MiGA::Cli::Action::Wf
|
|
81
81
|
cli[:aai_p] = 'blast+'
|
82
82
|
cli[:ani_p] = 'blast+'
|
83
83
|
end
|
84
|
-
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
84
|
+
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani (default)') do
|
85
85
|
cli[:aai_p] = 'diamond'
|
86
86
|
cli[:ani_p] = 'fastani'
|
87
87
|
end
|
88
88
|
opt.on(
|
89
89
|
'--haai-p STRING',
|
90
|
-
'hAAI search engine. One of: blast
|
90
|
+
'hAAI search engine. One of: blast+, fastaai, blat, diamond, fastaai, no',
|
91
|
+
'The default is "no" for clade projects and "fastaai" otherwise'
|
91
92
|
) { |v| cli[:haai_p] = v }
|
92
93
|
opt.on(
|
93
94
|
'--aai-p STRING',
|
94
|
-
'AAI search engine. One of: blast
|
95
|
+
'AAI search engine. One of: blast+, blat, diamond (default)'
|
95
96
|
) { |v| cli[:aai_p] = v }
|
96
97
|
opt.on(
|
97
98
|
'--ani-p STRING',
|
98
|
-
'ANI search engine. One of: blast
|
99
|
+
'ANI search engine. One of: blast+, blat, fastani (default)'
|
99
100
|
) { |v| cli[:ani_p] = v }
|
100
101
|
end
|
101
102
|
|
data/lib/miga/cli/base.rb
CHANGED
@@ -41,6 +41,7 @@ module MiGA::Cli::Base
|
|
41
41
|
lair: 'Control groups of daemons for several MiGA projects',
|
42
42
|
date: 'Return the current date in standard MiGA format',
|
43
43
|
console: 'Open an IRB console with MiGA',
|
44
|
+
env: 'Shell code to load MiGA environment',
|
44
45
|
# Taxonomy
|
45
46
|
tax_set: 'Register taxonomic information for datasets',
|
46
47
|
tax_test: 'Return test of taxonomic distributions for query datasets',
|
data/lib/miga/common.rb
CHANGED
data/lib/miga/daemon.rb
CHANGED
@@ -73,10 +73,10 @@ class MiGA::Daemon < MiGA::MiGA
|
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
75
|
miga_say "Saving log to: #{output_file}" unless show_log?
|
76
|
-
queue_maintenance
|
77
|
-
load_status
|
78
76
|
say 'Configuration options:'
|
79
77
|
say @runopts.to_s
|
78
|
+
load_status
|
79
|
+
queue_maintenance(true)
|
80
80
|
end
|
81
81
|
|
82
82
|
##
|
@@ -87,6 +87,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
87
87
|
check_datasets or check_project
|
88
88
|
if shutdown_when_done? && (jobs_running.size + jobs_to_run.size).zero?
|
89
89
|
say 'Nothing else to do, shutting down'
|
90
|
+
exit_cleanup
|
90
91
|
return false
|
91
92
|
end
|
92
93
|
flush!
|
@@ -102,13 +103,19 @@ class MiGA::Daemon < MiGA::MiGA
|
|
102
103
|
|
103
104
|
##
|
104
105
|
# Queue maintenance tasks as an analysis job
|
105
|
-
def queue_maintenance
|
106
|
-
return if bypass_maintenance? || shutdown_when_done?
|
106
|
+
def queue_maintenance(force = false)
|
107
|
+
return if bypass_maintenance? || (!force && shutdown_when_done?)
|
107
108
|
|
108
109
|
say 'Queueing maintenance tasks'
|
109
110
|
queue_job(:maintenance)
|
110
111
|
end
|
111
112
|
|
113
|
+
##
|
114
|
+
# Remove temporary files on completion
|
115
|
+
def exit_cleanup
|
116
|
+
FileUtils.rm_f(File.join(daemon_home, 'status.json'))
|
117
|
+
end
|
118
|
+
|
112
119
|
##
|
113
120
|
# Send +msg+ to +say+ as long as +level+ is at most +verbosity+
|
114
121
|
def l_say(level, *msg)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -181,26 +181,30 @@ module MiGA::Dataset::Result
|
|
181
181
|
add_files_to_ds_result(
|
182
182
|
MiGA::Result.new("#{base}.json"), name,
|
183
183
|
if result_files_exist?(base, '.2.clipped.fastq')
|
184
|
-
{
|
185
|
-
pair1: '.1.clipped.fastq',
|
186
|
-
pair2: '.2.clipped.fastq',
|
187
|
-
single: '.1.clipped.single.fastq'
|
188
|
-
}
|
184
|
+
{ pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
|
189
185
|
else
|
190
186
|
{ single: '.1.clipped.fastq' }
|
191
187
|
end
|
192
188
|
).tap do |r|
|
189
|
+
# Legacy files
|
193
190
|
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
191
|
+
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
194
192
|
end
|
195
193
|
end
|
196
194
|
|
197
195
|
##
|
198
196
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
199
197
|
def add_result_read_quality(base, _opts)
|
200
|
-
return nil unless
|
198
|
+
return nil unless
|
199
|
+
result_files_exist?(base, %w[.post.1.html]) ||
|
200
|
+
result_files_exist?(base, %w[.solexaqa .fastqc])
|
201
201
|
|
202
202
|
add_files_to_ds_result(
|
203
203
|
MiGA::Result.new("#{base}.json"), name,
|
204
|
+
pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
|
205
|
+
post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
|
206
|
+
adapter_detection: '.adapters.txt',
|
207
|
+
# Legacy files
|
204
208
|
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
205
209
|
)
|
206
210
|
end
|