miga-base 0.7.26.3 → 1.0.0.sr1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a599c1e0c51a62f7303cbd1bbf9f8568649dbbeae768b518ad67250b1c3217a4
|
|
4
|
+
data.tar.gz: f8300cb5c44209d8a3639338319b50cdbce01cf14362006a9f61147854625bd9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4bf676ee04f650e8f2388b0f0e732e9da941ca13ceb6b02d986800d353adf2de261ab61a56dde496210f2965255ca0ff9df2cbf5927f8643ddd706736511ef07
|
|
7
|
+
data.tar.gz: b5cffa3afdb384db2a7fd2d86d11f062491d3df403569cae8bb2209df9119012e539179453efc492227835553bade1ff03805d8b26be4b5988890a7bc9684475
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -116,38 +116,69 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
|
116
116
|
# Perform bidirectional operation with MiGA::Cli +cli+
|
|
117
117
|
def check_bidir(cli)
|
|
118
118
|
cli.say 'Checking if reference distances are bidirectional'
|
|
119
|
-
|
|
119
|
+
project = cli.load_project
|
|
120
|
+
ref_ds = project.each_dataset.select(&:ref?)
|
|
120
121
|
ref_names = ref_ds.map(&:name)
|
|
121
122
|
n = ref_ds.size
|
|
122
123
|
|
|
123
124
|
# Read data first (threaded)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
125
|
+
tmp = File.join(project.path, 'doctor-bidirectional.tmp')
|
|
126
|
+
FileUtils.mkdir_p(tmp)
|
|
127
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
|
128
|
+
file = File.join(tmp, "#{thr}.json")
|
|
129
|
+
fh = File.open(file, 'w')
|
|
130
|
+
[:aai, :ani].each do |metric|
|
|
131
|
+
fh.puts "# #{metric}"
|
|
132
|
+
ref_ds.each_with_index do |ds, idx|
|
|
133
|
+
if idx % cli[:threads] == thr
|
|
134
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
|
135
|
+
row = read_bidirectional(ds, metric)
|
|
136
|
+
fh.puts "#{ds.name} #{JSON.fast_generate(row)}" unless row.empty?
|
|
137
|
+
end
|
|
135
138
|
end
|
|
136
139
|
end
|
|
137
|
-
|
|
140
|
+
fh.puts '# end'
|
|
141
|
+
fh.flush # necessary for large threaded runs
|
|
142
|
+
fh.close
|
|
143
|
+
if thr == 0
|
|
144
|
+
cli.advance('Reading:', n, n, false)
|
|
145
|
+
cli.say
|
|
146
|
+
end
|
|
147
|
+
end
|
|
138
148
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
149
|
+
# Merge pieces per thread
|
|
150
|
+
dist = { aai: {}, ani: {} }
|
|
151
|
+
cli[:threads].times do |i|
|
|
152
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
|
153
|
+
file = File.join(tmp, "#{i}.json")
|
|
154
|
+
File.open(file, 'r') do |fh|
|
|
155
|
+
metric = nil
|
|
156
|
+
fh.each do |ln|
|
|
157
|
+
qry, row = ln.chomp.split(' ', 2)
|
|
158
|
+
if qry == '#'
|
|
159
|
+
metric = row.to_sym
|
|
160
|
+
else
|
|
161
|
+
raise "Unrecognized metric: #{metric}" unless dist[metric]
|
|
162
|
+
JSON.parse(row).each do |sbj, val|
|
|
163
|
+
dist[metric][qry] ||= {}
|
|
164
|
+
if dist[metric][sbj]&.include?(qry)
|
|
165
|
+
dist[metric][sbj].delete(qry) # Already bidirectional
|
|
166
|
+
else
|
|
167
|
+
dist[metric][qry][sbj] = val
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
raise "Incomplete thread dump: #{file}" unless metric == :end
|
|
143
173
|
end
|
|
144
|
-
cli.say
|
|
145
174
|
end
|
|
175
|
+
cli.say
|
|
176
|
+
FileUtils.rm_rf(tmp)
|
|
146
177
|
|
|
147
178
|
# Write missing values (threaded)
|
|
148
179
|
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
|
149
180
|
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
|
150
|
-
save_bidirectional(ds)
|
|
181
|
+
save_bidirectional(ds, dist)
|
|
151
182
|
end
|
|
152
183
|
cli.say
|
|
153
184
|
end
|
|
@@ -115,30 +115,30 @@ module MiGA::Cli::Action::Doctor::Base
|
|
|
115
115
|
end
|
|
116
116
|
|
|
117
117
|
##
|
|
118
|
-
# Reads all the distance estimates in +a+ ->
|
|
119
|
-
#
|
|
120
|
-
def read_bidirectional(a)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
data
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
end
|
|
118
|
+
# Reads all the distance estimates in +a+ -> * for +metric+ and
|
|
119
|
+
# returns them as a hash +{"b_name" => [val, sd, ...], ...}+
|
|
120
|
+
def read_bidirectional(a, metric)
|
|
121
|
+
db_file = a.result(:distances)&.file_path("#{metric}_db") or return {}
|
|
122
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
|
123
|
+
data = MiGA::SQLite.new(db_file).run(sql) || []
|
|
124
|
+
Hash[
|
|
125
|
+
data.map do |row|
|
|
126
|
+
k, v = row.shift(2)
|
|
127
|
+
[k, row.all?(&:zero?) ? v : [v] + row]
|
|
128
|
+
end
|
|
129
|
+
]
|
|
131
130
|
end
|
|
132
131
|
|
|
133
132
|
##
|
|
134
133
|
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
|
135
|
-
# (as +a+ -> *), where +a+ is a MiGA::Dataset object
|
|
136
|
-
|
|
134
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object, with currently
|
|
135
|
+
# saved values read from the hash +dist+
|
|
136
|
+
def save_bidirectional(a, dist)
|
|
137
137
|
each_database_file(a) do |db_file, metric, result, rank|
|
|
138
138
|
next if rank == :haai # No need for hAAI to be bidirectional
|
|
139
139
|
|
|
140
|
-
b2a =
|
|
141
|
-
a2b =
|
|
140
|
+
b2a = dist[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
|
141
|
+
a2b = dist[rank][a.name]&.keys || []
|
|
142
142
|
SQLite3::Database.new(db_file) do |db|
|
|
143
143
|
sql = <<~SQL
|
|
144
144
|
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
|
@@ -146,7 +146,9 @@ module MiGA::Cli::Action::Doctor::Base
|
|
|
146
146
|
SQL
|
|
147
147
|
db.execute('BEGIN TRANSACTION;')
|
|
148
148
|
(b2a - a2b).each do |b_name|
|
|
149
|
-
|
|
149
|
+
val = dist[rank][b_name][a.name]
|
|
150
|
+
val = [val, 0, 0, 0] unless val.is_a?(Array)
|
|
151
|
+
db.execute(sql, [a.name, b_name] + val)
|
|
150
152
|
end
|
|
151
153
|
db.execute('COMMIT;')
|
|
152
154
|
end
|
data/lib/miga/cli/action/init.rb
CHANGED
|
@@ -112,12 +112,15 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
|
112
112
|
|
|
113
113
|
def check_software_requirements(rc_fh)
|
|
114
114
|
cli.puts 'Looking for requirements:'
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
115
|
+
opt_groups = {
|
|
116
|
+
mytaxa: 'MyTaxa',
|
|
117
|
+
rdp: 'RDP classifier',
|
|
118
|
+
reads: 'read processing'
|
|
119
|
+
}
|
|
120
|
+
opt_groups.each do |k, v|
|
|
121
|
+
ask_for_optional(k, v)
|
|
122
|
+
rc_fh.puts "export MIGA_#{k.to_s.upcase}='#{cli[k] ? 'yes' : 'no'}'"
|
|
123
|
+
end
|
|
121
124
|
paths = {}
|
|
122
125
|
rc_fh.puts 'MIGA_PATH=""'
|
|
123
126
|
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
|
@@ -196,8 +199,9 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
|
196
199
|
cli.puts 'yes'
|
|
197
200
|
else
|
|
198
201
|
cli.puts 'no, installing'
|
|
199
|
-
|
|
202
|
+
out = install_library(cli, paths, language, library)
|
|
200
203
|
unless test_library(cli, paths, language, library)
|
|
204
|
+
cli.puts out
|
|
201
205
|
raise "Cannot install #{language.to_s.capitalize} library: #{library}"
|
|
202
206
|
end
|
|
203
207
|
end
|
|
@@ -18,7 +18,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
|
18
18
|
cli.opt_object(opt, [:project])
|
|
19
19
|
opt.on(
|
|
20
20
|
'-T', '--taxon STRING',
|
|
21
|
-
'(Mandatory
|
|
21
|
+
'(Mandatory) Taxon name (e.g., a species binomial)'
|
|
22
22
|
) { |v| cli[:taxon] = v }
|
|
23
23
|
opt.on(
|
|
24
24
|
'-m', '--metadata STRING',
|
|
@@ -137,7 +137,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
|
137
137
|
end
|
|
138
138
|
|
|
139
139
|
def sanitize_cli
|
|
140
|
-
cli.ensure_par(taxon: '-T')
|
|
140
|
+
cli.ensure_par(taxon: '-T')
|
|
141
141
|
tasks = %w[reference complete chromosome scaffold contig]
|
|
142
142
|
unless tasks.any? { |i| cli[i.to_sym] }
|
|
143
143
|
raise 'No action requested: pick at least one type of genome'
|
|
@@ -204,7 +204,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
|
204
204
|
'from(GenomeAssemblies).' \
|
|
205
205
|
'usingschema(/schema/GenomeAssemblies).' \
|
|
206
206
|
'matching(tab==["Prokaryotes"] and q=="' \
|
|
207
|
-
"#{cli[:taxon]
|
|
207
|
+
"#{cli[:taxon]&.tr('"', "'")}\"",
|
|
208
208
|
fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
|
|
209
209
|
'level|level,ftp_path_genbank|ftp_path_genbank,' \
|
|
210
210
|
'release_date|release_date,strain|strain',
|
|
@@ -99,13 +99,13 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
|
99
99
|
ds_name = []
|
|
100
100
|
File.open(tab, 'r') do |fh|
|
|
101
101
|
fh.each_line do |ln|
|
|
102
|
-
if ln =~ /^ {
|
|
102
|
+
if ln =~ /^ {0,#{(rank_i - 1) * 2}}\S+:\S+:/
|
|
103
103
|
in_rank = nil
|
|
104
104
|
ds_name = []
|
|
105
105
|
elsif ln =~ /^ {#{rank_i * 2}}(#{rank}:(\S+)):/
|
|
106
106
|
in_rank = $2 == '?' ? nil : $1
|
|
107
107
|
ds_name = []
|
|
108
|
-
elsif ln =~ /^ *# (\S+)/
|
|
108
|
+
elsif ln =~ /^ *# (\S+)/ && !in_rank.nil?
|
|
109
109
|
ds_i = $1
|
|
110
110
|
ds_name << ds_i
|
|
111
111
|
ds_name.each do |ds_j|
|
data/lib/miga/cli/action/wf.rb
CHANGED
|
@@ -81,21 +81,22 @@ module MiGA::Cli::Action::Wf
|
|
|
81
81
|
cli[:aai_p] = 'blast+'
|
|
82
82
|
cli[:ani_p] = 'blast+'
|
|
83
83
|
end
|
|
84
|
-
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
|
84
|
+
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani (default)') do
|
|
85
85
|
cli[:aai_p] = 'diamond'
|
|
86
86
|
cli[:ani_p] = 'fastani'
|
|
87
87
|
end
|
|
88
88
|
opt.on(
|
|
89
89
|
'--haai-p STRING',
|
|
90
|
-
'hAAI search engine. One of: blast
|
|
90
|
+
'hAAI search engine. One of: blast+, fastaai, blat, diamond, fastaai, no',
|
|
91
|
+
'The default is "no" for clade projects and "fastaai" otherwise'
|
|
91
92
|
) { |v| cli[:haai_p] = v }
|
|
92
93
|
opt.on(
|
|
93
94
|
'--aai-p STRING',
|
|
94
|
-
'AAI search engine. One of: blast
|
|
95
|
+
'AAI search engine. One of: blast+, blat, diamond (default)'
|
|
95
96
|
) { |v| cli[:aai_p] = v }
|
|
96
97
|
opt.on(
|
|
97
98
|
'--ani-p STRING',
|
|
98
|
-
'ANI search engine. One of: blast
|
|
99
|
+
'ANI search engine. One of: blast+, blat, fastani (default)'
|
|
99
100
|
) { |v| cli[:ani_p] = v }
|
|
100
101
|
end
|
|
101
102
|
|
data/lib/miga/daemon.rb
CHANGED
|
@@ -73,10 +73,10 @@ class MiGA::Daemon < MiGA::MiGA
|
|
|
73
73
|
say 'MiGA:%s launched' % project.name
|
|
74
74
|
say '-----------------------------------'
|
|
75
75
|
miga_say "Saving log to: #{output_file}" unless show_log?
|
|
76
|
-
queue_maintenance
|
|
77
|
-
load_status
|
|
78
76
|
say 'Configuration options:'
|
|
79
77
|
say @runopts.to_s
|
|
78
|
+
load_status
|
|
79
|
+
queue_maintenance(true)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
82
|
##
|
|
@@ -87,6 +87,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
|
87
87
|
check_datasets or check_project
|
|
88
88
|
if shutdown_when_done? && (jobs_running.size + jobs_to_run.size).zero?
|
|
89
89
|
say 'Nothing else to do, shutting down'
|
|
90
|
+
exit_cleanup
|
|
90
91
|
return false
|
|
91
92
|
end
|
|
92
93
|
flush!
|
|
@@ -102,13 +103,19 @@ class MiGA::Daemon < MiGA::MiGA
|
|
|
102
103
|
|
|
103
104
|
##
|
|
104
105
|
# Queue maintenance tasks as an analysis job
|
|
105
|
-
def queue_maintenance
|
|
106
|
-
return if bypass_maintenance? || shutdown_when_done?
|
|
106
|
+
def queue_maintenance(force = false)
|
|
107
|
+
return if bypass_maintenance? || (!force && shutdown_when_done?)
|
|
107
108
|
|
|
108
109
|
say 'Queueing maintenance tasks'
|
|
109
110
|
queue_job(:maintenance)
|
|
110
111
|
end
|
|
111
112
|
|
|
113
|
+
##
|
|
114
|
+
# Remove temporary files on completion
|
|
115
|
+
def exit_cleanup
|
|
116
|
+
FileUtils.rm_f(File.join(daemon_home, 'status.json'))
|
|
117
|
+
end
|
|
118
|
+
|
|
112
119
|
##
|
|
113
120
|
# Send +msg+ to +say+ as long as +level+ is at most +verbosity+
|
|
114
121
|
def l_say(level, *msg)
|
data/lib/miga/dataset/result.rb
CHANGED
|
@@ -181,26 +181,30 @@ module MiGA::Dataset::Result
|
|
|
181
181
|
add_files_to_ds_result(
|
|
182
182
|
MiGA::Result.new("#{base}.json"), name,
|
|
183
183
|
if result_files_exist?(base, '.2.clipped.fastq')
|
|
184
|
-
{
|
|
185
|
-
pair1: '.1.clipped.fastq',
|
|
186
|
-
pair2: '.2.clipped.fastq',
|
|
187
|
-
single: '.1.clipped.single.fastq'
|
|
188
|
-
}
|
|
184
|
+
{ pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
|
|
189
185
|
else
|
|
190
186
|
{ single: '.1.clipped.fastq' }
|
|
191
187
|
end
|
|
192
188
|
).tap do |r|
|
|
189
|
+
# Legacy files
|
|
193
190
|
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
|
191
|
+
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
|
194
192
|
end
|
|
195
193
|
end
|
|
196
194
|
|
|
197
195
|
##
|
|
198
196
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
|
199
197
|
def add_result_read_quality(base, _opts)
|
|
200
|
-
return nil unless
|
|
198
|
+
return nil unless
|
|
199
|
+
result_files_exist?(base, %w[.post.1.html]) ||
|
|
200
|
+
result_files_exist?(base, %w[.solexaqa .fastqc])
|
|
201
201
|
|
|
202
202
|
add_files_to_ds_result(
|
|
203
203
|
MiGA::Result.new("#{base}.json"), name,
|
|
204
|
+
pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
|
|
205
|
+
post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
|
|
206
|
+
adapter_detection: '.adapters.txt',
|
|
207
|
+
# Legacy files
|
|
204
208
|
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
|
205
209
|
)
|
|
206
210
|
end
|
data/lib/miga/json.rb
CHANGED
data/lib/miga/metadata.rb
CHANGED
data/lib/miga/parallel.rb
CHANGED
|
@@ -19,13 +19,18 @@ class MiGA::Parallel < MiGA::MiGA
|
|
|
19
19
|
# 1. Unitary object from +enum+
|
|
20
20
|
# 2. Index of the unitary object
|
|
21
21
|
# 3. Index of the acting thread
|
|
22
|
-
def distribute(enum, threads)
|
|
23
|
-
process(threads)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
def distribute(enum, threads, &blk)
|
|
23
|
+
process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
##
|
|
27
|
+
# Enum through +enum+ executing the passed block only for thread with index
|
|
28
|
+
# +thr+, one of +threads+ threads. The passed block has the same arguments
|
|
29
|
+
# as the one in +#distribute+
|
|
30
|
+
def thread_enum(enum, threads, thr)
|
|
31
|
+
enum.each_with_index do |obj, idx|
|
|
32
|
+
yield(obj, idx, thr) if idx % threads == thr
|
|
27
33
|
end
|
|
28
34
|
end
|
|
29
35
|
end
|
|
30
36
|
end
|
|
31
|
-
|
data/lib/miga/project.rb
CHANGED
|
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
|
|
|
42
42
|
# Create an empty project
|
|
43
43
|
def create
|
|
44
44
|
unless MiGA::MiGA.initialized?
|
|
45
|
-
|
|
45
|
+
warn 'Projects cannot be processed yet, first run: miga init'
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
dirs =
|
|
49
|
-
|
|
50
|
-
dirs.each { |d|
|
|
48
|
+
dirs = @@FOLDERS.map { |d| File.join(path, d) }
|
|
49
|
+
dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
|
|
50
|
+
dirs.each { |d| FileUtils.mkdir_p(d) }
|
|
51
51
|
@metadata = MiGA::Metadata.new(
|
|
52
|
-
File.
|
|
53
|
-
|
|
52
|
+
File.join(path, 'miga.project.json'),
|
|
53
|
+
datasets: [], name: File.basename(path)
|
|
54
54
|
)
|
|
55
|
-
d_path = File.
|
|
56
|
-
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?
|
|
55
|
+
d_path = File.join(path, 'daemon', 'daemon.json')
|
|
56
|
+
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
|
|
57
57
|
pull_hook :on_create
|
|
58
58
|
self.load
|
|
59
59
|
end
|
data/lib/miga/project/base.rb
CHANGED
|
@@ -131,15 +131,15 @@ module MiGA::Project::Base
|
|
|
131
131
|
},
|
|
132
132
|
haai_p: {
|
|
133
133
|
desc: 'Value of aai.rb -p on hAAI', type: String,
|
|
134
|
-
default: proc { |project| project.clade? ? 'no' : '
|
|
135
|
-
in: %w[
|
|
134
|
+
default: proc { |project| project.clade? ? 'no' : 'fastaai' },
|
|
135
|
+
in: %w[blast+ blast blat diamond fastaai no]
|
|
136
136
|
},
|
|
137
137
|
aai_p: {
|
|
138
|
-
desc: 'Value of aai.rb -p on AAI', default: '
|
|
138
|
+
desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
|
|
139
139
|
in: %w[blast+ blast blat diamond]
|
|
140
140
|
},
|
|
141
141
|
ani_p: {
|
|
142
|
-
desc: 'Value of ani.rb -p on ANI', default: '
|
|
142
|
+
desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
|
|
143
143
|
in: %w[blast+ blast blat fastani]
|
|
144
144
|
},
|
|
145
145
|
max_try: {
|