miga-base 0.7.26.3 → 1.0.0.sr1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a599c1e0c51a62f7303cbd1bbf9f8568649dbbeae768b518ad67250b1c3217a4
|
4
|
+
data.tar.gz: f8300cb5c44209d8a3639338319b50cdbce01cf14362006a9f61147854625bd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4bf676ee04f650e8f2388b0f0e732e9da941ca13ceb6b02d986800d353adf2de261ab61a56dde496210f2965255ca0ff9df2cbf5927f8643ddd706736511ef07
|
7
|
+
data.tar.gz: b5cffa3afdb384db2a7fd2d86d11f062491d3df403569cae8bb2209df9119012e539179453efc492227835553bade1ff03805d8b26be4b5988890a7bc9684475
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -116,38 +116,69 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
116
116
|
# Perform bidirectional operation with MiGA::Cli +cli+
|
117
117
|
def check_bidir(cli)
|
118
118
|
cli.say 'Checking if reference distances are bidirectional'
|
119
|
-
|
119
|
+
project = cli.load_project
|
120
|
+
ref_ds = project.each_dataset.select(&:ref?)
|
120
121
|
ref_names = ref_ds.map(&:name)
|
121
122
|
n = ref_ds.size
|
122
123
|
|
123
124
|
# Read data first (threaded)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
125
|
+
tmp = File.join(project.path, 'doctor-bidirectional.tmp')
|
126
|
+
FileUtils.mkdir_p(tmp)
|
127
|
+
MiGA::Parallel.process(cli[:threads]) do |thr|
|
128
|
+
file = File.join(tmp, "#{thr}.json")
|
129
|
+
fh = File.open(file, 'w')
|
130
|
+
[:aai, :ani].each do |metric|
|
131
|
+
fh.puts "# #{metric}"
|
132
|
+
ref_ds.each_with_index do |ds, idx|
|
133
|
+
if idx % cli[:threads] == thr
|
134
|
+
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
135
|
+
row = read_bidirectional(ds, metric)
|
136
|
+
fh.puts "#{ds.name} #{JSON.fast_generate(row)}" unless row.empty?
|
137
|
+
end
|
135
138
|
end
|
136
139
|
end
|
137
|
-
|
140
|
+
fh.puts '# end'
|
141
|
+
fh.flush # necessary for large threaded runs
|
142
|
+
fh.close
|
143
|
+
if thr == 0
|
144
|
+
cli.advance('Reading:', n, n, false)
|
145
|
+
cli.say
|
146
|
+
end
|
147
|
+
end
|
138
148
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
149
|
+
# Merge pieces per thread
|
150
|
+
dist = { aai: {}, ani: {} }
|
151
|
+
cli[:threads].times do |i|
|
152
|
+
cli.advance('Merging:', i + 1, cli[:threads], false)
|
153
|
+
file = File.join(tmp, "#{i}.json")
|
154
|
+
File.open(file, 'r') do |fh|
|
155
|
+
metric = nil
|
156
|
+
fh.each do |ln|
|
157
|
+
qry, row = ln.chomp.split(' ', 2)
|
158
|
+
if qry == '#'
|
159
|
+
metric = row.to_sym
|
160
|
+
else
|
161
|
+
raise "Unrecognized metric: #{metric}" unless dist[metric]
|
162
|
+
JSON.parse(row).each do |sbj, val|
|
163
|
+
dist[metric][qry] ||= {}
|
164
|
+
if dist[metric][sbj]&.include?(qry)
|
165
|
+
dist[metric][sbj].delete(qry) # Already bidirectional
|
166
|
+
else
|
167
|
+
dist[metric][qry][sbj] = val
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
raise "Incomplete thread dump: #{file}" unless metric == :end
|
143
173
|
end
|
144
|
-
cli.say
|
145
174
|
end
|
175
|
+
cli.say
|
176
|
+
FileUtils.rm_rf(tmp)
|
146
177
|
|
147
178
|
# Write missing values (threaded)
|
148
179
|
MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
|
149
180
|
cli.advance('Datasets:', idx + 1, n, false) if thr == 0
|
150
|
-
save_bidirectional(ds)
|
181
|
+
save_bidirectional(ds, dist)
|
151
182
|
end
|
152
183
|
cli.say
|
153
184
|
end
|
@@ -115,30 +115,30 @@ module MiGA::Cli::Action::Doctor::Base
|
|
115
115
|
end
|
116
116
|
|
117
117
|
##
|
118
|
-
# Reads all the distance estimates in +a+ ->
|
119
|
-
#
|
120
|
-
def read_bidirectional(a)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
data
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
118
|
+
# Reads all the distance estimates in +a+ -> * for +metric+ and
|
119
|
+
# returns them as a hash +{"b_name" => [val, sd, ...], ...}+
|
120
|
+
def read_bidirectional(a, metric)
|
121
|
+
db_file = a.result(:distances)&.file_path("#{metric}_db") or return {}
|
122
|
+
sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
|
123
|
+
data = MiGA::SQLite.new(db_file).run(sql) || []
|
124
|
+
Hash[
|
125
|
+
data.map do |row|
|
126
|
+
k, v = row.shift(2)
|
127
|
+
[k, row.all?(&:zero?) ? v : [v] + row]
|
128
|
+
end
|
129
|
+
]
|
131
130
|
end
|
132
131
|
|
133
132
|
##
|
134
133
|
# Saves all the distance estimates in * -> +a+ into the +a+ databases
|
135
|
-
# (as +a+ -> *), where +a+ is a MiGA::Dataset object
|
136
|
-
|
134
|
+
# (as +a+ -> *), where +a+ is a MiGA::Dataset object, with currently
|
135
|
+
# saved values read from the hash +dist+
|
136
|
+
def save_bidirectional(a, dist)
|
137
137
|
each_database_file(a) do |db_file, metric, result, rank|
|
138
138
|
next if rank == :haai # No need for hAAI to be bidirectional
|
139
139
|
|
140
|
-
b2a =
|
141
|
-
a2b =
|
140
|
+
b2a = dist[rank].map { |b_name, v| b_name if v[a.name] }.compact
|
141
|
+
a2b = dist[rank][a.name]&.keys || []
|
142
142
|
SQLite3::Database.new(db_file) do |db|
|
143
143
|
sql = <<~SQL
|
144
144
|
insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
|
@@ -146,7 +146,9 @@ module MiGA::Cli::Action::Doctor::Base
|
|
146
146
|
SQL
|
147
147
|
db.execute('BEGIN TRANSACTION;')
|
148
148
|
(b2a - a2b).each do |b_name|
|
149
|
-
|
149
|
+
val = dist[rank][b_name][a.name]
|
150
|
+
val = [val, 0, 0, 0] unless val.is_a?(Array)
|
151
|
+
db.execute(sql, [a.name, b_name] + val)
|
150
152
|
end
|
151
153
|
db.execute('COMMIT;')
|
152
154
|
end
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -112,12 +112,15 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
112
112
|
|
113
113
|
def check_software_requirements(rc_fh)
|
114
114
|
cli.puts 'Looking for requirements:'
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
115
|
+
opt_groups = {
|
116
|
+
mytaxa: 'MyTaxa',
|
117
|
+
rdp: 'RDP classifier',
|
118
|
+
reads: 'read processing'
|
119
|
+
}
|
120
|
+
opt_groups.each do |k, v|
|
121
|
+
ask_for_optional(k, v)
|
122
|
+
rc_fh.puts "export MIGA_#{k.to_s.upcase}='#{cli[k] ? 'yes' : 'no'}'"
|
123
|
+
end
|
121
124
|
paths = {}
|
122
125
|
rc_fh.puts 'MIGA_PATH=""'
|
123
126
|
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
@@ -196,8 +199,9 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
196
199
|
cli.puts 'yes'
|
197
200
|
else
|
198
201
|
cli.puts 'no, installing'
|
199
|
-
|
202
|
+
out = install_library(cli, paths, language, library)
|
200
203
|
unless test_library(cli, paths, language, library)
|
204
|
+
cli.puts out
|
201
205
|
raise "Cannot install #{language.to_s.capitalize} library: #{library}"
|
202
206
|
end
|
203
207
|
end
|
@@ -18,7 +18,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
18
18
|
cli.opt_object(opt, [:project])
|
19
19
|
opt.on(
|
20
20
|
'-T', '--taxon STRING',
|
21
|
-
'(Mandatory
|
21
|
+
'(Mandatory) Taxon name (e.g., a species binomial)'
|
22
22
|
) { |v| cli[:taxon] = v }
|
23
23
|
opt.on(
|
24
24
|
'-m', '--metadata STRING',
|
@@ -137,7 +137,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def sanitize_cli
|
140
|
-
cli.ensure_par(taxon: '-T')
|
140
|
+
cli.ensure_par(taxon: '-T')
|
141
141
|
tasks = %w[reference complete chromosome scaffold contig]
|
142
142
|
unless tasks.any? { |i| cli[i.to_sym] }
|
143
143
|
raise 'No action requested: pick at least one type of genome'
|
@@ -204,7 +204,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
204
204
|
'from(GenomeAssemblies).' \
|
205
205
|
'usingschema(/schema/GenomeAssemblies).' \
|
206
206
|
'matching(tab==["Prokaryotes"] and q=="' \
|
207
|
-
"#{cli[:taxon]
|
207
|
+
"#{cli[:taxon]&.tr('"', "'")}\"",
|
208
208
|
fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
|
209
209
|
'level|level,ftp_path_genbank|ftp_path_genbank,' \
|
210
210
|
'release_date|release_date,strain|strain',
|
@@ -99,13 +99,13 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
99
99
|
ds_name = []
|
100
100
|
File.open(tab, 'r') do |fh|
|
101
101
|
fh.each_line do |ln|
|
102
|
-
if ln =~ /^ {
|
102
|
+
if ln =~ /^ {0,#{(rank_i - 1) * 2}}\S+:\S+:/
|
103
103
|
in_rank = nil
|
104
104
|
ds_name = []
|
105
105
|
elsif ln =~ /^ {#{rank_i * 2}}(#{rank}:(\S+)):/
|
106
106
|
in_rank = $2 == '?' ? nil : $1
|
107
107
|
ds_name = []
|
108
|
-
elsif ln =~ /^ *# (\S+)/
|
108
|
+
elsif ln =~ /^ *# (\S+)/ && !in_rank.nil?
|
109
109
|
ds_i = $1
|
110
110
|
ds_name << ds_i
|
111
111
|
ds_name.each do |ds_j|
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -81,21 +81,22 @@ module MiGA::Cli::Action::Wf
|
|
81
81
|
cli[:aai_p] = 'blast+'
|
82
82
|
cli[:ani_p] = 'blast+'
|
83
83
|
end
|
84
|
-
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
84
|
+
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani (default)') do
|
85
85
|
cli[:aai_p] = 'diamond'
|
86
86
|
cli[:ani_p] = 'fastani'
|
87
87
|
end
|
88
88
|
opt.on(
|
89
89
|
'--haai-p STRING',
|
90
|
-
'hAAI search engine. One of: blast
|
90
|
+
'hAAI search engine. One of: blast+, fastaai, blat, diamond, fastaai, no',
|
91
|
+
'The default is "no" for clade projects and "fastaai" otherwise'
|
91
92
|
) { |v| cli[:haai_p] = v }
|
92
93
|
opt.on(
|
93
94
|
'--aai-p STRING',
|
94
|
-
'AAI search engine. One of: blast
|
95
|
+
'AAI search engine. One of: blast+, blat, diamond (default)'
|
95
96
|
) { |v| cli[:aai_p] = v }
|
96
97
|
opt.on(
|
97
98
|
'--ani-p STRING',
|
98
|
-
'ANI search engine. One of: blast
|
99
|
+
'ANI search engine. One of: blast+, blat, fastani (default)'
|
99
100
|
) { |v| cli[:ani_p] = v }
|
100
101
|
end
|
101
102
|
|
data/lib/miga/daemon.rb
CHANGED
@@ -73,10 +73,10 @@ class MiGA::Daemon < MiGA::MiGA
|
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
75
|
miga_say "Saving log to: #{output_file}" unless show_log?
|
76
|
-
queue_maintenance
|
77
|
-
load_status
|
78
76
|
say 'Configuration options:'
|
79
77
|
say @runopts.to_s
|
78
|
+
load_status
|
79
|
+
queue_maintenance(true)
|
80
80
|
end
|
81
81
|
|
82
82
|
##
|
@@ -87,6 +87,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
87
87
|
check_datasets or check_project
|
88
88
|
if shutdown_when_done? && (jobs_running.size + jobs_to_run.size).zero?
|
89
89
|
say 'Nothing else to do, shutting down'
|
90
|
+
exit_cleanup
|
90
91
|
return false
|
91
92
|
end
|
92
93
|
flush!
|
@@ -102,13 +103,19 @@ class MiGA::Daemon < MiGA::MiGA
|
|
102
103
|
|
103
104
|
##
|
104
105
|
# Queue maintenance tasks as an analysis job
|
105
|
-
def queue_maintenance
|
106
|
-
return if bypass_maintenance? || shutdown_when_done?
|
106
|
+
def queue_maintenance(force = false)
|
107
|
+
return if bypass_maintenance? || (!force && shutdown_when_done?)
|
107
108
|
|
108
109
|
say 'Queueing maintenance tasks'
|
109
110
|
queue_job(:maintenance)
|
110
111
|
end
|
111
112
|
|
113
|
+
##
|
114
|
+
# Remove temporary files on completion
|
115
|
+
def exit_cleanup
|
116
|
+
FileUtils.rm_f(File.join(daemon_home, 'status.json'))
|
117
|
+
end
|
118
|
+
|
112
119
|
##
|
113
120
|
# Send +msg+ to +say+ as long as +level+ is at most +verbosity+
|
114
121
|
def l_say(level, *msg)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -181,26 +181,30 @@ module MiGA::Dataset::Result
|
|
181
181
|
add_files_to_ds_result(
|
182
182
|
MiGA::Result.new("#{base}.json"), name,
|
183
183
|
if result_files_exist?(base, '.2.clipped.fastq')
|
184
|
-
{
|
185
|
-
pair1: '.1.clipped.fastq',
|
186
|
-
pair2: '.2.clipped.fastq',
|
187
|
-
single: '.1.clipped.single.fastq'
|
188
|
-
}
|
184
|
+
{ pair1: '.1.clipped.fastq', pair2: '.2.clipped.fastq' }
|
189
185
|
else
|
190
186
|
{ single: '.1.clipped.fastq' }
|
191
187
|
end
|
192
188
|
).tap do |r|
|
189
|
+
# Legacy files
|
193
190
|
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
191
|
+
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
194
192
|
end
|
195
193
|
end
|
196
194
|
|
197
195
|
##
|
198
196
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
199
197
|
def add_result_read_quality(base, _opts)
|
200
|
-
return nil unless
|
198
|
+
return nil unless
|
199
|
+
result_files_exist?(base, %w[.post.1.html]) ||
|
200
|
+
result_files_exist?(base, %w[.solexaqa .fastqc])
|
201
201
|
|
202
202
|
add_files_to_ds_result(
|
203
203
|
MiGA::Result.new("#{base}.json"), name,
|
204
|
+
pre_qc_1: '.pre.1.html', pre_qc_2: '.pre.2.html',
|
205
|
+
post_qc_1: '.post.1.html', post_qc_2: '.post.2.html',
|
206
|
+
adapter_detection: '.adapters.txt',
|
207
|
+
# Legacy files
|
204
208
|
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
205
209
|
)
|
206
210
|
end
|
data/lib/miga/json.rb
CHANGED
data/lib/miga/metadata.rb
CHANGED
data/lib/miga/parallel.rb
CHANGED
@@ -19,13 +19,18 @@ class MiGA::Parallel < MiGA::MiGA
|
|
19
19
|
# 1. Unitary object from +enum+
|
20
20
|
# 2. Index of the unitary object
|
21
21
|
# 3. Index of the acting thread
|
22
|
-
def distribute(enum, threads)
|
23
|
-
process(threads)
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
def distribute(enum, threads, &blk)
|
23
|
+
process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Enum through +enum+ executing the passed block only for thread with index
|
28
|
+
# +thr+, one of +threads+ threads. The passed block has the same arguments
|
29
|
+
# as the one in +#distribute+
|
30
|
+
def thread_enum(enum, threads, thr)
|
31
|
+
enum.each_with_index do |obj, idx|
|
32
|
+
yield(obj, idx, thr) if idx % threads == thr
|
27
33
|
end
|
28
34
|
end
|
29
35
|
end
|
30
36
|
end
|
31
|
-
|
data/lib/miga/project.rb
CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
|
|
42
42
|
# Create an empty project
|
43
43
|
def create
|
44
44
|
unless MiGA::MiGA.initialized?
|
45
|
-
|
45
|
+
warn 'Projects cannot be processed yet, first run: miga init'
|
46
46
|
end
|
47
47
|
|
48
|
-
dirs =
|
49
|
-
|
50
|
-
dirs.each { |d|
|
48
|
+
dirs = @@FOLDERS.map { |d| File.join(path, d) }
|
49
|
+
dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
|
50
|
+
dirs.each { |d| FileUtils.mkdir_p(d) }
|
51
51
|
@metadata = MiGA::Metadata.new(
|
52
|
-
File.
|
53
|
-
|
52
|
+
File.join(path, 'miga.project.json'),
|
53
|
+
datasets: [], name: File.basename(path)
|
54
54
|
)
|
55
|
-
d_path = File.
|
56
|
-
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?
|
55
|
+
d_path = File.join(path, 'daemon', 'daemon.json')
|
56
|
+
File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
|
57
57
|
pull_hook :on_create
|
58
58
|
self.load
|
59
59
|
end
|
data/lib/miga/project/base.rb
CHANGED
@@ -131,15 +131,15 @@ module MiGA::Project::Base
|
|
131
131
|
},
|
132
132
|
haai_p: {
|
133
133
|
desc: 'Value of aai.rb -p on hAAI', type: String,
|
134
|
-
default: proc { |project| project.clade? ? 'no' : '
|
135
|
-
in: %w[
|
134
|
+
default: proc { |project| project.clade? ? 'no' : 'fastaai' },
|
135
|
+
in: %w[blast+ blast blat diamond fastaai no]
|
136
136
|
},
|
137
137
|
aai_p: {
|
138
|
-
desc: 'Value of aai.rb -p on AAI', default: '
|
138
|
+
desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
|
139
139
|
in: %w[blast+ blast blat diamond]
|
140
140
|
},
|
141
141
|
ani_p: {
|
142
|
-
desc: 'Value of ani.rb -p on ANI', default: '
|
142
|
+
desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
|
143
143
|
in: %w[blast+ blast blat fastani]
|
144
144
|
},
|
145
145
|
max_try: {
|