miga-base 0.4.1.0 → 0.4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/miga +2 -244
- data/lib/miga/cli/action/about.rb +44 -0
- data/lib/miga/cli/action/add.rb +139 -0
- data/lib/miga/cli/action/add_result.rb +26 -0
- data/lib/miga/cli/action/console.rb +19 -0
- data/lib/miga/cli/action/daemon.rb +74 -0
- data/lib/miga/cli/action/date.rb +18 -0
- data/lib/miga/cli/action/doctor.rb +210 -0
- data/lib/miga/cli/action/edit.rb +24 -0
- data/lib/miga/cli/action/files.rb +31 -0
- data/lib/miga/cli/action/find.rb +48 -0
- data/lib/miga/cli/action/generic.rb +44 -0
- data/lib/miga/cli/action/get.rb +132 -0
- data/lib/miga/cli/action/init.rb +343 -0
- data/lib/miga/cli/action/ln.rb +42 -0
- data/lib/miga/cli/action/ls.rb +55 -0
- data/lib/miga/cli/action/ncbi_get.rb +218 -0
- data/lib/miga/cli/action/new.rb +45 -0
- data/lib/miga/cli/action/next_step.rb +27 -0
- data/lib/miga/cli/action/plugins.rb +28 -0
- data/lib/miga/cli/action/rm.rb +25 -0
- data/lib/miga/cli/action/run.rb +39 -0
- data/lib/miga/cli/action/stats.rb +140 -0
- data/lib/miga/cli/action/summary.rb +49 -0
- data/lib/miga/cli/action/tax_dist.rb +102 -0
- data/lib/miga/cli/action/tax_index.rb +47 -0
- data/lib/miga/cli/action/tax_set.rb +59 -0
- data/lib/miga/cli/action/tax_test.rb +77 -0
- data/lib/miga/cli/action.rb +66 -0
- data/lib/miga/cli/base.rb +90 -0
- data/lib/miga/cli.rb +426 -0
- data/lib/miga/project/result.rb +14 -6
- data/lib/miga/remote_dataset.rb +1 -1
- data/lib/miga/tax_index.rb +5 -4
- data/lib/miga/taxonomy/base.rb +63 -0
- data/lib/miga/taxonomy.rb +87 -92
- data/lib/miga/version.rb +6 -6
- data/test/taxonomy_test.rb +49 -9
- data/utils/distance/commands.rb +11 -11
- data/utils/distance/pipeline.rb +5 -5
- metadata +43 -49
- data/actions/about.rb +0 -43
- data/actions/add.rb +0 -129
- data/actions/add_result.rb +0 -30
- data/actions/daemon.rb +0 -55
- data/actions/date.rb +0 -14
- data/actions/doctor.rb +0 -201
- data/actions/edit.rb +0 -33
- data/actions/files.rb +0 -43
- data/actions/find.rb +0 -41
- data/actions/get.rb +0 -105
- data/actions/init.rb +0 -301
- data/actions/ln.rb +0 -47
- data/actions/ls.rb +0 -61
- data/actions/ncbi_get.rb +0 -192
- data/actions/new.rb +0 -44
- data/actions/next_step.rb +0 -33
- data/actions/plugins.rb +0 -25
- data/actions/rm.rb +0 -29
- data/actions/run.rb +0 -45
- data/actions/stats.rb +0 -149
- data/actions/summary.rb +0 -57
- data/actions/tax_dist.rb +0 -106
- data/actions/tax_index.rb +0 -46
- data/actions/tax_set.rb +0 -63
- data/actions/tax_test.rb +0 -80
data/actions/ncbi_get.rb
DELETED
@@ -1,192 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'miga/remote_dataset'
|
7
|
-
require 'csv'
|
8
|
-
|
9
|
-
o = {q: true, query: false, unlink: false,
|
10
|
-
reference: false, legacy_name: false,
|
11
|
-
complete: false, chromosome: false,
|
12
|
-
scaffold: false, contig: false, add_version: true, dry: false,
|
13
|
-
get_md: false, only_md: false, save_every: 1}
|
14
|
-
OptionParser.new do |opt|
|
15
|
-
opt_banner(opt)
|
16
|
-
opt_object(opt, o, [:project])
|
17
|
-
opt.on('-T', '--taxon STRING',
|
18
|
-
'(Mandatory unless --reference) Taxon name (e.g., a species binomial).'
|
19
|
-
){ |v| o[:taxon]=v }
|
20
|
-
opt.on('--reference',
|
21
|
-
'Download all reference genomes (ignores any other status).'
|
22
|
-
){ |v| o[:reference]=v }
|
23
|
-
opt.on('--complete', 'Download complete genomes.'){ |v| o[:complete]=v }
|
24
|
-
opt.on('--chromosome',
|
25
|
-
'Download complete chromosomes.'){ |v| o[:chromosome]=v }
|
26
|
-
opt.on('--scaffold', 'Download genomes in scaffolds.'){ |v| o[:scaffold]=v }
|
27
|
-
opt.on('--contig', 'Download genomes in contigs.'){ |v| o[:contig]=v }
|
28
|
-
opt.on('--all', 'Download all genomes (in any status).') do
|
29
|
-
o[:complete] = true
|
30
|
-
o[:chromosome] = true
|
31
|
-
o[:scaffold] = true
|
32
|
-
o[:contig] = true
|
33
|
-
end
|
34
|
-
opt.on('--no-version-name',
|
35
|
-
'Do not add sequence version to the dataset name.',
|
36
|
-
'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
|
37
|
-
opt.on('--legacy-name',
|
38
|
-
'Use dataset names based on chromosome entries instead of assembly.'
|
39
|
-
){ |v| o[:legacy_name] = v }
|
40
|
-
opt.on('--blacklist PATH',
|
41
|
-
'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
|
42
|
-
opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
|
43
|
-
opt.on('--get-metadata',
|
44
|
-
'Only download and update metadata for existing datasets'
|
45
|
-
){ |v| o[:get_md] = v }
|
46
|
-
opt.on('--only-metadata',
|
47
|
-
'Create datasets without input data but retrieve all metadata.'
|
48
|
-
){ |v| o[:only_md] = v }
|
49
|
-
opt.on('--save-every INT',
|
50
|
-
'Save project every this many downloaded datasets.',
|
51
|
-
'If zero, it saves the project only once upon completion.',
|
52
|
-
'By default: 1.'){ |v| o[:save_every] = v.to_i }
|
53
|
-
opt.on('-q', '--query',
|
54
|
-
'Register the datasets as queries, not reference datasets.'
|
55
|
-
){ |v| o[:query]=v }
|
56
|
-
opt.on('-u', '--unlink',
|
57
|
-
'Unlink all datasets in the project missing from the download list.'
|
58
|
-
){ |v| o[:unlink]=v }
|
59
|
-
opt.on('-R', '--remote-list PATH',
|
60
|
-
'Path to an output file with the list of all datasets listed remotely.'
|
61
|
-
){ |v| o[:remote_list]=v }
|
62
|
-
opt.on('--api-key STRING', 'NCBI API key.'){ |v| ENV['NCBI_API_KEY'] = v }
|
63
|
-
opt_common(opt, o)
|
64
|
-
end.parse!
|
65
|
-
|
66
|
-
opt_require(o, project: '-P')
|
67
|
-
opt_require(o, taxon: '-T') unless o[:reference]
|
68
|
-
unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
|
69
|
-
raise 'No action requested. Pick at least one type of genome.'
|
70
|
-
end
|
71
|
-
o[:save_every] = 1 if o[:dry]
|
72
|
-
|
73
|
-
##=> Main <=
|
74
|
-
$stderr.puts "Loading project." unless o[:q]
|
75
|
-
p = MiGA::Project.load(o[:project])
|
76
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
77
|
-
d = []
|
78
|
-
ds = {}
|
79
|
-
downloaded = 0
|
80
|
-
|
81
|
-
url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
|
82
|
-
url_param = {
|
83
|
-
q: '[display()].' +
|
84
|
-
'from(GenomeAssemblies).' +
|
85
|
-
'usingschema(/schema/GenomeAssemblies).' +
|
86
|
-
'matching(tab==["Prokaryotes"] and q=="' + o[:taxon].tr('"',"'") + '"',
|
87
|
-
fields: 'organism|organism,assembly|assembly,replicons|replicons,' +
|
88
|
-
'level|level,ftp_path_genbank|ftp_path_genbank,release_date|release_date,' +
|
89
|
-
'strain|strain',
|
90
|
-
nolimit: 'on',
|
91
|
-
}
|
92
|
-
if o[:reference]
|
93
|
-
url_param[:q] += ' and refseq_category==["representative"]'
|
94
|
-
else
|
95
|
-
status = {
|
96
|
-
complete: 'Complete',
|
97
|
-
chromosome: ' Chromosome', # <- The leading space is *VERY* important!
|
98
|
-
scaffold: 'Scaffold',
|
99
|
-
contig: 'Contig'
|
100
|
-
}.map { |k, v| '"' + v + '"' if o[k] }.compact.join(',')
|
101
|
-
url_param[:q] += ' and level==[' + status + ']'
|
102
|
-
end
|
103
|
-
url_param[:q] += ')'
|
104
|
-
url = url_base + URI.encode_www_form(url_param)
|
105
|
-
$stderr.puts 'Downloading genome list' unless o[:q]
|
106
|
-
lineno = 0
|
107
|
-
doc = MiGA::RemoteDataset.download_url(url)
|
108
|
-
CSV.parse(doc, headers: true).each do |r|
|
109
|
-
asm = r['assembly']
|
110
|
-
next if asm.nil? or asm.empty? or asm == '-'
|
111
|
-
next unless r['ftp_path_genbank']
|
112
|
-
|
113
|
-
# Get replicons
|
114
|
-
rep = r['replicons'].nil? ? nil : r['replicons'].
|
115
|
-
split('; ').map{ |i| i.gsub(/.*:/,'') }.map{ |i| i.gsub(/\/.*/, '') }
|
116
|
-
|
117
|
-
# Set name
|
118
|
-
if o[:legacy_name] and o[:reference]
|
119
|
-
n = r['#organism'].miga_name
|
120
|
-
else
|
121
|
-
if o[:legacy_name] and ['Complete',' Chromosome'].include? r['level']
|
122
|
-
acc = rep.nil? ? '' : rep.first
|
123
|
-
else
|
124
|
-
acc = asm
|
125
|
-
end
|
126
|
-
acc.gsub!(/\.\d+\Z/, '') unless o[:add_version]
|
127
|
-
n = "#{r['#organism']}_#{acc}".miga_name
|
128
|
-
end
|
129
|
-
|
130
|
-
# Register for download
|
131
|
-
fna_url = r['ftp_path_genbank'] + '/' +
|
132
|
-
File.basename(r['ftp_path_genbank']) + '_genomic.fna.gz'
|
133
|
-
ds[n] = {
|
134
|
-
ids: [fna_url], db: :assembly_gz, universe: :web,
|
135
|
-
md: {
|
136
|
-
type: :genome, ncbi_asm: asm, strain: r['strain']
|
137
|
-
}
|
138
|
-
}
|
139
|
-
ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
|
140
|
-
ds[n][:md][:release_date] =
|
141
|
-
Time.parse(r['release_date']).to_s unless r['release_date'].nil?
|
142
|
-
end
|
143
|
-
|
144
|
-
# Discard blacklisted
|
145
|
-
unless o[:blacklist].nil?
|
146
|
-
$stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
|
147
|
-
File.readlines(o[:blacklist]).
|
148
|
-
select{ |i| i !~ /^#/ }.map(&:chomp).each{ |i| ds.delete i }
|
149
|
-
end
|
150
|
-
|
151
|
-
# Download entries
|
152
|
-
$stderr.puts "Downloading #{ds.size} " +
|
153
|
-
(ds.size == 1 ? 'entry' : 'entries') unless o[:q]
|
154
|
-
p.do_not_save = true if o[:save_every] != 1
|
155
|
-
ds.each do |name, body|
|
156
|
-
d << name
|
157
|
-
puts name
|
158
|
-
next if p.dataset(name).nil? == o[:get_md]
|
159
|
-
downloaded += 1
|
160
|
-
next if o[:dry]
|
161
|
-
$stderr.puts ' Locating remote dataset.' unless o[:q]
|
162
|
-
body[:md][:metadata_only] = true if o[:only_md]
|
163
|
-
rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
|
164
|
-
if o[:get_md]
|
165
|
-
$stderr.puts ' Updating dataset.' unless o[:q]
|
166
|
-
rd.update_metadata(p.dataset(name), body[:md])
|
167
|
-
else
|
168
|
-
$stderr.puts ' Creating dataset.' unless o[:q]
|
169
|
-
rd.save_to(p, name, !o[:query], body[:md])
|
170
|
-
p.add_dataset(name)
|
171
|
-
end
|
172
|
-
p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
|
173
|
-
end
|
174
|
-
|
175
|
-
p.do_not_save = false
|
176
|
-
p.save! if o[:save_every] != 1
|
177
|
-
|
178
|
-
# Finalize
|
179
|
-
$stderr.puts "Datasets listed: #{d.size}" unless o[:q]
|
180
|
-
$stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
|
181
|
-
downloaded.to_s unless o[:q]
|
182
|
-
unless o[:remote_list].nil?
|
183
|
-
File.open(o[:remote_list], 'w') do |fh|
|
184
|
-
d.each { |i| fh.puts i }
|
185
|
-
end
|
186
|
-
end
|
187
|
-
if o[:unlink]
|
188
|
-
unlink = p.dataset_names - d
|
189
|
-
unlink.each { |i| p.unlink_dataset(i).remove! }
|
190
|
-
$stderr.puts "Datasets unlinked: #{unlink.size}" unless o[:q]
|
191
|
-
end
|
192
|
-
|
data/actions/new.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, update: false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :project_type_req])
|
10
|
-
opt.on('-n', '--name STRING', 'Name of the project.'){ |v| o[:name] = v }
|
11
|
-
opt.on('-d', '--description STRING',
|
12
|
-
'Description of the project.'){ |v| o[:description] = v }
|
13
|
-
opt.on('-c', '--comments STRING',
|
14
|
-
'Comments on the project.'){ |v| o[:comments] = v }
|
15
|
-
opt.on('-m', '--metadata STRING',
|
16
|
-
'Metadata as key-value pairs separated by = and delimited by comma.',
|
17
|
-
'Values are saved as strings except for booleans (true / false) or nil.'
|
18
|
-
){ |v| o[:metadata] = v }
|
19
|
-
opt.on('--update', 'Updates the project if it already exists.',
|
20
|
-
'Same as "miga edit".'){ o[:update] = true }
|
21
|
-
opt_common(opt, o)
|
22
|
-
end.parse!
|
23
|
-
|
24
|
-
##=> Main <=
|
25
|
-
opt_require(o, project: '-P')
|
26
|
-
opt_require_type(o, MiGA::Project) unless o[:update]
|
27
|
-
|
28
|
-
unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
|
29
|
-
File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
|
30
|
-
raise "You must initialize MiGA before creating the first project.\n" +
|
31
|
-
'Please use "miga init".'
|
32
|
-
end
|
33
|
-
|
34
|
-
$stderr.puts 'Creating project.' unless o[:q]
|
35
|
-
raise 'Project already exists, aborting.' unless
|
36
|
-
o[:update] or not MiGA::Project.exist? o[:project]
|
37
|
-
p = MiGA::Project.new(o[:project], o[:update])
|
38
|
-
# The following check is redundant with MiGA::Project#create,
|
39
|
-
# but allows upgrading projects from (very) early code versions
|
40
|
-
o[:name] = File.basename(p.path) if o[:update] and o[:name].nil?
|
41
|
-
p = add_metadata(o, p)
|
42
|
-
p.save
|
43
|
-
|
44
|
-
$stderr.puts 'Done.' unless o[:q]
|
data/actions/next_step.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt_common(opt, o)
|
11
|
-
end.parse!
|
12
|
-
|
13
|
-
##=> Main <=
|
14
|
-
opts.parse!
|
15
|
-
opt_require(o, project: '-P')
|
16
|
-
|
17
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
18
|
-
p = MiGA::Project.load(o[:project])
|
19
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
20
|
-
|
21
|
-
n = nil
|
22
|
-
if not o[:dataset].nil?
|
23
|
-
$stderr.puts 'Loading dataset.' unless o[:q]
|
24
|
-
d = p.dataset o[:dataset]
|
25
|
-
raise "Impossible to load dataset: #{o[:dataset]}" if d.nil?
|
26
|
-
n = d.next_preprocessing if d.is_active?
|
27
|
-
else
|
28
|
-
n = p.next_distances(false)
|
29
|
-
n ||= p.next_inclade(false)
|
30
|
-
end
|
31
|
-
n ||= '?'
|
32
|
-
puts n
|
33
|
-
|
data/actions/plugins.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q:true, update:false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project])
|
10
|
-
opt.on("--install PATH",
|
11
|
-
"Installs the specified plugin in the project."){ |v| o[:install]=v }
|
12
|
-
opt.on("--uninstall PATH",
|
13
|
-
"Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
|
14
|
-
opt_common(opt, o)
|
15
|
-
end.parse!
|
16
|
-
|
17
|
-
##=> Main <=
|
18
|
-
opt_require(o, project:"-P")
|
19
|
-
|
20
|
-
p = MiGA::Project.new(o[:project], true)
|
21
|
-
p.install_plugin(o[:install]) unless o[:install].nil?
|
22
|
-
p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
|
23
|
-
p.plugins.each { |i| puts i }
|
24
|
-
|
25
|
-
$stderr.puts "Done." unless o[:q]
|
data/actions/rm.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, remove: false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o)
|
10
|
-
opt.on('-r', '--remove', 'Also remove all associated files.',
|
11
|
-
'By default, only unlinks from metadata.'){ o[:remove] = true }
|
12
|
-
opt_common(opt, o)
|
13
|
-
end.parse!
|
14
|
-
|
15
|
-
##=> Main <=
|
16
|
-
opt_require(o)
|
17
|
-
|
18
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
19
|
-
p = MiGA::Project.load(o[:project])
|
20
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
21
|
-
|
22
|
-
$stderr.puts 'Unlinking dataset.' unless o[:q]
|
23
|
-
raise 'Dataset doesn\'t exist, aborting.' unless
|
24
|
-
MiGA::Dataset.exist?(p, o[:dataset])
|
25
|
-
d = p.unlink_dataset(o[:dataset])
|
26
|
-
d.remove! if o[:remove]
|
27
|
-
|
28
|
-
$stderr.puts 'Done.' unless o[:q]
|
29
|
-
|
data/actions/run.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'shellwords'
|
7
|
-
|
8
|
-
o = {q: true, try_load: false, thr: 1}
|
9
|
-
OptionParser.new do |opt|
|
10
|
-
opt_banner(opt)
|
11
|
-
opt_object(opt, o, [:project, :dataset_opt, :result])
|
12
|
-
opt.on('-t', '--threads INT',
|
13
|
-
"Threads to use in the local run (by default: #{o[:thr]})."
|
14
|
-
){ |v| o[:thr] = v.to_i }
|
15
|
-
opt_common(opt, o)
|
16
|
-
end.parse!
|
17
|
-
|
18
|
-
##=> Main <=
|
19
|
-
opt_require(o, project: '-P', name: '-r')
|
20
|
-
|
21
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
22
|
-
p = MiGA::Project.load(o[:project])
|
23
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
24
|
-
|
25
|
-
virtual_task = false
|
26
|
-
miga = MiGA::MiGA.root_path
|
27
|
-
cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
|
28
|
-
"MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
|
29
|
-
if o[:dataset].nil?
|
30
|
-
type = MiGA::Project
|
31
|
-
virtual_task = true if o[:name] == :p
|
32
|
-
else
|
33
|
-
d = p.dataset(o[:dataset])
|
34
|
-
raise 'Cannot load dataset.' if d.nil?
|
35
|
-
cmd << "DATASET=#{d.name.shellescape}"
|
36
|
-
type = MiGA::Dataset
|
37
|
-
virtual_task = true if o[:name] == :d
|
38
|
-
end
|
39
|
-
raise "Unsupported #{type.to_s.gsub(/.*::/, '')} result: #{o[:name]}." if
|
40
|
-
type.RESULT_DIRS[o[:name].to_sym].nil? and not virtual_task
|
41
|
-
cmd << MiGA::MiGA.script_path(o[:name], miga: miga, project: p).shellescape
|
42
|
-
pid = spawn cmd.join(' ')
|
43
|
-
Process.wait pid
|
44
|
-
|
45
|
-
$stderr.puts 'Done.' unless o[:q]
|
data/actions/stats.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q:true, try_load:false}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt, :result])
|
10
|
-
opt.on("--key STRING",
|
11
|
-
"Returns only the value of the requested key."){ |v| o[:key] = v }
|
12
|
-
opt.on("--compute-and-save",
|
13
|
-
"Computes and saves the statistics."){ |v| o[:compute] = v }
|
14
|
-
opt.on("--try-load",
|
15
|
-
"Checks if stat exists instead of computing on --compute-and-save."
|
16
|
-
){ |v| o[:try_load] = v }
|
17
|
-
opt_common(opt, o)
|
18
|
-
end.parse!
|
19
|
-
|
20
|
-
##=> Main <=
|
21
|
-
opts.parse!
|
22
|
-
opt_require(o, project:"-P", name:"-r")
|
23
|
-
|
24
|
-
$stderr.puts "Loading project." unless o[:q]
|
25
|
-
p = MiGA::Project.load(o[:project])
|
26
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
27
|
-
|
28
|
-
$stderr.puts "Loading result." unless o[:q]
|
29
|
-
d = nil
|
30
|
-
if o[:dataset].nil?
|
31
|
-
r = p.add_result(o[:name], false)
|
32
|
-
else
|
33
|
-
d = p.dataset(o[:dataset])
|
34
|
-
r = d.add_result(o[:name], false)
|
35
|
-
end
|
36
|
-
raise "Cannot load result." if r.nil?
|
37
|
-
|
38
|
-
o[:compute] = false if o[:try_load] and
|
39
|
-
(not r[:stats].nil?) and (not r[:stats].empty?)
|
40
|
-
|
41
|
-
if o[:compute]
|
42
|
-
$stderr.puts "Computing statistics." unless o[:q]
|
43
|
-
stats = {}
|
44
|
-
case o[:name]
|
45
|
-
when :raw_reads
|
46
|
-
if r[:files][:pair1].nil?
|
47
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
48
|
-
stats = {
|
49
|
-
reads: s[:n],
|
50
|
-
length_average: [s[:avg], "bp"],
|
51
|
-
length_standard_deviation: [s[:sd], "bp"],
|
52
|
-
g_c_content: [s[:gc], "%"]}
|
53
|
-
else
|
54
|
-
s1 = MiGA::MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
55
|
-
s2 = MiGA::MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
56
|
-
stats = {
|
57
|
-
read_pairs: s1[:n],
|
58
|
-
forward_length_average: [s1[:avg], "bp"],
|
59
|
-
forward_length_standard_deviation: [s1[:sd], "bp"],
|
60
|
-
forward_g_c_content: [s1[:gc], "%"],
|
61
|
-
reverse_length_average: [s2[:avg], "bp"],
|
62
|
-
reverse_length_standard_deviation: [s2[:sd], "bp"],
|
63
|
-
reverse_g_c_content: [s2[:gc], "%"]}
|
64
|
-
end
|
65
|
-
when :trimmed_fasta
|
66
|
-
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
67
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
|
68
|
-
stats = {
|
69
|
-
reads: s[:n],
|
70
|
-
length_average: [s[:avg], "bp"],
|
71
|
-
length_standard_deviation: [s[:sd], "bp"],
|
72
|
-
g_c_content: [s[:gc], "%"]}
|
73
|
-
when :assembly
|
74
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
75
|
-
n50: true, gc: true)
|
76
|
-
stats = {
|
77
|
-
contigs: s[:n],
|
78
|
-
n50: [s[:n50], "bp"],
|
79
|
-
total_length: [s[:tot], "bp"],
|
80
|
-
g_c_content: [s[:gc], "%"]}
|
81
|
-
when :cds
|
82
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
83
|
-
stats = {
|
84
|
-
predicted_proteins: s[:n],
|
85
|
-
average_length: [s[:avg], "aa"]}
|
86
|
-
asm = d.add_result(:assembly, false)
|
87
|
-
unless asm.nil? or asm[:stats][:total_length].nil?
|
88
|
-
stats[:coding_density] =
|
89
|
-
[300.0 * s[:tot] / asm[:stats][:total_length][0], "%"]
|
90
|
-
end
|
91
|
-
when :essential_genes
|
92
|
-
if d.is_multi?
|
93
|
-
stats = {median_copies:0, mean_copies:0}
|
94
|
-
File.open(r.file_path(:report), "r") do |fh|
|
95
|
-
fh.each_line do |ln|
|
96
|
-
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
97
|
-
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
else
|
102
|
-
# Fix estimate for Archaea
|
103
|
-
if not d.metadata[:tax].nil? and
|
104
|
-
d.metadata[:tax].is_in? MiGA::Taxonomy.new("d:Archaea") and
|
105
|
-
r.file_path(:bac_report).nil?
|
106
|
-
scr = "#{MiGA::MiGA.root_path}/utils/arch-ess-genes.rb"
|
107
|
-
rep = r.file_path(:report)
|
108
|
-
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
109
|
-
r.add_file(:bac_report, "#{d.name}.ess/log")
|
110
|
-
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
111
|
-
end
|
112
|
-
# Extract/compute quality values
|
113
|
-
stats = {completeness: [0.0,"%"], contamination: [0.0,"%"]}
|
114
|
-
File.open(r.file_path(:report), "r") do |fh|
|
115
|
-
fh.each_line do |ln|
|
116
|
-
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
117
|
-
stats[$1.downcase.to_sym][0] = $2.to_f
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
122
|
-
d.metadata[:quality] = case stats[:quality]
|
123
|
-
when 80..100 ; :excellent
|
124
|
-
when 50..80 ; :high
|
125
|
-
when 20..50 ; :intermediate
|
126
|
-
else ; :low
|
127
|
-
end
|
128
|
-
d.save
|
129
|
-
end
|
130
|
-
else
|
131
|
-
stats = nil
|
132
|
-
end
|
133
|
-
unless stats.nil?
|
134
|
-
r[:stats] = stats
|
135
|
-
r.save
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
if o[:key].nil?
|
140
|
-
r[:stats].each do |k,v|
|
141
|
-
puts "#{k==:g_c_content ? "G+C content" : k.to_s.unmiga_name.capitalize}: #{
|
142
|
-
v.is_a?(Array) ? v.join(" ") : v}."
|
143
|
-
end
|
144
|
-
else
|
145
|
-
v = r[:stats][o[:key].downcase.miga_name.to_sym]
|
146
|
-
puts v.is_a?(Array) ? v.first : v
|
147
|
-
end
|
148
|
-
|
149
|
-
$stderr.puts "Done." unless o[:q]
|
data/actions/summary.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, units: false, tabular: false}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt_filter_datasets(opt, o)
|
11
|
-
opt_object(opt, o, [:result_dataset])
|
12
|
-
opt.on("--tab",
|
13
|
-
"Returns a tab-delimited table."){ |v| o[:tabular] = v }
|
14
|
-
opt.on("--key STRING",
|
15
|
-
"Returns only the value of the requested key."){ |v| o[:key_md] = v }
|
16
|
-
opt.on("--with-units",
|
17
|
-
"Includes units in each cell."){ |v| o[:units] = v }
|
18
|
-
opt_common(opt, o)
|
19
|
-
end.parse!
|
20
|
-
|
21
|
-
##=> Main <=
|
22
|
-
opts.parse!
|
23
|
-
opt_require(o, project:"-P", name:"-r")
|
24
|
-
|
25
|
-
$stderr.puts "Loading project." unless o[:q]
|
26
|
-
p = MiGA::Project.load(o[:project])
|
27
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
28
|
-
|
29
|
-
$stderr.puts "Listing datasets." unless o[:q]
|
30
|
-
if o[:dataset].nil?
|
31
|
-
ds = p.datasets
|
32
|
-
elsif MiGA::Dataset.exist? p, o[:dataset]
|
33
|
-
ds = [p.dataset(o[:dataset])]
|
34
|
-
else
|
35
|
-
ds = []
|
36
|
-
end
|
37
|
-
ds = filter_datasets!(ds, o)
|
38
|
-
|
39
|
-
$stderr.puts "Loading results." unless o[:q]
|
40
|
-
stats = ds.map do |d|
|
41
|
-
r = d.add_result(o[:name].to_sym, false)
|
42
|
-
s = r.nil? ? {} : r[:stats]
|
43
|
-
s.tap{ |i| i[:dataset] = d.name }
|
44
|
-
end
|
45
|
-
keys = o[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
|
46
|
-
[:dataset, o[:key_md].downcase.miga_name.to_sym]
|
47
|
-
keys.delete :dataset
|
48
|
-
keys.unshift :dataset
|
49
|
-
|
50
|
-
table = o[:units] ?
|
51
|
-
stats.map{ |s| keys.map{ |k|
|
52
|
-
s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
|
53
|
-
stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
|
54
|
-
puts MiGA::MiGA.tabulate(keys, table, o[:tabular])
|
55
|
-
|
56
|
-
$stderr.puts "Done." unless o[:q]
|
57
|
-
|
data/actions/tax_dist.rb
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'miga/tax_index'
|
7
|
-
require 'zlib'
|
8
|
-
require 'tmpdir'
|
9
|
-
|
10
|
-
o = {q: true, format: :json}
|
11
|
-
OptionParser.new do |opt|
|
12
|
-
opt_banner(opt)
|
13
|
-
opt_object(opt, o, [:project])
|
14
|
-
opt_filter_datasets(opt, o)
|
15
|
-
opt.on('-i', '--index FILE',
|
16
|
-
'Pre-calculated tax-index (in tabular format) to be used.',
|
17
|
-
'If passed, dataset filtering arguments are ignored.'
|
18
|
-
){ |v| o[:index] = v }
|
19
|
-
opt_common(opt, o)
|
20
|
-
end.parse!
|
21
|
-
|
22
|
-
##=> Functions <=
|
23
|
-
# Returns the _cannonical_ ID between strings +a+ and +b+.
|
24
|
-
def cannid(a, b) ; (a > b ? [b, a] : [a, b]).join('-') ; end
|
25
|
-
|
26
|
-
##=> Main <=
|
27
|
-
opt_require(o, project: '-P')
|
28
|
-
|
29
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
30
|
-
p = MiGA::Project.load(o[:project])
|
31
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
32
|
-
|
33
|
-
metric = p.is_clade? ? 'ani' : 'aai'
|
34
|
-
res_n = "#{metric}_distances"
|
35
|
-
$stderr.puts "Reading distances (1-#{metric.upcase})." unless o[:q]
|
36
|
-
res = p.result res_n
|
37
|
-
raise "#{res_n} not yet calculated." if res.nil?
|
38
|
-
matrix = res.file_path(:matrix)
|
39
|
-
raise "#{res_n} has no matrix." if matrix.nil?
|
40
|
-
dist = {}
|
41
|
-
mfh = matrix =~ /\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
|
42
|
-
mfh.each_line do |ln|
|
43
|
-
next if mfh.lineno==1
|
44
|
-
row = ln.chomp.split("\t")
|
45
|
-
dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
|
46
|
-
$stderr.print(" Ln:#{mfh.lineno} \r") if !o[:q] and (mfh.lineno % 1_000) == 0
|
47
|
-
end
|
48
|
-
$stderr.puts " Lines: #{mfh.lineno}" unless o[:q]
|
49
|
-
mfh.close
|
50
|
-
|
51
|
-
Dir.mktmpdir do |dir|
|
52
|
-
if o[:index].nil?
|
53
|
-
$stderr.puts 'Loading datasets.' unless o[:q]
|
54
|
-
ds = p.datasets
|
55
|
-
ds.select!{ |d| not d.metadata[:tax].nil? }
|
56
|
-
ds = filter_datasets!(ds, o)
|
57
|
-
|
58
|
-
$stderr.puts 'Indexing taxonomy.' unless o[:q]
|
59
|
-
tax_index = MiGA::TaxIndex.new
|
60
|
-
ds.each { |d| tax_index << d }
|
61
|
-
tab = File.expand_path('index.tab', dir)
|
62
|
-
File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
|
63
|
-
else
|
64
|
-
tab = o[:index]
|
65
|
-
end
|
66
|
-
|
67
|
-
$stderr.puts 'Traversing taxonomy.' unless o[:q]
|
68
|
-
rank_i = 0
|
69
|
-
MiGA::Taxonomy.KNOWN_RANKS.each do |rank|
|
70
|
-
$stderr.print "o #{rank}: " unless o[:q]
|
71
|
-
rank_n = 0
|
72
|
-
rank_i += 1
|
73
|
-
in_rank = nil
|
74
|
-
ds_name = []
|
75
|
-
File.open(tab, 'r') do |fh|
|
76
|
-
fh.each_line do |ln|
|
77
|
-
if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
|
78
|
-
in_rank = nil
|
79
|
-
ds_name = []
|
80
|
-
elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
|
81
|
-
in_rank = $2 == '?' ? nil : $1
|
82
|
-
ds_name = []
|
83
|
-
elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
|
84
|
-
ds_i = $1
|
85
|
-
ds_name << ds_i
|
86
|
-
ds_name.each do |ds_j|
|
87
|
-
k = cannid(ds_i, ds_j)
|
88
|
-
next if dist[k].nil?
|
89
|
-
rank_n += 1
|
90
|
-
dist[k][3] = rank_i
|
91
|
-
dist[k][4].unshift in_rank
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
$stderr.puts "#{rank_n} pairs of datasets." unless o[:q]
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
$stderr.puts 'Generating report.' unless o[:q]
|
101
|
-
dist.keys.each do |k|
|
102
|
-
dist[k][5] = dist[k][4].reverse.join(' ')
|
103
|
-
dist[k][4] = dist[k][4].first
|
104
|
-
puts (k.split('-') + dist[k]).join("\t")
|
105
|
-
end
|
106
|
-
|