miga-base 0.4.1.0 → 0.4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/miga +2 -244
- data/lib/miga/cli/action/about.rb +44 -0
- data/lib/miga/cli/action/add.rb +139 -0
- data/lib/miga/cli/action/add_result.rb +26 -0
- data/lib/miga/cli/action/console.rb +19 -0
- data/lib/miga/cli/action/daemon.rb +74 -0
- data/lib/miga/cli/action/date.rb +18 -0
- data/lib/miga/cli/action/doctor.rb +210 -0
- data/lib/miga/cli/action/edit.rb +24 -0
- data/lib/miga/cli/action/files.rb +31 -0
- data/lib/miga/cli/action/find.rb +48 -0
- data/lib/miga/cli/action/generic.rb +44 -0
- data/lib/miga/cli/action/get.rb +132 -0
- data/lib/miga/cli/action/init.rb +343 -0
- data/lib/miga/cli/action/ln.rb +42 -0
- data/lib/miga/cli/action/ls.rb +55 -0
- data/lib/miga/cli/action/ncbi_get.rb +218 -0
- data/lib/miga/cli/action/new.rb +45 -0
- data/lib/miga/cli/action/next_step.rb +27 -0
- data/lib/miga/cli/action/plugins.rb +28 -0
- data/lib/miga/cli/action/rm.rb +25 -0
- data/lib/miga/cli/action/run.rb +39 -0
- data/lib/miga/cli/action/stats.rb +140 -0
- data/lib/miga/cli/action/summary.rb +49 -0
- data/lib/miga/cli/action/tax_dist.rb +102 -0
- data/lib/miga/cli/action/tax_index.rb +47 -0
- data/lib/miga/cli/action/tax_set.rb +59 -0
- data/lib/miga/cli/action/tax_test.rb +77 -0
- data/lib/miga/cli/action.rb +66 -0
- data/lib/miga/cli/base.rb +90 -0
- data/lib/miga/cli.rb +426 -0
- data/lib/miga/project/result.rb +14 -6
- data/lib/miga/remote_dataset.rb +1 -1
- data/lib/miga/tax_index.rb +5 -4
- data/lib/miga/taxonomy/base.rb +63 -0
- data/lib/miga/taxonomy.rb +87 -92
- data/lib/miga/version.rb +6 -6
- data/test/taxonomy_test.rb +49 -9
- data/utils/distance/commands.rb +11 -11
- data/utils/distance/pipeline.rb +5 -5
- metadata +43 -49
- data/actions/about.rb +0 -43
- data/actions/add.rb +0 -129
- data/actions/add_result.rb +0 -30
- data/actions/daemon.rb +0 -55
- data/actions/date.rb +0 -14
- data/actions/doctor.rb +0 -201
- data/actions/edit.rb +0 -33
- data/actions/files.rb +0 -43
- data/actions/find.rb +0 -41
- data/actions/get.rb +0 -105
- data/actions/init.rb +0 -301
- data/actions/ln.rb +0 -47
- data/actions/ls.rb +0 -61
- data/actions/ncbi_get.rb +0 -192
- data/actions/new.rb +0 -44
- data/actions/next_step.rb +0 -33
- data/actions/plugins.rb +0 -25
- data/actions/rm.rb +0 -29
- data/actions/run.rb +0 -45
- data/actions/stats.rb +0 -149
- data/actions/summary.rb +0 -57
- data/actions/tax_dist.rb +0 -106
- data/actions/tax_index.rb +0 -46
- data/actions/tax_set.rb +0 -63
- data/actions/tax_test.rb +0 -80
data/actions/ncbi_get.rb
DELETED
@@ -1,192 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'miga/remote_dataset'
|
7
|
-
require 'csv'
|
8
|
-
|
9
|
-
o = {q: true, query: false, unlink: false,
|
10
|
-
reference: false, legacy_name: false,
|
11
|
-
complete: false, chromosome: false,
|
12
|
-
scaffold: false, contig: false, add_version: true, dry: false,
|
13
|
-
get_md: false, only_md: false, save_every: 1}
|
14
|
-
OptionParser.new do |opt|
|
15
|
-
opt_banner(opt)
|
16
|
-
opt_object(opt, o, [:project])
|
17
|
-
opt.on('-T', '--taxon STRING',
|
18
|
-
'(Mandatory unless --reference) Taxon name (e.g., a species binomial).'
|
19
|
-
){ |v| o[:taxon]=v }
|
20
|
-
opt.on('--reference',
|
21
|
-
'Download all reference genomes (ignores any other status).'
|
22
|
-
){ |v| o[:reference]=v }
|
23
|
-
opt.on('--complete', 'Download complete genomes.'){ |v| o[:complete]=v }
|
24
|
-
opt.on('--chromosome',
|
25
|
-
'Download complete chromosomes.'){ |v| o[:chromosome]=v }
|
26
|
-
opt.on('--scaffold', 'Download genomes in scaffolds.'){ |v| o[:scaffold]=v }
|
27
|
-
opt.on('--contig', 'Download genomes in contigs.'){ |v| o[:contig]=v }
|
28
|
-
opt.on('--all', 'Download all genomes (in any status).') do
|
29
|
-
o[:complete] = true
|
30
|
-
o[:chromosome] = true
|
31
|
-
o[:scaffold] = true
|
32
|
-
o[:contig] = true
|
33
|
-
end
|
34
|
-
opt.on('--no-version-name',
|
35
|
-
'Do not add sequence version to the dataset name.',
|
36
|
-
'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
|
37
|
-
opt.on('--legacy-name',
|
38
|
-
'Use dataset names based on chromosome entries instead of assembly.'
|
39
|
-
){ |v| o[:legacy_name] = v }
|
40
|
-
opt.on('--blacklist PATH',
|
41
|
-
'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
|
42
|
-
opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
|
43
|
-
opt.on('--get-metadata',
|
44
|
-
'Only download and update metadata for existing datasets'
|
45
|
-
){ |v| o[:get_md] = v }
|
46
|
-
opt.on('--only-metadata',
|
47
|
-
'Create datasets without input data but retrieve all metadata.'
|
48
|
-
){ |v| o[:only_md] = v }
|
49
|
-
opt.on('--save-every INT',
|
50
|
-
'Save project every this many downloaded datasets.',
|
51
|
-
'If zero, it saves the project only once upon completion.',
|
52
|
-
'By default: 1.'){ |v| o[:save_every] = v.to_i }
|
53
|
-
opt.on('-q', '--query',
|
54
|
-
'Register the datasets as queries, not reference datasets.'
|
55
|
-
){ |v| o[:query]=v }
|
56
|
-
opt.on('-u', '--unlink',
|
57
|
-
'Unlink all datasets in the project missing from the download list.'
|
58
|
-
){ |v| o[:unlink]=v }
|
59
|
-
opt.on('-R', '--remote-list PATH',
|
60
|
-
'Path to an output file with the list of all datasets listed remotely.'
|
61
|
-
){ |v| o[:remote_list]=v }
|
62
|
-
opt.on('--api-key STRING', 'NCBI API key.'){ |v| ENV['NCBI_API_KEY'] = v }
|
63
|
-
opt_common(opt, o)
|
64
|
-
end.parse!
|
65
|
-
|
66
|
-
opt_require(o, project: '-P')
|
67
|
-
opt_require(o, taxon: '-T') unless o[:reference]
|
68
|
-
unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
|
69
|
-
raise 'No action requested. Pick at least one type of genome.'
|
70
|
-
end
|
71
|
-
o[:save_every] = 1 if o[:dry]
|
72
|
-
|
73
|
-
##=> Main <=
|
74
|
-
$stderr.puts "Loading project." unless o[:q]
|
75
|
-
p = MiGA::Project.load(o[:project])
|
76
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
77
|
-
d = []
|
78
|
-
ds = {}
|
79
|
-
downloaded = 0
|
80
|
-
|
81
|
-
url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
|
82
|
-
url_param = {
|
83
|
-
q: '[display()].' +
|
84
|
-
'from(GenomeAssemblies).' +
|
85
|
-
'usingschema(/schema/GenomeAssemblies).' +
|
86
|
-
'matching(tab==["Prokaryotes"] and q=="' + o[:taxon].tr('"',"'") + '"',
|
87
|
-
fields: 'organism|organism,assembly|assembly,replicons|replicons,' +
|
88
|
-
'level|level,ftp_path_genbank|ftp_path_genbank,release_date|release_date,' +
|
89
|
-
'strain|strain',
|
90
|
-
nolimit: 'on',
|
91
|
-
}
|
92
|
-
if o[:reference]
|
93
|
-
url_param[:q] += ' and refseq_category==["representative"]'
|
94
|
-
else
|
95
|
-
status = {
|
96
|
-
complete: 'Complete',
|
97
|
-
chromosome: ' Chromosome', # <- The leading space is *VERY* important!
|
98
|
-
scaffold: 'Scaffold',
|
99
|
-
contig: 'Contig'
|
100
|
-
}.map { |k, v| '"' + v + '"' if o[k] }.compact.join(',')
|
101
|
-
url_param[:q] += ' and level==[' + status + ']'
|
102
|
-
end
|
103
|
-
url_param[:q] += ')'
|
104
|
-
url = url_base + URI.encode_www_form(url_param)
|
105
|
-
$stderr.puts 'Downloading genome list' unless o[:q]
|
106
|
-
lineno = 0
|
107
|
-
doc = MiGA::RemoteDataset.download_url(url)
|
108
|
-
CSV.parse(doc, headers: true).each do |r|
|
109
|
-
asm = r['assembly']
|
110
|
-
next if asm.nil? or asm.empty? or asm == '-'
|
111
|
-
next unless r['ftp_path_genbank']
|
112
|
-
|
113
|
-
# Get replicons
|
114
|
-
rep = r['replicons'].nil? ? nil : r['replicons'].
|
115
|
-
split('; ').map{ |i| i.gsub(/.*:/,'') }.map{ |i| i.gsub(/\/.*/, '') }
|
116
|
-
|
117
|
-
# Set name
|
118
|
-
if o[:legacy_name] and o[:reference]
|
119
|
-
n = r['#organism'].miga_name
|
120
|
-
else
|
121
|
-
if o[:legacy_name] and ['Complete',' Chromosome'].include? r['level']
|
122
|
-
acc = rep.nil? ? '' : rep.first
|
123
|
-
else
|
124
|
-
acc = asm
|
125
|
-
end
|
126
|
-
acc.gsub!(/\.\d+\Z/, '') unless o[:add_version]
|
127
|
-
n = "#{r['#organism']}_#{acc}".miga_name
|
128
|
-
end
|
129
|
-
|
130
|
-
# Register for download
|
131
|
-
fna_url = r['ftp_path_genbank'] + '/' +
|
132
|
-
File.basename(r['ftp_path_genbank']) + '_genomic.fna.gz'
|
133
|
-
ds[n] = {
|
134
|
-
ids: [fna_url], db: :assembly_gz, universe: :web,
|
135
|
-
md: {
|
136
|
-
type: :genome, ncbi_asm: asm, strain: r['strain']
|
137
|
-
}
|
138
|
-
}
|
139
|
-
ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
|
140
|
-
ds[n][:md][:release_date] =
|
141
|
-
Time.parse(r['release_date']).to_s unless r['release_date'].nil?
|
142
|
-
end
|
143
|
-
|
144
|
-
# Discard blacklisted
|
145
|
-
unless o[:blacklist].nil?
|
146
|
-
$stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
|
147
|
-
File.readlines(o[:blacklist]).
|
148
|
-
select{ |i| i !~ /^#/ }.map(&:chomp).each{ |i| ds.delete i }
|
149
|
-
end
|
150
|
-
|
151
|
-
# Download entries
|
152
|
-
$stderr.puts "Downloading #{ds.size} " +
|
153
|
-
(ds.size == 1 ? 'entry' : 'entries') unless o[:q]
|
154
|
-
p.do_not_save = true if o[:save_every] != 1
|
155
|
-
ds.each do |name, body|
|
156
|
-
d << name
|
157
|
-
puts name
|
158
|
-
next if p.dataset(name).nil? == o[:get_md]
|
159
|
-
downloaded += 1
|
160
|
-
next if o[:dry]
|
161
|
-
$stderr.puts ' Locating remote dataset.' unless o[:q]
|
162
|
-
body[:md][:metadata_only] = true if o[:only_md]
|
163
|
-
rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
|
164
|
-
if o[:get_md]
|
165
|
-
$stderr.puts ' Updating dataset.' unless o[:q]
|
166
|
-
rd.update_metadata(p.dataset(name), body[:md])
|
167
|
-
else
|
168
|
-
$stderr.puts ' Creating dataset.' unless o[:q]
|
169
|
-
rd.save_to(p, name, !o[:query], body[:md])
|
170
|
-
p.add_dataset(name)
|
171
|
-
end
|
172
|
-
p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
|
173
|
-
end
|
174
|
-
|
175
|
-
p.do_not_save = false
|
176
|
-
p.save! if o[:save_every] != 1
|
177
|
-
|
178
|
-
# Finalize
|
179
|
-
$stderr.puts "Datasets listed: #{d.size}" unless o[:q]
|
180
|
-
$stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
|
181
|
-
downloaded.to_s unless o[:q]
|
182
|
-
unless o[:remote_list].nil?
|
183
|
-
File.open(o[:remote_list], 'w') do |fh|
|
184
|
-
d.each { |i| fh.puts i }
|
185
|
-
end
|
186
|
-
end
|
187
|
-
if o[:unlink]
|
188
|
-
unlink = p.dataset_names - d
|
189
|
-
unlink.each { |i| p.unlink_dataset(i).remove! }
|
190
|
-
$stderr.puts "Datasets unlinked: #{unlink.size}" unless o[:q]
|
191
|
-
end
|
192
|
-
|
data/actions/new.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, update: false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :project_type_req])
|
10
|
-
opt.on('-n', '--name STRING', 'Name of the project.'){ |v| o[:name] = v }
|
11
|
-
opt.on('-d', '--description STRING',
|
12
|
-
'Description of the project.'){ |v| o[:description] = v }
|
13
|
-
opt.on('-c', '--comments STRING',
|
14
|
-
'Comments on the project.'){ |v| o[:comments] = v }
|
15
|
-
opt.on('-m', '--metadata STRING',
|
16
|
-
'Metadata as key-value pairs separated by = and delimited by comma.',
|
17
|
-
'Values are saved as strings except for booleans (true / false) or nil.'
|
18
|
-
){ |v| o[:metadata] = v }
|
19
|
-
opt.on('--update', 'Updates the project if it already exists.',
|
20
|
-
'Same as "miga edit".'){ o[:update] = true }
|
21
|
-
opt_common(opt, o)
|
22
|
-
end.parse!
|
23
|
-
|
24
|
-
##=> Main <=
|
25
|
-
opt_require(o, project: '-P')
|
26
|
-
opt_require_type(o, MiGA::Project) unless o[:update]
|
27
|
-
|
28
|
-
unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
|
29
|
-
File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
|
30
|
-
raise "You must initialize MiGA before creating the first project.\n" +
|
31
|
-
'Please use "miga init".'
|
32
|
-
end
|
33
|
-
|
34
|
-
$stderr.puts 'Creating project.' unless o[:q]
|
35
|
-
raise 'Project already exists, aborting.' unless
|
36
|
-
o[:update] or not MiGA::Project.exist? o[:project]
|
37
|
-
p = MiGA::Project.new(o[:project], o[:update])
|
38
|
-
# The following check is redundant with MiGA::Project#create,
|
39
|
-
# but allows upgrading projects from (very) early code versions
|
40
|
-
o[:name] = File.basename(p.path) if o[:update] and o[:name].nil?
|
41
|
-
p = add_metadata(o, p)
|
42
|
-
p.save
|
43
|
-
|
44
|
-
$stderr.puts 'Done.' unless o[:q]
|
data/actions/next_step.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt_common(opt, o)
|
11
|
-
end.parse!
|
12
|
-
|
13
|
-
##=> Main <=
|
14
|
-
opts.parse!
|
15
|
-
opt_require(o, project: '-P')
|
16
|
-
|
17
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
18
|
-
p = MiGA::Project.load(o[:project])
|
19
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
20
|
-
|
21
|
-
n = nil
|
22
|
-
if not o[:dataset].nil?
|
23
|
-
$stderr.puts 'Loading dataset.' unless o[:q]
|
24
|
-
d = p.dataset o[:dataset]
|
25
|
-
raise "Impossible to load dataset: #{o[:dataset]}" if d.nil?
|
26
|
-
n = d.next_preprocessing if d.is_active?
|
27
|
-
else
|
28
|
-
n = p.next_distances(false)
|
29
|
-
n ||= p.next_inclade(false)
|
30
|
-
end
|
31
|
-
n ||= '?'
|
32
|
-
puts n
|
33
|
-
|
data/actions/plugins.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q:true, update:false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project])
|
10
|
-
opt.on("--install PATH",
|
11
|
-
"Installs the specified plugin in the project."){ |v| o[:install]=v }
|
12
|
-
opt.on("--uninstall PATH",
|
13
|
-
"Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
|
14
|
-
opt_common(opt, o)
|
15
|
-
end.parse!
|
16
|
-
|
17
|
-
##=> Main <=
|
18
|
-
opt_require(o, project:"-P")
|
19
|
-
|
20
|
-
p = MiGA::Project.new(o[:project], true)
|
21
|
-
p.install_plugin(o[:install]) unless o[:install].nil?
|
22
|
-
p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
|
23
|
-
p.plugins.each { |i| puts i }
|
24
|
-
|
25
|
-
$stderr.puts "Done." unless o[:q]
|
data/actions/rm.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, remove: false}
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o)
|
10
|
-
opt.on('-r', '--remove', 'Also remove all associated files.',
|
11
|
-
'By default, only unlinks from metadata.'){ o[:remove] = true }
|
12
|
-
opt_common(opt, o)
|
13
|
-
end.parse!
|
14
|
-
|
15
|
-
##=> Main <=
|
16
|
-
opt_require(o)
|
17
|
-
|
18
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
19
|
-
p = MiGA::Project.load(o[:project])
|
20
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
21
|
-
|
22
|
-
$stderr.puts 'Unlinking dataset.' unless o[:q]
|
23
|
-
raise 'Dataset doesn\'t exist, aborting.' unless
|
24
|
-
MiGA::Dataset.exist?(p, o[:dataset])
|
25
|
-
d = p.unlink_dataset(o[:dataset])
|
26
|
-
d.remove! if o[:remove]
|
27
|
-
|
28
|
-
$stderr.puts 'Done.' unless o[:q]
|
29
|
-
|
data/actions/run.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'shellwords'
|
7
|
-
|
8
|
-
o = {q: true, try_load: false, thr: 1}
|
9
|
-
OptionParser.new do |opt|
|
10
|
-
opt_banner(opt)
|
11
|
-
opt_object(opt, o, [:project, :dataset_opt, :result])
|
12
|
-
opt.on('-t', '--threads INT',
|
13
|
-
"Threads to use in the local run (by default: #{o[:thr]})."
|
14
|
-
){ |v| o[:thr] = v.to_i }
|
15
|
-
opt_common(opt, o)
|
16
|
-
end.parse!
|
17
|
-
|
18
|
-
##=> Main <=
|
19
|
-
opt_require(o, project: '-P', name: '-r')
|
20
|
-
|
21
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
22
|
-
p = MiGA::Project.load(o[:project])
|
23
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
24
|
-
|
25
|
-
virtual_task = false
|
26
|
-
miga = MiGA::MiGA.root_path
|
27
|
-
cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
|
28
|
-
"MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
|
29
|
-
if o[:dataset].nil?
|
30
|
-
type = MiGA::Project
|
31
|
-
virtual_task = true if o[:name] == :p
|
32
|
-
else
|
33
|
-
d = p.dataset(o[:dataset])
|
34
|
-
raise 'Cannot load dataset.' if d.nil?
|
35
|
-
cmd << "DATASET=#{d.name.shellescape}"
|
36
|
-
type = MiGA::Dataset
|
37
|
-
virtual_task = true if o[:name] == :d
|
38
|
-
end
|
39
|
-
raise "Unsupported #{type.to_s.gsub(/.*::/, '')} result: #{o[:name]}." if
|
40
|
-
type.RESULT_DIRS[o[:name].to_sym].nil? and not virtual_task
|
41
|
-
cmd << MiGA::MiGA.script_path(o[:name], miga: miga, project: p).shellescape
|
42
|
-
pid = spawn cmd.join(' ')
|
43
|
-
Process.wait pid
|
44
|
-
|
45
|
-
$stderr.puts 'Done.' unless o[:q]
|
data/actions/stats.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q:true, try_load:false}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt, :result])
|
10
|
-
opt.on("--key STRING",
|
11
|
-
"Returns only the value of the requested key."){ |v| o[:key] = v }
|
12
|
-
opt.on("--compute-and-save",
|
13
|
-
"Computes and saves the statistics."){ |v| o[:compute] = v }
|
14
|
-
opt.on("--try-load",
|
15
|
-
"Checks if stat exists instead of computing on --compute-and-save."
|
16
|
-
){ |v| o[:try_load] = v }
|
17
|
-
opt_common(opt, o)
|
18
|
-
end.parse!
|
19
|
-
|
20
|
-
##=> Main <=
|
21
|
-
opts.parse!
|
22
|
-
opt_require(o, project:"-P", name:"-r")
|
23
|
-
|
24
|
-
$stderr.puts "Loading project." unless o[:q]
|
25
|
-
p = MiGA::Project.load(o[:project])
|
26
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
27
|
-
|
28
|
-
$stderr.puts "Loading result." unless o[:q]
|
29
|
-
d = nil
|
30
|
-
if o[:dataset].nil?
|
31
|
-
r = p.add_result(o[:name], false)
|
32
|
-
else
|
33
|
-
d = p.dataset(o[:dataset])
|
34
|
-
r = d.add_result(o[:name], false)
|
35
|
-
end
|
36
|
-
raise "Cannot load result." if r.nil?
|
37
|
-
|
38
|
-
o[:compute] = false if o[:try_load] and
|
39
|
-
(not r[:stats].nil?) and (not r[:stats].empty?)
|
40
|
-
|
41
|
-
if o[:compute]
|
42
|
-
$stderr.puts "Computing statistics." unless o[:q]
|
43
|
-
stats = {}
|
44
|
-
case o[:name]
|
45
|
-
when :raw_reads
|
46
|
-
if r[:files][:pair1].nil?
|
47
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
48
|
-
stats = {
|
49
|
-
reads: s[:n],
|
50
|
-
length_average: [s[:avg], "bp"],
|
51
|
-
length_standard_deviation: [s[:sd], "bp"],
|
52
|
-
g_c_content: [s[:gc], "%"]}
|
53
|
-
else
|
54
|
-
s1 = MiGA::MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
55
|
-
s2 = MiGA::MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
56
|
-
stats = {
|
57
|
-
read_pairs: s1[:n],
|
58
|
-
forward_length_average: [s1[:avg], "bp"],
|
59
|
-
forward_length_standard_deviation: [s1[:sd], "bp"],
|
60
|
-
forward_g_c_content: [s1[:gc], "%"],
|
61
|
-
reverse_length_average: [s2[:avg], "bp"],
|
62
|
-
reverse_length_standard_deviation: [s2[:sd], "bp"],
|
63
|
-
reverse_g_c_content: [s2[:gc], "%"]}
|
64
|
-
end
|
65
|
-
when :trimmed_fasta
|
66
|
-
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
67
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
|
68
|
-
stats = {
|
69
|
-
reads: s[:n],
|
70
|
-
length_average: [s[:avg], "bp"],
|
71
|
-
length_standard_deviation: [s[:sd], "bp"],
|
72
|
-
g_c_content: [s[:gc], "%"]}
|
73
|
-
when :assembly
|
74
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
75
|
-
n50: true, gc: true)
|
76
|
-
stats = {
|
77
|
-
contigs: s[:n],
|
78
|
-
n50: [s[:n50], "bp"],
|
79
|
-
total_length: [s[:tot], "bp"],
|
80
|
-
g_c_content: [s[:gc], "%"]}
|
81
|
-
when :cds
|
82
|
-
s = MiGA::MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
83
|
-
stats = {
|
84
|
-
predicted_proteins: s[:n],
|
85
|
-
average_length: [s[:avg], "aa"]}
|
86
|
-
asm = d.add_result(:assembly, false)
|
87
|
-
unless asm.nil? or asm[:stats][:total_length].nil?
|
88
|
-
stats[:coding_density] =
|
89
|
-
[300.0 * s[:tot] / asm[:stats][:total_length][0], "%"]
|
90
|
-
end
|
91
|
-
when :essential_genes
|
92
|
-
if d.is_multi?
|
93
|
-
stats = {median_copies:0, mean_copies:0}
|
94
|
-
File.open(r.file_path(:report), "r") do |fh|
|
95
|
-
fh.each_line do |ln|
|
96
|
-
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
97
|
-
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
else
|
102
|
-
# Fix estimate for Archaea
|
103
|
-
if not d.metadata[:tax].nil? and
|
104
|
-
d.metadata[:tax].is_in? MiGA::Taxonomy.new("d:Archaea") and
|
105
|
-
r.file_path(:bac_report).nil?
|
106
|
-
scr = "#{MiGA::MiGA.root_path}/utils/arch-ess-genes.rb"
|
107
|
-
rep = r.file_path(:report)
|
108
|
-
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
109
|
-
r.add_file(:bac_report, "#{d.name}.ess/log")
|
110
|
-
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
111
|
-
end
|
112
|
-
# Extract/compute quality values
|
113
|
-
stats = {completeness: [0.0,"%"], contamination: [0.0,"%"]}
|
114
|
-
File.open(r.file_path(:report), "r") do |fh|
|
115
|
-
fh.each_line do |ln|
|
116
|
-
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
117
|
-
stats[$1.downcase.to_sym][0] = $2.to_f
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
122
|
-
d.metadata[:quality] = case stats[:quality]
|
123
|
-
when 80..100 ; :excellent
|
124
|
-
when 50..80 ; :high
|
125
|
-
when 20..50 ; :intermediate
|
126
|
-
else ; :low
|
127
|
-
end
|
128
|
-
d.save
|
129
|
-
end
|
130
|
-
else
|
131
|
-
stats = nil
|
132
|
-
end
|
133
|
-
unless stats.nil?
|
134
|
-
r[:stats] = stats
|
135
|
-
r.save
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
if o[:key].nil?
|
140
|
-
r[:stats].each do |k,v|
|
141
|
-
puts "#{k==:g_c_content ? "G+C content" : k.to_s.unmiga_name.capitalize}: #{
|
142
|
-
v.is_a?(Array) ? v.join(" ") : v}."
|
143
|
-
end
|
144
|
-
else
|
145
|
-
v = r[:stats][o[:key].downcase.miga_name.to_sym]
|
146
|
-
puts v.is_a?(Array) ? v.first : v
|
147
|
-
end
|
148
|
-
|
149
|
-
$stderr.puts "Done." unless o[:q]
|
data/actions/summary.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
o = {q: true, units: false, tabular: false}
|
7
|
-
opts = OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt_filter_datasets(opt, o)
|
11
|
-
opt_object(opt, o, [:result_dataset])
|
12
|
-
opt.on("--tab",
|
13
|
-
"Returns a tab-delimited table."){ |v| o[:tabular] = v }
|
14
|
-
opt.on("--key STRING",
|
15
|
-
"Returns only the value of the requested key."){ |v| o[:key_md] = v }
|
16
|
-
opt.on("--with-units",
|
17
|
-
"Includes units in each cell."){ |v| o[:units] = v }
|
18
|
-
opt_common(opt, o)
|
19
|
-
end.parse!
|
20
|
-
|
21
|
-
##=> Main <=
|
22
|
-
opts.parse!
|
23
|
-
opt_require(o, project:"-P", name:"-r")
|
24
|
-
|
25
|
-
$stderr.puts "Loading project." unless o[:q]
|
26
|
-
p = MiGA::Project.load(o[:project])
|
27
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
28
|
-
|
29
|
-
$stderr.puts "Listing datasets." unless o[:q]
|
30
|
-
if o[:dataset].nil?
|
31
|
-
ds = p.datasets
|
32
|
-
elsif MiGA::Dataset.exist? p, o[:dataset]
|
33
|
-
ds = [p.dataset(o[:dataset])]
|
34
|
-
else
|
35
|
-
ds = []
|
36
|
-
end
|
37
|
-
ds = filter_datasets!(ds, o)
|
38
|
-
|
39
|
-
$stderr.puts "Loading results." unless o[:q]
|
40
|
-
stats = ds.map do |d|
|
41
|
-
r = d.add_result(o[:name].to_sym, false)
|
42
|
-
s = r.nil? ? {} : r[:stats]
|
43
|
-
s.tap{ |i| i[:dataset] = d.name }
|
44
|
-
end
|
45
|
-
keys = o[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
|
46
|
-
[:dataset, o[:key_md].downcase.miga_name.to_sym]
|
47
|
-
keys.delete :dataset
|
48
|
-
keys.unshift :dataset
|
49
|
-
|
50
|
-
table = o[:units] ?
|
51
|
-
stats.map{ |s| keys.map{ |k|
|
52
|
-
s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
|
53
|
-
stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
|
54
|
-
puts MiGA::MiGA.tabulate(keys, table, o[:tabular])
|
55
|
-
|
56
|
-
$stderr.puts "Done." unless o[:q]
|
57
|
-
|
data/actions/tax_dist.rb
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @package MiGA
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require 'miga/tax_index'
|
7
|
-
require 'zlib'
|
8
|
-
require 'tmpdir'
|
9
|
-
|
10
|
-
o = {q: true, format: :json}
|
11
|
-
OptionParser.new do |opt|
|
12
|
-
opt_banner(opt)
|
13
|
-
opt_object(opt, o, [:project])
|
14
|
-
opt_filter_datasets(opt, o)
|
15
|
-
opt.on('-i', '--index FILE',
|
16
|
-
'Pre-calculated tax-index (in tabular format) to be used.',
|
17
|
-
'If passed, dataset filtering arguments are ignored.'
|
18
|
-
){ |v| o[:index] = v }
|
19
|
-
opt_common(opt, o)
|
20
|
-
end.parse!
|
21
|
-
|
22
|
-
##=> Functions <=
|
23
|
-
# Returns the _cannonical_ ID between strings +a+ and +b+.
|
24
|
-
def cannid(a, b) ; (a > b ? [b, a] : [a, b]).join('-') ; end
|
25
|
-
|
26
|
-
##=> Main <=
|
27
|
-
opt_require(o, project: '-P')
|
28
|
-
|
29
|
-
$stderr.puts 'Loading project.' unless o[:q]
|
30
|
-
p = MiGA::Project.load(o[:project])
|
31
|
-
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
32
|
-
|
33
|
-
metric = p.is_clade? ? 'ani' : 'aai'
|
34
|
-
res_n = "#{metric}_distances"
|
35
|
-
$stderr.puts "Reading distances (1-#{metric.upcase})." unless o[:q]
|
36
|
-
res = p.result res_n
|
37
|
-
raise "#{res_n} not yet calculated." if res.nil?
|
38
|
-
matrix = res.file_path(:matrix)
|
39
|
-
raise "#{res_n} has no matrix." if matrix.nil?
|
40
|
-
dist = {}
|
41
|
-
mfh = matrix =~ /\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
|
42
|
-
mfh.each_line do |ln|
|
43
|
-
next if mfh.lineno==1
|
44
|
-
row = ln.chomp.split("\t")
|
45
|
-
dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
|
46
|
-
$stderr.print(" Ln:#{mfh.lineno} \r") if !o[:q] and (mfh.lineno % 1_000) == 0
|
47
|
-
end
|
48
|
-
$stderr.puts " Lines: #{mfh.lineno}" unless o[:q]
|
49
|
-
mfh.close
|
50
|
-
|
51
|
-
Dir.mktmpdir do |dir|
|
52
|
-
if o[:index].nil?
|
53
|
-
$stderr.puts 'Loading datasets.' unless o[:q]
|
54
|
-
ds = p.datasets
|
55
|
-
ds.select!{ |d| not d.metadata[:tax].nil? }
|
56
|
-
ds = filter_datasets!(ds, o)
|
57
|
-
|
58
|
-
$stderr.puts 'Indexing taxonomy.' unless o[:q]
|
59
|
-
tax_index = MiGA::TaxIndex.new
|
60
|
-
ds.each { |d| tax_index << d }
|
61
|
-
tab = File.expand_path('index.tab', dir)
|
62
|
-
File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
|
63
|
-
else
|
64
|
-
tab = o[:index]
|
65
|
-
end
|
66
|
-
|
67
|
-
$stderr.puts 'Traversing taxonomy.' unless o[:q]
|
68
|
-
rank_i = 0
|
69
|
-
MiGA::Taxonomy.KNOWN_RANKS.each do |rank|
|
70
|
-
$stderr.print "o #{rank}: " unless o[:q]
|
71
|
-
rank_n = 0
|
72
|
-
rank_i += 1
|
73
|
-
in_rank = nil
|
74
|
-
ds_name = []
|
75
|
-
File.open(tab, 'r') do |fh|
|
76
|
-
fh.each_line do |ln|
|
77
|
-
if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
|
78
|
-
in_rank = nil
|
79
|
-
ds_name = []
|
80
|
-
elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
|
81
|
-
in_rank = $2 == '?' ? nil : $1
|
82
|
-
ds_name = []
|
83
|
-
elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
|
84
|
-
ds_i = $1
|
85
|
-
ds_name << ds_i
|
86
|
-
ds_name.each do |ds_j|
|
87
|
-
k = cannid(ds_i, ds_j)
|
88
|
-
next if dist[k].nil?
|
89
|
-
rank_n += 1
|
90
|
-
dist[k][3] = rank_i
|
91
|
-
dist[k][4].unshift in_rank
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
$stderr.puts "#{rank_n} pairs of datasets." unless o[:q]
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
$stderr.puts 'Generating report.' unless o[:q]
|
101
|
-
dist.keys.each do |k|
|
102
|
-
dist[k][5] = dist[k][4].reverse.join(' ')
|
103
|
-
dist[k][4] = dist[k][4].first
|
104
|
-
puts (k.split('-') + dist[k]).join("\t")
|
105
|
-
end
|
106
|
-
|