miga-base 0.4.1.0 → 0.4.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
data/actions/ncbi_get.rb DELETED
@@ -1,192 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/remote_dataset'
7
- require 'csv'
8
-
9
- o = {q: true, query: false, unlink: false,
10
- reference: false, legacy_name: false,
11
- complete: false, chromosome: false,
12
- scaffold: false, contig: false, add_version: true, dry: false,
13
- get_md: false, only_md: false, save_every: 1}
14
- OptionParser.new do |opt|
15
- opt_banner(opt)
16
- opt_object(opt, o, [:project])
17
- opt.on('-T', '--taxon STRING',
18
- '(Mandatory unless --reference) Taxon name (e.g., a species binomial).'
19
- ){ |v| o[:taxon]=v }
20
- opt.on('--reference',
21
- 'Download all reference genomes (ignores any other status).'
22
- ){ |v| o[:reference]=v }
23
- opt.on('--complete', 'Download complete genomes.'){ |v| o[:complete]=v }
24
- opt.on('--chromosome',
25
- 'Download complete chromosomes.'){ |v| o[:chromosome]=v }
26
- opt.on('--scaffold', 'Download genomes in scaffolds.'){ |v| o[:scaffold]=v }
27
- opt.on('--contig', 'Download genomes in contigs.'){ |v| o[:contig]=v }
28
- opt.on('--all', 'Download all genomes (in any status).') do
29
- o[:complete] = true
30
- o[:chromosome] = true
31
- o[:scaffold] = true
32
- o[:contig] = true
33
- end
34
- opt.on('--no-version-name',
35
- 'Do not add sequence version to the dataset name.',
36
- 'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
37
- opt.on('--legacy-name',
38
- 'Use dataset names based on chromosome entries instead of assembly.'
39
- ){ |v| o[:legacy_name] = v }
40
- opt.on('--blacklist PATH',
41
- 'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
42
- opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
43
- opt.on('--get-metadata',
44
- 'Only download and update metadata for existing datasets'
45
- ){ |v| o[:get_md] = v }
46
- opt.on('--only-metadata',
47
- 'Create datasets without input data but retrieve all metadata.'
48
- ){ |v| o[:only_md] = v }
49
- opt.on('--save-every INT',
50
- 'Save project every this many downloaded datasets.',
51
- 'If zero, it saves the project only once upon completion.',
52
- 'By default: 1.'){ |v| o[:save_every] = v.to_i }
53
- opt.on('-q', '--query',
54
- 'Register the datasets as queries, not reference datasets.'
55
- ){ |v| o[:query]=v }
56
- opt.on('-u', '--unlink',
57
- 'Unlink all datasets in the project missing from the download list.'
58
- ){ |v| o[:unlink]=v }
59
- opt.on('-R', '--remote-list PATH',
60
- 'Path to an output file with the list of all datasets listed remotely.'
61
- ){ |v| o[:remote_list]=v }
62
- opt.on('--api-key STRING', 'NCBI API key.'){ |v| ENV['NCBI_API_KEY'] = v }
63
- opt_common(opt, o)
64
- end.parse!
65
-
66
- opt_require(o, project: '-P')
67
- opt_require(o, taxon: '-T') unless o[:reference]
68
- unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
69
- raise 'No action requested. Pick at least one type of genome.'
70
- end
71
- o[:save_every] = 1 if o[:dry]
72
-
73
- ##=> Main <=
74
- $stderr.puts "Loading project." unless o[:q]
75
- p = MiGA::Project.load(o[:project])
76
- raise "Impossible to load project: #{o[:project]}" if p.nil?
77
- d = []
78
- ds = {}
79
- downloaded = 0
80
-
81
- url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
82
- url_param = {
83
- q: '[display()].' +
84
- 'from(GenomeAssemblies).' +
85
- 'usingschema(/schema/GenomeAssemblies).' +
86
- 'matching(tab==["Prokaryotes"] and q=="' + o[:taxon].tr('"',"'") + '"',
87
- fields: 'organism|organism,assembly|assembly,replicons|replicons,' +
88
- 'level|level,ftp_path_genbank|ftp_path_genbank,release_date|release_date,' +
89
- 'strain|strain',
90
- nolimit: 'on',
91
- }
92
- if o[:reference]
93
- url_param[:q] += ' and refseq_category==["representative"]'
94
- else
95
- status = {
96
- complete: 'Complete',
97
- chromosome: ' Chromosome', # <- The leading space is *VERY* important!
98
- scaffold: 'Scaffold',
99
- contig: 'Contig'
100
- }.map { |k, v| '"' + v + '"' if o[k] }.compact.join(',')
101
- url_param[:q] += ' and level==[' + status + ']'
102
- end
103
- url_param[:q] += ')'
104
- url = url_base + URI.encode_www_form(url_param)
105
- $stderr.puts 'Downloading genome list' unless o[:q]
106
- lineno = 0
107
- doc = MiGA::RemoteDataset.download_url(url)
108
- CSV.parse(doc, headers: true).each do |r|
109
- asm = r['assembly']
110
- next if asm.nil? or asm.empty? or asm == '-'
111
- next unless r['ftp_path_genbank']
112
-
113
- # Get replicons
114
- rep = r['replicons'].nil? ? nil : r['replicons'].
115
- split('; ').map{ |i| i.gsub(/.*:/,'') }.map{ |i| i.gsub(/\/.*/, '') }
116
-
117
- # Set name
118
- if o[:legacy_name] and o[:reference]
119
- n = r['#organism'].miga_name
120
- else
121
- if o[:legacy_name] and ['Complete',' Chromosome'].include? r['level']
122
- acc = rep.nil? ? '' : rep.first
123
- else
124
- acc = asm
125
- end
126
- acc.gsub!(/\.\d+\Z/, '') unless o[:add_version]
127
- n = "#{r['#organism']}_#{acc}".miga_name
128
- end
129
-
130
- # Register for download
131
- fna_url = r['ftp_path_genbank'] + '/' +
132
- File.basename(r['ftp_path_genbank']) + '_genomic.fna.gz'
133
- ds[n] = {
134
- ids: [fna_url], db: :assembly_gz, universe: :web,
135
- md: {
136
- type: :genome, ncbi_asm: asm, strain: r['strain']
137
- }
138
- }
139
- ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
140
- ds[n][:md][:release_date] =
141
- Time.parse(r['release_date']).to_s unless r['release_date'].nil?
142
- end
143
-
144
- # Discard blacklisted
145
- unless o[:blacklist].nil?
146
- $stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
147
- File.readlines(o[:blacklist]).
148
- select{ |i| i !~ /^#/ }.map(&:chomp).each{ |i| ds.delete i }
149
- end
150
-
151
- # Download entries
152
- $stderr.puts "Downloading #{ds.size} " +
153
- (ds.size == 1 ? 'entry' : 'entries') unless o[:q]
154
- p.do_not_save = true if o[:save_every] != 1
155
- ds.each do |name, body|
156
- d << name
157
- puts name
158
- next if p.dataset(name).nil? == o[:get_md]
159
- downloaded += 1
160
- next if o[:dry]
161
- $stderr.puts ' Locating remote dataset.' unless o[:q]
162
- body[:md][:metadata_only] = true if o[:only_md]
163
- rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
164
- if o[:get_md]
165
- $stderr.puts ' Updating dataset.' unless o[:q]
166
- rd.update_metadata(p.dataset(name), body[:md])
167
- else
168
- $stderr.puts ' Creating dataset.' unless o[:q]
169
- rd.save_to(p, name, !o[:query], body[:md])
170
- p.add_dataset(name)
171
- end
172
- p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
173
- end
174
-
175
- p.do_not_save = false
176
- p.save! if o[:save_every] != 1
177
-
178
- # Finalize
179
- $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
180
- $stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
181
- downloaded.to_s unless o[:q]
182
- unless o[:remote_list].nil?
183
- File.open(o[:remote_list], 'w') do |fh|
184
- d.each { |i| fh.puts i }
185
- end
186
- end
187
- if o[:unlink]
188
- unlink = p.dataset_names - d
189
- unlink.each { |i| p.unlink_dataset(i).remove! }
190
- $stderr.puts "Datasets unlinked: #{unlink.size}" unless o[:q]
191
- end
192
-
data/actions/new.rb DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, update: false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :project_type_req])
10
- opt.on('-n', '--name STRING', 'Name of the project.'){ |v| o[:name] = v }
11
- opt.on('-d', '--description STRING',
12
- 'Description of the project.'){ |v| o[:description] = v }
13
- opt.on('-c', '--comments STRING',
14
- 'Comments on the project.'){ |v| o[:comments] = v }
15
- opt.on('-m', '--metadata STRING',
16
- 'Metadata as key-value pairs separated by = and delimited by comma.',
17
- 'Values are saved as strings except for booleans (true / false) or nil.'
18
- ){ |v| o[:metadata] = v }
19
- opt.on('--update', 'Updates the project if it already exists.',
20
- 'Same as "miga edit".'){ o[:update] = true }
21
- opt_common(opt, o)
22
- end.parse!
23
-
24
- ##=> Main <=
25
- opt_require(o, project: '-P')
26
- opt_require_type(o, MiGA::Project) unless o[:update]
27
-
28
- unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
29
- File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
30
- raise "You must initialize MiGA before creating the first project.\n" +
31
- 'Please use "miga init".'
32
- end
33
-
34
- $stderr.puts 'Creating project.' unless o[:q]
35
- raise 'Project already exists, aborting.' unless
36
- o[:update] or not MiGA::Project.exist? o[:project]
37
- p = MiGA::Project.new(o[:project], o[:update])
38
- # The following check is redundant with MiGA::Project#create,
39
- # but allows upgrading projects from (very) early code versions
40
- o[:name] = File.basename(p.path) if o[:update] and o[:name].nil?
41
- p = add_metadata(o, p)
42
- p.save
43
-
44
- $stderr.puts 'Done.' unless o[:q]
data/actions/next_step.rb DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt_common(opt, o)
11
- end.parse!
12
-
13
- ##=> Main <=
14
- opts.parse!
15
- opt_require(o, project: '-P')
16
-
17
- $stderr.puts 'Loading project.' unless o[:q]
18
- p = MiGA::Project.load(o[:project])
19
- raise "Impossible to load project: #{o[:project]}" if p.nil?
20
-
21
- n = nil
22
- if not o[:dataset].nil?
23
- $stderr.puts 'Loading dataset.' unless o[:q]
24
- d = p.dataset o[:dataset]
25
- raise "Impossible to load dataset: #{o[:dataset]}" if d.nil?
26
- n = d.next_preprocessing if d.is_active?
27
- else
28
- n = p.next_distances(false)
29
- n ||= p.next_inclade(false)
30
- end
31
- n ||= '?'
32
- puts n
33
-
data/actions/plugins.rb DELETED
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, update:false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project])
10
- opt.on("--install PATH",
11
- "Installs the specified plugin in the project."){ |v| o[:install]=v }
12
- opt.on("--uninstall PATH",
13
- "Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
14
- opt_common(opt, o)
15
- end.parse!
16
-
17
- ##=> Main <=
18
- opt_require(o, project:"-P")
19
-
20
- p = MiGA::Project.new(o[:project], true)
21
- p.install_plugin(o[:install]) unless o[:install].nil?
22
- p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
23
- p.plugins.each { |i| puts i }
24
-
25
- $stderr.puts "Done." unless o[:q]
data/actions/rm.rb DELETED
@@ -1,29 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, remove: false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o)
10
- opt.on('-r', '--remove', 'Also remove all associated files.',
11
- 'By default, only unlinks from metadata.'){ o[:remove] = true }
12
- opt_common(opt, o)
13
- end.parse!
14
-
15
- ##=> Main <=
16
- opt_require(o)
17
-
18
- $stderr.puts 'Loading project.' unless o[:q]
19
- p = MiGA::Project.load(o[:project])
20
- raise "Impossible to load project: #{o[:project]}" if p.nil?
21
-
22
- $stderr.puts 'Unlinking dataset.' unless o[:q]
23
- raise 'Dataset doesn\'t exist, aborting.' unless
24
- MiGA::Dataset.exist?(p, o[:dataset])
25
- d = p.unlink_dataset(o[:dataset])
26
- d.remove! if o[:remove]
27
-
28
- $stderr.puts 'Done.' unless o[:q]
29
-
data/actions/run.rb DELETED
@@ -1,45 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'shellwords'
7
-
8
- o = {q: true, try_load: false, thr: 1}
9
- OptionParser.new do |opt|
10
- opt_banner(opt)
11
- opt_object(opt, o, [:project, :dataset_opt, :result])
12
- opt.on('-t', '--threads INT',
13
- "Threads to use in the local run (by default: #{o[:thr]})."
14
- ){ |v| o[:thr] = v.to_i }
15
- opt_common(opt, o)
16
- end.parse!
17
-
18
- ##=> Main <=
19
- opt_require(o, project: '-P', name: '-r')
20
-
21
- $stderr.puts 'Loading project.' unless o[:q]
22
- p = MiGA::Project.load(o[:project])
23
- raise "Impossible to load project: #{o[:project]}" if p.nil?
24
-
25
- virtual_task = false
26
- miga = MiGA::MiGA.root_path
27
- cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
28
- "MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
29
- if o[:dataset].nil?
30
- type = MiGA::Project
31
- virtual_task = true if o[:name] == :p
32
- else
33
- d = p.dataset(o[:dataset])
34
- raise 'Cannot load dataset.' if d.nil?
35
- cmd << "DATASET=#{d.name.shellescape}"
36
- type = MiGA::Dataset
37
- virtual_task = true if o[:name] == :d
38
- end
39
- raise "Unsupported #{type.to_s.gsub(/.*::/, '')} result: #{o[:name]}." if
40
- type.RESULT_DIRS[o[:name].to_sym].nil? and not virtual_task
41
- cmd << MiGA::MiGA.script_path(o[:name], miga: miga, project: p).shellescape
42
- pid = spawn cmd.join(' ')
43
- Process.wait pid
44
-
45
- $stderr.puts 'Done.' unless o[:q]
data/actions/stats.rb DELETED
@@ -1,149 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, try_load:false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result])
10
- opt.on("--key STRING",
11
- "Returns only the value of the requested key."){ |v| o[:key] = v }
12
- opt.on("--compute-and-save",
13
- "Computes and saves the statistics."){ |v| o[:compute] = v }
14
- opt.on("--try-load",
15
- "Checks if stat exists instead of computing on --compute-and-save."
16
- ){ |v| o[:try_load] = v }
17
- opt_common(opt, o)
18
- end.parse!
19
-
20
- ##=> Main <=
21
- opts.parse!
22
- opt_require(o, project:"-P", name:"-r")
23
-
24
- $stderr.puts "Loading project." unless o[:q]
25
- p = MiGA::Project.load(o[:project])
26
- raise "Impossible to load project: #{o[:project]}" if p.nil?
27
-
28
- $stderr.puts "Loading result." unless o[:q]
29
- d = nil
30
- if o[:dataset].nil?
31
- r = p.add_result(o[:name], false)
32
- else
33
- d = p.dataset(o[:dataset])
34
- r = d.add_result(o[:name], false)
35
- end
36
- raise "Cannot load result." if r.nil?
37
-
38
- o[:compute] = false if o[:try_load] and
39
- (not r[:stats].nil?) and (not r[:stats].empty?)
40
-
41
- if o[:compute]
42
- $stderr.puts "Computing statistics." unless o[:q]
43
- stats = {}
44
- case o[:name]
45
- when :raw_reads
46
- if r[:files][:pair1].nil?
47
- s = MiGA::MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
48
- stats = {
49
- reads: s[:n],
50
- length_average: [s[:avg], "bp"],
51
- length_standard_deviation: [s[:sd], "bp"],
52
- g_c_content: [s[:gc], "%"]}
53
- else
54
- s1 = MiGA::MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
55
- s2 = MiGA::MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
56
- stats = {
57
- read_pairs: s1[:n],
58
- forward_length_average: [s1[:avg], "bp"],
59
- forward_length_standard_deviation: [s1[:sd], "bp"],
60
- forward_g_c_content: [s1[:gc], "%"],
61
- reverse_length_average: [s2[:avg], "bp"],
62
- reverse_length_standard_deviation: [s2[:sd], "bp"],
63
- reverse_g_c_content: [s2[:gc], "%"]}
64
- end
65
- when :trimmed_fasta
66
- f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
67
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
68
- stats = {
69
- reads: s[:n],
70
- length_average: [s[:avg], "bp"],
71
- length_standard_deviation: [s[:sd], "bp"],
72
- g_c_content: [s[:gc], "%"]}
73
- when :assembly
74
- s = MiGA::MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
75
- n50: true, gc: true)
76
- stats = {
77
- contigs: s[:n],
78
- n50: [s[:n50], "bp"],
79
- total_length: [s[:tot], "bp"],
80
- g_c_content: [s[:gc], "%"]}
81
- when :cds
82
- s = MiGA::MiGA.seqs_length(r.file_path(:proteins), :fasta)
83
- stats = {
84
- predicted_proteins: s[:n],
85
- average_length: [s[:avg], "aa"]}
86
- asm = d.add_result(:assembly, false)
87
- unless asm.nil? or asm[:stats][:total_length].nil?
88
- stats[:coding_density] =
89
- [300.0 * s[:tot] / asm[:stats][:total_length][0], "%"]
90
- end
91
- when :essential_genes
92
- if d.is_multi?
93
- stats = {median_copies:0, mean_copies:0}
94
- File.open(r.file_path(:report), "r") do |fh|
95
- fh.each_line do |ln|
96
- if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
97
- stats["#{$1.downcase}_copies".to_sym] = $2.to_f
98
- end
99
- end
100
- end
101
- else
102
- # Fix estimate for Archaea
103
- if not d.metadata[:tax].nil? and
104
- d.metadata[:tax].is_in? MiGA::Taxonomy.new("d:Archaea") and
105
- r.file_path(:bac_report).nil?
106
- scr = "#{MiGA::MiGA.root_path}/utils/arch-ess-genes.rb"
107
- rep = r.file_path(:report)
108
- $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
109
- r.add_file(:bac_report, "#{d.name}.ess/log")
110
- r.add_file(:report, "#{d.name}.ess/log.archaea")
111
- end
112
- # Extract/compute quality values
113
- stats = {completeness: [0.0,"%"], contamination: [0.0,"%"]}
114
- File.open(r.file_path(:report), "r") do |fh|
115
- fh.each_line do |ln|
116
- if /^! (Completeness|Contamination): (.*)%/.match(ln)
117
- stats[$1.downcase.to_sym][0] = $2.to_f
118
- end
119
- end
120
- end
121
- stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
122
- d.metadata[:quality] = case stats[:quality]
123
- when 80..100 ; :excellent
124
- when 50..80 ; :high
125
- when 20..50 ; :intermediate
126
- else ; :low
127
- end
128
- d.save
129
- end
130
- else
131
- stats = nil
132
- end
133
- unless stats.nil?
134
- r[:stats] = stats
135
- r.save
136
- end
137
- end
138
-
139
- if o[:key].nil?
140
- r[:stats].each do |k,v|
141
- puts "#{k==:g_c_content ? "G+C content" : k.to_s.unmiga_name.capitalize}: #{
142
- v.is_a?(Array) ? v.join(" ") : v}."
143
- end
144
- else
145
- v = r[:stats][o[:key].downcase.miga_name.to_sym]
146
- puts v.is_a?(Array) ? v.first : v
147
- end
148
-
149
- $stderr.puts "Done." unless o[:q]
data/actions/summary.rb DELETED
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, units: false, tabular: false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt_filter_datasets(opt, o)
11
- opt_object(opt, o, [:result_dataset])
12
- opt.on("--tab",
13
- "Returns a tab-delimited table."){ |v| o[:tabular] = v }
14
- opt.on("--key STRING",
15
- "Returns only the value of the requested key."){ |v| o[:key_md] = v }
16
- opt.on("--with-units",
17
- "Includes units in each cell."){ |v| o[:units] = v }
18
- opt_common(opt, o)
19
- end.parse!
20
-
21
- ##=> Main <=
22
- opts.parse!
23
- opt_require(o, project:"-P", name:"-r")
24
-
25
- $stderr.puts "Loading project." unless o[:q]
26
- p = MiGA::Project.load(o[:project])
27
- raise "Impossible to load project: #{o[:project]}" if p.nil?
28
-
29
- $stderr.puts "Listing datasets." unless o[:q]
30
- if o[:dataset].nil?
31
- ds = p.datasets
32
- elsif MiGA::Dataset.exist? p, o[:dataset]
33
- ds = [p.dataset(o[:dataset])]
34
- else
35
- ds = []
36
- end
37
- ds = filter_datasets!(ds, o)
38
-
39
- $stderr.puts "Loading results." unless o[:q]
40
- stats = ds.map do |d|
41
- r = d.add_result(o[:name].to_sym, false)
42
- s = r.nil? ? {} : r[:stats]
43
- s.tap{ |i| i[:dataset] = d.name }
44
- end
45
- keys = o[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
46
- [:dataset, o[:key_md].downcase.miga_name.to_sym]
47
- keys.delete :dataset
48
- keys.unshift :dataset
49
-
50
- table = o[:units] ?
51
- stats.map{ |s| keys.map{ |k|
52
- s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
53
- stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
54
- puts MiGA::MiGA.tabulate(keys, table, o[:tabular])
55
-
56
- $stderr.puts "Done." unless o[:q]
57
-
data/actions/tax_dist.rb DELETED
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/tax_index'
7
- require 'zlib'
8
- require 'tmpdir'
9
-
10
- o = {q: true, format: :json}
11
- OptionParser.new do |opt|
12
- opt_banner(opt)
13
- opt_object(opt, o, [:project])
14
- opt_filter_datasets(opt, o)
15
- opt.on('-i', '--index FILE',
16
- 'Pre-calculated tax-index (in tabular format) to be used.',
17
- 'If passed, dataset filtering arguments are ignored.'
18
- ){ |v| o[:index] = v }
19
- opt_common(opt, o)
20
- end.parse!
21
-
22
- ##=> Functions <=
23
- # Returns the _cannonical_ ID between strings +a+ and +b+.
24
- def cannid(a, b) ; (a > b ? [b, a] : [a, b]).join('-') ; end
25
-
26
- ##=> Main <=
27
- opt_require(o, project: '-P')
28
-
29
- $stderr.puts 'Loading project.' unless o[:q]
30
- p = MiGA::Project.load(o[:project])
31
- raise "Impossible to load project: #{o[:project]}" if p.nil?
32
-
33
- metric = p.is_clade? ? 'ani' : 'aai'
34
- res_n = "#{metric}_distances"
35
- $stderr.puts "Reading distances (1-#{metric.upcase})." unless o[:q]
36
- res = p.result res_n
37
- raise "#{res_n} not yet calculated." if res.nil?
38
- matrix = res.file_path(:matrix)
39
- raise "#{res_n} has no matrix." if matrix.nil?
40
- dist = {}
41
- mfh = matrix =~ /\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
42
- mfh.each_line do |ln|
43
- next if mfh.lineno==1
44
- row = ln.chomp.split("\t")
45
- dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
46
- $stderr.print(" Ln:#{mfh.lineno} \r") if !o[:q] and (mfh.lineno % 1_000) == 0
47
- end
48
- $stderr.puts " Lines: #{mfh.lineno}" unless o[:q]
49
- mfh.close
50
-
51
- Dir.mktmpdir do |dir|
52
- if o[:index].nil?
53
- $stderr.puts 'Loading datasets.' unless o[:q]
54
- ds = p.datasets
55
- ds.select!{ |d| not d.metadata[:tax].nil? }
56
- ds = filter_datasets!(ds, o)
57
-
58
- $stderr.puts 'Indexing taxonomy.' unless o[:q]
59
- tax_index = MiGA::TaxIndex.new
60
- ds.each { |d| tax_index << d }
61
- tab = File.expand_path('index.tab', dir)
62
- File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
63
- else
64
- tab = o[:index]
65
- end
66
-
67
- $stderr.puts 'Traversing taxonomy.' unless o[:q]
68
- rank_i = 0
69
- MiGA::Taxonomy.KNOWN_RANKS.each do |rank|
70
- $stderr.print "o #{rank}: " unless o[:q]
71
- rank_n = 0
72
- rank_i += 1
73
- in_rank = nil
74
- ds_name = []
75
- File.open(tab, 'r') do |fh|
76
- fh.each_line do |ln|
77
- if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
78
- in_rank = nil
79
- ds_name = []
80
- elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
81
- in_rank = $2 == '?' ? nil : $1
82
- ds_name = []
83
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
84
- ds_i = $1
85
- ds_name << ds_i
86
- ds_name.each do |ds_j|
87
- k = cannid(ds_i, ds_j)
88
- next if dist[k].nil?
89
- rank_n += 1
90
- dist[k][3] = rank_i
91
- dist[k][4].unshift in_rank
92
- end
93
- end
94
- end
95
- end
96
- $stderr.puts "#{rank_n} pairs of datasets." unless o[:q]
97
- end
98
- end
99
-
100
- $stderr.puts 'Generating report.' unless o[:q]
101
- dist.keys.each do |k|
102
- dist[k][5] = dist[k][4].reverse.join(' ')
103
- dist[k][4] = dist[k][4].first
104
- puts (k.split('-') + dist[k]).join("\t")
105
- end
106
-