miga-base 0.4.1.0 → 0.4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
data/actions/ncbi_get.rb DELETED
@@ -1,192 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/remote_dataset'
7
- require 'csv'
8
-
9
- o = {q: true, query: false, unlink: false,
10
- reference: false, legacy_name: false,
11
- complete: false, chromosome: false,
12
- scaffold: false, contig: false, add_version: true, dry: false,
13
- get_md: false, only_md: false, save_every: 1}
14
- OptionParser.new do |opt|
15
- opt_banner(opt)
16
- opt_object(opt, o, [:project])
17
- opt.on('-T', '--taxon STRING',
18
- '(Mandatory unless --reference) Taxon name (e.g., a species binomial).'
19
- ){ |v| o[:taxon]=v }
20
- opt.on('--reference',
21
- 'Download all reference genomes (ignores any other status).'
22
- ){ |v| o[:reference]=v }
23
- opt.on('--complete', 'Download complete genomes.'){ |v| o[:complete]=v }
24
- opt.on('--chromosome',
25
- 'Download complete chromosomes.'){ |v| o[:chromosome]=v }
26
- opt.on('--scaffold', 'Download genomes in scaffolds.'){ |v| o[:scaffold]=v }
27
- opt.on('--contig', 'Download genomes in contigs.'){ |v| o[:contig]=v }
28
- opt.on('--all', 'Download all genomes (in any status).') do
29
- o[:complete] = true
30
- o[:chromosome] = true
31
- o[:scaffold] = true
32
- o[:contig] = true
33
- end
34
- opt.on('--no-version-name',
35
- 'Do not add sequence version to the dataset name.',
36
- 'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
37
- opt.on('--legacy-name',
38
- 'Use dataset names based on chromosome entries instead of assembly.'
39
- ){ |v| o[:legacy_name] = v }
40
- opt.on('--blacklist PATH',
41
- 'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
42
- opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
43
- opt.on('--get-metadata',
44
- 'Only download and update metadata for existing datasets'
45
- ){ |v| o[:get_md] = v }
46
- opt.on('--only-metadata',
47
- 'Create datasets without input data but retrieve all metadata.'
48
- ){ |v| o[:only_md] = v }
49
- opt.on('--save-every INT',
50
- 'Save project every this many downloaded datasets.',
51
- 'If zero, it saves the project only once upon completion.',
52
- 'By default: 1.'){ |v| o[:save_every] = v.to_i }
53
- opt.on('-q', '--query',
54
- 'Register the datasets as queries, not reference datasets.'
55
- ){ |v| o[:query]=v }
56
- opt.on('-u', '--unlink',
57
- 'Unlink all datasets in the project missing from the download list.'
58
- ){ |v| o[:unlink]=v }
59
- opt.on('-R', '--remote-list PATH',
60
- 'Path to an output file with the list of all datasets listed remotely.'
61
- ){ |v| o[:remote_list]=v }
62
- opt.on('--api-key STRING', 'NCBI API key.'){ |v| ENV['NCBI_API_KEY'] = v }
63
- opt_common(opt, o)
64
- end.parse!
65
-
66
- opt_require(o, project: '-P')
67
- opt_require(o, taxon: '-T') unless o[:reference]
68
- unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
69
- raise 'No action requested. Pick at least one type of genome.'
70
- end
71
- o[:save_every] = 1 if o[:dry]
72
-
73
- ##=> Main <=
74
- $stderr.puts "Loading project." unless o[:q]
75
- p = MiGA::Project.load(o[:project])
76
- raise "Impossible to load project: #{o[:project]}" if p.nil?
77
- d = []
78
- ds = {}
79
- downloaded = 0
80
-
81
- url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
82
- url_param = {
83
- q: '[display()].' +
84
- 'from(GenomeAssemblies).' +
85
- 'usingschema(/schema/GenomeAssemblies).' +
86
- 'matching(tab==["Prokaryotes"] and q=="' + o[:taxon].tr('"',"'") + '"',
87
- fields: 'organism|organism,assembly|assembly,replicons|replicons,' +
88
- 'level|level,ftp_path_genbank|ftp_path_genbank,release_date|release_date,' +
89
- 'strain|strain',
90
- nolimit: 'on',
91
- }
92
- if o[:reference]
93
- url_param[:q] += ' and refseq_category==["representative"]'
94
- else
95
- status = {
96
- complete: 'Complete',
97
- chromosome: ' Chromosome', # <- The leading space is *VERY* important!
98
- scaffold: 'Scaffold',
99
- contig: 'Contig'
100
- }.map { |k, v| '"' + v + '"' if o[k] }.compact.join(',')
101
- url_param[:q] += ' and level==[' + status + ']'
102
- end
103
- url_param[:q] += ')'
104
- url = url_base + URI.encode_www_form(url_param)
105
- $stderr.puts 'Downloading genome list' unless o[:q]
106
- lineno = 0
107
- doc = MiGA::RemoteDataset.download_url(url)
108
- CSV.parse(doc, headers: true).each do |r|
109
- asm = r['assembly']
110
- next if asm.nil? or asm.empty? or asm == '-'
111
- next unless r['ftp_path_genbank']
112
-
113
- # Get replicons
114
- rep = r['replicons'].nil? ? nil : r['replicons'].
115
- split('; ').map{ |i| i.gsub(/.*:/,'') }.map{ |i| i.gsub(/\/.*/, '') }
116
-
117
- # Set name
118
- if o[:legacy_name] and o[:reference]
119
- n = r['#organism'].miga_name
120
- else
121
- if o[:legacy_name] and ['Complete',' Chromosome'].include? r['level']
122
- acc = rep.nil? ? '' : rep.first
123
- else
124
- acc = asm
125
- end
126
- acc.gsub!(/\.\d+\Z/, '') unless o[:add_version]
127
- n = "#{r['#organism']}_#{acc}".miga_name
128
- end
129
-
130
- # Register for download
131
- fna_url = r['ftp_path_genbank'] + '/' +
132
- File.basename(r['ftp_path_genbank']) + '_genomic.fna.gz'
133
- ds[n] = {
134
- ids: [fna_url], db: :assembly_gz, universe: :web,
135
- md: {
136
- type: :genome, ncbi_asm: asm, strain: r['strain']
137
- }
138
- }
139
- ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
140
- ds[n][:md][:release_date] =
141
- Time.parse(r['release_date']).to_s unless r['release_date'].nil?
142
- end
143
-
144
- # Discard blacklisted
145
- unless o[:blacklist].nil?
146
- $stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
147
- File.readlines(o[:blacklist]).
148
- select{ |i| i !~ /^#/ }.map(&:chomp).each{ |i| ds.delete i }
149
- end
150
-
151
- # Download entries
152
- $stderr.puts "Downloading #{ds.size} " +
153
- (ds.size == 1 ? 'entry' : 'entries') unless o[:q]
154
- p.do_not_save = true if o[:save_every] != 1
155
- ds.each do |name, body|
156
- d << name
157
- puts name
158
- next if p.dataset(name).nil? == o[:get_md]
159
- downloaded += 1
160
- next if o[:dry]
161
- $stderr.puts ' Locating remote dataset.' unless o[:q]
162
- body[:md][:metadata_only] = true if o[:only_md]
163
- rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
164
- if o[:get_md]
165
- $stderr.puts ' Updating dataset.' unless o[:q]
166
- rd.update_metadata(p.dataset(name), body[:md])
167
- else
168
- $stderr.puts ' Creating dataset.' unless o[:q]
169
- rd.save_to(p, name, !o[:query], body[:md])
170
- p.add_dataset(name)
171
- end
172
- p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
173
- end
174
-
175
- p.do_not_save = false
176
- p.save! if o[:save_every] != 1
177
-
178
- # Finalize
179
- $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
180
- $stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
181
- downloaded.to_s unless o[:q]
182
- unless o[:remote_list].nil?
183
- File.open(o[:remote_list], 'w') do |fh|
184
- d.each { |i| fh.puts i }
185
- end
186
- end
187
- if o[:unlink]
188
- unlink = p.dataset_names - d
189
- unlink.each { |i| p.unlink_dataset(i).remove! }
190
- $stderr.puts "Datasets unlinked: #{unlink.size}" unless o[:q]
191
- end
192
-
data/actions/new.rb DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, update: false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :project_type_req])
10
- opt.on('-n', '--name STRING', 'Name of the project.'){ |v| o[:name] = v }
11
- opt.on('-d', '--description STRING',
12
- 'Description of the project.'){ |v| o[:description] = v }
13
- opt.on('-c', '--comments STRING',
14
- 'Comments on the project.'){ |v| o[:comments] = v }
15
- opt.on('-m', '--metadata STRING',
16
- 'Metadata as key-value pairs separated by = and delimited by comma.',
17
- 'Values are saved as strings except for booleans (true / false) or nil.'
18
- ){ |v| o[:metadata] = v }
19
- opt.on('--update', 'Updates the project if it already exists.',
20
- 'Same as "miga edit".'){ o[:update] = true }
21
- opt_common(opt, o)
22
- end.parse!
23
-
24
- ##=> Main <=
25
- opt_require(o, project: '-P')
26
- opt_require_type(o, MiGA::Project) unless o[:update]
27
-
28
- unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
29
- File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
30
- raise "You must initialize MiGA before creating the first project.\n" +
31
- 'Please use "miga init".'
32
- end
33
-
34
- $stderr.puts 'Creating project.' unless o[:q]
35
- raise 'Project already exists, aborting.' unless
36
- o[:update] or not MiGA::Project.exist? o[:project]
37
- p = MiGA::Project.new(o[:project], o[:update])
38
- # The following check is redundant with MiGA::Project#create,
39
- # but allows upgrading projects from (very) early code versions
40
- o[:name] = File.basename(p.path) if o[:update] and o[:name].nil?
41
- p = add_metadata(o, p)
42
- p.save
43
-
44
- $stderr.puts 'Done.' unless o[:q]
data/actions/next_step.rb DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt_common(opt, o)
11
- end.parse!
12
-
13
- ##=> Main <=
14
- opts.parse!
15
- opt_require(o, project: '-P')
16
-
17
- $stderr.puts 'Loading project.' unless o[:q]
18
- p = MiGA::Project.load(o[:project])
19
- raise "Impossible to load project: #{o[:project]}" if p.nil?
20
-
21
- n = nil
22
- if not o[:dataset].nil?
23
- $stderr.puts 'Loading dataset.' unless o[:q]
24
- d = p.dataset o[:dataset]
25
- raise "Impossible to load dataset: #{o[:dataset]}" if d.nil?
26
- n = d.next_preprocessing if d.is_active?
27
- else
28
- n = p.next_distances(false)
29
- n ||= p.next_inclade(false)
30
- end
31
- n ||= '?'
32
- puts n
33
-
data/actions/plugins.rb DELETED
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, update:false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project])
10
- opt.on("--install PATH",
11
- "Installs the specified plugin in the project."){ |v| o[:install]=v }
12
- opt.on("--uninstall PATH",
13
- "Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
14
- opt_common(opt, o)
15
- end.parse!
16
-
17
- ##=> Main <=
18
- opt_require(o, project:"-P")
19
-
20
- p = MiGA::Project.new(o[:project], true)
21
- p.install_plugin(o[:install]) unless o[:install].nil?
22
- p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
23
- p.plugins.each { |i| puts i }
24
-
25
- $stderr.puts "Done." unless o[:q]
data/actions/rm.rb DELETED
@@ -1,29 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, remove: false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o)
10
- opt.on('-r', '--remove', 'Also remove all associated files.',
11
- 'By default, only unlinks from metadata.'){ o[:remove] = true }
12
- opt_common(opt, o)
13
- end.parse!
14
-
15
- ##=> Main <=
16
- opt_require(o)
17
-
18
- $stderr.puts 'Loading project.' unless o[:q]
19
- p = MiGA::Project.load(o[:project])
20
- raise "Impossible to load project: #{o[:project]}" if p.nil?
21
-
22
- $stderr.puts 'Unlinking dataset.' unless o[:q]
23
- raise 'Dataset doesn\'t exist, aborting.' unless
24
- MiGA::Dataset.exist?(p, o[:dataset])
25
- d = p.unlink_dataset(o[:dataset])
26
- d.remove! if o[:remove]
27
-
28
- $stderr.puts 'Done.' unless o[:q]
29
-
data/actions/run.rb DELETED
@@ -1,45 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'shellwords'
7
-
8
- o = {q: true, try_load: false, thr: 1}
9
- OptionParser.new do |opt|
10
- opt_banner(opt)
11
- opt_object(opt, o, [:project, :dataset_opt, :result])
12
- opt.on('-t', '--threads INT',
13
- "Threads to use in the local run (by default: #{o[:thr]})."
14
- ){ |v| o[:thr] = v.to_i }
15
- opt_common(opt, o)
16
- end.parse!
17
-
18
- ##=> Main <=
19
- opt_require(o, project: '-P', name: '-r')
20
-
21
- $stderr.puts 'Loading project.' unless o[:q]
22
- p = MiGA::Project.load(o[:project])
23
- raise "Impossible to load project: #{o[:project]}" if p.nil?
24
-
25
- virtual_task = false
26
- miga = MiGA::MiGA.root_path
27
- cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
28
- "MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
29
- if o[:dataset].nil?
30
- type = MiGA::Project
31
- virtual_task = true if o[:name] == :p
32
- else
33
- d = p.dataset(o[:dataset])
34
- raise 'Cannot load dataset.' if d.nil?
35
- cmd << "DATASET=#{d.name.shellescape}"
36
- type = MiGA::Dataset
37
- virtual_task = true if o[:name] == :d
38
- end
39
- raise "Unsupported #{type.to_s.gsub(/.*::/, '')} result: #{o[:name]}." if
40
- type.RESULT_DIRS[o[:name].to_sym].nil? and not virtual_task
41
- cmd << MiGA::MiGA.script_path(o[:name], miga: miga, project: p).shellescape
42
- pid = spawn cmd.join(' ')
43
- Process.wait pid
44
-
45
- $stderr.puts 'Done.' unless o[:q]
data/actions/stats.rb DELETED
@@ -1,149 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, try_load:false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result])
10
- opt.on("--key STRING",
11
- "Returns only the value of the requested key."){ |v| o[:key] = v }
12
- opt.on("--compute-and-save",
13
- "Computes and saves the statistics."){ |v| o[:compute] = v }
14
- opt.on("--try-load",
15
- "Checks if stat exists instead of computing on --compute-and-save."
16
- ){ |v| o[:try_load] = v }
17
- opt_common(opt, o)
18
- end.parse!
19
-
20
- ##=> Main <=
21
- opts.parse!
22
- opt_require(o, project:"-P", name:"-r")
23
-
24
- $stderr.puts "Loading project." unless o[:q]
25
- p = MiGA::Project.load(o[:project])
26
- raise "Impossible to load project: #{o[:project]}" if p.nil?
27
-
28
- $stderr.puts "Loading result." unless o[:q]
29
- d = nil
30
- if o[:dataset].nil?
31
- r = p.add_result(o[:name], false)
32
- else
33
- d = p.dataset(o[:dataset])
34
- r = d.add_result(o[:name], false)
35
- end
36
- raise "Cannot load result." if r.nil?
37
-
38
- o[:compute] = false if o[:try_load] and
39
- (not r[:stats].nil?) and (not r[:stats].empty?)
40
-
41
- if o[:compute]
42
- $stderr.puts "Computing statistics." unless o[:q]
43
- stats = {}
44
- case o[:name]
45
- when :raw_reads
46
- if r[:files][:pair1].nil?
47
- s = MiGA::MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
48
- stats = {
49
- reads: s[:n],
50
- length_average: [s[:avg], "bp"],
51
- length_standard_deviation: [s[:sd], "bp"],
52
- g_c_content: [s[:gc], "%"]}
53
- else
54
- s1 = MiGA::MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
55
- s2 = MiGA::MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
56
- stats = {
57
- read_pairs: s1[:n],
58
- forward_length_average: [s1[:avg], "bp"],
59
- forward_length_standard_deviation: [s1[:sd], "bp"],
60
- forward_g_c_content: [s1[:gc], "%"],
61
- reverse_length_average: [s2[:avg], "bp"],
62
- reverse_length_standard_deviation: [s2[:sd], "bp"],
63
- reverse_g_c_content: [s2[:gc], "%"]}
64
- end
65
- when :trimmed_fasta
66
- f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
67
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
68
- stats = {
69
- reads: s[:n],
70
- length_average: [s[:avg], "bp"],
71
- length_standard_deviation: [s[:sd], "bp"],
72
- g_c_content: [s[:gc], "%"]}
73
- when :assembly
74
- s = MiGA::MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
75
- n50: true, gc: true)
76
- stats = {
77
- contigs: s[:n],
78
- n50: [s[:n50], "bp"],
79
- total_length: [s[:tot], "bp"],
80
- g_c_content: [s[:gc], "%"]}
81
- when :cds
82
- s = MiGA::MiGA.seqs_length(r.file_path(:proteins), :fasta)
83
- stats = {
84
- predicted_proteins: s[:n],
85
- average_length: [s[:avg], "aa"]}
86
- asm = d.add_result(:assembly, false)
87
- unless asm.nil? or asm[:stats][:total_length].nil?
88
- stats[:coding_density] =
89
- [300.0 * s[:tot] / asm[:stats][:total_length][0], "%"]
90
- end
91
- when :essential_genes
92
- if d.is_multi?
93
- stats = {median_copies:0, mean_copies:0}
94
- File.open(r.file_path(:report), "r") do |fh|
95
- fh.each_line do |ln|
96
- if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
97
- stats["#{$1.downcase}_copies".to_sym] = $2.to_f
98
- end
99
- end
100
- end
101
- else
102
- # Fix estimate for Archaea
103
- if not d.metadata[:tax].nil? and
104
- d.metadata[:tax].is_in? MiGA::Taxonomy.new("d:Archaea") and
105
- r.file_path(:bac_report).nil?
106
- scr = "#{MiGA::MiGA.root_path}/utils/arch-ess-genes.rb"
107
- rep = r.file_path(:report)
108
- $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
109
- r.add_file(:bac_report, "#{d.name}.ess/log")
110
- r.add_file(:report, "#{d.name}.ess/log.archaea")
111
- end
112
- # Extract/compute quality values
113
- stats = {completeness: [0.0,"%"], contamination: [0.0,"%"]}
114
- File.open(r.file_path(:report), "r") do |fh|
115
- fh.each_line do |ln|
116
- if /^! (Completeness|Contamination): (.*)%/.match(ln)
117
- stats[$1.downcase.to_sym][0] = $2.to_f
118
- end
119
- end
120
- end
121
- stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
122
- d.metadata[:quality] = case stats[:quality]
123
- when 80..100 ; :excellent
124
- when 50..80 ; :high
125
- when 20..50 ; :intermediate
126
- else ; :low
127
- end
128
- d.save
129
- end
130
- else
131
- stats = nil
132
- end
133
- unless stats.nil?
134
- r[:stats] = stats
135
- r.save
136
- end
137
- end
138
-
139
- if o[:key].nil?
140
- r[:stats].each do |k,v|
141
- puts "#{k==:g_c_content ? "G+C content" : k.to_s.unmiga_name.capitalize}: #{
142
- v.is_a?(Array) ? v.join(" ") : v}."
143
- end
144
- else
145
- v = r[:stats][o[:key].downcase.miga_name.to_sym]
146
- puts v.is_a?(Array) ? v.first : v
147
- end
148
-
149
- $stderr.puts "Done." unless o[:q]
data/actions/summary.rb DELETED
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true, units: false, tabular: false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt_filter_datasets(opt, o)
11
- opt_object(opt, o, [:result_dataset])
12
- opt.on("--tab",
13
- "Returns a tab-delimited table."){ |v| o[:tabular] = v }
14
- opt.on("--key STRING",
15
- "Returns only the value of the requested key."){ |v| o[:key_md] = v }
16
- opt.on("--with-units",
17
- "Includes units in each cell."){ |v| o[:units] = v }
18
- opt_common(opt, o)
19
- end.parse!
20
-
21
- ##=> Main <=
22
- opts.parse!
23
- opt_require(o, project:"-P", name:"-r")
24
-
25
- $stderr.puts "Loading project." unless o[:q]
26
- p = MiGA::Project.load(o[:project])
27
- raise "Impossible to load project: #{o[:project]}" if p.nil?
28
-
29
- $stderr.puts "Listing datasets." unless o[:q]
30
- if o[:dataset].nil?
31
- ds = p.datasets
32
- elsif MiGA::Dataset.exist? p, o[:dataset]
33
- ds = [p.dataset(o[:dataset])]
34
- else
35
- ds = []
36
- end
37
- ds = filter_datasets!(ds, o)
38
-
39
- $stderr.puts "Loading results." unless o[:q]
40
- stats = ds.map do |d|
41
- r = d.add_result(o[:name].to_sym, false)
42
- s = r.nil? ? {} : r[:stats]
43
- s.tap{ |i| i[:dataset] = d.name }
44
- end
45
- keys = o[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
46
- [:dataset, o[:key_md].downcase.miga_name.to_sym]
47
- keys.delete :dataset
48
- keys.unshift :dataset
49
-
50
- table = o[:units] ?
51
- stats.map{ |s| keys.map{ |k|
52
- s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
53
- stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
54
- puts MiGA::MiGA.tabulate(keys, table, o[:tabular])
55
-
56
- $stderr.puts "Done." unless o[:q]
57
-
data/actions/tax_dist.rb DELETED
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/tax_index'
7
- require 'zlib'
8
- require 'tmpdir'
9
-
10
- o = {q: true, format: :json}
11
- OptionParser.new do |opt|
12
- opt_banner(opt)
13
- opt_object(opt, o, [:project])
14
- opt_filter_datasets(opt, o)
15
- opt.on('-i', '--index FILE',
16
- 'Pre-calculated tax-index (in tabular format) to be used.',
17
- 'If passed, dataset filtering arguments are ignored.'
18
- ){ |v| o[:index] = v }
19
- opt_common(opt, o)
20
- end.parse!
21
-
22
- ##=> Functions <=
23
- # Returns the _cannonical_ ID between strings +a+ and +b+.
24
- def cannid(a, b) ; (a > b ? [b, a] : [a, b]).join('-') ; end
25
-
26
- ##=> Main <=
27
- opt_require(o, project: '-P')
28
-
29
- $stderr.puts 'Loading project.' unless o[:q]
30
- p = MiGA::Project.load(o[:project])
31
- raise "Impossible to load project: #{o[:project]}" if p.nil?
32
-
33
- metric = p.is_clade? ? 'ani' : 'aai'
34
- res_n = "#{metric}_distances"
35
- $stderr.puts "Reading distances (1-#{metric.upcase})." unless o[:q]
36
- res = p.result res_n
37
- raise "#{res_n} not yet calculated." if res.nil?
38
- matrix = res.file_path(:matrix)
39
- raise "#{res_n} has no matrix." if matrix.nil?
40
- dist = {}
41
- mfh = matrix =~ /\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
42
- mfh.each_line do |ln|
43
- next if mfh.lineno==1
44
- row = ln.chomp.split("\t")
45
- dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
46
- $stderr.print(" Ln:#{mfh.lineno} \r") if !o[:q] and (mfh.lineno % 1_000) == 0
47
- end
48
- $stderr.puts " Lines: #{mfh.lineno}" unless o[:q]
49
- mfh.close
50
-
51
- Dir.mktmpdir do |dir|
52
- if o[:index].nil?
53
- $stderr.puts 'Loading datasets.' unless o[:q]
54
- ds = p.datasets
55
- ds.select!{ |d| not d.metadata[:tax].nil? }
56
- ds = filter_datasets!(ds, o)
57
-
58
- $stderr.puts 'Indexing taxonomy.' unless o[:q]
59
- tax_index = MiGA::TaxIndex.new
60
- ds.each { |d| tax_index << d }
61
- tab = File.expand_path('index.tab', dir)
62
- File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
63
- else
64
- tab = o[:index]
65
- end
66
-
67
- $stderr.puts 'Traversing taxonomy.' unless o[:q]
68
- rank_i = 0
69
- MiGA::Taxonomy.KNOWN_RANKS.each do |rank|
70
- $stderr.print "o #{rank}: " unless o[:q]
71
- rank_n = 0
72
- rank_i += 1
73
- in_rank = nil
74
- ds_name = []
75
- File.open(tab, 'r') do |fh|
76
- fh.each_line do |ln|
77
- if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
78
- in_rank = nil
79
- ds_name = []
80
- elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
81
- in_rank = $2 == '?' ? nil : $1
82
- ds_name = []
83
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
84
- ds_i = $1
85
- ds_name << ds_i
86
- ds_name.each do |ds_j|
87
- k = cannid(ds_i, ds_j)
88
- next if dist[k].nil?
89
- rank_n += 1
90
- dist[k][3] = rank_i
91
- dist[k][4].unshift in_rank
92
- end
93
- end
94
- end
95
- end
96
- $stderr.puts "#{rank_n} pairs of datasets." unless o[:q]
97
- end
98
- end
99
-
100
- $stderr.puts 'Generating report.' unless o[:q]
101
- dist.keys.each do |k|
102
- dist[k][5] = dist[k][4].reverse.join(' ')
103
- dist[k][4] = dist[k][4].first
104
- puts (k.split('-') + dist[k]).join("\t")
105
- end
106
-