miga-base 0.4.1.0 → 0.4.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::New < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :project_type_req])
11
+ opt.on(
12
+ '-n', '--name STRING',
13
+ 'Name of the project'
14
+ ){ |v| cli[:name] = v }
15
+ opt.on(
16
+ '-d', '--description STRING',
17
+ 'Description of the project'
18
+ ){ |v| cli[:description] = v }
19
+ opt.on(
20
+ '-c', '--comments STRING',
21
+ 'Comments on the project'
22
+ ){ |v| cli[:comments] = v }
23
+ opt.on(
24
+ '-m', '--metadata STRING',
25
+ 'Metadata as key-value pairs separated by = and delimited by comma',
26
+ 'Values are saved as strings except for booleans (true / false) or nil'
27
+ ){ |v| cli[:metadata] = v }
28
+ end
29
+ end
30
+
31
+ def perform
32
+ cli.ensure_type(MiGA::Project)
33
+ cli.ensure_par(project: '-P')
34
+ unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
35
+ File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
36
+ raise "You must initialize MiGA before creating the first project.\n" +
37
+ 'Please use "miga init".'
38
+ end
39
+ cli.say "Creating project: #{cli[:project]}"
40
+ raise 'Project already exists, aborting.' if Project.exist? cli[:project]
41
+ p = Project.new(cli[:project], false)
42
+ p = cli.add_metadata(p)
43
+ p.save
44
+ end
45
+ end
@@ -0,0 +1,27 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::NextStep < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ n = nil
17
+ if cli[:dataset].nil?
18
+ n = p.next_distances(false)
19
+ n ||= p.next_inclade(false)
20
+ else
21
+ d = cli.load_dataset
22
+ n = d.next_preprocessing if d.is_active?
23
+ end
24
+ n ||= '?'
25
+ cli.puts n
26
+ end
27
+ end
@@ -0,0 +1,28 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Plugins < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project])
11
+ opt.on(
12
+ '--install PATH',
13
+ 'Install the specified plugin in the project'
14
+ ){ |v| cli[:install] = v }
15
+ opt.on(
16
+ '--uninstall PATH',
17
+ 'Uninstall the specified plugin from the project'
18
+ ){ |v| cli[:uninstall] = v }
19
+ end
20
+ end
21
+
22
+ def perform
23
+ p = cli.load_project
24
+ p.install_plugin(cli[:install]) unless cli[:install].nil?
25
+ p.uninstall_plugin(cli[:uninstall]) unless cli[:uninstall].nil?
26
+ p.plugins.each { |i| cli.puts i }
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Rm < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {remove: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt)
12
+ opt.on(
13
+ '-r', '--remove',
14
+ 'Also remove all associated files',
15
+ 'By default, only unlinks from metadata'
16
+ ){ |v| cli[:remove] = v }
17
+ end
18
+ end
19
+
20
+ def perform
21
+ d = cli.load_dataset
22
+ cli.load_project.unlink_dataset(d.name)
23
+ d.remove! if cli[:remove]
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'shellwords'
6
+
7
+ class MiGA::Cli::Action::Run < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {try_load: false, thr: 1}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project, :dataset_opt, :result])
13
+ opt.on(
14
+ '-t', '--threads INT', Integer,
15
+ "Threads to use in the local run (by default: #{cli[:thr]})."
16
+ ){ |v| cli[:thr] = v }
17
+ end
18
+ end
19
+
20
+ def perform
21
+ virtual_task = false
22
+ miga = MiGA.root_path
23
+ p = cli.load_project
24
+ cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
25
+ "MIGA=#{miga.shellescape}", "CORES=#{cli[:thr]}"]
26
+
27
+ obj = cli.load_project_or_dataset
28
+ klass = obj.class
29
+ virtual_task = true if [:p, :d].include? cli[:result]
30
+ cmd << "DATASET=#{obj.name.shellescape}" if obj.is_a? MiGA::Dataset
31
+
32
+ if klass.RESULT_DIRS[cli[:result]].nil? and not virtual_task
33
+ raise "Unsupported #{klass.to_s.gsub(/.*::/, '')} result: #{cli[:result]}."
34
+ end
35
+ cmd << MiGA.script_path(cli[:result], miga: miga, project: p).shellescape
36
+ pid = spawn cmd.join(' ')
37
+ Process.wait pid
38
+ end
39
+ end
@@ -0,0 +1,140 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {try_load: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt, [:project, :dataset_opt, :result])
12
+ opt.on(
13
+ '--key STRING',
14
+ 'Return only the value of the requested key'
15
+ ){ |v| cli[:key] = v }
16
+ opt.on(
17
+ '--compute-and-save',
18
+ 'Compute and saves the statistics'
19
+ ){ |v| cli[:compute] = v }
20
+ opt.on(
21
+ '--try-load',
22
+ 'Check if stat exists instead of computing on --compute-and-save'
23
+ ){ |v| cli[:try_load] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ cli[:compute] = false if cli[:try_load] and
29
+ (not r[:stats].nil?) and (not r[:stats].empty?)
30
+ r = cli.load_result
31
+ if cli[:compute]
32
+ cli.say 'Computing statistics'
33
+ stats = {}
34
+ case cli[:result]
35
+ when :raw_reads
36
+ if r[:files][:pair1].nil?
37
+ s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
38
+ stats = {
39
+ reads: s[:n],
40
+ length_average: [s[:avg], 'bp'],
41
+ length_standard_deviation: [s[:sd], 'bp'],
42
+ g_c_content: [s[:gc], '%']}
43
+ else
44
+ s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
45
+ s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
46
+ stats = {
47
+ read_pairs: s1[:n],
48
+ forward_length_average: [s1[:avg], 'bp'],
49
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
50
+ forward_g_c_content: [s1[:gc], '%'],
51
+ reverse_length_average: [s2[:avg], 'bp'],
52
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
53
+ reverse_g_c_content: [s2[:gc], '%']}
54
+ end
55
+ when :trimmed_fasta
56
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
57
+ s = MiGA.seqs_length(f, :fasta, gc: true)
58
+ stats = {
59
+ reads: s[:n],
60
+ length_average: [s[:avg], 'bp'],
61
+ length_standard_deviation: [s[:sd], 'bp'],
62
+ g_c_content: [s[:gc], '%']}
63
+ when :assembly
64
+ s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
65
+ n50: true, gc: true)
66
+ stats = {
67
+ contigs: s[:n],
68
+ n50: [s[:n50], 'bp'],
69
+ total_length: [s[:tot], 'bp'],
70
+ g_c_content: [s[:gc], '%']}
71
+ when :cds
72
+ s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
73
+ stats = {
74
+ predicted_proteins: s[:n],
75
+ average_length: [s[:avg], 'aa']}
76
+ asm = cli.load_dataset.add_result(:assembly, false)
77
+ unless asm.nil? or asm[:stats][:total_length].nil?
78
+ stats[:coding_density] =
79
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
80
+ end
81
+ when :essential_genes
82
+ d = cli.load_dataset
83
+ if d.is_multi?
84
+ stats = {median_copies: 0, mean_copies: 0}
85
+ File.open(r.file_path(:report), 'r') do |fh|
86
+ fh.each_line do |ln|
87
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
+ end
90
+ end
91
+ end
92
+ else
93
+ # Fix estimate for Archaea
94
+ if not d.metadata[:tax].nil? &&
95
+ d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
96
+ r.file_path(:bac_report).nil?
97
+ scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
98
+ rep = r.file_path(:report)
99
+ $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
100
+ r.add_file(:bac_report, "#{d.name}.ess/log")
101
+ r.add_file(:report, "#{d.name}.ess/log.archaea")
102
+ end
103
+ # Extract/compute quality values
104
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
105
+ File.open(r.file_path(:report), 'r') do |fh|
106
+ fh.each_line do |ln|
107
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
108
+ stats[$1.downcase.to_sym][0] = $2.to_f
109
+ end
110
+ end
111
+ end
112
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
113
+ d.metadata[:quality] = case stats[:quality]
114
+ when 80..100 ; :excellent
115
+ when 50..80 ; :high
116
+ when 20..50 ; :intermediate
117
+ else ; :low
118
+ end
119
+ d.save
120
+ end
121
+ else
122
+ stats = nil
123
+ end
124
+ unless stats.nil?
125
+ r[:stats] = stats
126
+ r.save
127
+ end
128
+ end
129
+
130
+ if cli[:key].nil?
131
+ r[:stats].each do |k,v|
132
+ cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
133
+ v.is_a?(Array) ? v.join(' ') : v}."
134
+ end
135
+ else
136
+ v = r[:stats][cli[:key].downcase.miga_name.to_sym]
137
+ puts v.is_a?(Array) ? v.first : v
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,49 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {units: false, tabular: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt, [:project, :dataset_opt])
12
+ cli.opt_filter_datasets(opt)
13
+ cli.opt_object(opt, [:result_dataset])
14
+ opt.on(
15
+ '--tab',
16
+ 'Return a tab-delimited table'
17
+ ){ |v| cli[:tabular] = v }
18
+ opt.on(
19
+ '--key STRING',
20
+ 'Return only the value of the requested key'
21
+ ){ |v| cli[:key_md] = v }
22
+ opt.on(
23
+ '--with-units',
24
+ 'Include units in each cell'
25
+ ){ |v| cli[:units] = v }
26
+ end
27
+ end
28
+
29
+ def perform
30
+ cli.ensure_par(result: '-r')
31
+ ds = cli.load_and_filter_datasets
32
+ cli.say 'Loading results'
33
+ stats = ds.map do |d|
34
+ r = d.add_result(cli[:result].to_sym, false)
35
+ s = r.nil? ? {} : r[:stats]
36
+ s.tap{ |i| i[:dataset] = d.name }
37
+ end
38
+ keys = cli[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
39
+ [:dataset, cli[:key_md].downcase.miga_name.to_sym]
40
+ keys.delete :dataset
41
+ keys.unshift :dataset
42
+
43
+ table = cli[:units] ?
44
+ stats.map{ |s| keys.
45
+ map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
46
+ stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
47
+ cli.puts MiGA.tabulate(keys, table, cli[:tabular])
48
+ end
49
+ end
@@ -0,0 +1,102 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_index'
6
+ require 'zlib'
7
+ require 'tmpdir'
8
+
9
+ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
10
+
11
+ def parse_cli
12
+ cli.parse do |opt|
13
+ cli.opt_object(opt, [:project])
14
+ cli.opt_filter_datasets(opt)
15
+ opt.on(
16
+ '-i', '--index FILE',
17
+ 'Pre-calculated tax-index (in tabular format) to be used',
18
+ 'If passed, dataset filtering arguments are ignored'
19
+ ){ |v| cli[:index] = v }
20
+ end
21
+ end
22
+
23
+ def cannid(a, b)
24
+ (a > b ? [b, a] : [a, b]).join('-')
25
+ end
26
+
27
+ def perform
28
+ p = cli.load_project
29
+ metric = p.is_clade? ? 'ani' : 'aai'
30
+ res_n = "#{metric}_distances"
31
+ cli.say "Reading distances: 1-#{metric.upcase}"
32
+ res = p.result(res_n)
33
+ raise "#{res_n} not yet calculated" if res.nil?
34
+ matrix = res.file_path(:matrix)
35
+ raise "#{res_n} has no matrix" if matrix.nil?
36
+ dist = {}
37
+ mfh = (matrix =~ /\.gz$/) ?
38
+ Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
39
+ mfh.each_line do |ln|
40
+ next if mfh.lineno == 1
41
+ row = ln.chomp.split("\t")
42
+ dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
43
+ cli.advance("Ln: #{mfh.lineno}") if (mfh.lineno % 1_000) == 0
44
+ end
45
+ cli.say " Lines: #{mfh.lineno}"
46
+ mfh.close
47
+
48
+ Dir.mktmpdir do |dir|
49
+ if cli[:index].nil?
50
+ ds = cli.load_and_filter_datasets
51
+ ds.keep_if { |d| !d.metadata[:tax].nil? }
52
+
53
+ cli.say 'Indexing taxonomy'
54
+ tax_index = TaxIndex.new
55
+ ds.each { |d| tax_index << d }
56
+ tab = File.expand_path('index.tab', dir)
57
+ File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
58
+ else
59
+ tab = cli[:index]
60
+ end
61
+
62
+ cli.say 'Traversing taxonomy'
63
+ rank_i = 0
64
+ Taxonomy.KNOWN_RANKS.each do |rank|
65
+ cli.say "o #{rank}: "
66
+ rank_n = 0
67
+ rank_i += 1
68
+ in_rank = nil
69
+ ds_name = []
70
+ File.open(tab, 'r') do |fh|
71
+ fh.each_line do |ln|
72
+ if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
73
+ in_rank = nil
74
+ ds_name = []
75
+ elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
76
+ in_rank = $2 == '?' ? nil : $1
77
+ ds_name = []
78
+ elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
79
+ ds_i = $1
80
+ ds_name << ds_i
81
+ ds_name.each do |ds_j|
82
+ k = cannid(ds_i, ds_j)
83
+ next if dist[k].nil?
84
+ rank_n += 1
85
+ dist[k][3] = rank_i
86
+ dist[k][4].unshift in_rank
87
+ end
88
+ end
89
+ end
90
+ end
91
+ cli.say "#{rank_n} pairs of datasets"
92
+ end
93
+ end
94
+
95
+ cli.say 'Generating report'
96
+ dist.keys.each do |k|
97
+ dist[k][5] = dist[k][4].reverse.join(' ')
98
+ dist[k][4] = dist[k][4].first
99
+ puts (k.split('-') + dist[k]).join("\t")
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,47 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_index'
6
+
7
+ class MiGA::Cli::Action::TaxIndex < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {format: :json}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project])
13
+ opt.on(
14
+ '-i', '--index PATH',
15
+ '(Mandatory) File to create with the index'
16
+ ){ |v| cli[:index] = v }
17
+ opt.on(
18
+ '-f', '--format STRING',
19
+ "Format of the index file, by default: #{cli[:format]}",
20
+ 'Supported: json, tab.'
21
+ ){ |v| cli[:format] = v.downcase.to_sym }
22
+ cli.opt_filter_datasets(opt)
23
+ end
24
+ end
25
+
26
+ def perform
27
+ cli.ensure_par(index: '-i')
28
+ ds = cli.load_and_filter_datasets
29
+ ds.keep_if {|d| !d.metadata[:tax].nil? }
30
+
31
+ cli.say 'Indexing taxonomy'
32
+ tax_index = MiGA::TaxIndex.new
33
+ ds.each { |d| tax_index << d }
34
+
35
+ cli.say 'Saving index'
36
+ File.open(cli[:index], 'w') do |fh|
37
+ case cli[:format]
38
+ when :json
39
+ fh.print tax_index.to_json
40
+ when :tab
41
+ fh.print tax_index.to_tab
42
+ else
43
+ raise "Unsupported output format: #{cli[:format]}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,59 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::TaxSet < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ opt.on(
12
+ '-s', '--tax-string STRING',
13
+ 'String corresponding to the taxonomy of the dataset',
14
+ 'A space-delimited set of \'rank:name\' pairs'
15
+ ){ |v| cli[:taxstring] = v }
16
+ opt.on('-t', '--tax-file PATH',
17
+ '(Mandatory unless -D and -s are provided)',
18
+ 'Tab-delimited file containing datasets taxonomy',
19
+ 'Each row corresponds to a datasets and each column to a rank',
20
+ 'The first row must be a header with the rank names,',
21
+ 'and the first column must contain dataset names'
22
+ ){ |v| cli[:taxfile] = v }
23
+ end
24
+ end
25
+
26
+ def perform
27
+ p = cli.load_project
28
+ if !cli[:taxfile].nil?
29
+ cli.say 'Reading tax-file and registering taxonomy'
30
+ tfh = File.open(cli[:taxfile], 'r')
31
+ header = nil
32
+ tfh.each_line do |ln|
33
+ next if ln =~ /^\s*?$/
34
+ r = ln.chomp.split(/\t/, -1)
35
+ dn = r.shift
36
+ if header.nil?
37
+ header = r
38
+ next
39
+ end
40
+ d = p.dataset(dn)
41
+ if d.nil?
42
+ warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
43
+ next
44
+ end
45
+ d.metadata[:tax] = Taxonomy.new(r, header)
46
+ d.save
47
+ cli.say "o #{d.name} registered"
48
+ end
49
+ tfh.close
50
+ else
51
+ cli.ensure_par({dataset: '-D', taxstring: '-s'},
52
+ '%<flag>s is mandatory unless -t is provided')
53
+ cli.say 'Registering taxonomy'
54
+ d = cli.load_dataset
55
+ d.metadata[:tax] = Taxonomy.new(cli[:taxstring])
56
+ d.save
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,77 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_dist'
6
+
7
+ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {test: 'both', ref_project: false}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project, :dataset])
13
+ opt.on(
14
+ '--ref-project',
15
+ 'Use the taxonomy from the reference project, not the current project'
16
+ ){ |v| cli[:ref_project] = v }
17
+ opt.on(
18
+ '-t', '--test STRING',
19
+ 'Test to perform. Supported values: intax, novel, both'
20
+ ){ |v| cli[:test] = v.downcase }
21
+ end
22
+ end
23
+
24
+ def perform
25
+ d = cli.load_dataset
26
+ cli.say 'Finding closest relative'
27
+ cr = d.closest_relatives(1, cli[:ref_project])
28
+ if cr.nil? or cr.empty?
29
+ raise 'Action not supported for the project or dataset' if cr.nil?
30
+ raise 'No close relatives found'
31
+ else
32
+ cli.say 'Querying probability distributions'
33
+ cr = cr[0]
34
+ cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
35
+ if self[:ref_project]
36
+ if (ref = p.metadata[:ref_project]).nil?
37
+ raise '--ref-project requested but no reference project has been set'
38
+ end
39
+ if (q = MiGA::Project.load(ref)).nil?
40
+ raise '--ref-project requested but reference project doesn\'t exist'
41
+ end
42
+ cr_d = q.dataset(cr[0])
43
+ else
44
+ cr_d = p.dataset(cr[0])
45
+ end
46
+ tax = cr_d.metadata[:tax] unless cr_d.nil?
47
+ tax ||= {}
48
+
49
+ if %w[intax both].include? cli[:test]
50
+ # Intax
51
+ r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
52
+ sig = ''
53
+ [0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
54
+ [Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
55
+ end
56
+ cli.puts ''
57
+ cli.puts 'Taxonomic classification'
58
+ cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
59
+ end
60
+
61
+ if %w[novel both].include? cli[:test]
62
+ # Novel
63
+ r = TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
64
+ sig = ''
65
+ [0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
66
+ [Taxonomy.LONG_RANKS[k], v, sig]
67
+ end
68
+ cli.puts ''
69
+ cli.puts 'Taxonomic novelty'
70
+ cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
71
+ end
72
+
73
+ cli.puts ''
74
+ cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
75
+ end
76
+ end
77
+ end