miga-base 0.4.1.0 → 0.4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::New < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :project_type_req])
11
+ opt.on(
12
+ '-n', '--name STRING',
13
+ 'Name of the project'
14
+ ){ |v| cli[:name] = v }
15
+ opt.on(
16
+ '-d', '--description STRING',
17
+ 'Description of the project'
18
+ ){ |v| cli[:description] = v }
19
+ opt.on(
20
+ '-c', '--comments STRING',
21
+ 'Comments on the project'
22
+ ){ |v| cli[:comments] = v }
23
+ opt.on(
24
+ '-m', '--metadata STRING',
25
+ 'Metadata as key-value pairs separated by = and delimited by comma',
26
+ 'Values are saved as strings except for booleans (true / false) or nil'
27
+ ){ |v| cli[:metadata] = v }
28
+ end
29
+ end
30
+
31
+ def perform
32
+ cli.ensure_type(MiGA::Project)
33
+ cli.ensure_par(project: '-P')
34
+ unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
35
+ File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
36
+ raise "You must initialize MiGA before creating the first project.\n" +
37
+ 'Please use "miga init".'
38
+ end
39
+ cli.say "Creating project: #{cli[:project]}"
40
+ raise 'Project already exists, aborting.' if Project.exist? cli[:project]
41
+ p = Project.new(cli[:project], false)
42
+ p = cli.add_metadata(p)
43
+ p.save
44
+ end
45
+ end
@@ -0,0 +1,27 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::NextStep < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ n = nil
17
+ if cli[:dataset].nil?
18
+ n = p.next_distances(false)
19
+ n ||= p.next_inclade(false)
20
+ else
21
+ d = cli.load_dataset
22
+ n = d.next_preprocessing if d.is_active?
23
+ end
24
+ n ||= '?'
25
+ cli.puts n
26
+ end
27
+ end
@@ -0,0 +1,28 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Plugins < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project])
11
+ opt.on(
12
+ '--install PATH',
13
+ 'Install the specified plugin in the project'
14
+ ){ |v| cli[:install] = v }
15
+ opt.on(
16
+ '--uninstall PATH',
17
+ 'Uninstall the specified plugin from the project'
18
+ ){ |v| cli[:uninstall] = v }
19
+ end
20
+ end
21
+
22
+ def perform
23
+ p = cli.load_project
24
+ p.install_plugin(cli[:install]) unless cli[:install].nil?
25
+ p.uninstall_plugin(cli[:uninstall]) unless cli[:uninstall].nil?
26
+ p.plugins.each { |i| cli.puts i }
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Rm < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {remove: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt)
12
+ opt.on(
13
+ '-r', '--remove',
14
+ 'Also remove all associated files',
15
+ 'By default, only unlinks from metadata'
16
+ ){ |v| cli[:remove] = v }
17
+ end
18
+ end
19
+
20
+ def perform
21
+ d = cli.load_dataset
22
+ cli.load_project.unlink_dataset(d.name)
23
+ d.remove! if cli[:remove]
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'shellwords'
6
+
7
+ class MiGA::Cli::Action::Run < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {try_load: false, thr: 1}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project, :dataset_opt, :result])
13
+ opt.on(
14
+ '-t', '--threads INT', Integer,
15
+ "Threads to use in the local run (by default: #{cli[:thr]})."
16
+ ){ |v| cli[:thr] = v }
17
+ end
18
+ end
19
+
20
+ def perform
21
+ virtual_task = false
22
+ miga = MiGA.root_path
23
+ p = cli.load_project
24
+ cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
25
+ "MIGA=#{miga.shellescape}", "CORES=#{cli[:thr]}"]
26
+
27
+ obj = cli.load_project_or_dataset
28
+ klass = obj.class
29
+ virtual_task = true if [:p, :d].include? cli[:result]
30
+ cmd << "DATASET=#{obj.name.shellescape}" if obj.is_a? MiGA::Dataset
31
+
32
+ if klass.RESULT_DIRS[cli[:result]].nil? and not virtual_task
33
+ raise "Unsupported #{klass.to_s.gsub(/.*::/, '')} result: #{cli[:result]}."
34
+ end
35
+ cmd << MiGA.script_path(cli[:result], miga: miga, project: p).shellescape
36
+ pid = spawn cmd.join(' ')
37
+ Process.wait pid
38
+ end
39
+ end
@@ -0,0 +1,140 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {try_load: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt, [:project, :dataset_opt, :result])
12
+ opt.on(
13
+ '--key STRING',
14
+ 'Return only the value of the requested key'
15
+ ){ |v| cli[:key] = v }
16
+ opt.on(
17
+ '--compute-and-save',
18
+ 'Compute and saves the statistics'
19
+ ){ |v| cli[:compute] = v }
20
+ opt.on(
21
+ '--try-load',
22
+ 'Check if stat exists instead of computing on --compute-and-save'
23
+ ){ |v| cli[:try_load] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ cli[:compute] = false if cli[:try_load] and
29
+ (not r[:stats].nil?) and (not r[:stats].empty?)
30
+ r = cli.load_result
31
+ if cli[:compute]
32
+ cli.say 'Computing statistics'
33
+ stats = {}
34
+ case cli[:result]
35
+ when :raw_reads
36
+ if r[:files][:pair1].nil?
37
+ s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
38
+ stats = {
39
+ reads: s[:n],
40
+ length_average: [s[:avg], 'bp'],
41
+ length_standard_deviation: [s[:sd], 'bp'],
42
+ g_c_content: [s[:gc], '%']}
43
+ else
44
+ s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
45
+ s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
46
+ stats = {
47
+ read_pairs: s1[:n],
48
+ forward_length_average: [s1[:avg], 'bp'],
49
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
50
+ forward_g_c_content: [s1[:gc], '%'],
51
+ reverse_length_average: [s2[:avg], 'bp'],
52
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
53
+ reverse_g_c_content: [s2[:gc], '%']}
54
+ end
55
+ when :trimmed_fasta
56
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
57
+ s = MiGA.seqs_length(f, :fasta, gc: true)
58
+ stats = {
59
+ reads: s[:n],
60
+ length_average: [s[:avg], 'bp'],
61
+ length_standard_deviation: [s[:sd], 'bp'],
62
+ g_c_content: [s[:gc], '%']}
63
+ when :assembly
64
+ s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
65
+ n50: true, gc: true)
66
+ stats = {
67
+ contigs: s[:n],
68
+ n50: [s[:n50], 'bp'],
69
+ total_length: [s[:tot], 'bp'],
70
+ g_c_content: [s[:gc], '%']}
71
+ when :cds
72
+ s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
73
+ stats = {
74
+ predicted_proteins: s[:n],
75
+ average_length: [s[:avg], 'aa']}
76
+ asm = cli.load_dataset.add_result(:assembly, false)
77
+ unless asm.nil? or asm[:stats][:total_length].nil?
78
+ stats[:coding_density] =
79
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
80
+ end
81
+ when :essential_genes
82
+ d = cli.load_dataset
83
+ if d.is_multi?
84
+ stats = {median_copies: 0, mean_copies: 0}
85
+ File.open(r.file_path(:report), 'r') do |fh|
86
+ fh.each_line do |ln|
87
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
+ end
90
+ end
91
+ end
92
+ else
93
+ # Fix estimate for Archaea
94
+ if not d.metadata[:tax].nil? &&
95
+ d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
96
+ r.file_path(:bac_report).nil?
97
+ scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
98
+ rep = r.file_path(:report)
99
+ $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
100
+ r.add_file(:bac_report, "#{d.name}.ess/log")
101
+ r.add_file(:report, "#{d.name}.ess/log.archaea")
102
+ end
103
+ # Extract/compute quality values
104
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
105
+ File.open(r.file_path(:report), 'r') do |fh|
106
+ fh.each_line do |ln|
107
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
108
+ stats[$1.downcase.to_sym][0] = $2.to_f
109
+ end
110
+ end
111
+ end
112
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
113
+ d.metadata[:quality] = case stats[:quality]
114
+ when 80..100 ; :excellent
115
+ when 50..80 ; :high
116
+ when 20..50 ; :intermediate
117
+ else ; :low
118
+ end
119
+ d.save
120
+ end
121
+ else
122
+ stats = nil
123
+ end
124
+ unless stats.nil?
125
+ r[:stats] = stats
126
+ r.save
127
+ end
128
+ end
129
+
130
+ if cli[:key].nil?
131
+ r[:stats].each do |k,v|
132
+ cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
133
+ v.is_a?(Array) ? v.join(' ') : v}."
134
+ end
135
+ else
136
+ v = r[:stats][cli[:key].downcase.miga_name.to_sym]
137
+ puts v.is_a?(Array) ? v.first : v
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,49 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.defaults = {units: false, tabular: false}
10
+ cli.parse do |opt|
11
+ cli.opt_object(opt, [:project, :dataset_opt])
12
+ cli.opt_filter_datasets(opt)
13
+ cli.opt_object(opt, [:result_dataset])
14
+ opt.on(
15
+ '--tab',
16
+ 'Return a tab-delimited table'
17
+ ){ |v| cli[:tabular] = v }
18
+ opt.on(
19
+ '--key STRING',
20
+ 'Return only the value of the requested key'
21
+ ){ |v| cli[:key_md] = v }
22
+ opt.on(
23
+ '--with-units',
24
+ 'Include units in each cell'
25
+ ){ |v| cli[:units] = v }
26
+ end
27
+ end
28
+
29
+ def perform
30
+ cli.ensure_par(result: '-r')
31
+ ds = cli.load_and_filter_datasets
32
+ cli.say 'Loading results'
33
+ stats = ds.map do |d|
34
+ r = d.add_result(cli[:result].to_sym, false)
35
+ s = r.nil? ? {} : r[:stats]
36
+ s.tap{ |i| i[:dataset] = d.name }
37
+ end
38
+ keys = cli[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
39
+ [:dataset, cli[:key_md].downcase.miga_name.to_sym]
40
+ keys.delete :dataset
41
+ keys.unshift :dataset
42
+
43
+ table = cli[:units] ?
44
+ stats.map{ |s| keys.
45
+ map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
46
+ stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
47
+ cli.puts MiGA.tabulate(keys, table, cli[:tabular])
48
+ end
49
+ end
@@ -0,0 +1,102 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_index'
6
+ require 'zlib'
7
+ require 'tmpdir'
8
+
9
+ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
10
+
11
+ def parse_cli
12
+ cli.parse do |opt|
13
+ cli.opt_object(opt, [:project])
14
+ cli.opt_filter_datasets(opt)
15
+ opt.on(
16
+ '-i', '--index FILE',
17
+ 'Pre-calculated tax-index (in tabular format) to be used',
18
+ 'If passed, dataset filtering arguments are ignored'
19
+ ){ |v| cli[:index] = v }
20
+ end
21
+ end
22
+
23
+ def cannid(a, b)
24
+ (a > b ? [b, a] : [a, b]).join('-')
25
+ end
26
+
27
+ def perform
28
+ p = cli.load_project
29
+ metric = p.is_clade? ? 'ani' : 'aai'
30
+ res_n = "#{metric}_distances"
31
+ cli.say "Reading distances: 1-#{metric.upcase}"
32
+ res = p.result(res_n)
33
+ raise "#{res_n} not yet calculated" if res.nil?
34
+ matrix = res.file_path(:matrix)
35
+ raise "#{res_n} has no matrix" if matrix.nil?
36
+ dist = {}
37
+ mfh = (matrix =~ /\.gz$/) ?
38
+ Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
39
+ mfh.each_line do |ln|
40
+ next if mfh.lineno == 1
41
+ row = ln.chomp.split("\t")
42
+ dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
43
+ cli.advance("Ln: #{mfh.lineno}") if (mfh.lineno % 1_000) == 0
44
+ end
45
+ cli.say " Lines: #{mfh.lineno}"
46
+ mfh.close
47
+
48
+ Dir.mktmpdir do |dir|
49
+ if cli[:index].nil?
50
+ ds = cli.load_and_filter_datasets
51
+ ds.keep_if { |d| !d.metadata[:tax].nil? }
52
+
53
+ cli.say 'Indexing taxonomy'
54
+ tax_index = TaxIndex.new
55
+ ds.each { |d| tax_index << d }
56
+ tab = File.expand_path('index.tab', dir)
57
+ File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
58
+ else
59
+ tab = cli[:index]
60
+ end
61
+
62
+ cli.say 'Traversing taxonomy'
63
+ rank_i = 0
64
+ Taxonomy.KNOWN_RANKS.each do |rank|
65
+ cli.say "o #{rank}: "
66
+ rank_n = 0
67
+ rank_i += 1
68
+ in_rank = nil
69
+ ds_name = []
70
+ File.open(tab, 'r') do |fh|
71
+ fh.each_line do |ln|
72
+ if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
73
+ in_rank = nil
74
+ ds_name = []
75
+ elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
76
+ in_rank = $2 == '?' ? nil : $1
77
+ ds_name = []
78
+ elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
79
+ ds_i = $1
80
+ ds_name << ds_i
81
+ ds_name.each do |ds_j|
82
+ k = cannid(ds_i, ds_j)
83
+ next if dist[k].nil?
84
+ rank_n += 1
85
+ dist[k][3] = rank_i
86
+ dist[k][4].unshift in_rank
87
+ end
88
+ end
89
+ end
90
+ end
91
+ cli.say "#{rank_n} pairs of datasets"
92
+ end
93
+ end
94
+
95
+ cli.say 'Generating report'
96
+ dist.keys.each do |k|
97
+ dist[k][5] = dist[k][4].reverse.join(' ')
98
+ dist[k][4] = dist[k][4].first
99
+ puts (k.split('-') + dist[k]).join("\t")
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,47 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_index'
6
+
7
+ class MiGA::Cli::Action::TaxIndex < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {format: :json}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project])
13
+ opt.on(
14
+ '-i', '--index PATH',
15
+ '(Mandatory) File to create with the index'
16
+ ){ |v| cli[:index] = v }
17
+ opt.on(
18
+ '-f', '--format STRING',
19
+ "Format of the index file, by default: #{cli[:format]}",
20
+ 'Supported: json, tab.'
21
+ ){ |v| cli[:format] = v.downcase.to_sym }
22
+ cli.opt_filter_datasets(opt)
23
+ end
24
+ end
25
+
26
+ def perform
27
+ cli.ensure_par(index: '-i')
28
+ ds = cli.load_and_filter_datasets
29
+ ds.keep_if {|d| !d.metadata[:tax].nil? }
30
+
31
+ cli.say 'Indexing taxonomy'
32
+ tax_index = MiGA::TaxIndex.new
33
+ ds.each { |d| tax_index << d }
34
+
35
+ cli.say 'Saving index'
36
+ File.open(cli[:index], 'w') do |fh|
37
+ case cli[:format]
38
+ when :json
39
+ fh.print tax_index.to_json
40
+ when :tab
41
+ fh.print tax_index.to_tab
42
+ else
43
+ raise "Unsupported output format: #{cli[:format]}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,59 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::TaxSet < MiGA::Cli::Action
7
+
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ opt.on(
12
+ '-s', '--tax-string STRING',
13
+ 'String corresponding to the taxonomy of the dataset',
14
+ 'A space-delimited set of \'rank:name\' pairs'
15
+ ){ |v| cli[:taxstring] = v }
16
+ opt.on('-t', '--tax-file PATH',
17
+ '(Mandatory unless -D and -s are provided)',
18
+ 'Tab-delimited file containing datasets taxonomy',
19
+ 'Each row corresponds to a datasets and each column to a rank',
20
+ 'The first row must be a header with the rank names,',
21
+ 'and the first column must contain dataset names'
22
+ ){ |v| cli[:taxfile] = v }
23
+ end
24
+ end
25
+
26
+ def perform
27
+ p = cli.load_project
28
+ if !cli[:taxfile].nil?
29
+ cli.say 'Reading tax-file and registering taxonomy'
30
+ tfh = File.open(cli[:taxfile], 'r')
31
+ header = nil
32
+ tfh.each_line do |ln|
33
+ next if ln =~ /^\s*?$/
34
+ r = ln.chomp.split(/\t/, -1)
35
+ dn = r.shift
36
+ if header.nil?
37
+ header = r
38
+ next
39
+ end
40
+ d = p.dataset(dn)
41
+ if d.nil?
42
+ warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
43
+ next
44
+ end
45
+ d.metadata[:tax] = Taxonomy.new(r, header)
46
+ d.save
47
+ cli.say "o #{d.name} registered"
48
+ end
49
+ tfh.close
50
+ else
51
+ cli.ensure_par({dataset: '-D', taxstring: '-s'},
52
+ '%<flag>s is mandatory unless -t is provided')
53
+ cli.say 'Registering taxonomy'
54
+ d = cli.load_dataset
55
+ d.metadata[:tax] = Taxonomy.new(cli[:taxstring])
56
+ d.save
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,77 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'miga/tax_dist'
6
+
7
+ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
8
+
9
+ def parse_cli
10
+ cli.defaults = {test: 'both', ref_project: false}
11
+ cli.parse do |opt|
12
+ cli.opt_object(opt, [:project, :dataset])
13
+ opt.on(
14
+ '--ref-project',
15
+ 'Use the taxonomy from the reference project, not the current project'
16
+ ){ |v| cli[:ref_project] = v }
17
+ opt.on(
18
+ '-t', '--test STRING',
19
+ 'Test to perform. Supported values: intax, novel, both'
20
+ ){ |v| cli[:test] = v.downcase }
21
+ end
22
+ end
23
+
24
+ def perform
25
+ d = cli.load_dataset
26
+ cli.say 'Finding closest relative'
27
+ cr = d.closest_relatives(1, cli[:ref_project])
28
+ if cr.nil? or cr.empty?
29
+ raise 'Action not supported for the project or dataset' if cr.nil?
30
+ raise 'No close relatives found'
31
+ else
32
+ cli.say 'Querying probability distributions'
33
+ cr = cr[0]
34
+ cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
35
+ if self[:ref_project]
36
+ if (ref = p.metadata[:ref_project]).nil?
37
+ raise '--ref-project requested but no reference project has been set'
38
+ end
39
+ if (q = MiGA::Project.load(ref)).nil?
40
+ raise '--ref-project requested but reference project doesn\'t exist'
41
+ end
42
+ cr_d = q.dataset(cr[0])
43
+ else
44
+ cr_d = p.dataset(cr[0])
45
+ end
46
+ tax = cr_d.metadata[:tax] unless cr_d.nil?
47
+ tax ||= {}
48
+
49
+ if %w[intax both].include? cli[:test]
50
+ # Intax
51
+ r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
52
+ sig = ''
53
+ [0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
54
+ [Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
55
+ end
56
+ cli.puts ''
57
+ cli.puts 'Taxonomic classification'
58
+ cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
59
+ end
60
+
61
+ if %w[novel both].include? cli[:test]
62
+ # Novel
63
+ r = TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
64
+ sig = ''
65
+ [0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
66
+ [Taxonomy.LONG_RANKS[k], v, sig]
67
+ end
68
+ cli.puts ''
69
+ cli.puts 'Taxonomic novelty'
70
+ cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
71
+ end
72
+
73
+ cli.puts ''
74
+ cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
75
+ end
76
+ end
77
+ end