miga-base 0.4.1.0 → 0.4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/miga +2 -244
- data/lib/miga/cli/action/about.rb +44 -0
- data/lib/miga/cli/action/add.rb +139 -0
- data/lib/miga/cli/action/add_result.rb +26 -0
- data/lib/miga/cli/action/console.rb +19 -0
- data/lib/miga/cli/action/daemon.rb +74 -0
- data/lib/miga/cli/action/date.rb +18 -0
- data/lib/miga/cli/action/doctor.rb +210 -0
- data/lib/miga/cli/action/edit.rb +24 -0
- data/lib/miga/cli/action/files.rb +31 -0
- data/lib/miga/cli/action/find.rb +48 -0
- data/lib/miga/cli/action/generic.rb +44 -0
- data/lib/miga/cli/action/get.rb +132 -0
- data/lib/miga/cli/action/init.rb +343 -0
- data/lib/miga/cli/action/ln.rb +42 -0
- data/lib/miga/cli/action/ls.rb +55 -0
- data/lib/miga/cli/action/ncbi_get.rb +218 -0
- data/lib/miga/cli/action/new.rb +45 -0
- data/lib/miga/cli/action/next_step.rb +27 -0
- data/lib/miga/cli/action/plugins.rb +28 -0
- data/lib/miga/cli/action/rm.rb +25 -0
- data/lib/miga/cli/action/run.rb +39 -0
- data/lib/miga/cli/action/stats.rb +140 -0
- data/lib/miga/cli/action/summary.rb +49 -0
- data/lib/miga/cli/action/tax_dist.rb +102 -0
- data/lib/miga/cli/action/tax_index.rb +47 -0
- data/lib/miga/cli/action/tax_set.rb +59 -0
- data/lib/miga/cli/action/tax_test.rb +77 -0
- data/lib/miga/cli/action.rb +66 -0
- data/lib/miga/cli/base.rb +90 -0
- data/lib/miga/cli.rb +426 -0
- data/lib/miga/project/result.rb +14 -6
- data/lib/miga/remote_dataset.rb +1 -1
- data/lib/miga/tax_index.rb +5 -4
- data/lib/miga/taxonomy/base.rb +63 -0
- data/lib/miga/taxonomy.rb +87 -92
- data/lib/miga/version.rb +6 -6
- data/test/taxonomy_test.rb +49 -9
- data/utils/distance/commands.rb +11 -11
- data/utils/distance/pipeline.rb +5 -5
- metadata +43 -49
- data/actions/about.rb +0 -43
- data/actions/add.rb +0 -129
- data/actions/add_result.rb +0 -30
- data/actions/daemon.rb +0 -55
- data/actions/date.rb +0 -14
- data/actions/doctor.rb +0 -201
- data/actions/edit.rb +0 -33
- data/actions/files.rb +0 -43
- data/actions/find.rb +0 -41
- data/actions/get.rb +0 -105
- data/actions/init.rb +0 -301
- data/actions/ln.rb +0 -47
- data/actions/ls.rb +0 -61
- data/actions/ncbi_get.rb +0 -192
- data/actions/new.rb +0 -44
- data/actions/next_step.rb +0 -33
- data/actions/plugins.rb +0 -25
- data/actions/rm.rb +0 -29
- data/actions/run.rb +0 -45
- data/actions/stats.rb +0 -149
- data/actions/summary.rb +0 -57
- data/actions/tax_dist.rb +0 -106
- data/actions/tax_index.rb +0 -46
- data/actions/tax_set.rb +0 -63
- data/actions/tax_test.rb +0 -80
@@ -0,0 +1,45 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::New < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :project_type_req])
|
11
|
+
opt.on(
|
12
|
+
'-n', '--name STRING',
|
13
|
+
'Name of the project'
|
14
|
+
){ |v| cli[:name] = v }
|
15
|
+
opt.on(
|
16
|
+
'-d', '--description STRING',
|
17
|
+
'Description of the project'
|
18
|
+
){ |v| cli[:description] = v }
|
19
|
+
opt.on(
|
20
|
+
'-c', '--comments STRING',
|
21
|
+
'Comments on the project'
|
22
|
+
){ |v| cli[:comments] = v }
|
23
|
+
opt.on(
|
24
|
+
'-m', '--metadata STRING',
|
25
|
+
'Metadata as key-value pairs separated by = and delimited by comma',
|
26
|
+
'Values are saved as strings except for booleans (true / false) or nil'
|
27
|
+
){ |v| cli[:metadata] = v }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def perform
|
32
|
+
cli.ensure_type(MiGA::Project)
|
33
|
+
cli.ensure_par(project: '-P')
|
34
|
+
unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
|
35
|
+
File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
|
36
|
+
raise "You must initialize MiGA before creating the first project.\n" +
|
37
|
+
'Please use "miga init".'
|
38
|
+
end
|
39
|
+
cli.say "Creating project: #{cli[:project]}"
|
40
|
+
raise 'Project already exists, aborting.' if Project.exist? cli[:project]
|
41
|
+
p = Project.new(cli[:project], false)
|
42
|
+
p = cli.add_metadata(p)
|
43
|
+
p.save
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::NextStep < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform
|
15
|
+
p = cli.load_project
|
16
|
+
n = nil
|
17
|
+
if cli[:dataset].nil?
|
18
|
+
n = p.next_distances(false)
|
19
|
+
n ||= p.next_inclade(false)
|
20
|
+
else
|
21
|
+
d = cli.load_dataset
|
22
|
+
n = d.next_preprocessing if d.is_active?
|
23
|
+
end
|
24
|
+
n ||= '?'
|
25
|
+
cli.puts n
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Plugins < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project])
|
11
|
+
opt.on(
|
12
|
+
'--install PATH',
|
13
|
+
'Install the specified plugin in the project'
|
14
|
+
){ |v| cli[:install] = v }
|
15
|
+
opt.on(
|
16
|
+
'--uninstall PATH',
|
17
|
+
'Uninstall the specified plugin from the project'
|
18
|
+
){ |v| cli[:uninstall] = v }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def perform
|
23
|
+
p = cli.load_project
|
24
|
+
p.install_plugin(cli[:install]) unless cli[:install].nil?
|
25
|
+
p.uninstall_plugin(cli[:uninstall]) unless cli[:uninstall].nil?
|
26
|
+
p.plugins.each { |i| cli.puts i }
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Rm < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {remove: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt)
|
12
|
+
opt.on(
|
13
|
+
'-r', '--remove',
|
14
|
+
'Also remove all associated files',
|
15
|
+
'By default, only unlinks from metadata'
|
16
|
+
){ |v| cli[:remove] = v }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
d = cli.load_dataset
|
22
|
+
cli.load_project.unlink_dataset(d.name)
|
23
|
+
d.remove! if cli[:remove]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'shellwords'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::Run < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {try_load: false, thr: 1}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
13
|
+
opt.on(
|
14
|
+
'-t', '--threads INT', Integer,
|
15
|
+
"Threads to use in the local run (by default: #{cli[:thr]})."
|
16
|
+
){ |v| cli[:thr] = v }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
virtual_task = false
|
22
|
+
miga = MiGA.root_path
|
23
|
+
p = cli.load_project
|
24
|
+
cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
|
25
|
+
"MIGA=#{miga.shellescape}", "CORES=#{cli[:thr]}"]
|
26
|
+
|
27
|
+
obj = cli.load_project_or_dataset
|
28
|
+
klass = obj.class
|
29
|
+
virtual_task = true if [:p, :d].include? cli[:result]
|
30
|
+
cmd << "DATASET=#{obj.name.shellescape}" if obj.is_a? MiGA::Dataset
|
31
|
+
|
32
|
+
if klass.RESULT_DIRS[cli[:result]].nil? and not virtual_task
|
33
|
+
raise "Unsupported #{klass.to_s.gsub(/.*::/, '')} result: #{cli[:result]}."
|
34
|
+
end
|
35
|
+
cmd << MiGA.script_path(cli[:result], miga: miga, project: p).shellescape
|
36
|
+
pid = spawn cmd.join(' ')
|
37
|
+
Process.wait pid
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {try_load: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
12
|
+
opt.on(
|
13
|
+
'--key STRING',
|
14
|
+
'Return only the value of the requested key'
|
15
|
+
){ |v| cli[:key] = v }
|
16
|
+
opt.on(
|
17
|
+
'--compute-and-save',
|
18
|
+
'Compute and saves the statistics'
|
19
|
+
){ |v| cli[:compute] = v }
|
20
|
+
opt.on(
|
21
|
+
'--try-load',
|
22
|
+
'Check if stat exists instead of computing on --compute-and-save'
|
23
|
+
){ |v| cli[:try_load] = v }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def perform
|
28
|
+
cli[:compute] = false if cli[:try_load] and
|
29
|
+
(not r[:stats].nil?) and (not r[:stats].empty?)
|
30
|
+
r = cli.load_result
|
31
|
+
if cli[:compute]
|
32
|
+
cli.say 'Computing statistics'
|
33
|
+
stats = {}
|
34
|
+
case cli[:result]
|
35
|
+
when :raw_reads
|
36
|
+
if r[:files][:pair1].nil?
|
37
|
+
s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
38
|
+
stats = {
|
39
|
+
reads: s[:n],
|
40
|
+
length_average: [s[:avg], 'bp'],
|
41
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
42
|
+
g_c_content: [s[:gc], '%']}
|
43
|
+
else
|
44
|
+
s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
45
|
+
s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
46
|
+
stats = {
|
47
|
+
read_pairs: s1[:n],
|
48
|
+
forward_length_average: [s1[:avg], 'bp'],
|
49
|
+
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
50
|
+
forward_g_c_content: [s1[:gc], '%'],
|
51
|
+
reverse_length_average: [s2[:avg], 'bp'],
|
52
|
+
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
53
|
+
reverse_g_c_content: [s2[:gc], '%']}
|
54
|
+
end
|
55
|
+
when :trimmed_fasta
|
56
|
+
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
57
|
+
s = MiGA.seqs_length(f, :fasta, gc: true)
|
58
|
+
stats = {
|
59
|
+
reads: s[:n],
|
60
|
+
length_average: [s[:avg], 'bp'],
|
61
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
62
|
+
g_c_content: [s[:gc], '%']}
|
63
|
+
when :assembly
|
64
|
+
s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
65
|
+
n50: true, gc: true)
|
66
|
+
stats = {
|
67
|
+
contigs: s[:n],
|
68
|
+
n50: [s[:n50], 'bp'],
|
69
|
+
total_length: [s[:tot], 'bp'],
|
70
|
+
g_c_content: [s[:gc], '%']}
|
71
|
+
when :cds
|
72
|
+
s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
73
|
+
stats = {
|
74
|
+
predicted_proteins: s[:n],
|
75
|
+
average_length: [s[:avg], 'aa']}
|
76
|
+
asm = cli.load_dataset.add_result(:assembly, false)
|
77
|
+
unless asm.nil? or asm[:stats][:total_length].nil?
|
78
|
+
stats[:coding_density] =
|
79
|
+
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
80
|
+
end
|
81
|
+
when :essential_genes
|
82
|
+
d = cli.load_dataset
|
83
|
+
if d.is_multi?
|
84
|
+
stats = {median_copies: 0, mean_copies: 0}
|
85
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
86
|
+
fh.each_line do |ln|
|
87
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
88
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# Fix estimate for Archaea
|
94
|
+
if not d.metadata[:tax].nil? &&
|
95
|
+
d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
|
96
|
+
r.file_path(:bac_report).nil?
|
97
|
+
scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
|
98
|
+
rep = r.file_path(:report)
|
99
|
+
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
100
|
+
r.add_file(:bac_report, "#{d.name}.ess/log")
|
101
|
+
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
102
|
+
end
|
103
|
+
# Extract/compute quality values
|
104
|
+
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
105
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
106
|
+
fh.each_line do |ln|
|
107
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
108
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
113
|
+
d.metadata[:quality] = case stats[:quality]
|
114
|
+
when 80..100 ; :excellent
|
115
|
+
when 50..80 ; :high
|
116
|
+
when 20..50 ; :intermediate
|
117
|
+
else ; :low
|
118
|
+
end
|
119
|
+
d.save
|
120
|
+
end
|
121
|
+
else
|
122
|
+
stats = nil
|
123
|
+
end
|
124
|
+
unless stats.nil?
|
125
|
+
r[:stats] = stats
|
126
|
+
r.save
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
if cli[:key].nil?
|
131
|
+
r[:stats].each do |k,v|
|
132
|
+
cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
|
133
|
+
v.is_a?(Array) ? v.join(' ') : v}."
|
134
|
+
end
|
135
|
+
else
|
136
|
+
v = r[:stats][cli[:key].downcase.miga_name.to_sym]
|
137
|
+
puts v.is_a?(Array) ? v.first : v
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {units: false, tabular: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
12
|
+
cli.opt_filter_datasets(opt)
|
13
|
+
cli.opt_object(opt, [:result_dataset])
|
14
|
+
opt.on(
|
15
|
+
'--tab',
|
16
|
+
'Return a tab-delimited table'
|
17
|
+
){ |v| cli[:tabular] = v }
|
18
|
+
opt.on(
|
19
|
+
'--key STRING',
|
20
|
+
'Return only the value of the requested key'
|
21
|
+
){ |v| cli[:key_md] = v }
|
22
|
+
opt.on(
|
23
|
+
'--with-units',
|
24
|
+
'Include units in each cell'
|
25
|
+
){ |v| cli[:units] = v }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def perform
|
30
|
+
cli.ensure_par(result: '-r')
|
31
|
+
ds = cli.load_and_filter_datasets
|
32
|
+
cli.say 'Loading results'
|
33
|
+
stats = ds.map do |d|
|
34
|
+
r = d.add_result(cli[:result].to_sym, false)
|
35
|
+
s = r.nil? ? {} : r[:stats]
|
36
|
+
s.tap{ |i| i[:dataset] = d.name }
|
37
|
+
end
|
38
|
+
keys = cli[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
|
39
|
+
[:dataset, cli[:key_md].downcase.miga_name.to_sym]
|
40
|
+
keys.delete :dataset
|
41
|
+
keys.unshift :dataset
|
42
|
+
|
43
|
+
table = cli[:units] ?
|
44
|
+
stats.map{ |s| keys.
|
45
|
+
map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
|
46
|
+
stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
|
47
|
+
cli.puts MiGA.tabulate(keys, table, cli[:tabular])
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_index'
|
6
|
+
require 'zlib'
|
7
|
+
require 'tmpdir'
|
8
|
+
|
9
|
+
class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
10
|
+
|
11
|
+
def parse_cli
|
12
|
+
cli.parse do |opt|
|
13
|
+
cli.opt_object(opt, [:project])
|
14
|
+
cli.opt_filter_datasets(opt)
|
15
|
+
opt.on(
|
16
|
+
'-i', '--index FILE',
|
17
|
+
'Pre-calculated tax-index (in tabular format) to be used',
|
18
|
+
'If passed, dataset filtering arguments are ignored'
|
19
|
+
){ |v| cli[:index] = v }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def cannid(a, b)
|
24
|
+
(a > b ? [b, a] : [a, b]).join('-')
|
25
|
+
end
|
26
|
+
|
27
|
+
def perform
|
28
|
+
p = cli.load_project
|
29
|
+
metric = p.is_clade? ? 'ani' : 'aai'
|
30
|
+
res_n = "#{metric}_distances"
|
31
|
+
cli.say "Reading distances: 1-#{metric.upcase}"
|
32
|
+
res = p.result(res_n)
|
33
|
+
raise "#{res_n} not yet calculated" if res.nil?
|
34
|
+
matrix = res.file_path(:matrix)
|
35
|
+
raise "#{res_n} has no matrix" if matrix.nil?
|
36
|
+
dist = {}
|
37
|
+
mfh = (matrix =~ /\.gz$/) ?
|
38
|
+
Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
|
39
|
+
mfh.each_line do |ln|
|
40
|
+
next if mfh.lineno == 1
|
41
|
+
row = ln.chomp.split("\t")
|
42
|
+
dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
|
43
|
+
cli.advance("Ln: #{mfh.lineno}") if (mfh.lineno % 1_000) == 0
|
44
|
+
end
|
45
|
+
cli.say " Lines: #{mfh.lineno}"
|
46
|
+
mfh.close
|
47
|
+
|
48
|
+
Dir.mktmpdir do |dir|
|
49
|
+
if cli[:index].nil?
|
50
|
+
ds = cli.load_and_filter_datasets
|
51
|
+
ds.keep_if { |d| !d.metadata[:tax].nil? }
|
52
|
+
|
53
|
+
cli.say 'Indexing taxonomy'
|
54
|
+
tax_index = TaxIndex.new
|
55
|
+
ds.each { |d| tax_index << d }
|
56
|
+
tab = File.expand_path('index.tab', dir)
|
57
|
+
File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
|
58
|
+
else
|
59
|
+
tab = cli[:index]
|
60
|
+
end
|
61
|
+
|
62
|
+
cli.say 'Traversing taxonomy'
|
63
|
+
rank_i = 0
|
64
|
+
Taxonomy.KNOWN_RANKS.each do |rank|
|
65
|
+
cli.say "o #{rank}: "
|
66
|
+
rank_n = 0
|
67
|
+
rank_i += 1
|
68
|
+
in_rank = nil
|
69
|
+
ds_name = []
|
70
|
+
File.open(tab, 'r') do |fh|
|
71
|
+
fh.each_line do |ln|
|
72
|
+
if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
|
73
|
+
in_rank = nil
|
74
|
+
ds_name = []
|
75
|
+
elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
|
76
|
+
in_rank = $2 == '?' ? nil : $1
|
77
|
+
ds_name = []
|
78
|
+
elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
|
79
|
+
ds_i = $1
|
80
|
+
ds_name << ds_i
|
81
|
+
ds_name.each do |ds_j|
|
82
|
+
k = cannid(ds_i, ds_j)
|
83
|
+
next if dist[k].nil?
|
84
|
+
rank_n += 1
|
85
|
+
dist[k][3] = rank_i
|
86
|
+
dist[k][4].unshift in_rank
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
cli.say "#{rank_n} pairs of datasets"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
cli.say 'Generating report'
|
96
|
+
dist.keys.each do |k|
|
97
|
+
dist[k][5] = dist[k][4].reverse.join(' ')
|
98
|
+
dist[k][4] = dist[k][4].first
|
99
|
+
puts (k.split('-') + dist[k]).join("\t")
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_index'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::TaxIndex < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {format: :json}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project])
|
13
|
+
opt.on(
|
14
|
+
'-i', '--index PATH',
|
15
|
+
'(Mandatory) File to create with the index'
|
16
|
+
){ |v| cli[:index] = v }
|
17
|
+
opt.on(
|
18
|
+
'-f', '--format STRING',
|
19
|
+
"Format of the index file, by default: #{cli[:format]}",
|
20
|
+
'Supported: json, tab.'
|
21
|
+
){ |v| cli[:format] = v.downcase.to_sym }
|
22
|
+
cli.opt_filter_datasets(opt)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def perform
|
27
|
+
cli.ensure_par(index: '-i')
|
28
|
+
ds = cli.load_and_filter_datasets
|
29
|
+
ds.keep_if {|d| !d.metadata[:tax].nil? }
|
30
|
+
|
31
|
+
cli.say 'Indexing taxonomy'
|
32
|
+
tax_index = MiGA::TaxIndex.new
|
33
|
+
ds.each { |d| tax_index << d }
|
34
|
+
|
35
|
+
cli.say 'Saving index'
|
36
|
+
File.open(cli[:index], 'w') do |fh|
|
37
|
+
case cli[:format]
|
38
|
+
when :json
|
39
|
+
fh.print tax_index.to_json
|
40
|
+
when :tab
|
41
|
+
fh.print tax_index.to_tab
|
42
|
+
else
|
43
|
+
raise "Unsupported output format: #{cli[:format]}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::TaxSet < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
11
|
+
opt.on(
|
12
|
+
'-s', '--tax-string STRING',
|
13
|
+
'String corresponding to the taxonomy of the dataset',
|
14
|
+
'A space-delimited set of \'rank:name\' pairs'
|
15
|
+
){ |v| cli[:taxstring] = v }
|
16
|
+
opt.on('-t', '--tax-file PATH',
|
17
|
+
'(Mandatory unless -D and -s are provided)',
|
18
|
+
'Tab-delimited file containing datasets taxonomy',
|
19
|
+
'Each row corresponds to a datasets and each column to a rank',
|
20
|
+
'The first row must be a header with the rank names,',
|
21
|
+
'and the first column must contain dataset names'
|
22
|
+
){ |v| cli[:taxfile] = v }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def perform
|
27
|
+
p = cli.load_project
|
28
|
+
if !cli[:taxfile].nil?
|
29
|
+
cli.say 'Reading tax-file and registering taxonomy'
|
30
|
+
tfh = File.open(cli[:taxfile], 'r')
|
31
|
+
header = nil
|
32
|
+
tfh.each_line do |ln|
|
33
|
+
next if ln =~ /^\s*?$/
|
34
|
+
r = ln.chomp.split(/\t/, -1)
|
35
|
+
dn = r.shift
|
36
|
+
if header.nil?
|
37
|
+
header = r
|
38
|
+
next
|
39
|
+
end
|
40
|
+
d = p.dataset(dn)
|
41
|
+
if d.nil?
|
42
|
+
warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
|
43
|
+
next
|
44
|
+
end
|
45
|
+
d.metadata[:tax] = Taxonomy.new(r, header)
|
46
|
+
d.save
|
47
|
+
cli.say "o #{d.name} registered"
|
48
|
+
end
|
49
|
+
tfh.close
|
50
|
+
else
|
51
|
+
cli.ensure_par({dataset: '-D', taxstring: '-s'},
|
52
|
+
'%<flag>s is mandatory unless -t is provided')
|
53
|
+
cli.say 'Registering taxonomy'
|
54
|
+
d = cli.load_dataset
|
55
|
+
d.metadata[:tax] = Taxonomy.new(cli[:taxstring])
|
56
|
+
d.save
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_dist'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {test: 'both', ref_project: false}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project, :dataset])
|
13
|
+
opt.on(
|
14
|
+
'--ref-project',
|
15
|
+
'Use the taxonomy from the reference project, not the current project'
|
16
|
+
){ |v| cli[:ref_project] = v }
|
17
|
+
opt.on(
|
18
|
+
'-t', '--test STRING',
|
19
|
+
'Test to perform. Supported values: intax, novel, both'
|
20
|
+
){ |v| cli[:test] = v.downcase }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def perform
|
25
|
+
d = cli.load_dataset
|
26
|
+
cli.say 'Finding closest relative'
|
27
|
+
cr = d.closest_relatives(1, cli[:ref_project])
|
28
|
+
if cr.nil? or cr.empty?
|
29
|
+
raise 'Action not supported for the project or dataset' if cr.nil?
|
30
|
+
raise 'No close relatives found'
|
31
|
+
else
|
32
|
+
cli.say 'Querying probability distributions'
|
33
|
+
cr = cr[0]
|
34
|
+
cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
35
|
+
if self[:ref_project]
|
36
|
+
if (ref = p.metadata[:ref_project]).nil?
|
37
|
+
raise '--ref-project requested but no reference project has been set'
|
38
|
+
end
|
39
|
+
if (q = MiGA::Project.load(ref)).nil?
|
40
|
+
raise '--ref-project requested but reference project doesn\'t exist'
|
41
|
+
end
|
42
|
+
cr_d = q.dataset(cr[0])
|
43
|
+
else
|
44
|
+
cr_d = p.dataset(cr[0])
|
45
|
+
end
|
46
|
+
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
47
|
+
tax ||= {}
|
48
|
+
|
49
|
+
if %w[intax both].include? cli[:test]
|
50
|
+
# Intax
|
51
|
+
r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
|
52
|
+
sig = ''
|
53
|
+
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
54
|
+
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
55
|
+
end
|
56
|
+
cli.puts ''
|
57
|
+
cli.puts 'Taxonomic classification'
|
58
|
+
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
59
|
+
end
|
60
|
+
|
61
|
+
if %w[novel both].include? cli[:test]
|
62
|
+
# Novel
|
63
|
+
r = TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
|
64
|
+
sig = ''
|
65
|
+
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
66
|
+
[Taxonomy.LONG_RANKS[k], v, sig]
|
67
|
+
end
|
68
|
+
cli.puts ''
|
69
|
+
cli.puts 'Taxonomic novelty'
|
70
|
+
cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
|
71
|
+
end
|
72
|
+
|
73
|
+
cli.puts ''
|
74
|
+
cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|