miga-base 0.4.1.0 → 0.4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/miga +2 -244
- data/lib/miga/cli/action/about.rb +44 -0
- data/lib/miga/cli/action/add.rb +139 -0
- data/lib/miga/cli/action/add_result.rb +26 -0
- data/lib/miga/cli/action/console.rb +19 -0
- data/lib/miga/cli/action/daemon.rb +74 -0
- data/lib/miga/cli/action/date.rb +18 -0
- data/lib/miga/cli/action/doctor.rb +210 -0
- data/lib/miga/cli/action/edit.rb +24 -0
- data/lib/miga/cli/action/files.rb +31 -0
- data/lib/miga/cli/action/find.rb +48 -0
- data/lib/miga/cli/action/generic.rb +44 -0
- data/lib/miga/cli/action/get.rb +132 -0
- data/lib/miga/cli/action/init.rb +343 -0
- data/lib/miga/cli/action/ln.rb +42 -0
- data/lib/miga/cli/action/ls.rb +55 -0
- data/lib/miga/cli/action/ncbi_get.rb +218 -0
- data/lib/miga/cli/action/new.rb +45 -0
- data/lib/miga/cli/action/next_step.rb +27 -0
- data/lib/miga/cli/action/plugins.rb +28 -0
- data/lib/miga/cli/action/rm.rb +25 -0
- data/lib/miga/cli/action/run.rb +39 -0
- data/lib/miga/cli/action/stats.rb +140 -0
- data/lib/miga/cli/action/summary.rb +49 -0
- data/lib/miga/cli/action/tax_dist.rb +102 -0
- data/lib/miga/cli/action/tax_index.rb +47 -0
- data/lib/miga/cli/action/tax_set.rb +59 -0
- data/lib/miga/cli/action/tax_test.rb +77 -0
- data/lib/miga/cli/action.rb +66 -0
- data/lib/miga/cli/base.rb +90 -0
- data/lib/miga/cli.rb +426 -0
- data/lib/miga/project/result.rb +14 -6
- data/lib/miga/remote_dataset.rb +1 -1
- data/lib/miga/tax_index.rb +5 -4
- data/lib/miga/taxonomy/base.rb +63 -0
- data/lib/miga/taxonomy.rb +87 -92
- data/lib/miga/version.rb +6 -6
- data/test/taxonomy_test.rb +49 -9
- data/utils/distance/commands.rb +11 -11
- data/utils/distance/pipeline.rb +5 -5
- metadata +43 -49
- data/actions/about.rb +0 -43
- data/actions/add.rb +0 -129
- data/actions/add_result.rb +0 -30
- data/actions/daemon.rb +0 -55
- data/actions/date.rb +0 -14
- data/actions/doctor.rb +0 -201
- data/actions/edit.rb +0 -33
- data/actions/files.rb +0 -43
- data/actions/find.rb +0 -41
- data/actions/get.rb +0 -105
- data/actions/init.rb +0 -301
- data/actions/ln.rb +0 -47
- data/actions/ls.rb +0 -61
- data/actions/ncbi_get.rb +0 -192
- data/actions/new.rb +0 -44
- data/actions/next_step.rb +0 -33
- data/actions/plugins.rb +0 -25
- data/actions/rm.rb +0 -29
- data/actions/run.rb +0 -45
- data/actions/stats.rb +0 -149
- data/actions/summary.rb +0 -57
- data/actions/tax_dist.rb +0 -106
- data/actions/tax_index.rb +0 -46
- data/actions/tax_set.rb +0 -63
- data/actions/tax_test.rb +0 -80
@@ -0,0 +1,45 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::New < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :project_type_req])
|
11
|
+
opt.on(
|
12
|
+
'-n', '--name STRING',
|
13
|
+
'Name of the project'
|
14
|
+
){ |v| cli[:name] = v }
|
15
|
+
opt.on(
|
16
|
+
'-d', '--description STRING',
|
17
|
+
'Description of the project'
|
18
|
+
){ |v| cli[:description] = v }
|
19
|
+
opt.on(
|
20
|
+
'-c', '--comments STRING',
|
21
|
+
'Comments on the project'
|
22
|
+
){ |v| cli[:comments] = v }
|
23
|
+
opt.on(
|
24
|
+
'-m', '--metadata STRING',
|
25
|
+
'Metadata as key-value pairs separated by = and delimited by comma',
|
26
|
+
'Values are saved as strings except for booleans (true / false) or nil'
|
27
|
+
){ |v| cli[:metadata] = v }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def perform
|
32
|
+
cli.ensure_type(MiGA::Project)
|
33
|
+
cli.ensure_par(project: '-P')
|
34
|
+
unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
|
35
|
+
File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
|
36
|
+
raise "You must initialize MiGA before creating the first project.\n" +
|
37
|
+
'Please use "miga init".'
|
38
|
+
end
|
39
|
+
cli.say "Creating project: #{cli[:project]}"
|
40
|
+
raise 'Project already exists, aborting.' if Project.exist? cli[:project]
|
41
|
+
p = Project.new(cli[:project], false)
|
42
|
+
p = cli.add_metadata(p)
|
43
|
+
p.save
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::NextStep < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform
|
15
|
+
p = cli.load_project
|
16
|
+
n = nil
|
17
|
+
if cli[:dataset].nil?
|
18
|
+
n = p.next_distances(false)
|
19
|
+
n ||= p.next_inclade(false)
|
20
|
+
else
|
21
|
+
d = cli.load_dataset
|
22
|
+
n = d.next_preprocessing if d.is_active?
|
23
|
+
end
|
24
|
+
n ||= '?'
|
25
|
+
cli.puts n
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Plugins < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project])
|
11
|
+
opt.on(
|
12
|
+
'--install PATH',
|
13
|
+
'Install the specified plugin in the project'
|
14
|
+
){ |v| cli[:install] = v }
|
15
|
+
opt.on(
|
16
|
+
'--uninstall PATH',
|
17
|
+
'Uninstall the specified plugin from the project'
|
18
|
+
){ |v| cli[:uninstall] = v }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def perform
|
23
|
+
p = cli.load_project
|
24
|
+
p.install_plugin(cli[:install]) unless cli[:install].nil?
|
25
|
+
p.uninstall_plugin(cli[:uninstall]) unless cli[:uninstall].nil?
|
26
|
+
p.plugins.each { |i| cli.puts i }
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Rm < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {remove: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt)
|
12
|
+
opt.on(
|
13
|
+
'-r', '--remove',
|
14
|
+
'Also remove all associated files',
|
15
|
+
'By default, only unlinks from metadata'
|
16
|
+
){ |v| cli[:remove] = v }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
d = cli.load_dataset
|
22
|
+
cli.load_project.unlink_dataset(d.name)
|
23
|
+
d.remove! if cli[:remove]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'shellwords'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::Run < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {try_load: false, thr: 1}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
13
|
+
opt.on(
|
14
|
+
'-t', '--threads INT', Integer,
|
15
|
+
"Threads to use in the local run (by default: #{cli[:thr]})."
|
16
|
+
){ |v| cli[:thr] = v }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
virtual_task = false
|
22
|
+
miga = MiGA.root_path
|
23
|
+
p = cli.load_project
|
24
|
+
cmd = ["PROJECT=#{p.path.shellescape}", 'RUNTYPE=bash',
|
25
|
+
"MIGA=#{miga.shellescape}", "CORES=#{cli[:thr]}"]
|
26
|
+
|
27
|
+
obj = cli.load_project_or_dataset
|
28
|
+
klass = obj.class
|
29
|
+
virtual_task = true if [:p, :d].include? cli[:result]
|
30
|
+
cmd << "DATASET=#{obj.name.shellescape}" if obj.is_a? MiGA::Dataset
|
31
|
+
|
32
|
+
if klass.RESULT_DIRS[cli[:result]].nil? and not virtual_task
|
33
|
+
raise "Unsupported #{klass.to_s.gsub(/.*::/, '')} result: #{cli[:result]}."
|
34
|
+
end
|
35
|
+
cmd << MiGA.script_path(cli[:result], miga: miga, project: p).shellescape
|
36
|
+
pid = spawn cmd.join(' ')
|
37
|
+
Process.wait pid
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {try_load: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
12
|
+
opt.on(
|
13
|
+
'--key STRING',
|
14
|
+
'Return only the value of the requested key'
|
15
|
+
){ |v| cli[:key] = v }
|
16
|
+
opt.on(
|
17
|
+
'--compute-and-save',
|
18
|
+
'Compute and saves the statistics'
|
19
|
+
){ |v| cli[:compute] = v }
|
20
|
+
opt.on(
|
21
|
+
'--try-load',
|
22
|
+
'Check if stat exists instead of computing on --compute-and-save'
|
23
|
+
){ |v| cli[:try_load] = v }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def perform
|
28
|
+
cli[:compute] = false if cli[:try_load] and
|
29
|
+
(not r[:stats].nil?) and (not r[:stats].empty?)
|
30
|
+
r = cli.load_result
|
31
|
+
if cli[:compute]
|
32
|
+
cli.say 'Computing statistics'
|
33
|
+
stats = {}
|
34
|
+
case cli[:result]
|
35
|
+
when :raw_reads
|
36
|
+
if r[:files][:pair1].nil?
|
37
|
+
s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
38
|
+
stats = {
|
39
|
+
reads: s[:n],
|
40
|
+
length_average: [s[:avg], 'bp'],
|
41
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
42
|
+
g_c_content: [s[:gc], '%']}
|
43
|
+
else
|
44
|
+
s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
45
|
+
s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
46
|
+
stats = {
|
47
|
+
read_pairs: s1[:n],
|
48
|
+
forward_length_average: [s1[:avg], 'bp'],
|
49
|
+
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
50
|
+
forward_g_c_content: [s1[:gc], '%'],
|
51
|
+
reverse_length_average: [s2[:avg], 'bp'],
|
52
|
+
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
53
|
+
reverse_g_c_content: [s2[:gc], '%']}
|
54
|
+
end
|
55
|
+
when :trimmed_fasta
|
56
|
+
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
57
|
+
s = MiGA.seqs_length(f, :fasta, gc: true)
|
58
|
+
stats = {
|
59
|
+
reads: s[:n],
|
60
|
+
length_average: [s[:avg], 'bp'],
|
61
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
62
|
+
g_c_content: [s[:gc], '%']}
|
63
|
+
when :assembly
|
64
|
+
s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
65
|
+
n50: true, gc: true)
|
66
|
+
stats = {
|
67
|
+
contigs: s[:n],
|
68
|
+
n50: [s[:n50], 'bp'],
|
69
|
+
total_length: [s[:tot], 'bp'],
|
70
|
+
g_c_content: [s[:gc], '%']}
|
71
|
+
when :cds
|
72
|
+
s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
73
|
+
stats = {
|
74
|
+
predicted_proteins: s[:n],
|
75
|
+
average_length: [s[:avg], 'aa']}
|
76
|
+
asm = cli.load_dataset.add_result(:assembly, false)
|
77
|
+
unless asm.nil? or asm[:stats][:total_length].nil?
|
78
|
+
stats[:coding_density] =
|
79
|
+
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
80
|
+
end
|
81
|
+
when :essential_genes
|
82
|
+
d = cli.load_dataset
|
83
|
+
if d.is_multi?
|
84
|
+
stats = {median_copies: 0, mean_copies: 0}
|
85
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
86
|
+
fh.each_line do |ln|
|
87
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
88
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# Fix estimate for Archaea
|
94
|
+
if not d.metadata[:tax].nil? &&
|
95
|
+
d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
|
96
|
+
r.file_path(:bac_report).nil?
|
97
|
+
scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
|
98
|
+
rep = r.file_path(:report)
|
99
|
+
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
100
|
+
r.add_file(:bac_report, "#{d.name}.ess/log")
|
101
|
+
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
102
|
+
end
|
103
|
+
# Extract/compute quality values
|
104
|
+
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
105
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
106
|
+
fh.each_line do |ln|
|
107
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
108
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
113
|
+
d.metadata[:quality] = case stats[:quality]
|
114
|
+
when 80..100 ; :excellent
|
115
|
+
when 50..80 ; :high
|
116
|
+
when 20..50 ; :intermediate
|
117
|
+
else ; :low
|
118
|
+
end
|
119
|
+
d.save
|
120
|
+
end
|
121
|
+
else
|
122
|
+
stats = nil
|
123
|
+
end
|
124
|
+
unless stats.nil?
|
125
|
+
r[:stats] = stats
|
126
|
+
r.save
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
if cli[:key].nil?
|
131
|
+
r[:stats].each do |k,v|
|
132
|
+
cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
|
133
|
+
v.is_a?(Array) ? v.join(' ') : v}."
|
134
|
+
end
|
135
|
+
else
|
136
|
+
v = r[:stats][cli[:key].downcase.miga_name.to_sym]
|
137
|
+
puts v.is_a?(Array) ? v.first : v
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.defaults = {units: false, tabular: false}
|
10
|
+
cli.parse do |opt|
|
11
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
12
|
+
cli.opt_filter_datasets(opt)
|
13
|
+
cli.opt_object(opt, [:result_dataset])
|
14
|
+
opt.on(
|
15
|
+
'--tab',
|
16
|
+
'Return a tab-delimited table'
|
17
|
+
){ |v| cli[:tabular] = v }
|
18
|
+
opt.on(
|
19
|
+
'--key STRING',
|
20
|
+
'Return only the value of the requested key'
|
21
|
+
){ |v| cli[:key_md] = v }
|
22
|
+
opt.on(
|
23
|
+
'--with-units',
|
24
|
+
'Include units in each cell'
|
25
|
+
){ |v| cli[:units] = v }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def perform
|
30
|
+
cli.ensure_par(result: '-r')
|
31
|
+
ds = cli.load_and_filter_datasets
|
32
|
+
cli.say 'Loading results'
|
33
|
+
stats = ds.map do |d|
|
34
|
+
r = d.add_result(cli[:result].to_sym, false)
|
35
|
+
s = r.nil? ? {} : r[:stats]
|
36
|
+
s.tap{ |i| i[:dataset] = d.name }
|
37
|
+
end
|
38
|
+
keys = cli[:key_md].nil? ? stats.map(&:keys).flatten.uniq :
|
39
|
+
[:dataset, cli[:key_md].downcase.miga_name.to_sym]
|
40
|
+
keys.delete :dataset
|
41
|
+
keys.unshift :dataset
|
42
|
+
|
43
|
+
table = cli[:units] ?
|
44
|
+
stats.map{ |s| keys.
|
45
|
+
map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
|
46
|
+
stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
|
47
|
+
cli.puts MiGA.tabulate(keys, table, cli[:tabular])
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_index'
|
6
|
+
require 'zlib'
|
7
|
+
require 'tmpdir'
|
8
|
+
|
9
|
+
class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
10
|
+
|
11
|
+
def parse_cli
|
12
|
+
cli.parse do |opt|
|
13
|
+
cli.opt_object(opt, [:project])
|
14
|
+
cli.opt_filter_datasets(opt)
|
15
|
+
opt.on(
|
16
|
+
'-i', '--index FILE',
|
17
|
+
'Pre-calculated tax-index (in tabular format) to be used',
|
18
|
+
'If passed, dataset filtering arguments are ignored'
|
19
|
+
){ |v| cli[:index] = v }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def cannid(a, b)
|
24
|
+
(a > b ? [b, a] : [a, b]).join('-')
|
25
|
+
end
|
26
|
+
|
27
|
+
def perform
|
28
|
+
p = cli.load_project
|
29
|
+
metric = p.is_clade? ? 'ani' : 'aai'
|
30
|
+
res_n = "#{metric}_distances"
|
31
|
+
cli.say "Reading distances: 1-#{metric.upcase}"
|
32
|
+
res = p.result(res_n)
|
33
|
+
raise "#{res_n} not yet calculated" if res.nil?
|
34
|
+
matrix = res.file_path(:matrix)
|
35
|
+
raise "#{res_n} has no matrix" if matrix.nil?
|
36
|
+
dist = {}
|
37
|
+
mfh = (matrix =~ /\.gz$/) ?
|
38
|
+
Zlib::GzipReader.open(matrix) : File.open(matrix, 'r')
|
39
|
+
mfh.each_line do |ln|
|
40
|
+
next if mfh.lineno == 1
|
41
|
+
row = ln.chomp.split("\t")
|
42
|
+
dist[cannid(row[1], row[2])] = [row[3], row[5], row[6], 0, ['root:biota']]
|
43
|
+
cli.advance("Ln: #{mfh.lineno}") if (mfh.lineno % 1_000) == 0
|
44
|
+
end
|
45
|
+
cli.say " Lines: #{mfh.lineno}"
|
46
|
+
mfh.close
|
47
|
+
|
48
|
+
Dir.mktmpdir do |dir|
|
49
|
+
if cli[:index].nil?
|
50
|
+
ds = cli.load_and_filter_datasets
|
51
|
+
ds.keep_if { |d| !d.metadata[:tax].nil? }
|
52
|
+
|
53
|
+
cli.say 'Indexing taxonomy'
|
54
|
+
tax_index = TaxIndex.new
|
55
|
+
ds.each { |d| tax_index << d }
|
56
|
+
tab = File.expand_path('index.tab', dir)
|
57
|
+
File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
|
58
|
+
else
|
59
|
+
tab = cli[:index]
|
60
|
+
end
|
61
|
+
|
62
|
+
cli.say 'Traversing taxonomy'
|
63
|
+
rank_i = 0
|
64
|
+
Taxonomy.KNOWN_RANKS.each do |rank|
|
65
|
+
cli.say "o #{rank}: "
|
66
|
+
rank_n = 0
|
67
|
+
rank_i += 1
|
68
|
+
in_rank = nil
|
69
|
+
ds_name = []
|
70
|
+
File.open(tab, 'r') do |fh|
|
71
|
+
fh.each_line do |ln|
|
72
|
+
if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
|
73
|
+
in_rank = nil
|
74
|
+
ds_name = []
|
75
|
+
elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
|
76
|
+
in_rank = $2 == '?' ? nil : $1
|
77
|
+
ds_name = []
|
78
|
+
elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
|
79
|
+
ds_i = $1
|
80
|
+
ds_name << ds_i
|
81
|
+
ds_name.each do |ds_j|
|
82
|
+
k = cannid(ds_i, ds_j)
|
83
|
+
next if dist[k].nil?
|
84
|
+
rank_n += 1
|
85
|
+
dist[k][3] = rank_i
|
86
|
+
dist[k][4].unshift in_rank
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
cli.say "#{rank_n} pairs of datasets"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
cli.say 'Generating report'
|
96
|
+
dist.keys.each do |k|
|
97
|
+
dist[k][5] = dist[k][4].reverse.join(' ')
|
98
|
+
dist[k][4] = dist[k][4].first
|
99
|
+
puts (k.split('-') + dist[k]).join("\t")
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_index'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::TaxIndex < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {format: :json}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project])
|
13
|
+
opt.on(
|
14
|
+
'-i', '--index PATH',
|
15
|
+
'(Mandatory) File to create with the index'
|
16
|
+
){ |v| cli[:index] = v }
|
17
|
+
opt.on(
|
18
|
+
'-f', '--format STRING',
|
19
|
+
"Format of the index file, by default: #{cli[:format]}",
|
20
|
+
'Supported: json, tab.'
|
21
|
+
){ |v| cli[:format] = v.downcase.to_sym }
|
22
|
+
cli.opt_filter_datasets(opt)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def perform
|
27
|
+
cli.ensure_par(index: '-i')
|
28
|
+
ds = cli.load_and_filter_datasets
|
29
|
+
ds.keep_if {|d| !d.metadata[:tax].nil? }
|
30
|
+
|
31
|
+
cli.say 'Indexing taxonomy'
|
32
|
+
tax_index = MiGA::TaxIndex.new
|
33
|
+
ds.each { |d| tax_index << d }
|
34
|
+
|
35
|
+
cli.say 'Saving index'
|
36
|
+
File.open(cli[:index], 'w') do |fh|
|
37
|
+
case cli[:format]
|
38
|
+
when :json
|
39
|
+
fh.print tax_index.to_json
|
40
|
+
when :tab
|
41
|
+
fh.print tax_index.to_tab
|
42
|
+
else
|
43
|
+
raise "Unsupported output format: #{cli[:format]}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::TaxSet < MiGA::Cli::Action
|
7
|
+
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
11
|
+
opt.on(
|
12
|
+
'-s', '--tax-string STRING',
|
13
|
+
'String corresponding to the taxonomy of the dataset',
|
14
|
+
'A space-delimited set of \'rank:name\' pairs'
|
15
|
+
){ |v| cli[:taxstring] = v }
|
16
|
+
opt.on('-t', '--tax-file PATH',
|
17
|
+
'(Mandatory unless -D and -s are provided)',
|
18
|
+
'Tab-delimited file containing datasets taxonomy',
|
19
|
+
'Each row corresponds to a datasets and each column to a rank',
|
20
|
+
'The first row must be a header with the rank names,',
|
21
|
+
'and the first column must contain dataset names'
|
22
|
+
){ |v| cli[:taxfile] = v }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def perform
|
27
|
+
p = cli.load_project
|
28
|
+
if !cli[:taxfile].nil?
|
29
|
+
cli.say 'Reading tax-file and registering taxonomy'
|
30
|
+
tfh = File.open(cli[:taxfile], 'r')
|
31
|
+
header = nil
|
32
|
+
tfh.each_line do |ln|
|
33
|
+
next if ln =~ /^\s*?$/
|
34
|
+
r = ln.chomp.split(/\t/, -1)
|
35
|
+
dn = r.shift
|
36
|
+
if header.nil?
|
37
|
+
header = r
|
38
|
+
next
|
39
|
+
end
|
40
|
+
d = p.dataset(dn)
|
41
|
+
if d.nil?
|
42
|
+
warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
|
43
|
+
next
|
44
|
+
end
|
45
|
+
d.metadata[:tax] = Taxonomy.new(r, header)
|
46
|
+
d.save
|
47
|
+
cli.say "o #{d.name} registered"
|
48
|
+
end
|
49
|
+
tfh.close
|
50
|
+
else
|
51
|
+
cli.ensure_par({dataset: '-D', taxstring: '-s'},
|
52
|
+
'%<flag>s is mandatory unless -t is provided')
|
53
|
+
cli.say 'Registering taxonomy'
|
54
|
+
d = cli.load_dataset
|
55
|
+
d.metadata[:tax] = Taxonomy.new(cli[:taxstring])
|
56
|
+
d.save
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'miga/tax_dist'
|
6
|
+
|
7
|
+
class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {test: 'both', ref_project: false}
|
11
|
+
cli.parse do |opt|
|
12
|
+
cli.opt_object(opt, [:project, :dataset])
|
13
|
+
opt.on(
|
14
|
+
'--ref-project',
|
15
|
+
'Use the taxonomy from the reference project, not the current project'
|
16
|
+
){ |v| cli[:ref_project] = v }
|
17
|
+
opt.on(
|
18
|
+
'-t', '--test STRING',
|
19
|
+
'Test to perform. Supported values: intax, novel, both'
|
20
|
+
){ |v| cli[:test] = v.downcase }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def perform
|
25
|
+
d = cli.load_dataset
|
26
|
+
cli.say 'Finding closest relative'
|
27
|
+
cr = d.closest_relatives(1, cli[:ref_project])
|
28
|
+
if cr.nil? or cr.empty?
|
29
|
+
raise 'Action not supported for the project or dataset' if cr.nil?
|
30
|
+
raise 'No close relatives found'
|
31
|
+
else
|
32
|
+
cli.say 'Querying probability distributions'
|
33
|
+
cr = cr[0]
|
34
|
+
cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
35
|
+
if self[:ref_project]
|
36
|
+
if (ref = p.metadata[:ref_project]).nil?
|
37
|
+
raise '--ref-project requested but no reference project has been set'
|
38
|
+
end
|
39
|
+
if (q = MiGA::Project.load(ref)).nil?
|
40
|
+
raise '--ref-project requested but reference project doesn\'t exist'
|
41
|
+
end
|
42
|
+
cr_d = q.dataset(cr[0])
|
43
|
+
else
|
44
|
+
cr_d = p.dataset(cr[0])
|
45
|
+
end
|
46
|
+
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
47
|
+
tax ||= {}
|
48
|
+
|
49
|
+
if %w[intax both].include? cli[:test]
|
50
|
+
# Intax
|
51
|
+
r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
|
52
|
+
sig = ''
|
53
|
+
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
54
|
+
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
55
|
+
end
|
56
|
+
cli.puts ''
|
57
|
+
cli.puts 'Taxonomic classification'
|
58
|
+
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
59
|
+
end
|
60
|
+
|
61
|
+
if %w[novel both].include? cli[:test]
|
62
|
+
# Novel
|
63
|
+
r = TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
|
64
|
+
sig = ''
|
65
|
+
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
66
|
+
[Taxonomy.LONG_RANKS[k], v, sig]
|
67
|
+
end
|
68
|
+
cli.puts ''
|
69
|
+
cli.puts 'Taxonomic novelty'
|
70
|
+
cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
|
71
|
+
end
|
72
|
+
|
73
|
+
cli.puts ''
|
74
|
+
cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|