miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -0,0 +1,46 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
|
7
|
+
require 'miga/cli/action/wf'
|
8
|
+
require 'miga/cli/action/add'
|
9
|
+
include MiGA::Cli::Action::Wf
|
10
|
+
|
11
|
+
def parse_cli
|
12
|
+
default_opts_for_wf
|
13
|
+
cli.defaults = { mytaxa: false }
|
14
|
+
cli.parse do |opt|
|
15
|
+
opt.on(
|
16
|
+
'-i', '--input-type STRING',
|
17
|
+
'(Mandatory) Type of input data, one of the following:',
|
18
|
+
*MiGA::Cli::Action::Add.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
|
19
|
+
) { |v| cli[:input_type] = v.downcase.to_sym }
|
20
|
+
opt.on(
|
21
|
+
'-m', '--mytaxa_scan',
|
22
|
+
'Perform MyTaxa scan analysis'
|
23
|
+
) { |v| cli[:mytaxa] = v }
|
24
|
+
opts_for_wf(opt, 'Input files as defined by --input-type',
|
25
|
+
multi: true, cleanup: false, ncbi: false)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def perform
|
30
|
+
# Input data
|
31
|
+
cli.ensure_par(input_type: '-i')
|
32
|
+
p_metadata = Hash[
|
33
|
+
%w[project_stats haai_distances aai_distances ani_distances clade_finding]
|
34
|
+
.map { |i| ["run_#{i}", false] }
|
35
|
+
]
|
36
|
+
d_metadata = { run_distances: false }
|
37
|
+
unless cli[:mytaxa]
|
38
|
+
d_metadata[:run_mytaxa_scan] = false
|
39
|
+
d_metadata[:run_mytaxa] = false
|
40
|
+
end
|
41
|
+
p = create_project(cli[:input_type], p_metadata, d_metadata)
|
42
|
+
# Run
|
43
|
+
run_daemon
|
44
|
+
summarize
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
7
|
+
require 'miga/cli/action/wf'
|
8
|
+
include MiGA::Cli::Action::Wf
|
9
|
+
|
10
|
+
def parse_cli
|
11
|
+
default_opts_for_wf
|
12
|
+
cli.defaults = { mytaxa: false }
|
13
|
+
cli.parse do |opt|
|
14
|
+
opt.on(
|
15
|
+
'-m', '--mytaxa-scan',
|
16
|
+
'Perform MyTaxa scan analysis'
|
17
|
+
) { |v| cli[:mytaxa] = v }
|
18
|
+
opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def perform
|
23
|
+
# Input data
|
24
|
+
p_metadata = Hash[
|
25
|
+
%w[project_stats haai_distances aai_distances ani_distances clade_finding]
|
26
|
+
.map { |i| ["run_#{i}", false] }
|
27
|
+
]
|
28
|
+
d_metadata = { run_distances: false }
|
29
|
+
d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
|
30
|
+
p = create_project(:assembly, p_metadata, d_metadata)
|
31
|
+
# Run
|
32
|
+
run_daemon
|
33
|
+
summarize
|
34
|
+
if cli[:mytaxa]
|
35
|
+
dir = File.expand_path('mytaxa_scan', cli[:outdir])
|
36
|
+
Dir.mkdir(dir)
|
37
|
+
p.each_dataset do |d|
|
38
|
+
r = d.result(:mytaxa_scan) or next
|
39
|
+
f = r.file_path(:report) or next
|
40
|
+
FileUtils.cp(f, dir)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
cleanup
|
44
|
+
end
|
45
|
+
end
|
@@ -6,21 +6,21 @@ require 'miga/cli/action'
|
|
6
6
|
class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
7
7
|
|
8
8
|
def parse_cli
|
9
|
-
cli.defaults = {try_load: false}
|
9
|
+
cli.defaults = { try_load: false }
|
10
10
|
cli.parse do |opt|
|
11
11
|
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
12
12
|
opt.on(
|
13
13
|
'--key STRING',
|
14
14
|
'Return only the value of the requested key'
|
15
|
-
|
15
|
+
) { |v| cli[:key] = v }
|
16
16
|
opt.on(
|
17
17
|
'--compute-and-save',
|
18
18
|
'Compute and saves the statistics'
|
19
|
-
|
19
|
+
) { |v| cli[:compute] = v }
|
20
20
|
opt.on(
|
21
21
|
'--try-load',
|
22
22
|
'Check if stat exists instead of computing on --compute-and-save'
|
23
|
-
|
23
|
+
) { |v| cli[:try_load] = v }
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -30,111 +30,159 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
30
30
|
r = cli.load_result
|
31
31
|
if cli[:compute]
|
32
32
|
cli.say 'Computing statistics'
|
33
|
-
|
34
|
-
|
35
|
-
when :raw_reads
|
36
|
-
if r[:files][:pair1].nil?
|
37
|
-
s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
38
|
-
stats = {
|
39
|
-
reads: s[:n],
|
40
|
-
length_average: [s[:avg], 'bp'],
|
41
|
-
length_standard_deviation: [s[:sd], 'bp'],
|
42
|
-
g_c_content: [s[:gc], '%']}
|
43
|
-
else
|
44
|
-
s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
45
|
-
s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
46
|
-
stats = {
|
47
|
-
read_pairs: s1[:n],
|
48
|
-
forward_length_average: [s1[:avg], 'bp'],
|
49
|
-
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
50
|
-
forward_g_c_content: [s1[:gc], '%'],
|
51
|
-
reverse_length_average: [s2[:avg], 'bp'],
|
52
|
-
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
53
|
-
reverse_g_c_content: [s2[:gc], '%']}
|
54
|
-
end
|
55
|
-
when :trimmed_fasta
|
56
|
-
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
57
|
-
s = MiGA.seqs_length(f, :fasta, gc: true)
|
58
|
-
stats = {
|
59
|
-
reads: s[:n],
|
60
|
-
length_average: [s[:avg], 'bp'],
|
61
|
-
length_standard_deviation: [s[:sd], 'bp'],
|
62
|
-
g_c_content: [s[:gc], '%']}
|
63
|
-
when :assembly
|
64
|
-
s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
65
|
-
n50: true, gc: true)
|
66
|
-
stats = {
|
67
|
-
contigs: s[:n],
|
68
|
-
n50: [s[:n50], 'bp'],
|
69
|
-
total_length: [s[:tot], 'bp'],
|
70
|
-
g_c_content: [s[:gc], '%']}
|
71
|
-
when :cds
|
72
|
-
s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
73
|
-
stats = {
|
74
|
-
predicted_proteins: s[:n],
|
75
|
-
average_length: [s[:avg], 'aa']}
|
76
|
-
asm = cli.load_dataset.add_result(:assembly, false)
|
77
|
-
unless asm.nil? or asm[:stats][:total_length].nil?
|
78
|
-
stats[:coding_density] =
|
79
|
-
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
80
|
-
end
|
81
|
-
when :essential_genes
|
82
|
-
d = cli.load_dataset
|
83
|
-
if d.is_multi?
|
84
|
-
stats = {median_copies: 0, mean_copies: 0}
|
85
|
-
File.open(r.file_path(:report), 'r') do |fh|
|
86
|
-
fh.each_line do |ln|
|
87
|
-
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
88
|
-
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
else
|
93
|
-
# Fix estimate for Archaea
|
94
|
-
if not d.metadata[:tax].nil? &&
|
95
|
-
d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
|
96
|
-
r.file_path(:bac_report).nil?
|
97
|
-
scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
|
98
|
-
rep = r.file_path(:report)
|
99
|
-
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
100
|
-
r.add_file(:bac_report, "#{d.name}.ess/log")
|
101
|
-
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
102
|
-
end
|
103
|
-
# Extract/compute quality values
|
104
|
-
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
105
|
-
File.open(r.file_path(:report), 'r') do |fh|
|
106
|
-
fh.each_line do |ln|
|
107
|
-
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
108
|
-
stats[$1.downcase.to_sym][0] = $2.to_f
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
113
|
-
d.metadata[:quality] = case stats[:quality]
|
114
|
-
when 80..100 ; :excellent
|
115
|
-
when 50..80 ; :high
|
116
|
-
when 20..50 ; :intermediate
|
117
|
-
else ; :low
|
118
|
-
end
|
119
|
-
d.save
|
120
|
-
end
|
121
|
-
else
|
122
|
-
stats = nil
|
123
|
-
end
|
33
|
+
method = :"compute_#{cli[:result]}"
|
34
|
+
stats = self.respond_to?(method, true) ? send(method, r) : nil
|
124
35
|
unless stats.nil?
|
125
36
|
r[:stats] = stats
|
126
37
|
r.save
|
127
38
|
end
|
128
39
|
end
|
129
|
-
|
130
40
|
if cli[:key].nil?
|
131
|
-
r[:stats].each do |k,v|
|
132
|
-
|
133
|
-
|
41
|
+
r[:stats].each do |k, v|
|
42
|
+
k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
|
43
|
+
cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
|
134
44
|
end
|
135
45
|
else
|
136
46
|
v = r[:stats][cli[:key].downcase.miga_name.to_sym]
|
137
47
|
puts v.is_a?(Array) ? v.first : v
|
138
48
|
end
|
139
49
|
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def compute_raw_reads(r)
|
54
|
+
stats = {}
|
55
|
+
if r[:files][:pair1].nil?
|
56
|
+
s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
|
57
|
+
stats = {
|
58
|
+
reads: s[:n],
|
59
|
+
length_average: [s[:avg], 'bp'],
|
60
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
61
|
+
g_c_content: [s[:gc], '%']}
|
62
|
+
else
|
63
|
+
s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
|
64
|
+
s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
|
65
|
+
stats = {
|
66
|
+
read_pairs: s1[:n],
|
67
|
+
forward_length_average: [s1[:avg], 'bp'],
|
68
|
+
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
69
|
+
forward_g_c_content: [s1[:gc], '%'],
|
70
|
+
reverse_length_average: [s2[:avg], 'bp'],
|
71
|
+
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
72
|
+
reverse_g_c_content: [s2[:gc], '%']}
|
73
|
+
end
|
74
|
+
stats
|
75
|
+
end
|
76
|
+
|
77
|
+
def compute_trimmed_fasta(r)
|
78
|
+
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
79
|
+
s = MiGA.seqs_length(f, :fasta, gc: true)
|
80
|
+
{
|
81
|
+
reads: s[:n],
|
82
|
+
length_average: [s[:avg], 'bp'],
|
83
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
84
|
+
g_c_content: [s[:gc], '%']
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
88
|
+
def compute_assembly(r)
|
89
|
+
s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
|
90
|
+
n50: true, gc: true)
|
91
|
+
{
|
92
|
+
contigs: s[:n],
|
93
|
+
n50: [s[:n50], 'bp'],
|
94
|
+
total_length: [s[:tot], 'bp'],
|
95
|
+
g_c_content: [s[:gc], '%']
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def compute_cds(r)
|
100
|
+
s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
|
101
|
+
stats = {
|
102
|
+
predicted_proteins: s[:n],
|
103
|
+
average_length: [s[:avg], 'aa']}
|
104
|
+
asm = cli.load_dataset.add_result(:assembly, false)
|
105
|
+
unless asm.nil? or asm[:stats][:total_length].nil?
|
106
|
+
stats[:coding_density] =
|
107
|
+
[300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
|
108
|
+
end
|
109
|
+
stats
|
110
|
+
end
|
111
|
+
|
112
|
+
def compute_essential_genes(r)
|
113
|
+
stats = {}
|
114
|
+
d = cli.load_dataset
|
115
|
+
if d.is_multi?
|
116
|
+
stats = {median_copies: 0, mean_copies: 0}
|
117
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
118
|
+
fh.each_line do |ln|
|
119
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
120
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
else
|
125
|
+
# Fix estimate for Archaea
|
126
|
+
if !d.metadata[:tax].nil? &&
|
127
|
+
d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
|
128
|
+
r.file_path(:bac_report).nil?
|
129
|
+
scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
|
130
|
+
rep = r.file_path(:report)
|
131
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
132
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
133
|
+
$stderr.print `#{rc} ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
134
|
+
r.add_file(:bac_report, "#{d.name}.ess/log")
|
135
|
+
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
136
|
+
end
|
137
|
+
# Extract/compute quality values
|
138
|
+
stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
|
139
|
+
File.open(r.file_path(:report), 'r') do |fh|
|
140
|
+
fh.each_line do |ln|
|
141
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
142
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
147
|
+
d.metadata[:quality] = case stats[:quality]
|
148
|
+
when 80..100 ; :excellent
|
149
|
+
when 50..80 ; :high
|
150
|
+
when 20..50 ; :intermediate
|
151
|
+
else ; :low
|
152
|
+
end
|
153
|
+
d.save
|
154
|
+
end
|
155
|
+
stats
|
156
|
+
end
|
157
|
+
|
158
|
+
def compute_ssu(r)
|
159
|
+
stats = {ssu: 0, complete_ssu: 0}
|
160
|
+
Zlib::GzipReader.open(r.file_path(:gff)) do |fh|
|
161
|
+
fh.each_line do |ln|
|
162
|
+
next if ln =~ /^#/
|
163
|
+
rl = ln.chomp.split("\t")
|
164
|
+
len = (rl[4].to_i - rl[3].to_i).abs + 1
|
165
|
+
stats[:max_length] = [stats[:max_length] || 0, len].max
|
166
|
+
stats[:ssu] += 1
|
167
|
+
stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
|
168
|
+
end
|
169
|
+
end
|
170
|
+
stats
|
171
|
+
end
|
172
|
+
|
173
|
+
def compute_taxonomy(r)
|
174
|
+
stats = {}
|
175
|
+
File.open(r.file_path(:intax_test), 'r') do |fh|
|
176
|
+
fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
|
177
|
+
stats[:closest_relative] = $1
|
178
|
+
stats[:aai] = [$2.to_f, '%']
|
179
|
+
3.times { fh.gets }
|
180
|
+
fh.each_line do |ln|
|
181
|
+
row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
|
182
|
+
break if row.empty?
|
183
|
+
stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
|
184
|
+
end
|
185
|
+
end
|
186
|
+
stats
|
187
|
+
end
|
140
188
|
end
|
@@ -11,18 +11,22 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
11
11
|
cli.opt_object(opt, [:project, :dataset_opt])
|
12
12
|
cli.opt_filter_datasets(opt)
|
13
13
|
cli.opt_object(opt, [:result_dataset])
|
14
|
+
opt.on(
|
15
|
+
'-o', '--output PATH',
|
16
|
+
'Create output file instead of returning to STDOUT'
|
17
|
+
) { |v| cli[:output] = v }
|
14
18
|
opt.on(
|
15
19
|
'--tab',
|
16
20
|
'Return a tab-delimited table'
|
17
|
-
|
21
|
+
) { |v| cli[:tabular] = v }
|
18
22
|
opt.on(
|
19
23
|
'--key STRING',
|
20
24
|
'Return only the value of the requested key'
|
21
|
-
|
25
|
+
) { |v| cli[:key_md] = v }
|
22
26
|
opt.on(
|
23
27
|
'--with-units',
|
24
28
|
'Include units in each cell'
|
25
|
-
|
29
|
+
) { |v| cli[:units] = v }
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
@@ -44,6 +48,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
44
48
|
stats.map{ |s| keys.
|
45
49
|
map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
|
46
50
|
stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
|
47
|
-
cli.
|
51
|
+
io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
|
52
|
+
cli.puts(io, MiGA.tabulate(keys, table, cli[:tabular]))
|
53
|
+
io.close unless cli[:output].nil?
|
48
54
|
end
|
49
55
|
end
|
@@ -25,6 +25,23 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def perform
|
28
|
+
dist = read_distances
|
29
|
+
Dir.mktmpdir do |dir|
|
30
|
+
tab = get_tab_index(dir)
|
31
|
+
dist = traverse_taxonomy(tab, dist)
|
32
|
+
end
|
33
|
+
|
34
|
+
cli.say 'Generating report'
|
35
|
+
dist.keys.each do |k|
|
36
|
+
dist[k][5] = dist[k][4].reverse.join(' ')
|
37
|
+
dist[k][4] = dist[k][4].first
|
38
|
+
puts (k.split('-') + dist[k]).join("\t")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def read_distances
|
28
45
|
p = cli.load_project
|
29
46
|
metric = p.is_clade? ? 'ani' : 'aai'
|
30
47
|
res_n = "#{metric}_distances"
|
@@ -44,59 +61,57 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
44
61
|
end
|
45
62
|
cli.say " Lines: #{mfh.lineno}"
|
46
63
|
mfh.close
|
64
|
+
dist
|
65
|
+
end
|
47
66
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
67
|
+
def get_tab_index(dir)
|
68
|
+
if cli[:index].nil?
|
69
|
+
ds = cli.load_and_filter_datasets
|
70
|
+
ds.keep_if { |d| !d.metadata[:tax].nil? }
|
52
71
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
72
|
+
cli.say 'Indexing taxonomy'
|
73
|
+
tax_index = TaxIndex.new
|
74
|
+
ds.each { |d| tax_index << d }
|
75
|
+
tab = File.expand_path('index.tab', dir)
|
76
|
+
File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
|
77
|
+
else
|
78
|
+
tab = cli[:index]
|
79
|
+
end
|
80
|
+
tab
|
81
|
+
end
|
61
82
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
83
|
+
def traverse_taxonomy(tab, dist)
|
84
|
+
cli.say 'Traversing taxonomy'
|
85
|
+
rank_i = 0
|
86
|
+
Taxonomy.KNOWN_RANKS.each do |rank|
|
87
|
+
cli.say "o #{rank}: "
|
88
|
+
rank_n = 0
|
89
|
+
rank_i += 1
|
90
|
+
in_rank = nil
|
91
|
+
ds_name = []
|
92
|
+
File.open(tab, 'r') do |fh|
|
93
|
+
fh.each_line do |ln|
|
94
|
+
if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
|
95
|
+
in_rank = nil
|
96
|
+
ds_name = []
|
97
|
+
elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
|
98
|
+
in_rank = $2 == '?' ? nil : $1
|
99
|
+
ds_name = []
|
100
|
+
elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
|
101
|
+
ds_i = $1
|
102
|
+
ds_name << ds_i
|
103
|
+
ds_name.each do |ds_j|
|
104
|
+
k = cannid(ds_i, ds_j)
|
105
|
+
next if dist[k].nil?
|
106
|
+
rank_n += 1
|
107
|
+
dist[k][3] = rank_i
|
108
|
+
dist[k][4].unshift in_rank
|
88
109
|
end
|
89
110
|
end
|
90
111
|
end
|
91
|
-
cli.say "#{rank_n} pairs of datasets"
|
92
112
|
end
|
113
|
+
cli.say "#{rank_n} pairs of datasets"
|
93
114
|
end
|
94
|
-
|
95
|
-
cli.say 'Generating report'
|
96
|
-
dist.keys.each do |k|
|
97
|
-
dist[k][5] = dist[k][4].reverse.join(' ')
|
98
|
-
dist[k][4] = dist[k][4].first
|
99
|
-
puts (k.split('-') + dist[k]).join("\t")
|
100
|
-
end
|
115
|
+
dist
|
101
116
|
end
|
102
117
|
end
|