miga-base 0.4.3.0 → 0.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -0,0 +1,46 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ require 'miga/cli/action/add'
9
+ include MiGA::Cli::Action::Wf
10
+
11
+ def parse_cli
12
+ default_opts_for_wf
13
+ cli.defaults = { mytaxa: false }
14
+ cli.parse do |opt|
15
+ opt.on(
16
+ '-i', '--input-type STRING',
17
+ '(Mandatory) Type of input data, one of the following:',
18
+ *MiGA::Cli::Action::Add.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
19
+ ) { |v| cli[:input_type] = v.downcase.to_sym }
20
+ opt.on(
21
+ '-m', '--mytaxa_scan',
22
+ 'Perform MyTaxa scan analysis'
23
+ ) { |v| cli[:mytaxa] = v }
24
+ opts_for_wf(opt, 'Input files as defined by --input-type',
25
+ multi: true, cleanup: false, ncbi: false)
26
+ end
27
+ end
28
+
29
+ def perform
30
+ # Input data
31
+ cli.ensure_par(input_type: '-i')
32
+ p_metadata = Hash[
33
+ %w[project_stats haai_distances aai_distances ani_distances clade_finding]
34
+ .map { |i| ["run_#{i}", false] }
35
+ ]
36
+ d_metadata = { run_distances: false }
37
+ unless cli[:mytaxa]
38
+ d_metadata[:run_mytaxa_scan] = false
39
+ d_metadata[:run_mytaxa] = false
40
+ end
41
+ p = create_project(cli[:input_type], p_metadata, d_metadata)
42
+ # Run
43
+ run_daemon
44
+ summarize
45
+ end
46
+ end
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ include MiGA::Cli::Action::Wf
9
+
10
+ def parse_cli
11
+ default_opts_for_wf
12
+ cli.defaults = { mytaxa: false }
13
+ cli.parse do |opt|
14
+ opt.on(
15
+ '-m', '--mytaxa-scan',
16
+ 'Perform MyTaxa scan analysis'
17
+ ) { |v| cli[:mytaxa] = v }
18
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
19
+ end
20
+ end
21
+
22
+ def perform
23
+ # Input data
24
+ p_metadata = Hash[
25
+ %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
+ .map { |i| ["run_#{i}", false] }
27
+ ]
28
+ d_metadata = { run_distances: false }
29
+ d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
+ p = create_project(:assembly, p_metadata, d_metadata)
31
+ # Run
32
+ run_daemon
33
+ summarize
34
+ if cli[:mytaxa]
35
+ dir = File.expand_path('mytaxa_scan', cli[:outdir])
36
+ Dir.mkdir(dir)
37
+ p.each_dataset do |d|
38
+ r = d.result(:mytaxa_scan) or next
39
+ f = r.file_path(:report) or next
40
+ FileUtils.cp(f, dir)
41
+ end
42
+ end
43
+ cleanup
44
+ end
45
+ end
@@ -6,21 +6,21 @@ require 'miga/cli/action'
6
6
  class MiGA::Cli::Action::Stats < MiGA::Cli::Action
7
7
 
8
8
  def parse_cli
9
- cli.defaults = {try_load: false}
9
+ cli.defaults = { try_load: false }
10
10
  cli.parse do |opt|
11
11
  cli.opt_object(opt, [:project, :dataset_opt, :result])
12
12
  opt.on(
13
13
  '--key STRING',
14
14
  'Return only the value of the requested key'
15
- ){ |v| cli[:key] = v }
15
+ ) { |v| cli[:key] = v }
16
16
  opt.on(
17
17
  '--compute-and-save',
18
18
  'Compute and saves the statistics'
19
- ){ |v| cli[:compute] = v }
19
+ ) { |v| cli[:compute] = v }
20
20
  opt.on(
21
21
  '--try-load',
22
22
  'Check if stat exists instead of computing on --compute-and-save'
23
- ){ |v| cli[:try_load] = v }
23
+ ) { |v| cli[:try_load] = v }
24
24
  end
25
25
  end
26
26
 
@@ -30,111 +30,159 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
30
30
  r = cli.load_result
31
31
  if cli[:compute]
32
32
  cli.say 'Computing statistics'
33
- stats = {}
34
- case cli[:result]
35
- when :raw_reads
36
- if r[:files][:pair1].nil?
37
- s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
38
- stats = {
39
- reads: s[:n],
40
- length_average: [s[:avg], 'bp'],
41
- length_standard_deviation: [s[:sd], 'bp'],
42
- g_c_content: [s[:gc], '%']}
43
- else
44
- s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
45
- s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
46
- stats = {
47
- read_pairs: s1[:n],
48
- forward_length_average: [s1[:avg], 'bp'],
49
- forward_length_standard_deviation: [s1[:sd], 'bp'],
50
- forward_g_c_content: [s1[:gc], '%'],
51
- reverse_length_average: [s2[:avg], 'bp'],
52
- reverse_length_standard_deviation: [s2[:sd], 'bp'],
53
- reverse_g_c_content: [s2[:gc], '%']}
54
- end
55
- when :trimmed_fasta
56
- f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
57
- s = MiGA.seqs_length(f, :fasta, gc: true)
58
- stats = {
59
- reads: s[:n],
60
- length_average: [s[:avg], 'bp'],
61
- length_standard_deviation: [s[:sd], 'bp'],
62
- g_c_content: [s[:gc], '%']}
63
- when :assembly
64
- s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
65
- n50: true, gc: true)
66
- stats = {
67
- contigs: s[:n],
68
- n50: [s[:n50], 'bp'],
69
- total_length: [s[:tot], 'bp'],
70
- g_c_content: [s[:gc], '%']}
71
- when :cds
72
- s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
73
- stats = {
74
- predicted_proteins: s[:n],
75
- average_length: [s[:avg], 'aa']}
76
- asm = cli.load_dataset.add_result(:assembly, false)
77
- unless asm.nil? or asm[:stats][:total_length].nil?
78
- stats[:coding_density] =
79
- [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
80
- end
81
- when :essential_genes
82
- d = cli.load_dataset
83
- if d.is_multi?
84
- stats = {median_copies: 0, mean_copies: 0}
85
- File.open(r.file_path(:report), 'r') do |fh|
86
- fh.each_line do |ln|
87
- if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
- stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
- end
90
- end
91
- end
92
- else
93
- # Fix estimate for Archaea
94
- if not d.metadata[:tax].nil? &&
95
- d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
96
- r.file_path(:bac_report).nil?
97
- scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
98
- rep = r.file_path(:report)
99
- $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
100
- r.add_file(:bac_report, "#{d.name}.ess/log")
101
- r.add_file(:report, "#{d.name}.ess/log.archaea")
102
- end
103
- # Extract/compute quality values
104
- stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
105
- File.open(r.file_path(:report), 'r') do |fh|
106
- fh.each_line do |ln|
107
- if /^! (Completeness|Contamination): (.*)%/.match(ln)
108
- stats[$1.downcase.to_sym][0] = $2.to_f
109
- end
110
- end
111
- end
112
- stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
113
- d.metadata[:quality] = case stats[:quality]
114
- when 80..100 ; :excellent
115
- when 50..80 ; :high
116
- when 20..50 ; :intermediate
117
- else ; :low
118
- end
119
- d.save
120
- end
121
- else
122
- stats = nil
123
- end
33
+ method = :"compute_#{cli[:result]}"
34
+ stats = self.respond_to?(method, true) ? send(method, r) : nil
124
35
  unless stats.nil?
125
36
  r[:stats] = stats
126
37
  r.save
127
38
  end
128
39
  end
129
-
130
40
  if cli[:key].nil?
131
- r[:stats].each do |k,v|
132
- cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
133
- v.is_a?(Array) ? v.join(' ') : v}."
41
+ r[:stats].each do |k, v|
42
+ k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
43
+ cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
134
44
  end
135
45
  else
136
46
  v = r[:stats][cli[:key].downcase.miga_name.to_sym]
137
47
  puts v.is_a?(Array) ? v.first : v
138
48
  end
139
49
  end
50
+
51
+ private
52
+
53
+ def compute_raw_reads(r)
54
+ stats = {}
55
+ if r[:files][:pair1].nil?
56
+ s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
57
+ stats = {
58
+ reads: s[:n],
59
+ length_average: [s[:avg], 'bp'],
60
+ length_standard_deviation: [s[:sd], 'bp'],
61
+ g_c_content: [s[:gc], '%']}
62
+ else
63
+ s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
64
+ s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
65
+ stats = {
66
+ read_pairs: s1[:n],
67
+ forward_length_average: [s1[:avg], 'bp'],
68
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
69
+ forward_g_c_content: [s1[:gc], '%'],
70
+ reverse_length_average: [s2[:avg], 'bp'],
71
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
72
+ reverse_g_c_content: [s2[:gc], '%']}
73
+ end
74
+ stats
75
+ end
76
+
77
+ def compute_trimmed_fasta(r)
78
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
79
+ s = MiGA.seqs_length(f, :fasta, gc: true)
80
+ {
81
+ reads: s[:n],
82
+ length_average: [s[:avg], 'bp'],
83
+ length_standard_deviation: [s[:sd], 'bp'],
84
+ g_c_content: [s[:gc], '%']
85
+ }
86
+ end
87
+
88
+ def compute_assembly(r)
89
+ s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
90
+ n50: true, gc: true)
91
+ {
92
+ contigs: s[:n],
93
+ n50: [s[:n50], 'bp'],
94
+ total_length: [s[:tot], 'bp'],
95
+ g_c_content: [s[:gc], '%']
96
+ }
97
+ end
98
+
99
+ def compute_cds(r)
100
+ s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
101
+ stats = {
102
+ predicted_proteins: s[:n],
103
+ average_length: [s[:avg], 'aa']}
104
+ asm = cli.load_dataset.add_result(:assembly, false)
105
+ unless asm.nil? or asm[:stats][:total_length].nil?
106
+ stats[:coding_density] =
107
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
108
+ end
109
+ stats
110
+ end
111
+
112
+ def compute_essential_genes(r)
113
+ stats = {}
114
+ d = cli.load_dataset
115
+ if d.is_multi?
116
+ stats = {median_copies: 0, mean_copies: 0}
117
+ File.open(r.file_path(:report), 'r') do |fh|
118
+ fh.each_line do |ln|
119
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
120
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
121
+ end
122
+ end
123
+ end
124
+ else
125
+ # Fix estimate for Archaea
126
+ if !d.metadata[:tax].nil? &&
127
+ d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
128
+ r.file_path(:bac_report).nil?
129
+ scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
130
+ rep = r.file_path(:report)
131
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
132
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
133
+ $stderr.print `#{rc} ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
134
+ r.add_file(:bac_report, "#{d.name}.ess/log")
135
+ r.add_file(:report, "#{d.name}.ess/log.archaea")
136
+ end
137
+ # Extract/compute quality values
138
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
139
+ File.open(r.file_path(:report), 'r') do |fh|
140
+ fh.each_line do |ln|
141
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
142
+ stats[$1.downcase.to_sym][0] = $2.to_f
143
+ end
144
+ end
145
+ end
146
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
147
+ d.metadata[:quality] = case stats[:quality]
148
+ when 80..100 ; :excellent
149
+ when 50..80 ; :high
150
+ when 20..50 ; :intermediate
151
+ else ; :low
152
+ end
153
+ d.save
154
+ end
155
+ stats
156
+ end
157
+
158
+ def compute_ssu(r)
159
+ stats = {ssu: 0, complete_ssu: 0}
160
+ Zlib::GzipReader.open(r.file_path(:gff)) do |fh|
161
+ fh.each_line do |ln|
162
+ next if ln =~ /^#/
163
+ rl = ln.chomp.split("\t")
164
+ len = (rl[4].to_i - rl[3].to_i).abs + 1
165
+ stats[:max_length] = [stats[:max_length] || 0, len].max
166
+ stats[:ssu] += 1
167
+ stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
168
+ end
169
+ end
170
+ stats
171
+ end
172
+
173
+ def compute_taxonomy(r)
174
+ stats = {}
175
+ File.open(r.file_path(:intax_test), 'r') do |fh|
176
+ fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
177
+ stats[:closest_relative] = $1
178
+ stats[:aai] = [$2.to_f, '%']
179
+ 3.times { fh.gets }
180
+ fh.each_line do |ln|
181
+ row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
182
+ break if row.empty?
183
+ stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
184
+ end
185
+ end
186
+ stats
187
+ end
140
188
  end
@@ -11,18 +11,22 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
11
11
  cli.opt_object(opt, [:project, :dataset_opt])
12
12
  cli.opt_filter_datasets(opt)
13
13
  cli.opt_object(opt, [:result_dataset])
14
+ opt.on(
15
+ '-o', '--output PATH',
16
+ 'Create output file instead of returning to STDOUT'
17
+ ) { |v| cli[:output] = v }
14
18
  opt.on(
15
19
  '--tab',
16
20
  'Return a tab-delimited table'
17
- ){ |v| cli[:tabular] = v }
21
+ ) { |v| cli[:tabular] = v }
18
22
  opt.on(
19
23
  '--key STRING',
20
24
  'Return only the value of the requested key'
21
- ){ |v| cli[:key_md] = v }
25
+ ) { |v| cli[:key_md] = v }
22
26
  opt.on(
23
27
  '--with-units',
24
28
  'Include units in each cell'
25
- ){ |v| cli[:units] = v }
29
+ ) { |v| cli[:units] = v }
26
30
  end
27
31
  end
28
32
 
@@ -44,6 +48,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
44
48
  stats.map{ |s| keys.
45
49
  map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
46
50
  stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
47
- cli.puts MiGA.tabulate(keys, table, cli[:tabular])
51
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
52
+ cli.puts(io, MiGA.tabulate(keys, table, cli[:tabular]))
53
+ io.close unless cli[:output].nil?
48
54
  end
49
55
  end
@@ -25,6 +25,23 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
25
25
  end
26
26
 
27
27
  def perform
28
+ dist = read_distances
29
+ Dir.mktmpdir do |dir|
30
+ tab = get_tab_index(dir)
31
+ dist = traverse_taxonomy(tab, dist)
32
+ end
33
+
34
+ cli.say 'Generating report'
35
+ dist.keys.each do |k|
36
+ dist[k][5] = dist[k][4].reverse.join(' ')
37
+ dist[k][4] = dist[k][4].first
38
+ puts (k.split('-') + dist[k]).join("\t")
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def read_distances
28
45
  p = cli.load_project
29
46
  metric = p.is_clade? ? 'ani' : 'aai'
30
47
  res_n = "#{metric}_distances"
@@ -44,59 +61,57 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
44
61
  end
45
62
  cli.say " Lines: #{mfh.lineno}"
46
63
  mfh.close
64
+ dist
65
+ end
47
66
 
48
- Dir.mktmpdir do |dir|
49
- if cli[:index].nil?
50
- ds = cli.load_and_filter_datasets
51
- ds.keep_if { |d| !d.metadata[:tax].nil? }
67
+ def get_tab_index(dir)
68
+ if cli[:index].nil?
69
+ ds = cli.load_and_filter_datasets
70
+ ds.keep_if { |d| !d.metadata[:tax].nil? }
52
71
 
53
- cli.say 'Indexing taxonomy'
54
- tax_index = TaxIndex.new
55
- ds.each { |d| tax_index << d }
56
- tab = File.expand_path('index.tab', dir)
57
- File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
58
- else
59
- tab = cli[:index]
60
- end
72
+ cli.say 'Indexing taxonomy'
73
+ tax_index = TaxIndex.new
74
+ ds.each { |d| tax_index << d }
75
+ tab = File.expand_path('index.tab', dir)
76
+ File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
77
+ else
78
+ tab = cli[:index]
79
+ end
80
+ tab
81
+ end
61
82
 
62
- cli.say 'Traversing taxonomy'
63
- rank_i = 0
64
- Taxonomy.KNOWN_RANKS.each do |rank|
65
- cli.say "o #{rank}: "
66
- rank_n = 0
67
- rank_i += 1
68
- in_rank = nil
69
- ds_name = []
70
- File.open(tab, 'r') do |fh|
71
- fh.each_line do |ln|
72
- if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
73
- in_rank = nil
74
- ds_name = []
75
- elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
76
- in_rank = $2 == '?' ? nil : $1
77
- ds_name = []
78
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
79
- ds_i = $1
80
- ds_name << ds_i
81
- ds_name.each do |ds_j|
82
- k = cannid(ds_i, ds_j)
83
- next if dist[k].nil?
84
- rank_n += 1
85
- dist[k][3] = rank_i
86
- dist[k][4].unshift in_rank
87
- end
83
+ def traverse_taxonomy(tab, dist)
84
+ cli.say 'Traversing taxonomy'
85
+ rank_i = 0
86
+ Taxonomy.KNOWN_RANKS.each do |rank|
87
+ cli.say "o #{rank}: "
88
+ rank_n = 0
89
+ rank_i += 1
90
+ in_rank = nil
91
+ ds_name = []
92
+ File.open(tab, 'r') do |fh|
93
+ fh.each_line do |ln|
94
+ if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
95
+ in_rank = nil
96
+ ds_name = []
97
+ elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
98
+ in_rank = $2 == '?' ? nil : $1
99
+ ds_name = []
100
+ elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
101
+ ds_i = $1
102
+ ds_name << ds_i
103
+ ds_name.each do |ds_j|
104
+ k = cannid(ds_i, ds_j)
105
+ next if dist[k].nil?
106
+ rank_n += 1
107
+ dist[k][3] = rank_i
108
+ dist[k][4].unshift in_rank
88
109
  end
89
110
  end
90
111
  end
91
- cli.say "#{rank_n} pairs of datasets"
92
112
  end
113
+ cli.say "#{rank_n} pairs of datasets"
93
114
  end
94
-
95
- cli.say 'Generating report'
96
- dist.keys.each do |k|
97
- dist[k][5] = dist[k][4].reverse.join(' ')
98
- dist[k][4] = dist[k][4].first
99
- puts (k.split('-') + dist[k]).join("\t")
100
- end
115
+ dist
101
116
  end
102
117
  end