miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -0,0 +1,46 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ require 'miga/cli/action/add'
9
+ include MiGA::Cli::Action::Wf
10
+
11
+ def parse_cli
12
+ default_opts_for_wf
13
+ cli.defaults = { mytaxa: false }
14
+ cli.parse do |opt|
15
+ opt.on(
16
+ '-i', '--input-type STRING',
17
+ '(Mandatory) Type of input data, one of the following:',
18
+ *MiGA::Cli::Action::Add.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
19
+ ) { |v| cli[:input_type] = v.downcase.to_sym }
20
+ opt.on(
21
+ '-m', '--mytaxa_scan',
22
+ 'Perform MyTaxa scan analysis'
23
+ ) { |v| cli[:mytaxa] = v }
24
+ opts_for_wf(opt, 'Input files as defined by --input-type',
25
+ multi: true, cleanup: false, ncbi: false)
26
+ end
27
+ end
28
+
29
+ def perform
30
+ # Input data
31
+ cli.ensure_par(input_type: '-i')
32
+ p_metadata = Hash[
33
+ %w[project_stats haai_distances aai_distances ani_distances clade_finding]
34
+ .map { |i| ["run_#{i}", false] }
35
+ ]
36
+ d_metadata = { run_distances: false }
37
+ unless cli[:mytaxa]
38
+ d_metadata[:run_mytaxa_scan] = false
39
+ d_metadata[:run_mytaxa] = false
40
+ end
41
+ p = create_project(cli[:input_type], p_metadata, d_metadata)
42
+ # Run
43
+ run_daemon
44
+ summarize
45
+ end
46
+ end
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ include MiGA::Cli::Action::Wf
9
+
10
+ def parse_cli
11
+ default_opts_for_wf
12
+ cli.defaults = { mytaxa: false }
13
+ cli.parse do |opt|
14
+ opt.on(
15
+ '-m', '--mytaxa-scan',
16
+ 'Perform MyTaxa scan analysis'
17
+ ) { |v| cli[:mytaxa] = v }
18
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
19
+ end
20
+ end
21
+
22
+ def perform
23
+ # Input data
24
+ p_metadata = Hash[
25
+ %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
+ .map { |i| ["run_#{i}", false] }
27
+ ]
28
+ d_metadata = { run_distances: false }
29
+ d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
+ p = create_project(:assembly, p_metadata, d_metadata)
31
+ # Run
32
+ run_daemon
33
+ summarize
34
+ if cli[:mytaxa]
35
+ dir = File.expand_path('mytaxa_scan', cli[:outdir])
36
+ Dir.mkdir(dir)
37
+ p.each_dataset do |d|
38
+ r = d.result(:mytaxa_scan) or next
39
+ f = r.file_path(:report) or next
40
+ FileUtils.cp(f, dir)
41
+ end
42
+ end
43
+ cleanup
44
+ end
45
+ end
@@ -6,21 +6,21 @@ require 'miga/cli/action'
6
6
  class MiGA::Cli::Action::Stats < MiGA::Cli::Action
7
7
 
8
8
  def parse_cli
9
- cli.defaults = {try_load: false}
9
+ cli.defaults = { try_load: false }
10
10
  cli.parse do |opt|
11
11
  cli.opt_object(opt, [:project, :dataset_opt, :result])
12
12
  opt.on(
13
13
  '--key STRING',
14
14
  'Return only the value of the requested key'
15
- ){ |v| cli[:key] = v }
15
+ ) { |v| cli[:key] = v }
16
16
  opt.on(
17
17
  '--compute-and-save',
18
18
  'Compute and saves the statistics'
19
- ){ |v| cli[:compute] = v }
19
+ ) { |v| cli[:compute] = v }
20
20
  opt.on(
21
21
  '--try-load',
22
22
  'Check if stat exists instead of computing on --compute-and-save'
23
- ){ |v| cli[:try_load] = v }
23
+ ) { |v| cli[:try_load] = v }
24
24
  end
25
25
  end
26
26
 
@@ -30,111 +30,159 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
30
30
  r = cli.load_result
31
31
  if cli[:compute]
32
32
  cli.say 'Computing statistics'
33
- stats = {}
34
- case cli[:result]
35
- when :raw_reads
36
- if r[:files][:pair1].nil?
37
- s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
38
- stats = {
39
- reads: s[:n],
40
- length_average: [s[:avg], 'bp'],
41
- length_standard_deviation: [s[:sd], 'bp'],
42
- g_c_content: [s[:gc], '%']}
43
- else
44
- s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
45
- s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
46
- stats = {
47
- read_pairs: s1[:n],
48
- forward_length_average: [s1[:avg], 'bp'],
49
- forward_length_standard_deviation: [s1[:sd], 'bp'],
50
- forward_g_c_content: [s1[:gc], '%'],
51
- reverse_length_average: [s2[:avg], 'bp'],
52
- reverse_length_standard_deviation: [s2[:sd], 'bp'],
53
- reverse_g_c_content: [s2[:gc], '%']}
54
- end
55
- when :trimmed_fasta
56
- f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
57
- s = MiGA.seqs_length(f, :fasta, gc: true)
58
- stats = {
59
- reads: s[:n],
60
- length_average: [s[:avg], 'bp'],
61
- length_standard_deviation: [s[:sd], 'bp'],
62
- g_c_content: [s[:gc], '%']}
63
- when :assembly
64
- s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
65
- n50: true, gc: true)
66
- stats = {
67
- contigs: s[:n],
68
- n50: [s[:n50], 'bp'],
69
- total_length: [s[:tot], 'bp'],
70
- g_c_content: [s[:gc], '%']}
71
- when :cds
72
- s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
73
- stats = {
74
- predicted_proteins: s[:n],
75
- average_length: [s[:avg], 'aa']}
76
- asm = cli.load_dataset.add_result(:assembly, false)
77
- unless asm.nil? or asm[:stats][:total_length].nil?
78
- stats[:coding_density] =
79
- [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
80
- end
81
- when :essential_genes
82
- d = cli.load_dataset
83
- if d.is_multi?
84
- stats = {median_copies: 0, mean_copies: 0}
85
- File.open(r.file_path(:report), 'r') do |fh|
86
- fh.each_line do |ln|
87
- if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
88
- stats["#{$1.downcase}_copies".to_sym] = $2.to_f
89
- end
90
- end
91
- end
92
- else
93
- # Fix estimate for Archaea
94
- if not d.metadata[:tax].nil? &&
95
- d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
96
- r.file_path(:bac_report).nil?
97
- scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
98
- rep = r.file_path(:report)
99
- $stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
100
- r.add_file(:bac_report, "#{d.name}.ess/log")
101
- r.add_file(:report, "#{d.name}.ess/log.archaea")
102
- end
103
- # Extract/compute quality values
104
- stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
105
- File.open(r.file_path(:report), 'r') do |fh|
106
- fh.each_line do |ln|
107
- if /^! (Completeness|Contamination): (.*)%/.match(ln)
108
- stats[$1.downcase.to_sym][0] = $2.to_f
109
- end
110
- end
111
- end
112
- stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
113
- d.metadata[:quality] = case stats[:quality]
114
- when 80..100 ; :excellent
115
- when 50..80 ; :high
116
- when 20..50 ; :intermediate
117
- else ; :low
118
- end
119
- d.save
120
- end
121
- else
122
- stats = nil
123
- end
33
+ method = :"compute_#{cli[:result]}"
34
+ stats = self.respond_to?(method, true) ? send(method, r) : nil
124
35
  unless stats.nil?
125
36
  r[:stats] = stats
126
37
  r.save
127
38
  end
128
39
  end
129
-
130
40
  if cli[:key].nil?
131
- r[:stats].each do |k,v|
132
- cli.puts "#{k==:g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize}: #{
133
- v.is_a?(Array) ? v.join(' ') : v}."
41
+ r[:stats].each do |k, v|
42
+ k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
43
+ cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
134
44
  end
135
45
  else
136
46
  v = r[:stats][cli[:key].downcase.miga_name.to_sym]
137
47
  puts v.is_a?(Array) ? v.first : v
138
48
  end
139
49
  end
50
+
51
+ private
52
+
53
+ def compute_raw_reads(r)
54
+ stats = {}
55
+ if r[:files][:pair1].nil?
56
+ s = MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
57
+ stats = {
58
+ reads: s[:n],
59
+ length_average: [s[:avg], 'bp'],
60
+ length_standard_deviation: [s[:sd], 'bp'],
61
+ g_c_content: [s[:gc], '%']}
62
+ else
63
+ s1 = MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
64
+ s2 = MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
65
+ stats = {
66
+ read_pairs: s1[:n],
67
+ forward_length_average: [s1[:avg], 'bp'],
68
+ forward_length_standard_deviation: [s1[:sd], 'bp'],
69
+ forward_g_c_content: [s1[:gc], '%'],
70
+ reverse_length_average: [s2[:avg], 'bp'],
71
+ reverse_length_standard_deviation: [s2[:sd], 'bp'],
72
+ reverse_g_c_content: [s2[:gc], '%']}
73
+ end
74
+ stats
75
+ end
76
+
77
+ def compute_trimmed_fasta(r)
78
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
79
+ s = MiGA.seqs_length(f, :fasta, gc: true)
80
+ {
81
+ reads: s[:n],
82
+ length_average: [s[:avg], 'bp'],
83
+ length_standard_deviation: [s[:sd], 'bp'],
84
+ g_c_content: [s[:gc], '%']
85
+ }
86
+ end
87
+
88
+ def compute_assembly(r)
89
+ s = MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
90
+ n50: true, gc: true)
91
+ {
92
+ contigs: s[:n],
93
+ n50: [s[:n50], 'bp'],
94
+ total_length: [s[:tot], 'bp'],
95
+ g_c_content: [s[:gc], '%']
96
+ }
97
+ end
98
+
99
+ def compute_cds(r)
100
+ s = MiGA.seqs_length(r.file_path(:proteins), :fasta)
101
+ stats = {
102
+ predicted_proteins: s[:n],
103
+ average_length: [s[:avg], 'aa']}
104
+ asm = cli.load_dataset.add_result(:assembly, false)
105
+ unless asm.nil? or asm[:stats][:total_length].nil?
106
+ stats[:coding_density] =
107
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], '%']
108
+ end
109
+ stats
110
+ end
111
+
112
+ def compute_essential_genes(r)
113
+ stats = {}
114
+ d = cli.load_dataset
115
+ if d.is_multi?
116
+ stats = {median_copies: 0, mean_copies: 0}
117
+ File.open(r.file_path(:report), 'r') do |fh|
118
+ fh.each_line do |ln|
119
+ if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
120
+ stats["#{$1.downcase}_copies".to_sym] = $2.to_f
121
+ end
122
+ end
123
+ end
124
+ else
125
+ # Fix estimate for Archaea
126
+ if !d.metadata[:tax].nil? &&
127
+ d.metadata[:tax].in?(Taxonomy.new('d:Archaea')) &&
128
+ r.file_path(:bac_report).nil?
129
+ scr = "#{MiGA.root_path}/utils/arch-ess-genes.rb"
130
+ rep = r.file_path(:report)
131
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
132
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
133
+ $stderr.print `#{rc} ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
134
+ r.add_file(:bac_report, "#{d.name}.ess/log")
135
+ r.add_file(:report, "#{d.name}.ess/log.archaea")
136
+ end
137
+ # Extract/compute quality values
138
+ stats = {completeness: [0.0, '%'], contamination: [0.0, '%']}
139
+ File.open(r.file_path(:report), 'r') do |fh|
140
+ fh.each_line do |ln|
141
+ if /^! (Completeness|Contamination): (.*)%/.match(ln)
142
+ stats[$1.downcase.to_sym][0] = $2.to_f
143
+ end
144
+ end
145
+ end
146
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
147
+ d.metadata[:quality] = case stats[:quality]
148
+ when 80..100 ; :excellent
149
+ when 50..80 ; :high
150
+ when 20..50 ; :intermediate
151
+ else ; :low
152
+ end
153
+ d.save
154
+ end
155
+ stats
156
+ end
157
+
158
+ def compute_ssu(r)
159
+ stats = {ssu: 0, complete_ssu: 0}
160
+ Zlib::GzipReader.open(r.file_path(:gff)) do |fh|
161
+ fh.each_line do |ln|
162
+ next if ln =~ /^#/
163
+ rl = ln.chomp.split("\t")
164
+ len = (rl[4].to_i - rl[3].to_i).abs + 1
165
+ stats[:max_length] = [stats[:max_length] || 0, len].max
166
+ stats[:ssu] += 1
167
+ stats[:complete_ssu] += 1 unless rl[8] =~ /\(partial\)/
168
+ end
169
+ end
170
+ stats
171
+ end
172
+
173
+ def compute_taxonomy(r)
174
+ stats = {}
175
+ File.open(r.file_path(:intax_test), 'r') do |fh|
176
+ fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
177
+ stats[:closest_relative] = $1
178
+ stats[:aai] = [$2.to_f, '%']
179
+ 3.times { fh.gets }
180
+ fh.each_line do |ln|
181
+ row = ln.chomp.gsub(/^\s*/,'').split(/\s+/)
182
+ break if row.empty?
183
+ stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
184
+ end
185
+ end
186
+ stats
187
+ end
140
188
  end
@@ -11,18 +11,22 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
11
11
  cli.opt_object(opt, [:project, :dataset_opt])
12
12
  cli.opt_filter_datasets(opt)
13
13
  cli.opt_object(opt, [:result_dataset])
14
+ opt.on(
15
+ '-o', '--output PATH',
16
+ 'Create output file instead of returning to STDOUT'
17
+ ) { |v| cli[:output] = v }
14
18
  opt.on(
15
19
  '--tab',
16
20
  'Return a tab-delimited table'
17
- ){ |v| cli[:tabular] = v }
21
+ ) { |v| cli[:tabular] = v }
18
22
  opt.on(
19
23
  '--key STRING',
20
24
  'Return only the value of the requested key'
21
- ){ |v| cli[:key_md] = v }
25
+ ) { |v| cli[:key_md] = v }
22
26
  opt.on(
23
27
  '--with-units',
24
28
  'Include units in each cell'
25
- ){ |v| cli[:units] = v }
29
+ ) { |v| cli[:units] = v }
26
30
  end
27
31
  end
28
32
 
@@ -44,6 +48,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
44
48
  stats.map{ |s| keys.
45
49
  map{ |k| s[k].is_a?(Array) ? s[k].map(&:to_s).join('') : s[k] } } :
46
50
  stats.map{ |s| keys.map{ |k| s[k].is_a?(Array) ? s[k].first : s[k] } }
47
- cli.puts MiGA.tabulate(keys, table, cli[:tabular])
51
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
52
+ cli.puts(io, MiGA.tabulate(keys, table, cli[:tabular]))
53
+ io.close unless cli[:output].nil?
48
54
  end
49
55
  end
@@ -25,6 +25,23 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
25
25
  end
26
26
 
27
27
  def perform
28
+ dist = read_distances
29
+ Dir.mktmpdir do |dir|
30
+ tab = get_tab_index(dir)
31
+ dist = traverse_taxonomy(tab, dist)
32
+ end
33
+
34
+ cli.say 'Generating report'
35
+ dist.keys.each do |k|
36
+ dist[k][5] = dist[k][4].reverse.join(' ')
37
+ dist[k][4] = dist[k][4].first
38
+ puts (k.split('-') + dist[k]).join("\t")
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def read_distances
28
45
  p = cli.load_project
29
46
  metric = p.is_clade? ? 'ani' : 'aai'
30
47
  res_n = "#{metric}_distances"
@@ -44,59 +61,57 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
44
61
  end
45
62
  cli.say " Lines: #{mfh.lineno}"
46
63
  mfh.close
64
+ dist
65
+ end
47
66
 
48
- Dir.mktmpdir do |dir|
49
- if cli[:index].nil?
50
- ds = cli.load_and_filter_datasets
51
- ds.keep_if { |d| !d.metadata[:tax].nil? }
67
+ def get_tab_index(dir)
68
+ if cli[:index].nil?
69
+ ds = cli.load_and_filter_datasets
70
+ ds.keep_if { |d| !d.metadata[:tax].nil? }
52
71
 
53
- cli.say 'Indexing taxonomy'
54
- tax_index = TaxIndex.new
55
- ds.each { |d| tax_index << d }
56
- tab = File.expand_path('index.tab', dir)
57
- File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
58
- else
59
- tab = cli[:index]
60
- end
72
+ cli.say 'Indexing taxonomy'
73
+ tax_index = TaxIndex.new
74
+ ds.each { |d| tax_index << d }
75
+ tab = File.expand_path('index.tab', dir)
76
+ File.open(tab, 'w') { |fh| fh.print tax_index.to_tab }
77
+ else
78
+ tab = cli[:index]
79
+ end
80
+ tab
81
+ end
61
82
 
62
- cli.say 'Traversing taxonomy'
63
- rank_i = 0
64
- Taxonomy.KNOWN_RANKS.each do |rank|
65
- cli.say "o #{rank}: "
66
- rank_n = 0
67
- rank_i += 1
68
- in_rank = nil
69
- ds_name = []
70
- File.open(tab, 'r') do |fh|
71
- fh.each_line do |ln|
72
- if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
73
- in_rank = nil
74
- ds_name = []
75
- elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
76
- in_rank = $2 == '?' ? nil : $1
77
- ds_name = []
78
- elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
79
- ds_i = $1
80
- ds_name << ds_i
81
- ds_name.each do |ds_j|
82
- k = cannid(ds_i, ds_j)
83
- next if dist[k].nil?
84
- rank_n += 1
85
- dist[k][3] = rank_i
86
- dist[k][4].unshift in_rank
87
- end
83
+ def traverse_taxonomy(tab, dist)
84
+ cli.say 'Traversing taxonomy'
85
+ rank_i = 0
86
+ Taxonomy.KNOWN_RANKS.each do |rank|
87
+ cli.say "o #{rank}: "
88
+ rank_n = 0
89
+ rank_i += 1
90
+ in_rank = nil
91
+ ds_name = []
92
+ File.open(tab, 'r') do |fh|
93
+ fh.each_line do |ln|
94
+ if ln =~ /^ {#{(rank_i-1)*2}}\S+:\S+:/
95
+ in_rank = nil
96
+ ds_name = []
97
+ elsif ln =~ /^ {#{rank_i*2}}(#{rank}:(\S+)):/
98
+ in_rank = $2 == '?' ? nil : $1
99
+ ds_name = []
100
+ elsif ln =~ /^ *# (\S+)/ and not in_rank.nil?
101
+ ds_i = $1
102
+ ds_name << ds_i
103
+ ds_name.each do |ds_j|
104
+ k = cannid(ds_i, ds_j)
105
+ next if dist[k].nil?
106
+ rank_n += 1
107
+ dist[k][3] = rank_i
108
+ dist[k][4].unshift in_rank
88
109
  end
89
110
  end
90
111
  end
91
- cli.say "#{rank_n} pairs of datasets"
92
112
  end
113
+ cli.say "#{rank_n} pairs of datasets"
93
114
  end
94
-
95
- cli.say 'Generating report'
96
- dist.keys.each do |k|
97
- dist[k][5] = dist[k][4].reverse.join(' ')
98
- dist[k][4] = dist[k][4].first
99
- puts (k.split('-') + dist[k]).join("\t")
100
- end
115
+ dist
101
116
  end
102
117
  end