miga-base 0.7.4.0 → 0.7.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +28 -20
  14. data/lib/miga/cli/action/doctor/base.rb +29 -6
  15. data/lib/miga/cli/action/edit.rb +1 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +34 -29
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +3 -3
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +21 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +24 -17
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +51 -35
  53. data/lib/miga/daemon/base.rb +0 -2
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/json.rb +5 -7
  59. data/lib/miga/lair.rb +4 -0
  60. data/lib/miga/metadata.rb +4 -3
  61. data/lib/miga/project.rb +29 -20
  62. data/lib/miga/project/base.rb +52 -37
  63. data/lib/miga/project/dataset.rb +27 -13
  64. data/lib/miga/project/hooks.rb +0 -3
  65. data/lib/miga/project/result.rb +14 -5
  66. data/lib/miga/remote_dataset.rb +85 -72
  67. data/lib/miga/remote_dataset/base.rb +11 -13
  68. data/lib/miga/remote_dataset/download.rb +33 -12
  69. data/lib/miga/result.rb +34 -25
  70. data/lib/miga/result/base.rb +0 -2
  71. data/lib/miga/result/dates.rb +1 -3
  72. data/lib/miga/result/source.rb +15 -16
  73. data/lib/miga/result/stats.rb +36 -25
  74. data/lib/miga/tax_dist.rb +6 -3
  75. data/lib/miga/tax_index.rb +17 -17
  76. data/lib/miga/taxonomy.rb +6 -1
  77. data/lib/miga/taxonomy/base.rb +19 -15
  78. data/lib/miga/version.rb +19 -16
  79. data/test/common_test.rb +3 -11
  80. data/test/daemon_helper.rb +38 -0
  81. data/test/daemon_test.rb +73 -101
  82. data/test/dataset_test.rb +58 -59
  83. data/test/format_test.rb +3 -11
  84. data/test/hook_test.rb +50 -55
  85. data/test/json_test.rb +7 -8
  86. data/test/lair_test.rb +22 -28
  87. data/test/metadata_test.rb +6 -14
  88. data/test/project_test.rb +33 -39
  89. data/test/remote_dataset_test.rb +20 -28
  90. data/test/result_stats_test.rb +17 -27
  91. data/test/result_test.rb +41 -34
  92. data/test/tax_dist_test.rb +0 -2
  93. data/test/tax_index_test.rb +4 -10
  94. data/test/taxonomy_test.rb +7 -9
  95. data/test/test_helper.rb +42 -1
  96. data/test/with_daemon_test.rb +14 -22
  97. data/utils/cleanup-databases.rb +6 -5
  98. data/utils/distance/base.rb +0 -1
  99. data/utils/distance/commands.rb +19 -12
  100. data/utils/distance/database.rb +24 -21
  101. data/utils/distance/pipeline.rb +12 -9
  102. data/utils/distance/runner.rb +14 -13
  103. data/utils/distance/temporal.rb +1 -3
  104. data/utils/distances.rb +1 -1
  105. data/utils/domain-ess-genes.rb +7 -7
  106. data/utils/index_metadata.rb +4 -2
  107. data/utils/mytaxa_scan.rb +18 -16
  108. data/utils/representatives.rb +5 -4
  109. data/utils/requirements.txt +1 -1
  110. data/utils/subclade/base.rb +0 -1
  111. data/utils/subclade/pipeline.rb +7 -6
  112. data/utils/subclade/runner.rb +9 -9
  113. data/utils/subclade/temporal.rb +0 -2
  114. data/utils/subclades-compile.rb +39 -37
  115. data/utils/subclades.rb +1 -1
  116. metadata +3 -2
@@ -1,13 +1,12 @@
1
-
2
1
  # High-end pipelines for DistanceRunner
3
2
  module MiGA::DistanceRunner::Pipeline
4
-
5
3
  # Recursively classify the dataset, returning an Array with two entries:
6
4
  # classification and cluster number
7
5
  def classify(clades, classif, metric, result_fh, val_cls = nil)
8
6
  dir = File.expand_path(classif, clades)
9
7
  med = File.expand_path('miga-project.medoids', dir)
10
- return [classif,val_cls] unless File.size? med
8
+ return [classif, val_cls] unless File.size? med
9
+
11
10
  max_val = 0
12
11
  val_med = ''
13
12
  val_cls = nil
@@ -35,6 +34,7 @@ module MiGA::DistanceRunner::Pipeline
35
34
  $stderr.puts "Building medoids tree (metric = #{metric})"
36
35
  db = query_db(metric)
37
36
  return unless File.size? db
37
+
38
38
  out_base = File.expand_path(dataset.name, home)
39
39
  ds_matrix = "#{out_base}.txt"
40
40
  ds_matrix_fh = File.open(ds_matrix, 'w')
@@ -43,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
43
43
  seq2 = []
44
44
  foreach_in_db(db, metric) do |r|
45
45
  seq2 << r[0]
46
- ds_matrix_fh.puts r[0,3].join("\t")
46
+ ds_matrix_fh.puts r[0, 3].join("\t")
47
47
  end
48
48
  # Find all values among visited datasets in ref_project
49
49
  ref_r = ref_project.result("#{metric}_distances") or return
@@ -51,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
51
51
  fh.each_line do |ln|
52
52
  r = ln.chomp.split("\t")
53
53
  next unless seq2.include?(r[1]) or seq2.include?(r[2])
54
- ds_matrix_fh.puts r[1,3].join("\t")
54
+
55
+ ds_matrix_fh.puts r[1, 3].join("\t")
55
56
  end
56
57
  end
57
58
  ds_matrix_fh.close
@@ -74,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
74
75
  dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
75
76
  cr = dataset.closest_relatives(1, from_ref_project)
76
77
  return if cr.nil? or cr.empty?
78
+
77
79
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
78
80
 
79
81
  # Run the test for each rank
80
82
  tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
81
- r = tax_test.map do |k,v|
83
+ r = tax_test.map do |k, v|
82
84
  sig = ''
83
85
  [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
84
86
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
@@ -99,10 +101,11 @@ module MiGA::DistanceRunner::Pipeline
99
101
  def transfer_taxonomy(tax)
100
102
  $stderr.puts "Transferring taxonomy"
101
103
  return if tax.nil?
104
+
102
105
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
103
- tax_a = tax.
104
- select { |i| i[1] != '?' && i[2] <= pval }.
105
- map { |i| i[0,2].join(':') }
106
+ tax_a = tax
107
+ .select { |i| i[1] != '?' && i[2] <= pval }
108
+ .map { |i| i[0, 2].join(':') }
106
109
  dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
107
110
  dataset.save
108
111
  end
@@ -1,13 +1,10 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'database.rb'
5
4
  require_relative 'commands.rb'
6
5
  require_relative 'pipeline.rb'
7
6
 
8
-
9
7
  class MiGA::DistanceRunner
10
-
11
8
  include MiGA::DistanceRunner::Temporal
12
9
  include MiGA::DistanceRunner::Database
13
10
  include MiGA::DistanceRunner::Commands
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
16
13
  attr_reader :project, :ref_project, :dataset, :opts, :home
17
14
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
18
15
 
19
- def initialize(project_path, dataset_name, opts_hash={})
16
+ def initialize(project_path, dataset_name, opts_hash = {})
20
17
  @opts = opts_hash
21
18
  @project = MiGA::Project.load(project_path) or
22
19
  raise "No project at #{project_path}"
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
30
27
  @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
31
28
  project.is_clade? ? 'save-rbm' : 'no-save-rbm'
32
29
  end
33
- @opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
34
31
  if opts[:run_taxonomy] and project.metadata[:ref_project]
35
32
  ref_path = project.metadata[:ref_project]
36
33
  @home = File.expand_path('05.taxonomy', @home)
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
60
57
  def go!
61
58
  $stderr.puts "Launching analysis"
62
59
  return if dataset.is_multi?
60
+
63
61
  Dir.mktmpdir do |tmp_dir|
64
62
  @tmp = tmp_dir
65
63
  create_temporals
@@ -76,12 +74,13 @@ class MiGA::DistanceRunner
76
74
  # first-come-first-serve traverse
77
75
  ref_project.each_dataset do |ds|
78
76
  next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
+
79
78
  puts "[ #{Time.now} ] #{ds.name}"
80
79
  ani_after_aai(ds)
81
80
  end
82
81
 
83
82
  # Finalize
84
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
83
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
85
84
  end
86
85
 
87
86
  ##
@@ -104,14 +103,15 @@ class MiGA::DistanceRunner
104
103
  # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
105
104
  par_dir = File.dirname(File.expand_path(classif, res.dir))
106
105
  par = File.expand_path('miga-project.classif', par_dir)
107
- closest = {dataset: nil, ani: 0.0}
106
+ closest = { dataset: nil, ani: 0.0 }
108
107
  if File.size? par
109
108
  File.open(par, 'r') do |fh|
110
109
  fh.each_line do |ln|
111
110
  r = ln.chomp.split("\t")
112
111
  next unless r[1].to_i == val_cls
112
+
113
113
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
114
+ closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
115
115
  end
116
116
  end
117
117
  end
@@ -119,14 +119,14 @@ class MiGA::DistanceRunner
119
119
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
120
120
  cl_path = res.file_path :clades_ani95
121
121
  if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
122
- File.foreach(cl_path).
123
- map { |i| i.chomp.split(',') }.
124
- find( lambda{[]} ){ |i| i.include? closest[:ds] }.
125
- each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
122
+ File.foreach(cl_path)
123
+ .map { |i| i.chomp.split(',') }
124
+ .find(lambda { [] }) { |i| i.include? closest[:ds] }
125
+ .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
126
126
  end
127
127
 
128
128
  # Finalize
129
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
129
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
130
130
  build_medoids_tree(tsk[1])
131
131
  transfer_taxonomy(tax_test)
132
132
  end
@@ -135,6 +135,7 @@ class MiGA::DistanceRunner
135
135
  def go_taxonomy!
136
136
  $stderr.puts "Launching taxonomy analysis"
137
137
  return unless project.metadata[:ref_project]
138
+
138
139
  go_query! # <- yeah, it's actually the same, just different ref_project
139
140
  end
140
141
  end
@@ -1,9 +1,7 @@
1
-
2
1
  require 'tmpdir'
3
2
  require 'zlib'
4
3
 
5
4
  module MiGA::DistanceRunner::Temporal
6
-
7
5
  # Copy input files to the (local) temporal folder
8
6
  def create_temporals
9
7
  rf = {
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
43
41
  def checkpoint!(metric)
44
42
  $stderr.puts "Checkpoint (metric = #{metric})"
45
43
  SQLite3::Database.new(tmp_dbs[metric]) do |conn|
46
- conn.execute("select count(*) from #{metric==:haai ? :aai : metric}")
44
+ conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
47
45
  end
48
46
  FileUtils.cp(tmp_dbs[metric], dbs[metric])
49
47
  @db_counts[metric] = 0
data/utils/distances.rb CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  dataset = ARGV.shift
6
6
  project = ARGV.shift
7
- opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
7
+ opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(dataset, project, opts)
9
9
  runner.go!
@@ -6,10 +6,10 @@ domain = ARGV.shift
6
6
 
7
7
  def quality(hsh)
8
8
  q = {}
9
- q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
10
- q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
11
- q[:cmp] = 100.0*q[:found].to_f/hsh.size
12
- q[:cnt] = 100.0*q[:multi].to_f/hsh.size
9
+ q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
10
+ q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
11
+ q[:cmp] = 100.0 * q[:found].to_f / hsh.size
12
+ q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
13
13
  q
14
14
  end
15
15
 
@@ -39,7 +39,7 @@ end
39
39
  # Find expected genes for domain
40
40
  n_dom = Hash[
41
41
  `HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
42
- .chomp.split("\n").map { |i| i.split("\t") }
42
+ .chomp.split("\n").map { |i| i.split("\t") }
43
43
  ]
44
44
  l_dom = n_dom.keys
45
45
  cnt_dom = {}
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
54
54
  ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
55
55
  if q[:multi] > 0
56
56
  ofh.puts "! Multiple copies: "
57
- cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
57
+ cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
58
58
  end
59
59
  if q[:found] < cnt_dom.size
60
60
  ofh.puts "! Missing genes: "
61
- cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
61
+ cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
62
62
  end
63
63
  end
@@ -13,16 +13,18 @@ db.execute 'create table metadata(' \
13
13
 
14
14
  def searchable(db, d, k, v)
15
15
  db.execute 'insert into metadata values(?,?,?)',
16
- d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
16
+ d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
17
17
  end
18
18
 
19
19
  p.each_dataset do |d|
20
20
  next unless d.is_ref?
21
21
  next unless d.is_active?
22
+
22
23
  searchable(db, d, :name, d.name)
23
24
  d.metadata.each do |k, v|
24
25
  next if [:created, :updated].include? k
25
- v = v.sorted_ranks.map{ |r| r[1] }.join(' ') if k == :tax
26
+
27
+ v = v.sorted_ranks.map { |r| r[1] }.join(' ') if k == :tax
26
28
  searchable(db, d, k, v)
27
29
  end
28
30
  end
data/utils/mytaxa_scan.rb CHANGED
@@ -15,42 +15,45 @@ begin
15
15
 
16
16
  # Extract gene IDs
17
17
  ifh = faa =~ /\.gz/ ? Zlib::GzipReader.open(faa) : File.open(faa, 'r')
18
- ids = ifh.each_line.grep(/^>/).map{|dl| dl.chomp.sub(/^>/,'').sub(/\s.*/,'')}
18
+ ids = ifh.each_line.grep(/^>/).map { |dl| dl.chomp.sub(/^>/, '').sub(/\s.*/, '') }
19
19
  ifh.close
20
- tax = Hash[ids.map{|k| [k, "NA"]}]
20
+ tax = Hash[ids.map { |k| [k, "NA"] }]
21
21
 
22
22
  # Get MyTaxa distributions
23
23
  k, l = nil
24
24
  File.open(mytaxa).each do |ln|
25
25
  ln.chomp!
26
- if $.%2 == 1
26
+ if $. % 2 == 1
27
27
  k, l = ln.split /\t/
28
28
  else
29
- tax[k] = ln.gsub(/<[^>]+>/,"").gsub(/;/,"::")
29
+ tax[k] = ln.gsub(/<[^>]+>/, '').gsub(/;/, '::')
30
30
  end
31
31
  end
32
- all_tax = tax.values.uniq.sort{|x,y| tax.values.count(y) <=> tax.values.count(x) }
32
+ all_tax = tax.values.uniq.sort do |x, y|
33
+ tax.values.count(y) <=> tax.values.count(x)
34
+ end
33
35
 
34
36
  # Estimate Windows and save gene IDs
35
- fh = File.open(outdata + ".genes", "w")
37
+ fh = File.open(outdata + '.genes', 'w')
36
38
  c = []
37
- c << all_tax.map{|t| tax.values.count(t) }
38
- n_wins = (ids.size/winsize).ceil
39
- (0 .. (n_wins-1)).each do |win|
40
- k = ids[win*winsize, winsize]
39
+ c << all_tax.map { |t| tax.values.count(t) }
40
+ n_wins = (ids.size / winsize).ceil
41
+ (0..(n_wins - 1)).each do |win|
42
+ k = ids[win * winsize, winsize]
41
43
  win_t = tax.values_at(*k)
42
44
  fh.puts k.join("\t")
43
- c << all_tax.map{|t| win_t.count(t)}
45
+ c << all_tax.map { |t| win_t.count(t) }
44
46
  end
45
- p = c.map{|col| col.map{|cell| cell.to_f/col.inject(:+)}}
47
+ p = c.map { |col| col.map { |cell| cell.to_f / col.inject(:+) } }
46
48
  fh.close
47
49
 
48
50
  # Save window profiles
49
51
  fh = File.open(outdata, "w")
50
52
  fh.puts "# Data derived from #{mytaxa}, with #{winsize}-genes windows"
51
- fh.puts "# " + (["Tax-label", "Genome"] + (1 .. n_wins).map{|i| "Win_#{i}"}).join("\t")
52
- (0 .. (all_tax.size - 1)).each do |row|
53
- fh.puts ([all_tax[row]] + p.map{|col| col[row]}).join "\t"
53
+ fh.puts '# ' + (['Tax-label', 'Genome'] +
54
+ (1..n_wins).map { |i| "Win_#{i}" }).join("\t")
55
+ (0..(all_tax.size - 1)).each do |row|
56
+ fh.puts ([all_tax[row]] + p.map { |col| col[row] }).join "\t"
54
57
  end
55
58
  fh.close
56
59
  rescue => err
@@ -58,4 +61,3 @@ rescue => err
58
61
  err.backtrace.each { |l| $stderr.puts l + "\n" }
59
62
  err
60
63
  end
61
-
@@ -19,7 +19,8 @@ end
19
19
  ani_spp = []
20
20
  File.open(pf, 'r') do |fh|
21
21
  fh.each_line do |ln|
22
- next if $.==1 and ln.chomp == 'G' # <- Legacy check
22
+ next if $. == 1 and ln.chomp == 'G' # <- Legacy check
23
+
23
24
  ani_spp << ln.chomp.split(',')
24
25
  end
25
26
  end
@@ -32,10 +33,10 @@ ani_spp.each_with_index do |datasets, i|
32
33
  dr = d.result(:essential_genes) or next
33
34
  q = dr[:stats][:quality] or next
34
35
  if best.nil? or q > best[:q]
35
- best = {d: d, q: q}
36
+ best = { d: d, q: q }
36
37
  end
37
38
  end
38
39
  raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
39
- puts "ANIsp_#{i+1}\t#{best[:d].name}"
40
- end
41
40
 
41
+ puts "ANIsp_#{i + 1}\t#{best[:d].name}"
42
+ end
@@ -1,6 +1,6 @@
1
1
  Software Test exec Website Notes
2
2
  -------- --------- ------- -----
3
- Ruby ruby https://www.ruby-lang.org/ Required version: 2.1+
3
+ Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
4
4
  Python python https://www.python.org/
5
5
  R R http://www.r-project.org/
6
6
  SQLite3 sqlite3 https://www.sqlite.org/
@@ -1,4 +1,3 @@
1
-
2
1
  require 'zlib'
3
2
  require 'miga'
4
3
 
@@ -1,7 +1,5 @@
1
-
2
1
  # High-end pipelines for SubcladeRunner
3
2
  module MiGA::SubcladeRunner::Pipeline
4
-
5
3
  # Run species-level clusterings using ANI > 95% / AAI > 90%
6
4
  def cluster_species
7
5
  tasks = {
@@ -12,7 +10,7 @@ module MiGA::SubcladeRunner::Pipeline
12
10
  # Final output
13
11
  ogs_file = "miga-project.#{k}-clades"
14
12
  next if File.size? ogs_file
15
-
13
+
16
14
  # Build ABC files
17
15
  abc_path = tmp_file("#{k}.abc")
18
16
  ofh = File.open(abc_path, 'w')
@@ -20,6 +18,7 @@ module MiGA::SubcladeRunner::Pipeline
20
18
  Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
21
19
  ifh.each_line do |ln|
22
20
  next if ln =~ /^metric\t/
21
+
23
22
  r = ln.chomp.split("\t")
24
23
  ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
25
24
  end
@@ -55,7 +54,8 @@ module MiGA::SubcladeRunner::Pipeline
55
54
  ofh = File.open('miga-project.proposed-clades', 'w')
56
55
  File.open('miga-project.gsp-clades', 'r') do |ifh|
57
56
  ifh.each_line do |ln|
58
- next if $.==1
57
+ next if $. == 1
58
+
59
59
  r = ln.chomp.split(',')
60
60
  ofh.puts r.join("\t") if r.size >= 5
61
61
  end
@@ -70,8 +70,9 @@ module MiGA::SubcladeRunner::Pipeline
70
70
  matrix = metric_res.file_path(:matrix)
71
71
  `Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
72
72
  miga-project.ani95-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
73
- File.rename('miga-project.nwk',"miga-project.#{metric}.nwk") if
74
- File.exist? 'miga-project.nwk'
73
+ if File.exist? 'miga-project.nwk'
74
+ File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
75
+ end
75
76
  end
76
77
 
77
78
  def compile
@@ -1,33 +1,34 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'pipeline.rb'
5
4
 
6
5
  class MiGA::SubcladeRunner
7
-
8
6
  include MiGA::SubcladeRunner::Temporal
9
7
  include MiGA::SubcladeRunner::Pipeline
10
8
 
11
9
  attr_reader :project, :step, :opts, :home, :tmp
12
10
 
13
- def initialize(project_path, step, opts_hash={})
11
+ def initialize(project_path, step, opts_hash = {})
14
12
  @opts = opts_hash
15
13
  @project = MiGA::Project.load(project_path) or
16
- raise "No project at #{project_path}"
14
+ raise "No project at #{project_path}"
17
15
  @step = step.to_sym
18
- clades_dir = File.expand_path('data/10.clades', project.path)
19
- @home = File.expand_path(@step == :clade_finding ? '01.find' : '02.ani',
20
- clades_dir)
16
+ @home = File.join(
17
+ File.join(project.path, 'data', '10.clades'),
18
+ @step == :clade_finding ? '01.find' : '02.ani'
19
+ )
21
20
  @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
22
21
  @opts[:run_clades] = !!@project.metadata.data.fetch(:run_clades) { true }
23
22
  @opts[:gsp_ani] = @project.metadata.data.fetch(:gsp_ani) { 95.0 }.to_f
24
23
  @opts[:gsp_aai] = @project.metadata.data.fetch(:gsp_aai) { 90.0 }.to_f
25
- @opts[:gsp_metric] = @project.metadata.data.fetch(:gsp_metric){ 'ani' }.to_s
24
+ @opts[:gsp_metric] =
25
+ @project.metadata.data.fetch(:gsp_metric) { 'ani' }.to_s
26
26
  end
27
27
 
28
28
  # Launch the appropriate analysis
29
29
  def go!
30
30
  return if project.type == :metagenomes
31
+
31
32
  unless @project.dataset_names.any? { |i| @project.dataset(i).is_ref? }
32
33
  FileUtils.touch(File.expand_path('miga-project.empty', @home))
33
34
  return
@@ -54,5 +55,4 @@ class MiGA::SubcladeRunner
54
55
  subclades :ani
55
56
  compile
56
57
  end
57
-
58
58
  end