miga-base 0.7.4.0 → 0.7.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +28 -20
  14. data/lib/miga/cli/action/doctor/base.rb +29 -6
  15. data/lib/miga/cli/action/edit.rb +1 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +34 -29
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +3 -3
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +21 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +24 -17
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +51 -35
  53. data/lib/miga/daemon/base.rb +0 -2
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/json.rb +5 -7
  59. data/lib/miga/lair.rb +4 -0
  60. data/lib/miga/metadata.rb +4 -3
  61. data/lib/miga/project.rb +29 -20
  62. data/lib/miga/project/base.rb +52 -37
  63. data/lib/miga/project/dataset.rb +27 -13
  64. data/lib/miga/project/hooks.rb +0 -3
  65. data/lib/miga/project/result.rb +14 -5
  66. data/lib/miga/remote_dataset.rb +85 -72
  67. data/lib/miga/remote_dataset/base.rb +11 -13
  68. data/lib/miga/remote_dataset/download.rb +33 -12
  69. data/lib/miga/result.rb +34 -25
  70. data/lib/miga/result/base.rb +0 -2
  71. data/lib/miga/result/dates.rb +1 -3
  72. data/lib/miga/result/source.rb +15 -16
  73. data/lib/miga/result/stats.rb +36 -25
  74. data/lib/miga/tax_dist.rb +6 -3
  75. data/lib/miga/tax_index.rb +17 -17
  76. data/lib/miga/taxonomy.rb +6 -1
  77. data/lib/miga/taxonomy/base.rb +19 -15
  78. data/lib/miga/version.rb +19 -16
  79. data/test/common_test.rb +3 -11
  80. data/test/daemon_helper.rb +38 -0
  81. data/test/daemon_test.rb +73 -101
  82. data/test/dataset_test.rb +58 -59
  83. data/test/format_test.rb +3 -11
  84. data/test/hook_test.rb +50 -55
  85. data/test/json_test.rb +7 -8
  86. data/test/lair_test.rb +22 -28
  87. data/test/metadata_test.rb +6 -14
  88. data/test/project_test.rb +33 -39
  89. data/test/remote_dataset_test.rb +20 -28
  90. data/test/result_stats_test.rb +17 -27
  91. data/test/result_test.rb +41 -34
  92. data/test/tax_dist_test.rb +0 -2
  93. data/test/tax_index_test.rb +4 -10
  94. data/test/taxonomy_test.rb +7 -9
  95. data/test/test_helper.rb +42 -1
  96. data/test/with_daemon_test.rb +14 -22
  97. data/utils/cleanup-databases.rb +6 -5
  98. data/utils/distance/base.rb +0 -1
  99. data/utils/distance/commands.rb +19 -12
  100. data/utils/distance/database.rb +24 -21
  101. data/utils/distance/pipeline.rb +12 -9
  102. data/utils/distance/runner.rb +14 -13
  103. data/utils/distance/temporal.rb +1 -3
  104. data/utils/distances.rb +1 -1
  105. data/utils/domain-ess-genes.rb +7 -7
  106. data/utils/index_metadata.rb +4 -2
  107. data/utils/mytaxa_scan.rb +18 -16
  108. data/utils/representatives.rb +5 -4
  109. data/utils/requirements.txt +1 -1
  110. data/utils/subclade/base.rb +0 -1
  111. data/utils/subclade/pipeline.rb +7 -6
  112. data/utils/subclade/runner.rb +9 -9
  113. data/utils/subclade/temporal.rb +0 -2
  114. data/utils/subclades-compile.rb +39 -37
  115. data/utils/subclades.rb +1 -1
  116. metadata +3 -2
@@ -1,13 +1,12 @@
1
-
2
1
  # High-end pipelines for DistanceRunner
3
2
  module MiGA::DistanceRunner::Pipeline
4
-
5
3
  # Recursively classify the dataset, returning an Array with two entries:
6
4
  # classification and cluster number
7
5
  def classify(clades, classif, metric, result_fh, val_cls = nil)
8
6
  dir = File.expand_path(classif, clades)
9
7
  med = File.expand_path('miga-project.medoids', dir)
10
- return [classif,val_cls] unless File.size? med
8
+ return [classif, val_cls] unless File.size? med
9
+
11
10
  max_val = 0
12
11
  val_med = ''
13
12
  val_cls = nil
@@ -35,6 +34,7 @@ module MiGA::DistanceRunner::Pipeline
35
34
  $stderr.puts "Building medoids tree (metric = #{metric})"
36
35
  db = query_db(metric)
37
36
  return unless File.size? db
37
+
38
38
  out_base = File.expand_path(dataset.name, home)
39
39
  ds_matrix = "#{out_base}.txt"
40
40
  ds_matrix_fh = File.open(ds_matrix, 'w')
@@ -43,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
43
43
  seq2 = []
44
44
  foreach_in_db(db, metric) do |r|
45
45
  seq2 << r[0]
46
- ds_matrix_fh.puts r[0,3].join("\t")
46
+ ds_matrix_fh.puts r[0, 3].join("\t")
47
47
  end
48
48
  # Find all values among visited datasets in ref_project
49
49
  ref_r = ref_project.result("#{metric}_distances") or return
@@ -51,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
51
51
  fh.each_line do |ln|
52
52
  r = ln.chomp.split("\t")
53
53
  next unless seq2.include?(r[1]) or seq2.include?(r[2])
54
- ds_matrix_fh.puts r[1,3].join("\t")
54
+
55
+ ds_matrix_fh.puts r[1, 3].join("\t")
55
56
  end
56
57
  end
57
58
  ds_matrix_fh.close
@@ -74,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
74
75
  dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
75
76
  cr = dataset.closest_relatives(1, from_ref_project)
76
77
  return if cr.nil? or cr.empty?
78
+
77
79
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
78
80
 
79
81
  # Run the test for each rank
80
82
  tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
81
- r = tax_test.map do |k,v|
83
+ r = tax_test.map do |k, v|
82
84
  sig = ''
83
85
  [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
84
86
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
@@ -99,10 +101,11 @@ module MiGA::DistanceRunner::Pipeline
99
101
  def transfer_taxonomy(tax)
100
102
  $stderr.puts "Transferring taxonomy"
101
103
  return if tax.nil?
104
+
102
105
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
103
- tax_a = tax.
104
- select { |i| i[1] != '?' && i[2] <= pval }.
105
- map { |i| i[0,2].join(':') }
106
+ tax_a = tax
107
+ .select { |i| i[1] != '?' && i[2] <= pval }
108
+ .map { |i| i[0, 2].join(':') }
106
109
  dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
107
110
  dataset.save
108
111
  end
@@ -1,13 +1,10 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'database.rb'
5
4
  require_relative 'commands.rb'
6
5
  require_relative 'pipeline.rb'
7
6
 
8
-
9
7
  class MiGA::DistanceRunner
10
-
11
8
  include MiGA::DistanceRunner::Temporal
12
9
  include MiGA::DistanceRunner::Database
13
10
  include MiGA::DistanceRunner::Commands
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
16
13
  attr_reader :project, :ref_project, :dataset, :opts, :home
17
14
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
18
15
 
19
- def initialize(project_path, dataset_name, opts_hash={})
16
+ def initialize(project_path, dataset_name, opts_hash = {})
20
17
  @opts = opts_hash
21
18
  @project = MiGA::Project.load(project_path) or
22
19
  raise "No project at #{project_path}"
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
30
27
  @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
31
28
  project.is_clade? ? 'save-rbm' : 'no-save-rbm'
32
29
  end
33
- @opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
34
31
  if opts[:run_taxonomy] and project.metadata[:ref_project]
35
32
  ref_path = project.metadata[:ref_project]
36
33
  @home = File.expand_path('05.taxonomy', @home)
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
60
57
  def go!
61
58
  $stderr.puts "Launching analysis"
62
59
  return if dataset.is_multi?
60
+
63
61
  Dir.mktmpdir do |tmp_dir|
64
62
  @tmp = tmp_dir
65
63
  create_temporals
@@ -76,12 +74,13 @@ class MiGA::DistanceRunner
76
74
  # first-come-first-serve traverse
77
75
  ref_project.each_dataset do |ds|
78
76
  next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
+
79
78
  puts "[ #{Time.now} ] #{ds.name}"
80
79
  ani_after_aai(ds)
81
80
  end
82
81
 
83
82
  # Finalize
84
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
83
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
85
84
  end
86
85
 
87
86
  ##
@@ -104,14 +103,15 @@ class MiGA::DistanceRunner
104
103
  # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
105
104
  par_dir = File.dirname(File.expand_path(classif, res.dir))
106
105
  par = File.expand_path('miga-project.classif', par_dir)
107
- closest = {dataset: nil, ani: 0.0}
106
+ closest = { dataset: nil, ani: 0.0 }
108
107
  if File.size? par
109
108
  File.open(par, 'r') do |fh|
110
109
  fh.each_line do |ln|
111
110
  r = ln.chomp.split("\t")
112
111
  next unless r[1].to_i == val_cls
112
+
113
113
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
114
+ closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
115
115
  end
116
116
  end
117
117
  end
@@ -119,14 +119,14 @@ class MiGA::DistanceRunner
119
119
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
120
120
  cl_path = res.file_path :clades_ani95
121
121
  if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
122
- File.foreach(cl_path).
123
- map { |i| i.chomp.split(',') }.
124
- find( lambda{[]} ){ |i| i.include? closest[:ds] }.
125
- each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
122
+ File.foreach(cl_path)
123
+ .map { |i| i.chomp.split(',') }
124
+ .find(lambda { [] }) { |i| i.include? closest[:ds] }
125
+ .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
126
126
  end
127
127
 
128
128
  # Finalize
129
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
129
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
130
130
  build_medoids_tree(tsk[1])
131
131
  transfer_taxonomy(tax_test)
132
132
  end
@@ -135,6 +135,7 @@ class MiGA::DistanceRunner
135
135
  def go_taxonomy!
136
136
  $stderr.puts "Launching taxonomy analysis"
137
137
  return unless project.metadata[:ref_project]
138
+
138
139
  go_query! # <- yeah, it's actually the same, just different ref_project
139
140
  end
140
141
  end
@@ -1,9 +1,7 @@
1
-
2
1
  require 'tmpdir'
3
2
  require 'zlib'
4
3
 
5
4
  module MiGA::DistanceRunner::Temporal
6
-
7
5
  # Copy input files to the (local) temporal folder
8
6
  def create_temporals
9
7
  rf = {
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
43
41
  def checkpoint!(metric)
44
42
  $stderr.puts "Checkpoint (metric = #{metric})"
45
43
  SQLite3::Database.new(tmp_dbs[metric]) do |conn|
46
- conn.execute("select count(*) from #{metric==:haai ? :aai : metric}")
44
+ conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
47
45
  end
48
46
  FileUtils.cp(tmp_dbs[metric], dbs[metric])
49
47
  @db_counts[metric] = 0
data/utils/distances.rb CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  dataset = ARGV.shift
6
6
  project = ARGV.shift
7
- opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
7
+ opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(dataset, project, opts)
9
9
  runner.go!
@@ -6,10 +6,10 @@ domain = ARGV.shift
6
6
 
7
7
  def quality(hsh)
8
8
  q = {}
9
- q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
10
- q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
11
- q[:cmp] = 100.0*q[:found].to_f/hsh.size
12
- q[:cnt] = 100.0*q[:multi].to_f/hsh.size
9
+ q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
10
+ q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
11
+ q[:cmp] = 100.0 * q[:found].to_f / hsh.size
12
+ q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
13
13
  q
14
14
  end
15
15
 
@@ -39,7 +39,7 @@ end
39
39
  # Find expected genes for domain
40
40
  n_dom = Hash[
41
41
  `HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
42
- .chomp.split("\n").map { |i| i.split("\t") }
42
+ .chomp.split("\n").map { |i| i.split("\t") }
43
43
  ]
44
44
  l_dom = n_dom.keys
45
45
  cnt_dom = {}
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
54
54
  ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
55
55
  if q[:multi] > 0
56
56
  ofh.puts "! Multiple copies: "
57
- cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
57
+ cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
58
58
  end
59
59
  if q[:found] < cnt_dom.size
60
60
  ofh.puts "! Missing genes: "
61
- cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
61
+ cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
62
62
  end
63
63
  end
@@ -13,16 +13,18 @@ db.execute 'create table metadata(' \
13
13
 
14
14
  def searchable(db, d, k, v)
15
15
  db.execute 'insert into metadata values(?,?,?)',
16
- d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
16
+ d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
17
17
  end
18
18
 
19
19
  p.each_dataset do |d|
20
20
  next unless d.is_ref?
21
21
  next unless d.is_active?
22
+
22
23
  searchable(db, d, :name, d.name)
23
24
  d.metadata.each do |k, v|
24
25
  next if [:created, :updated].include? k
25
- v = v.sorted_ranks.map{ |r| r[1] }.join(' ') if k == :tax
26
+
27
+ v = v.sorted_ranks.map { |r| r[1] }.join(' ') if k == :tax
26
28
  searchable(db, d, k, v)
27
29
  end
28
30
  end
data/utils/mytaxa_scan.rb CHANGED
@@ -15,42 +15,45 @@ begin
15
15
 
16
16
  # Extract gene IDs
17
17
  ifh = faa =~ /\.gz/ ? Zlib::GzipReader.open(faa) : File.open(faa, 'r')
18
- ids = ifh.each_line.grep(/^>/).map{|dl| dl.chomp.sub(/^>/,'').sub(/\s.*/,'')}
18
+ ids = ifh.each_line.grep(/^>/).map { |dl| dl.chomp.sub(/^>/, '').sub(/\s.*/, '') }
19
19
  ifh.close
20
- tax = Hash[ids.map{|k| [k, "NA"]}]
20
+ tax = Hash[ids.map { |k| [k, "NA"] }]
21
21
 
22
22
  # Get MyTaxa distributions
23
23
  k, l = nil
24
24
  File.open(mytaxa).each do |ln|
25
25
  ln.chomp!
26
- if $.%2 == 1
26
+ if $. % 2 == 1
27
27
  k, l = ln.split /\t/
28
28
  else
29
- tax[k] = ln.gsub(/<[^>]+>/,"").gsub(/;/,"::")
29
+ tax[k] = ln.gsub(/<[^>]+>/, '').gsub(/;/, '::')
30
30
  end
31
31
  end
32
- all_tax = tax.values.uniq.sort{|x,y| tax.values.count(y) <=> tax.values.count(x) }
32
+ all_tax = tax.values.uniq.sort do |x, y|
33
+ tax.values.count(y) <=> tax.values.count(x)
34
+ end
33
35
 
34
36
  # Estimate Windows and save gene IDs
35
- fh = File.open(outdata + ".genes", "w")
37
+ fh = File.open(outdata + '.genes', 'w')
36
38
  c = []
37
- c << all_tax.map{|t| tax.values.count(t) }
38
- n_wins = (ids.size/winsize).ceil
39
- (0 .. (n_wins-1)).each do |win|
40
- k = ids[win*winsize, winsize]
39
+ c << all_tax.map { |t| tax.values.count(t) }
40
+ n_wins = (ids.size / winsize).ceil
41
+ (0..(n_wins - 1)).each do |win|
42
+ k = ids[win * winsize, winsize]
41
43
  win_t = tax.values_at(*k)
42
44
  fh.puts k.join("\t")
43
- c << all_tax.map{|t| win_t.count(t)}
45
+ c << all_tax.map { |t| win_t.count(t) }
44
46
  end
45
- p = c.map{|col| col.map{|cell| cell.to_f/col.inject(:+)}}
47
+ p = c.map { |col| col.map { |cell| cell.to_f / col.inject(:+) } }
46
48
  fh.close
47
49
 
48
50
  # Save window profiles
49
51
  fh = File.open(outdata, "w")
50
52
  fh.puts "# Data derived from #{mytaxa}, with #{winsize}-genes windows"
51
- fh.puts "# " + (["Tax-label", "Genome"] + (1 .. n_wins).map{|i| "Win_#{i}"}).join("\t")
52
- (0 .. (all_tax.size - 1)).each do |row|
53
- fh.puts ([all_tax[row]] + p.map{|col| col[row]}).join "\t"
53
+ fh.puts '# ' + (['Tax-label', 'Genome'] +
54
+ (1..n_wins).map { |i| "Win_#{i}" }).join("\t")
55
+ (0..(all_tax.size - 1)).each do |row|
56
+ fh.puts ([all_tax[row]] + p.map { |col| col[row] }).join "\t"
54
57
  end
55
58
  fh.close
56
59
  rescue => err
@@ -58,4 +61,3 @@ rescue => err
58
61
  err.backtrace.each { |l| $stderr.puts l + "\n" }
59
62
  err
60
63
  end
61
-
@@ -19,7 +19,8 @@ end
19
19
  ani_spp = []
20
20
  File.open(pf, 'r') do |fh|
21
21
  fh.each_line do |ln|
22
- next if $.==1 and ln.chomp == 'G' # <- Legacy check
22
+ next if $. == 1 and ln.chomp == 'G' # <- Legacy check
23
+
23
24
  ani_spp << ln.chomp.split(',')
24
25
  end
25
26
  end
@@ -32,10 +33,10 @@ ani_spp.each_with_index do |datasets, i|
32
33
  dr = d.result(:essential_genes) or next
33
34
  q = dr[:stats][:quality] or next
34
35
  if best.nil? or q > best[:q]
35
- best = {d: d, q: q}
36
+ best = { d: d, q: q }
36
37
  end
37
38
  end
38
39
  raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
39
- puts "ANIsp_#{i+1}\t#{best[:d].name}"
40
- end
41
40
 
41
+ puts "ANIsp_#{i + 1}\t#{best[:d].name}"
42
+ end
@@ -1,6 +1,6 @@
1
1
  Software Test exec Website Notes
2
2
  -------- --------- ------- -----
3
- Ruby ruby https://www.ruby-lang.org/ Required version: 2.1+
3
+ Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
4
4
  Python python https://www.python.org/
5
5
  R R http://www.r-project.org/
6
6
  SQLite3 sqlite3 https://www.sqlite.org/
@@ -1,4 +1,3 @@
1
-
2
1
  require 'zlib'
3
2
  require 'miga'
4
3
 
@@ -1,7 +1,5 @@
1
-
2
1
  # High-end pipelines for SubcladeRunner
3
2
  module MiGA::SubcladeRunner::Pipeline
4
-
5
3
  # Run species-level clusterings using ANI > 95% / AAI > 90%
6
4
  def cluster_species
7
5
  tasks = {
@@ -12,7 +10,7 @@ module MiGA::SubcladeRunner::Pipeline
12
10
  # Final output
13
11
  ogs_file = "miga-project.#{k}-clades"
14
12
  next if File.size? ogs_file
15
-
13
+
16
14
  # Build ABC files
17
15
  abc_path = tmp_file("#{k}.abc")
18
16
  ofh = File.open(abc_path, 'w')
@@ -20,6 +18,7 @@ module MiGA::SubcladeRunner::Pipeline
20
18
  Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
21
19
  ifh.each_line do |ln|
22
20
  next if ln =~ /^metric\t/
21
+
23
22
  r = ln.chomp.split("\t")
24
23
  ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
25
24
  end
@@ -55,7 +54,8 @@ module MiGA::SubcladeRunner::Pipeline
55
54
  ofh = File.open('miga-project.proposed-clades', 'w')
56
55
  File.open('miga-project.gsp-clades', 'r') do |ifh|
57
56
  ifh.each_line do |ln|
58
- next if $.==1
57
+ next if $. == 1
58
+
59
59
  r = ln.chomp.split(',')
60
60
  ofh.puts r.join("\t") if r.size >= 5
61
61
  end
@@ -70,8 +70,9 @@ module MiGA::SubcladeRunner::Pipeline
70
70
  matrix = metric_res.file_path(:matrix)
71
71
  `Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
72
72
  miga-project.ani95-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
73
- File.rename('miga-project.nwk',"miga-project.#{metric}.nwk") if
74
- File.exist? 'miga-project.nwk'
73
+ if File.exist? 'miga-project.nwk'
74
+ File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
75
+ end
75
76
  end
76
77
 
77
78
  def compile
@@ -1,33 +1,34 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'pipeline.rb'
5
4
 
6
5
  class MiGA::SubcladeRunner
7
-
8
6
  include MiGA::SubcladeRunner::Temporal
9
7
  include MiGA::SubcladeRunner::Pipeline
10
8
 
11
9
  attr_reader :project, :step, :opts, :home, :tmp
12
10
 
13
- def initialize(project_path, step, opts_hash={})
11
+ def initialize(project_path, step, opts_hash = {})
14
12
  @opts = opts_hash
15
13
  @project = MiGA::Project.load(project_path) or
16
- raise "No project at #{project_path}"
14
+ raise "No project at #{project_path}"
17
15
  @step = step.to_sym
18
- clades_dir = File.expand_path('data/10.clades', project.path)
19
- @home = File.expand_path(@step == :clade_finding ? '01.find' : '02.ani',
20
- clades_dir)
16
+ @home = File.join(
17
+ File.join(project.path, 'data', '10.clades'),
18
+ @step == :clade_finding ? '01.find' : '02.ani'
19
+ )
21
20
  @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
22
21
  @opts[:run_clades] = !!@project.metadata.data.fetch(:run_clades) { true }
23
22
  @opts[:gsp_ani] = @project.metadata.data.fetch(:gsp_ani) { 95.0 }.to_f
24
23
  @opts[:gsp_aai] = @project.metadata.data.fetch(:gsp_aai) { 90.0 }.to_f
25
- @opts[:gsp_metric] = @project.metadata.data.fetch(:gsp_metric){ 'ani' }.to_s
24
+ @opts[:gsp_metric] =
25
+ @project.metadata.data.fetch(:gsp_metric) { 'ani' }.to_s
26
26
  end
27
27
 
28
28
  # Launch the appropriate analysis
29
29
  def go!
30
30
  return if project.type == :metagenomes
31
+
31
32
  unless @project.dataset_names.any? { |i| @project.dataset(i).is_ref? }
32
33
  FileUtils.touch(File.expand_path('miga-project.empty', @home))
33
34
  return
@@ -54,5 +55,4 @@ class MiGA::SubcladeRunner
54
55
  subclades :ani
55
56
  compile
56
57
  end
57
-
58
58
  end