miga-base 0.7.3.1 → 0.7.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +25 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +48 -53
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -3
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +24 -21
  105. data/utils/distance/pipeline.rb +23 -10
  106. data/utils/distance/runner.rb +20 -16
  107. data/utils/distance/temporal.rb +1 -3
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga'
3
2
  require 'miga/tax_dist'
4
3
 
@@ -1,12 +1,13 @@
1
-
2
1
  module MiGA::DistanceRunner::Commands
3
2
  # Estimates or calculates AAI against +target+
4
3
  def aai(target)
5
4
  # Check if the request makes sense
6
5
  return nil if target.nil? || target.result(:essential_genes).nil?
6
+
7
7
  # Check if it's been calculated
8
8
  y = stored_value(target, :aai)
9
9
  return y unless y.nil? || y.zero?
10
+
10
11
  # Try hAAI (except in clade projects)
11
12
  unless @ref_project.is_clade?
12
13
  y = haai(target)
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
14
15
  end
15
16
  # Full AAI
16
17
  aai_cmd(
17
- tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
18
- dataset.name, target.name, tmp_dbs[:aai]).tap{ checkpoint :aai }
18
+ tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
19
+ dataset.name, target.name, tmp_dbs[:aai]
20
+ ).tap { checkpoint :aai }
19
21
  end
20
22
 
21
23
  ##
22
24
  # Estimates AAI against +target+ using hAAI
23
25
  def haai(target)
24
26
  return nil if opts[:haai_p] == 'no'
27
+
25
28
  haai = aai_cmd(tmp_file('ess_genes.fa'),
26
- target.result(:essential_genes).file_path(:ess_genes),
27
- dataset.name, target.name, tmp_dbs[:haai],
28
- aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
+ target.result(:essential_genes).file_path(:ess_genes),
30
+ dataset.name, target.name, tmp_dbs[:haai],
31
+ aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
32
  checkpoint :haai
30
33
  return nil if haai.nil? || haai.zero? || haai > 90.0
31
- aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
34
+
35
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
32
36
  SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
33
37
  conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
34
- [dataset.name, target.name, aai]
38
+ [dataset.name, target.name, aai]
35
39
  end
36
40
  checkpoint :aai
37
41
  aai
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
44
48
  t = tmp_file('largecontigs.fa')
45
49
  r = target.result(:assembly)
46
50
  return nil if r.nil? || !File.size?(t)
51
+
47
52
  # Check if it's been calculated
48
53
  y = stored_value(target, :ani)
49
54
  return y unless y.nil? || y.zero?
55
+
50
56
  # Run it
51
57
  ani_cmd(
52
- t, r.file_path(:largecontigs),
53
- dataset.name, target.name, tmp_dbs[:ani]).tap{ checkpoint :ani }
58
+ t, r.file_path(:largecontigs),
59
+ dataset.name, target.name, tmp_dbs[:ani]
60
+ ).tap { checkpoint :ani }
54
61
  end
55
62
 
56
63
  ##
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
74
81
 
75
82
  ##
76
83
  # Execute an ANI command
77
- def ani_cmd(f1, f2, n1, n2, db, o={})
84
+ def ani_cmd(f1, f2, n1, n2, db, o = {})
78
85
  o = opts.merge(o)
79
86
  v = nil
80
87
  if o[:ani_p] == 'fastani'
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
83
90
  unless out.empty?
84
91
  SQLite3::Database.new(db) do |conn|
85
92
  conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
86
- [n1, n2, out[2], out[3], out[4]]
93
+ [n1, n2, out[2], out[3], out[4]]
87
94
  end
88
95
  end
89
96
  v = out[2]
@@ -1,4 +1,3 @@
1
-
2
1
  require 'sqlite3'
3
2
 
4
3
  module MiGA::DistanceRunner::Database
@@ -9,7 +8,7 @@ module MiGA::DistanceRunner::Database
9
8
  @dbs = {}
10
9
  @tmp_dbs = {}
11
10
  @db_counts = {}
12
- {haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
11
+ { haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
13
12
  @db_counts[m] = 0
14
13
  @dbs[m] = for_ref ? ref_db(m) : query_db(m)
15
14
  # Remove if corrupt
@@ -25,9 +24,9 @@ module MiGA::DistanceRunner::Database
25
24
  # Initialize if it doesn't exist
26
25
  SQLite3::Database.new(dbs[m]) do |conn|
27
26
  conn.execute "create table if not exists #{t}(" +
28
- "seq1 varchar(256), seq2 varchar(256), " +
29
- "#{t} float, sd float, n int, omega int" +
30
- ")"
27
+ "seq1 varchar(256), seq2 varchar(256), " +
28
+ "#{t} float, sd float, n int, omega int" +
29
+ ")"
31
30
  end unless File.size? dbs[m]
32
31
  # Copy over to (local) temporals
33
32
  @tmp_dbs[m] = tmp_file("#{m}.db")
@@ -38,16 +37,17 @@ module MiGA::DistanceRunner::Database
38
37
  ##
39
38
  # Path to the database +metric+ for +dataset_name+ in +project+
40
39
  # (assumes that +dataset_name+ is a reference dataset)
41
- def ref_db(metric, dataset_name=nil)
40
+ def ref_db(metric, dataset_name = nil)
42
41
  dataset_name ||= dataset.name
43
- b = case metric
44
- when :haai
45
- "01.haai/#{dataset_name}.db"
46
- when :aai
47
- "02.aai/#{dataset_name}.db"
48
- when :ani
49
- "03.ani/#{dataset_name}.db"
50
- end
42
+ b =
43
+ case metric
44
+ when :haai
45
+ "01.haai/#{dataset_name}.db"
46
+ when :aai
47
+ "02.aai/#{dataset_name}.db"
48
+ when :ani
49
+ "03.ani/#{dataset_name}.db"
50
+ end
51
51
  File.expand_path(b, home)
52
52
  end
53
53
 
@@ -63,13 +63,14 @@ module MiGA::DistanceRunner::Database
63
63
  def stored_value(target, metric)
64
64
  # Check if self.dataset -> target is done (previous run)
65
65
  y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
66
- return y unless y.nil? or y.zero?
66
+ return y unless y.nil? || y.zero?
67
67
 
68
68
  # Check if self.dataset <- target is done (another thread)
69
- if dataset.is_ref? and project.path == ref_project.path
69
+ if dataset.is_ref? && project.path == ref_project.path
70
70
  y = data_from_db(
71
- target.name, dataset.name, ref_db(metric, target.name), metric)
72
- unless y.nil? or y.first.nil? or y.first.zero?
71
+ target.name, dataset.name, ref_db(metric, target.name), metric
72
+ )
73
+ unless y.nil? || y.first.nil? || y.first.zero?
73
74
  # Store a copy
74
75
  data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
75
76
  return y.first
@@ -94,7 +95,8 @@ module MiGA::DistanceRunner::Database
94
95
  SQLite3::Database.new(db) do |conn|
95
96
  y = conn.execute(
96
97
  "select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
97
- [n1, n2]).first
98
+ [n1, n2]
99
+ ).first
98
100
  end if File.size? db
99
101
  y
100
102
  end
@@ -105,7 +107,8 @@ module MiGA::DistanceRunner::Database
105
107
  SQLite3::Database.new(db) do |conn|
106
108
  conn.execute(
107
109
  "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
108
- "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data)
110
+ "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
111
+ )
109
112
  end
110
113
  checkpoint metric
111
114
  end
@@ -114,7 +117,7 @@ module MiGA::DistanceRunner::Database
114
117
  # Iterates for each entry in +db+
115
118
  def foreach_in_db(db, metric, &blk)
116
119
  SQLite3::Database.new(db) do |conn|
117
- conn.execute("select * from #{metric}").each{ |r| blk[r] }
120
+ conn.execute("select * from #{metric}").each { |r| blk[r] }
118
121
  end
119
122
  end
120
123
  end
@@ -1,13 +1,12 @@
1
-
2
1
  # High-end pipelines for DistanceRunner
3
2
  module MiGA::DistanceRunner::Pipeline
4
-
5
3
  # Recursively classify the dataset, returning an Array with two entries:
6
4
  # classification and cluster number
7
5
  def classify(clades, classif, metric, result_fh, val_cls = nil)
8
6
  dir = File.expand_path(classif, clades)
9
7
  med = File.expand_path('miga-project.medoids', dir)
10
- return [classif,val_cls] unless File.size? med
8
+ return [classif, val_cls] unless File.size? med
9
+
11
10
  max_val = 0
12
11
  val_med = ''
13
12
  val_cls = nil
@@ -30,11 +29,22 @@ module MiGA::DistanceRunner::Pipeline
30
29
  classify(clades, classif, metric, result_fh, val_cls)
31
30
  end
32
31
 
32
+ # Run distances against datasets listed in metadata's +:dist_req+
33
+ def distances_by_request(metric)
34
+ return unless dataset.metadata[:dist_req]
35
+
36
+ $stderr.puts 'Running distances by request'
37
+ dataset.metadata[:dist_req].each do |target|
38
+ ds = ref_project.dataset(target) and send(metric, ds)
39
+ end
40
+ end
41
+
33
42
  # Builds a tree with all visited medoids from any classification level
34
43
  def build_medoids_tree(metric)
35
44
  $stderr.puts "Building medoids tree (metric = #{metric})"
36
45
  db = query_db(metric)
37
46
  return unless File.size? db
47
+
38
48
  out_base = File.expand_path(dataset.name, home)
39
49
  ds_matrix = "#{out_base}.txt"
40
50
  ds_matrix_fh = File.open(ds_matrix, 'w')
@@ -43,7 +53,7 @@ module MiGA::DistanceRunner::Pipeline
43
53
  seq2 = []
44
54
  foreach_in_db(db, metric) do |r|
45
55
  seq2 << r[0]
46
- ds_matrix_fh.puts r[0,3].join("\t")
56
+ ds_matrix_fh.puts r[0, 3].join("\t")
47
57
  end
48
58
  # Find all values among visited datasets in ref_project
49
59
  ref_r = ref_project.result("#{metric}_distances") or return
@@ -51,7 +61,8 @@ module MiGA::DistanceRunner::Pipeline
51
61
  fh.each_line do |ln|
52
62
  r = ln.chomp.split("\t")
53
63
  next unless seq2.include?(r[1]) or seq2.include?(r[2])
54
- ds_matrix_fh.puts r[1,3].join("\t")
64
+
65
+ ds_matrix_fh.puts r[1, 3].join("\t")
55
66
  end
56
67
  end
57
68
  ds_matrix_fh.close
@@ -74,11 +85,12 @@ module MiGA::DistanceRunner::Pipeline
74
85
  dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
75
86
  cr = dataset.closest_relatives(1, from_ref_project)
76
87
  return if cr.nil? or cr.empty?
88
+
77
89
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
78
90
 
79
91
  # Run the test for each rank
80
92
  tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
81
- r = tax_test.map do |k,v|
93
+ r = tax_test.map do |k, v|
82
94
  sig = ''
83
95
  [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
84
96
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
@@ -97,12 +109,13 @@ module MiGA::DistanceRunner::Pipeline
97
109
 
98
110
  # Transfer the taxonomy to the current dataset
99
111
  def transfer_taxonomy(tax)
100
- $stderr.puts "Transferring taxonomy"
112
+ $stderr.puts 'Transferring taxonomy'
101
113
  return if tax.nil?
114
+
102
115
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
103
- tax_a = tax.
104
- select { |i| i[1] != '?' && i[2] <= pval }.
105
- map { |i| i[0,2].join(':') }
116
+ tax_a = tax
117
+ .select { |i| i[1] != '?' && i[2] <= pval }
118
+ .map { |i| i[0, 2].join(':') }
106
119
  dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
107
120
  dataset.save
108
121
  end
@@ -1,13 +1,10 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'database.rb'
5
4
  require_relative 'commands.rb'
6
5
  require_relative 'pipeline.rb'
7
6
 
8
-
9
7
  class MiGA::DistanceRunner
10
-
11
8
  include MiGA::DistanceRunner::Temporal
12
9
  include MiGA::DistanceRunner::Database
13
10
  include MiGA::DistanceRunner::Commands
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
16
13
  attr_reader :project, :ref_project, :dataset, :opts, :home
17
14
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
18
15
 
19
- def initialize(project_path, dataset_name, opts_hash={})
16
+ def initialize(project_path, dataset_name, opts_hash = {})
20
17
  @opts = opts_hash
21
18
  @project = MiGA::Project.load(project_path) or
22
19
  raise "No project at #{project_path}"
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
30
27
  @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
31
28
  project.is_clade? ? 'save-rbm' : 'no-save-rbm'
32
29
  end
33
- @opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
34
31
  if opts[:run_taxonomy] and project.metadata[:ref_project]
35
32
  ref_path = project.metadata[:ref_project]
36
33
  @home = File.expand_path('05.taxonomy', @home)
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
60
57
  def go!
61
58
  $stderr.puts "Launching analysis"
62
59
  return if dataset.is_multi?
60
+
63
61
  Dir.mktmpdir do |tmp_dir|
64
62
  @tmp = tmp_dir
65
63
  create_temporals
@@ -69,25 +67,26 @@ class MiGA::DistanceRunner
69
67
 
70
68
  # Launch analysis for reference datasets
71
69
  def go_ref!
72
- $stderr.puts "Launching analysis for reference dataset"
70
+ $stderr.puts 'Launching analysis for reference dataset'
73
71
  # Initialize databases
74
72
  initialize_dbs! true
75
73
 
76
74
  # first-come-first-serve traverse
77
75
  ref_project.each_dataset do |ds|
78
76
  next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
+
79
78
  puts "[ #{Time.now} ] #{ds.name}"
80
79
  ani_after_aai(ds)
81
80
  end
82
81
 
83
82
  # Finalize
84
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
83
+ %i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
85
84
  end
86
85
 
87
86
  ##
88
87
  # Launch analysis for query datasets
89
88
  def go_query!
90
- $stderr.puts "Launching analysis for query dataset"
89
+ $stderr.puts 'Launching analysis for query dataset'
91
90
  # Check if project is ready
92
91
  tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
93
92
  res = ref_project.result(tsk[0])
@@ -95,6 +94,7 @@ class MiGA::DistanceRunner
95
94
 
96
95
  # Initialize the databases
97
96
  initialize_dbs! false
97
+ distances_by_request(tsk[1])
98
98
  # Calculate the classification-informed AAI/ANI traverse
99
99
  results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
100
100
  fh = File.open(results, 'w')
@@ -104,14 +104,17 @@ class MiGA::DistanceRunner
104
104
  # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
105
105
  par_dir = File.dirname(File.expand_path(classif, res.dir))
106
106
  par = File.expand_path('miga-project.classif', par_dir)
107
- closest = {dataset: nil, ani: 0.0}
107
+ closest = { dataset: nil, ani: 0.0 }
108
108
  if File.size? par
109
109
  File.open(par, 'r') do |fh|
110
110
  fh.each_line do |ln|
111
111
  r = ln.chomp.split("\t")
112
112
  next unless r[1].to_i == val_cls
113
+
113
114
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
115
+ unless ani.nil? || ani < closest[:ani]
116
+ closest = { ds: r[0], ani: ani }
117
+ end
115
118
  end
116
119
  end
117
120
  end
@@ -119,22 +122,23 @@ class MiGA::DistanceRunner
119
122
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
120
123
  cl_path = res.file_path :clades_ani95
121
124
  if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
122
- File.foreach(cl_path).
123
- map { |i| i.chomp.split(',') }.
124
- find( lambda{[]} ){ |i| i.include? closest[:ds] }.
125
- each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
125
+ File.foreach(cl_path)
126
+ .map { |i| i.chomp.split(',') }
127
+ .find(lambda { [] }) { |i| i.include? closest[:ds] }
128
+ .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
126
129
  end
127
130
 
128
131
  # Finalize
129
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
132
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
130
133
  build_medoids_tree(tsk[1])
131
134
  transfer_taxonomy(tax_test)
132
135
  end
133
136
 
134
137
  # Launch analysis for taxonomy jobs
135
138
  def go_taxonomy!
136
- $stderr.puts "Launching taxonomy analysis"
139
+ $stderr.puts 'Launching taxonomy analysis'
137
140
  return unless project.metadata[:ref_project]
141
+
138
142
  go_query! # <- yeah, it's actually the same, just different ref_project
139
143
  end
140
144
  end
@@ -1,9 +1,7 @@
1
-
2
1
  require 'tmpdir'
3
2
  require 'zlib'
4
3
 
5
4
  module MiGA::DistanceRunner::Temporal
6
-
7
5
  # Copy input files to the (local) temporal folder
8
6
  def create_temporals
9
7
  rf = {
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
43
41
  def checkpoint!(metric)
44
42
  $stderr.puts "Checkpoint (metric = #{metric})"
45
43
  SQLite3::Database.new(tmp_dbs[metric]) do |conn|
46
- conn.execute("select count(*) from #{metric==:haai ? :aai : metric}")
44
+ conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
47
45
  end
48
46
  FileUtils.cp(tmp_dbs[metric], dbs[metric])
49
47
  @db_counts[metric] = 0
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  dataset = ARGV.shift
6
6
  project = ARGV.shift
7
- opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
7
+ opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(dataset, project, opts)
9
9
  runner.go!
@@ -6,10 +6,10 @@ domain = ARGV.shift
6
6
 
7
7
  def quality(hsh)
8
8
  q = {}
9
- q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
10
- q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
11
- q[:cmp] = 100.0*q[:found].to_f/hsh.size
12
- q[:cnt] = 100.0*q[:multi].to_f/hsh.size
9
+ q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
10
+ q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
11
+ q[:cmp] = 100.0 * q[:found].to_f / hsh.size
12
+ q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
13
13
  q
14
14
  end
15
15
 
@@ -39,7 +39,7 @@ end
39
39
  # Find expected genes for domain
40
40
  n_dom = Hash[
41
41
  `HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
42
- .chomp.split("\n").map { |i| i.split("\t") }
42
+ .chomp.split("\n").map { |i| i.split("\t") }
43
43
  ]
44
44
  l_dom = n_dom.keys
45
45
  cnt_dom = {}
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
54
54
  ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
55
55
  if q[:multi] > 0
56
56
  ofh.puts "! Multiple copies: "
57
- cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
57
+ cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
58
58
  end
59
59
  if q[:found] < cnt_dom.size
60
60
  ofh.puts "! Missing genes: "
61
- cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
61
+ cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
62
62
  end
63
63
  end