miga-base 0.7.4.0 → 0.7.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +28 -20
  14. data/lib/miga/cli/action/doctor/base.rb +29 -6
  15. data/lib/miga/cli/action/edit.rb +1 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +9 -5
  35. data/lib/miga/cli/action/summary.rb +13 -7
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +25 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +11 -6
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +51 -35
  53. data/lib/miga/daemon/base.rb +0 -2
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/json.rb +5 -7
  59. data/lib/miga/lair.rb +4 -0
  60. data/lib/miga/metadata.rb +4 -3
  61. data/lib/miga/project.rb +29 -20
  62. data/lib/miga/project/base.rb +52 -37
  63. data/lib/miga/project/dataset.rb +27 -13
  64. data/lib/miga/project/hooks.rb +0 -3
  65. data/lib/miga/project/result.rb +14 -5
  66. data/lib/miga/remote_dataset.rb +85 -72
  67. data/lib/miga/remote_dataset/base.rb +11 -13
  68. data/lib/miga/remote_dataset/download.rb +34 -12
  69. data/lib/miga/result.rb +48 -53
  70. data/lib/miga/result/base.rb +0 -2
  71. data/lib/miga/result/dates.rb +1 -3
  72. data/lib/miga/result/source.rb +15 -16
  73. data/lib/miga/result/stats.rb +37 -27
  74. data/lib/miga/tax_dist.rb +6 -3
  75. data/lib/miga/tax_index.rb +17 -17
  76. data/lib/miga/taxonomy.rb +6 -1
  77. data/lib/miga/taxonomy/base.rb +19 -15
  78. data/lib/miga/version.rb +19 -16
  79. data/scripts/project_stats.bash +3 -0
  80. data/scripts/stats.bash +1 -1
  81. data/test/common_test.rb +3 -11
  82. data/test/daemon_helper.rb +38 -0
  83. data/test/daemon_test.rb +73 -101
  84. data/test/dataset_test.rb +63 -59
  85. data/test/format_test.rb +3 -11
  86. data/test/hook_test.rb +50 -55
  87. data/test/json_test.rb +7 -8
  88. data/test/lair_test.rb +22 -28
  89. data/test/metadata_test.rb +6 -14
  90. data/test/project_test.rb +33 -39
  91. data/test/remote_dataset_test.rb +26 -32
  92. data/test/result_stats_test.rb +17 -27
  93. data/test/result_test.rb +41 -34
  94. data/test/tax_dist_test.rb +0 -2
  95. data/test/tax_index_test.rb +4 -10
  96. data/test/taxonomy_test.rb +7 -9
  97. data/test/test_helper.rb +42 -1
  98. data/test/with_daemon_test.rb +14 -22
  99. data/utils/adapters.fa +13 -0
  100. data/utils/cleanup-databases.rb +6 -5
  101. data/utils/distance/base.rb +0 -1
  102. data/utils/distance/commands.rb +19 -12
  103. data/utils/distance/database.rb +24 -21
  104. data/utils/distance/pipeline.rb +23 -10
  105. data/utils/distance/runner.rb +20 -16
  106. data/utils/distance/temporal.rb +1 -3
  107. data/utils/distances.rb +1 -1
  108. data/utils/domain-ess-genes.rb +7 -7
  109. data/utils/index_metadata.rb +4 -2
  110. data/utils/mytaxa_scan.rb +18 -16
  111. data/utils/representatives.rb +5 -4
  112. data/utils/requirements.txt +1 -1
  113. data/utils/subclade/base.rb +0 -1
  114. data/utils/subclade/pipeline.rb +7 -6
  115. data/utils/subclade/runner.rb +9 -9
  116. data/utils/subclade/temporal.rb +0 -2
  117. data/utils/subclades-compile.rb +39 -37
  118. data/utils/subclades.rb +1 -1
  119. metadata +5 -4
@@ -1,12 +1,13 @@
1
-
2
1
  module MiGA::DistanceRunner::Commands
3
2
  # Estimates or calculates AAI against +target+
4
3
  def aai(target)
5
4
  # Check if the request makes sense
6
5
  return nil if target.nil? || target.result(:essential_genes).nil?
6
+
7
7
  # Check if it's been calculated
8
8
  y = stored_value(target, :aai)
9
9
  return y unless y.nil? || y.zero?
10
+
10
11
  # Try hAAI (except in clade projects)
11
12
  unless @ref_project.is_clade?
12
13
  y = haai(target)
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
14
15
  end
15
16
  # Full AAI
16
17
  aai_cmd(
17
- tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
18
- dataset.name, target.name, tmp_dbs[:aai]).tap{ checkpoint :aai }
18
+ tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
19
+ dataset.name, target.name, tmp_dbs[:aai]
20
+ ).tap { checkpoint :aai }
19
21
  end
20
22
 
21
23
  ##
22
24
  # Estimates AAI against +target+ using hAAI
23
25
  def haai(target)
24
26
  return nil if opts[:haai_p] == 'no'
27
+
25
28
  haai = aai_cmd(tmp_file('ess_genes.fa'),
26
- target.result(:essential_genes).file_path(:ess_genes),
27
- dataset.name, target.name, tmp_dbs[:haai],
28
- aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
+ target.result(:essential_genes).file_path(:ess_genes),
30
+ dataset.name, target.name, tmp_dbs[:haai],
31
+ aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
32
  checkpoint :haai
30
33
  return nil if haai.nil? || haai.zero? || haai > 90.0
31
- aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
34
+
35
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
32
36
  SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
33
37
  conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
34
- [dataset.name, target.name, aai]
38
+ [dataset.name, target.name, aai]
35
39
  end
36
40
  checkpoint :aai
37
41
  aai
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
44
48
  t = tmp_file('largecontigs.fa')
45
49
  r = target.result(:assembly)
46
50
  return nil if r.nil? || !File.size?(t)
51
+
47
52
  # Check if it's been calculated
48
53
  y = stored_value(target, :ani)
49
54
  return y unless y.nil? || y.zero?
55
+
50
56
  # Run it
51
57
  ani_cmd(
52
- t, r.file_path(:largecontigs),
53
- dataset.name, target.name, tmp_dbs[:ani]).tap{ checkpoint :ani }
58
+ t, r.file_path(:largecontigs),
59
+ dataset.name, target.name, tmp_dbs[:ani]
60
+ ).tap { checkpoint :ani }
54
61
  end
55
62
 
56
63
  ##
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
74
81
 
75
82
  ##
76
83
  # Execute an ANI command
77
- def ani_cmd(f1, f2, n1, n2, db, o={})
84
+ def ani_cmd(f1, f2, n1, n2, db, o = {})
78
85
  o = opts.merge(o)
79
86
  v = nil
80
87
  if o[:ani_p] == 'fastani'
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
83
90
  unless out.empty?
84
91
  SQLite3::Database.new(db) do |conn|
85
92
  conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
86
- [n1, n2, out[2], out[3], out[4]]
93
+ [n1, n2, out[2], out[3], out[4]]
87
94
  end
88
95
  end
89
96
  v = out[2]
@@ -1,4 +1,3 @@
1
-
2
1
  require 'sqlite3'
3
2
 
4
3
  module MiGA::DistanceRunner::Database
@@ -9,7 +8,7 @@ module MiGA::DistanceRunner::Database
9
8
  @dbs = {}
10
9
  @tmp_dbs = {}
11
10
  @db_counts = {}
12
- {haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
11
+ { haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
13
12
  @db_counts[m] = 0
14
13
  @dbs[m] = for_ref ? ref_db(m) : query_db(m)
15
14
  # Remove if corrupt
@@ -25,9 +24,9 @@ module MiGA::DistanceRunner::Database
25
24
  # Initialize if it doesn't exist
26
25
  SQLite3::Database.new(dbs[m]) do |conn|
27
26
  conn.execute "create table if not exists #{t}(" +
28
- "seq1 varchar(256), seq2 varchar(256), " +
29
- "#{t} float, sd float, n int, omega int" +
30
- ")"
27
+ "seq1 varchar(256), seq2 varchar(256), " +
28
+ "#{t} float, sd float, n int, omega int" +
29
+ ")"
31
30
  end unless File.size? dbs[m]
32
31
  # Copy over to (local) temporals
33
32
  @tmp_dbs[m] = tmp_file("#{m}.db")
@@ -38,16 +37,17 @@ module MiGA::DistanceRunner::Database
38
37
  ##
39
38
  # Path to the database +metric+ for +dataset_name+ in +project+
40
39
  # (assumes that +dataset_name+ is a reference dataset)
41
- def ref_db(metric, dataset_name=nil)
40
+ def ref_db(metric, dataset_name = nil)
42
41
  dataset_name ||= dataset.name
43
- b = case metric
44
- when :haai
45
- "01.haai/#{dataset_name}.db"
46
- when :aai
47
- "02.aai/#{dataset_name}.db"
48
- when :ani
49
- "03.ani/#{dataset_name}.db"
50
- end
42
+ b =
43
+ case metric
44
+ when :haai
45
+ "01.haai/#{dataset_name}.db"
46
+ when :aai
47
+ "02.aai/#{dataset_name}.db"
48
+ when :ani
49
+ "03.ani/#{dataset_name}.db"
50
+ end
51
51
  File.expand_path(b, home)
52
52
  end
53
53
 
@@ -63,13 +63,14 @@ module MiGA::DistanceRunner::Database
63
63
  def stored_value(target, metric)
64
64
  # Check if self.dataset -> target is done (previous run)
65
65
  y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
66
- return y unless y.nil? or y.zero?
66
+ return y unless y.nil? || y.zero?
67
67
 
68
68
  # Check if self.dataset <- target is done (another thread)
69
- if dataset.is_ref? and project.path == ref_project.path
69
+ if dataset.is_ref? && project.path == ref_project.path
70
70
  y = data_from_db(
71
- target.name, dataset.name, ref_db(metric, target.name), metric)
72
- unless y.nil? or y.first.nil? or y.first.zero?
71
+ target.name, dataset.name, ref_db(metric, target.name), metric
72
+ )
73
+ unless y.nil? || y.first.nil? || y.first.zero?
73
74
  # Store a copy
74
75
  data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
75
76
  return y.first
@@ -94,7 +95,8 @@ module MiGA::DistanceRunner::Database
94
95
  SQLite3::Database.new(db) do |conn|
95
96
  y = conn.execute(
96
97
  "select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
97
- [n1, n2]).first
98
+ [n1, n2]
99
+ ).first
98
100
  end if File.size? db
99
101
  y
100
102
  end
@@ -105,7 +107,8 @@ module MiGA::DistanceRunner::Database
105
107
  SQLite3::Database.new(db) do |conn|
106
108
  conn.execute(
107
109
  "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
108
- "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data)
110
+ "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
111
+ )
109
112
  end
110
113
  checkpoint metric
111
114
  end
@@ -114,7 +117,7 @@ module MiGA::DistanceRunner::Database
114
117
  # Iterates for each entry in +db+
115
118
  def foreach_in_db(db, metric, &blk)
116
119
  SQLite3::Database.new(db) do |conn|
117
- conn.execute("select * from #{metric}").each{ |r| blk[r] }
120
+ conn.execute("select * from #{metric}").each { |r| blk[r] }
118
121
  end
119
122
  end
120
123
  end
@@ -1,13 +1,12 @@
1
-
2
1
  # High-end pipelines for DistanceRunner
3
2
  module MiGA::DistanceRunner::Pipeline
4
-
5
3
  # Recursively classify the dataset, returning an Array with two entries:
6
4
  # classification and cluster number
7
5
  def classify(clades, classif, metric, result_fh, val_cls = nil)
8
6
  dir = File.expand_path(classif, clades)
9
7
  med = File.expand_path('miga-project.medoids', dir)
10
- return [classif,val_cls] unless File.size? med
8
+ return [classif, val_cls] unless File.size? med
9
+
11
10
  max_val = 0
12
11
  val_med = ''
13
12
  val_cls = nil
@@ -30,11 +29,22 @@ module MiGA::DistanceRunner::Pipeline
30
29
  classify(clades, classif, metric, result_fh, val_cls)
31
30
  end
32
31
 
32
+ # Run distances against datasets listed in metadata's +:dist_req+
33
+ def distances_by_request(metric)
34
+ return unless dataset.metadata[:dist_req]
35
+
36
+ $stderr.puts 'Running distances by request'
37
+ dataset.metadata[:dist_req].each do |target|
38
+ ds = ref_project.dataset(target) and send(metric, ds)
39
+ end
40
+ end
41
+
33
42
  # Builds a tree with all visited medoids from any classification level
34
43
  def build_medoids_tree(metric)
35
44
  $stderr.puts "Building medoids tree (metric = #{metric})"
36
45
  db = query_db(metric)
37
46
  return unless File.size? db
47
+
38
48
  out_base = File.expand_path(dataset.name, home)
39
49
  ds_matrix = "#{out_base}.txt"
40
50
  ds_matrix_fh = File.open(ds_matrix, 'w')
@@ -43,7 +53,7 @@ module MiGA::DistanceRunner::Pipeline
43
53
  seq2 = []
44
54
  foreach_in_db(db, metric) do |r|
45
55
  seq2 << r[0]
46
- ds_matrix_fh.puts r[0,3].join("\t")
56
+ ds_matrix_fh.puts r[0, 3].join("\t")
47
57
  end
48
58
  # Find all values among visited datasets in ref_project
49
59
  ref_r = ref_project.result("#{metric}_distances") or return
@@ -51,7 +61,8 @@ module MiGA::DistanceRunner::Pipeline
51
61
  fh.each_line do |ln|
52
62
  r = ln.chomp.split("\t")
53
63
  next unless seq2.include?(r[1]) or seq2.include?(r[2])
54
- ds_matrix_fh.puts r[1,3].join("\t")
64
+
65
+ ds_matrix_fh.puts r[1, 3].join("\t")
55
66
  end
56
67
  end
57
68
  ds_matrix_fh.close
@@ -74,11 +85,12 @@ module MiGA::DistanceRunner::Pipeline
74
85
  dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
75
86
  cr = dataset.closest_relatives(1, from_ref_project)
76
87
  return if cr.nil? or cr.empty?
88
+
77
89
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
78
90
 
79
91
  # Run the test for each rank
80
92
  tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
81
- r = tax_test.map do |k,v|
93
+ r = tax_test.map do |k, v|
82
94
  sig = ''
83
95
  [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
84
96
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
@@ -97,12 +109,13 @@ module MiGA::DistanceRunner::Pipeline
97
109
 
98
110
  # Transfer the taxonomy to the current dataset
99
111
  def transfer_taxonomy(tax)
100
- $stderr.puts "Transferring taxonomy"
112
+ $stderr.puts 'Transferring taxonomy'
101
113
  return if tax.nil?
114
+
102
115
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
103
- tax_a = tax.
104
- select { |i| i[1] != '?' && i[2] <= pval }.
105
- map { |i| i[0,2].join(':') }
116
+ tax_a = tax
117
+ .select { |i| i[1] != '?' && i[2] <= pval }
118
+ .map { |i| i[0, 2].join(':') }
106
119
  dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
107
120
  dataset.save
108
121
  end
@@ -1,13 +1,10 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'database.rb'
5
4
  require_relative 'commands.rb'
6
5
  require_relative 'pipeline.rb'
7
6
 
8
-
9
7
  class MiGA::DistanceRunner
10
-
11
8
  include MiGA::DistanceRunner::Temporal
12
9
  include MiGA::DistanceRunner::Database
13
10
  include MiGA::DistanceRunner::Commands
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
16
13
  attr_reader :project, :ref_project, :dataset, :opts, :home
17
14
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
18
15
 
19
- def initialize(project_path, dataset_name, opts_hash={})
16
+ def initialize(project_path, dataset_name, opts_hash = {})
20
17
  @opts = opts_hash
21
18
  @project = MiGA::Project.load(project_path) or
22
19
  raise "No project at #{project_path}"
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
30
27
  @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
31
28
  project.is_clade? ? 'save-rbm' : 'no-save-rbm'
32
29
  end
33
- @opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
34
31
  if opts[:run_taxonomy] and project.metadata[:ref_project]
35
32
  ref_path = project.metadata[:ref_project]
36
33
  @home = File.expand_path('05.taxonomy', @home)
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
60
57
  def go!
61
58
  $stderr.puts "Launching analysis"
62
59
  return if dataset.is_multi?
60
+
63
61
  Dir.mktmpdir do |tmp_dir|
64
62
  @tmp = tmp_dir
65
63
  create_temporals
@@ -69,25 +67,26 @@ class MiGA::DistanceRunner
69
67
 
70
68
  # Launch analysis for reference datasets
71
69
  def go_ref!
72
- $stderr.puts "Launching analysis for reference dataset"
70
+ $stderr.puts 'Launching analysis for reference dataset'
73
71
  # Initialize databases
74
72
  initialize_dbs! true
75
73
 
76
74
  # first-come-first-serve traverse
77
75
  ref_project.each_dataset do |ds|
78
76
  next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
+
79
78
  puts "[ #{Time.now} ] #{ds.name}"
80
79
  ani_after_aai(ds)
81
80
  end
82
81
 
83
82
  # Finalize
84
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
83
+ %i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
85
84
  end
86
85
 
87
86
  ##
88
87
  # Launch analysis for query datasets
89
88
  def go_query!
90
- $stderr.puts "Launching analysis for query dataset"
89
+ $stderr.puts 'Launching analysis for query dataset'
91
90
  # Check if project is ready
92
91
  tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
93
92
  res = ref_project.result(tsk[0])
@@ -95,6 +94,7 @@ class MiGA::DistanceRunner
95
94
 
96
95
  # Initialize the databases
97
96
  initialize_dbs! false
97
+ distances_by_request(tsk[1])
98
98
  # Calculate the classification-informed AAI/ANI traverse
99
99
  results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
100
100
  fh = File.open(results, 'w')
@@ -104,14 +104,17 @@ class MiGA::DistanceRunner
104
104
  # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
105
105
  par_dir = File.dirname(File.expand_path(classif, res.dir))
106
106
  par = File.expand_path('miga-project.classif', par_dir)
107
- closest = {dataset: nil, ani: 0.0}
107
+ closest = { dataset: nil, ani: 0.0 }
108
108
  if File.size? par
109
109
  File.open(par, 'r') do |fh|
110
110
  fh.each_line do |ln|
111
111
  r = ln.chomp.split("\t")
112
112
  next unless r[1].to_i == val_cls
113
+
113
114
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
115
+ unless ani.nil? || ani < closest[:ani]
116
+ closest = { ds: r[0], ani: ani }
117
+ end
115
118
  end
116
119
  end
117
120
  end
@@ -119,22 +122,23 @@ class MiGA::DistanceRunner
119
122
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
120
123
  cl_path = res.file_path :clades_ani95
121
124
  if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
122
- File.foreach(cl_path).
123
- map { |i| i.chomp.split(',') }.
124
- find( lambda{[]} ){ |i| i.include? closest[:ds] }.
125
- each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
125
+ File.foreach(cl_path)
126
+ .map { |i| i.chomp.split(',') }
127
+ .find(lambda { [] }) { |i| i.include? closest[:ds] }
128
+ .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
126
129
  end
127
130
 
128
131
  # Finalize
129
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
132
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
130
133
  build_medoids_tree(tsk[1])
131
134
  transfer_taxonomy(tax_test)
132
135
  end
133
136
 
134
137
  # Launch analysis for taxonomy jobs
135
138
  def go_taxonomy!
136
- $stderr.puts "Launching taxonomy analysis"
139
+ $stderr.puts 'Launching taxonomy analysis'
137
140
  return unless project.metadata[:ref_project]
141
+
138
142
  go_query! # <- yeah, it's actually the same, just different ref_project
139
143
  end
140
144
  end
@@ -1,9 +1,7 @@
1
-
2
1
  require 'tmpdir'
3
2
  require 'zlib'
4
3
 
5
4
  module MiGA::DistanceRunner::Temporal
6
-
7
5
  # Copy input files to the (local) temporal folder
8
6
  def create_temporals
9
7
  rf = {
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
43
41
  def checkpoint!(metric)
44
42
  $stderr.puts "Checkpoint (metric = #{metric})"
45
43
  SQLite3::Database.new(tmp_dbs[metric]) do |conn|
46
- conn.execute("select count(*) from #{metric==:haai ? :aai : metric}")
44
+ conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
47
45
  end
48
46
  FileUtils.cp(tmp_dbs[metric], dbs[metric])
49
47
  @db_counts[metric] = 0
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  dataset = ARGV.shift
6
6
  project = ARGV.shift
7
- opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
7
+ opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(dataset, project, opts)
9
9
  runner.go!
@@ -6,10 +6,10 @@ domain = ARGV.shift
6
6
 
7
7
  def quality(hsh)
8
8
  q = {}
9
- q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
10
- q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
11
- q[:cmp] = 100.0*q[:found].to_f/hsh.size
12
- q[:cnt] = 100.0*q[:multi].to_f/hsh.size
9
+ q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
10
+ q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
11
+ q[:cmp] = 100.0 * q[:found].to_f / hsh.size
12
+ q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
13
13
  q
14
14
  end
15
15
 
@@ -39,7 +39,7 @@ end
39
39
  # Find expected genes for domain
40
40
  n_dom = Hash[
41
41
  `HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
42
- .chomp.split("\n").map { |i| i.split("\t") }
42
+ .chomp.split("\n").map { |i| i.split("\t") }
43
43
  ]
44
44
  l_dom = n_dom.keys
45
45
  cnt_dom = {}
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
54
54
  ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
55
55
  if q[:multi] > 0
56
56
  ofh.puts "! Multiple copies: "
57
- cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
57
+ cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
58
58
  end
59
59
  if q[:found] < cnt_dom.size
60
60
  ofh.puts "! Missing genes: "
61
- cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
61
+ cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
62
62
  end
63
63
  end