miga-base 0.7.3.0 → 0.7.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +21 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +6 -3
  50. data/lib/miga/common/with_daemon_class.rb +3 -2
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +34 -25
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -4
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +25 -21
  105. data/utils/distance/pipeline.rb +16 -10
  106. data/utils/distance/runner.rb +19 -13
  107. data/utils/distance/temporal.rb +7 -4
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga'
3
2
  require 'miga/tax_dist'
4
3
 
@@ -1,12 +1,13 @@
1
-
2
1
  module MiGA::DistanceRunner::Commands
3
2
  # Estimates or calculates AAI against +target+
4
3
  def aai(target)
5
4
  # Check if the request makes sense
6
5
  return nil if target.nil? || target.result(:essential_genes).nil?
6
+
7
7
  # Check if it's been calculated
8
8
  y = stored_value(target, :aai)
9
9
  return y unless y.nil? || y.zero?
10
+
10
11
  # Try hAAI (except in clade projects)
11
12
  unless @ref_project.is_clade?
12
13
  y = haai(target)
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
14
15
  end
15
16
  # Full AAI
16
17
  aai_cmd(
17
- tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
18
- dataset.name, target.name, tmp_dbs[:aai]).tap{ checkpoint :aai }
18
+ tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
19
+ dataset.name, target.name, tmp_dbs[:aai]
20
+ ).tap { checkpoint :aai }
19
21
  end
20
22
 
21
23
  ##
22
24
  # Estimates AAI against +target+ using hAAI
23
25
  def haai(target)
24
26
  return nil if opts[:haai_p] == 'no'
27
+
25
28
  haai = aai_cmd(tmp_file('ess_genes.fa'),
26
- target.result(:essential_genes).file_path(:ess_genes),
27
- dataset.name, target.name, tmp_dbs[:haai],
28
- aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
+ target.result(:essential_genes).file_path(:ess_genes),
30
+ dataset.name, target.name, tmp_dbs[:haai],
31
+ aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
29
32
  checkpoint :haai
30
33
  return nil if haai.nil? || haai.zero? || haai > 90.0
31
- aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
34
+
35
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
32
36
  SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
33
37
  conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
34
- [dataset.name, target.name, aai]
38
+ [dataset.name, target.name, aai]
35
39
  end
36
40
  checkpoint :aai
37
41
  aai
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
44
48
  t = tmp_file('largecontigs.fa')
45
49
  r = target.result(:assembly)
46
50
  return nil if r.nil? || !File.size?(t)
51
+
47
52
  # Check if it's been calculated
48
53
  y = stored_value(target, :ani)
49
54
  return y unless y.nil? || y.zero?
55
+
50
56
  # Run it
51
57
  ani_cmd(
52
- t, r.file_path(:largecontigs),
53
- dataset.name, target.name, tmp_dbs[:ani]).tap{ checkpoint :ani }
58
+ t, r.file_path(:largecontigs),
59
+ dataset.name, target.name, tmp_dbs[:ani]
60
+ ).tap { checkpoint :ani }
54
61
  end
55
62
 
56
63
  ##
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
74
81
 
75
82
  ##
76
83
  # Execute an ANI command
77
- def ani_cmd(f1, f2, n1, n2, db, o={})
84
+ def ani_cmd(f1, f2, n1, n2, db, o = {})
78
85
  o = opts.merge(o)
79
86
  v = nil
80
87
  if o[:ani_p] == 'fastani'
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
83
90
  unless out.empty?
84
91
  SQLite3::Database.new(db) do |conn|
85
92
  conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
86
- [n1, n2, out[2], out[3], out[4]]
93
+ [n1, n2, out[2], out[3], out[4]]
87
94
  end
88
95
  end
89
96
  v = out[2]
@@ -1,14 +1,14 @@
1
-
2
1
  require 'sqlite3'
3
2
 
4
3
  module MiGA::DistanceRunner::Database
5
4
  ##
6
5
  # Check for corrupt files and create empty databases
7
6
  def initialize_dbs!(for_ref)
7
+ $stderr.puts "Initializing databases (for_ref = #{for_ref})"
8
8
  @dbs = {}
9
9
  @tmp_dbs = {}
10
10
  @db_counts = {}
11
- {haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
11
+ { haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
12
12
  @db_counts[m] = 0
13
13
  @dbs[m] = for_ref ? ref_db(m) : query_db(m)
14
14
  # Remove if corrupt
@@ -24,9 +24,9 @@ module MiGA::DistanceRunner::Database
24
24
  # Initialize if it doesn't exist
25
25
  SQLite3::Database.new(dbs[m]) do |conn|
26
26
  conn.execute "create table if not exists #{t}(" +
27
- "seq1 varchar(256), seq2 varchar(256), " +
28
- "#{t} float, sd float, n int, omega int" +
29
- ")"
27
+ "seq1 varchar(256), seq2 varchar(256), " +
28
+ "#{t} float, sd float, n int, omega int" +
29
+ ")"
30
30
  end unless File.size? dbs[m]
31
31
  # Copy over to (local) temporals
32
32
  @tmp_dbs[m] = tmp_file("#{m}.db")
@@ -37,16 +37,17 @@ module MiGA::DistanceRunner::Database
37
37
  ##
38
38
  # Path to the database +metric+ for +dataset_name+ in +project+
39
39
  # (assumes that +dataset_name+ is a reference dataset)
40
- def ref_db(metric, dataset_name=nil)
40
+ def ref_db(metric, dataset_name = nil)
41
41
  dataset_name ||= dataset.name
42
- b = case metric
43
- when :haai
44
- "01.haai/#{dataset_name}.db"
45
- when :aai
46
- "02.aai/#{dataset_name}.db"
47
- when :ani
48
- "03.ani/#{dataset_name}.db"
49
- end
42
+ b =
43
+ case metric
44
+ when :haai
45
+ "01.haai/#{dataset_name}.db"
46
+ when :aai
47
+ "02.aai/#{dataset_name}.db"
48
+ when :ani
49
+ "03.ani/#{dataset_name}.db"
50
+ end
50
51
  File.expand_path(b, home)
51
52
  end
52
53
 
@@ -62,13 +63,14 @@ module MiGA::DistanceRunner::Database
62
63
  def stored_value(target, metric)
63
64
  # Check if self.dataset -> target is done (previous run)
64
65
  y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
65
- return y unless y.nil? or y.zero?
66
+ return y unless y.nil? || y.zero?
66
67
 
67
68
  # Check if self.dataset <- target is done (another thread)
68
- if dataset.is_ref? and project.path == ref_project.path
69
+ if dataset.is_ref? && project.path == ref_project.path
69
70
  y = data_from_db(
70
- target.name, dataset.name, ref_db(metric, target.name), metric)
71
- unless y.nil? or y.first.nil? or y.first.zero?
71
+ target.name, dataset.name, ref_db(metric, target.name), metric
72
+ )
73
+ unless y.nil? || y.first.nil? || y.first.zero?
72
74
  # Store a copy
73
75
  data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
74
76
  return y.first
@@ -93,7 +95,8 @@ module MiGA::DistanceRunner::Database
93
95
  SQLite3::Database.new(db) do |conn|
94
96
  y = conn.execute(
95
97
  "select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
96
- [n1, n2]).first
98
+ [n1, n2]
99
+ ).first
97
100
  end if File.size? db
98
101
  y
99
102
  end
@@ -104,7 +107,8 @@ module MiGA::DistanceRunner::Database
104
107
  SQLite3::Database.new(db) do |conn|
105
108
  conn.execute(
106
109
  "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
107
- "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data)
110
+ "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
111
+ )
108
112
  end
109
113
  checkpoint metric
110
114
  end
@@ -113,7 +117,7 @@ module MiGA::DistanceRunner::Database
113
117
  # Iterates for each entry in +db+
114
118
  def foreach_in_db(db, metric, &blk)
115
119
  SQLite3::Database.new(db) do |conn|
116
- conn.execute("select * from #{metric}").each{ |r| blk[r] }
120
+ conn.execute("select * from #{metric}").each { |r| blk[r] }
117
121
  end
118
122
  end
119
123
  end
@@ -1,13 +1,12 @@
1
-
2
1
  # High-end pipelines for DistanceRunner
3
2
  module MiGA::DistanceRunner::Pipeline
4
-
5
3
  # Recursively classify the dataset, returning an Array with two entries:
6
4
  # classification and cluster number
7
- def classify(clades, classif, metric, result_fh, val_cls=nil)
5
+ def classify(clades, classif, metric, result_fh, val_cls = nil)
8
6
  dir = File.expand_path(classif, clades)
9
7
  med = File.expand_path('miga-project.medoids', dir)
10
- return [classif,val_cls] unless File.size? med
8
+ return [classif, val_cls] unless File.size? med
9
+
11
10
  max_val = 0
12
11
  val_med = ''
13
12
  val_cls = nil
@@ -32,8 +31,10 @@ module MiGA::DistanceRunner::Pipeline
32
31
 
33
32
  # Builds a tree with all visited medoids from any classification level
34
33
  def build_medoids_tree(metric)
34
+ $stderr.puts "Building medoids tree (metric = #{metric})"
35
35
  db = query_db(metric)
36
36
  return unless File.size? db
37
+
37
38
  out_base = File.expand_path(dataset.name, home)
38
39
  ds_matrix = "#{out_base}.txt"
39
40
  ds_matrix_fh = File.open(ds_matrix, 'w')
@@ -42,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
42
43
  seq2 = []
43
44
  foreach_in_db(db, metric) do |r|
44
45
  seq2 << r[0]
45
- ds_matrix_fh.puts r[0,3].join("\t")
46
+ ds_matrix_fh.puts r[0, 3].join("\t")
46
47
  end
47
48
  # Find all values among visited datasets in ref_project
48
49
  ref_r = ref_project.result("#{metric}_distances") or return
@@ -50,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
50
51
  fh.each_line do |ln|
51
52
  r = ln.chomp.split("\t")
52
53
  next unless seq2.include?(r[1]) or seq2.include?(r[2])
53
- ds_matrix_fh.puts r[1,3].join("\t")
54
+
55
+ ds_matrix_fh.puts r[1, 3].join("\t")
54
56
  end
55
57
  end
56
58
  ds_matrix_fh.close
@@ -61,6 +63,7 @@ module MiGA::DistanceRunner::Pipeline
61
63
 
62
64
  # Tests taxonomy
63
65
  def tax_test
66
+ $stderr.puts "Testing taxonomy | opts = #{opts}"
64
67
  # Get taxonomy of closest relative
65
68
  from_ref_project = (project != ref_project)
66
69
  res_dir = from_ref_project ?
@@ -72,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
72
75
  dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
73
76
  cr = dataset.closest_relatives(1, from_ref_project)
74
77
  return if cr.nil? or cr.empty?
78
+
75
79
  tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
76
80
 
77
81
  # Run the test for each rank
78
82
  tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
79
- r = tax_test.map do |k,v|
83
+ r = tax_test.map do |k, v|
80
84
  sig = ''
81
85
  [0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
82
86
  [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
@@ -95,11 +99,13 @@ module MiGA::DistanceRunner::Pipeline
95
99
 
96
100
  # Transfer the taxonomy to the current dataset
97
101
  def transfer_taxonomy(tax)
102
+ $stderr.puts "Transferring taxonomy"
98
103
  return if tax.nil?
104
+
99
105
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
100
- tax_a = tax.
101
- select { |i| i[1] != '?' && i[2] <= pval }.
102
- map { |i| i[0,2].join(':') }
106
+ tax_a = tax
107
+ .select { |i| i[1] != '?' && i[2] <= pval }
108
+ .map { |i| i[0, 2].join(':') }
103
109
  dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
104
110
  dataset.save
105
111
  end
@@ -1,13 +1,10 @@
1
-
2
1
  require_relative 'base.rb'
3
2
  require_relative 'temporal.rb'
4
3
  require_relative 'database.rb'
5
4
  require_relative 'commands.rb'
6
5
  require_relative 'pipeline.rb'
7
6
 
8
-
9
7
  class MiGA::DistanceRunner
10
-
11
8
  include MiGA::DistanceRunner::Temporal
12
9
  include MiGA::DistanceRunner::Database
13
10
  include MiGA::DistanceRunner::Commands
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
16
13
  attr_reader :project, :ref_project, :dataset, :opts, :home
17
14
  attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
18
15
 
19
- def initialize(project_path, dataset_name, opts_hash={})
16
+ def initialize(project_path, dataset_name, opts_hash = {})
20
17
  @opts = opts_hash
21
18
  @project = MiGA::Project.load(project_path) or
22
19
  raise "No project at #{project_path}"
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
30
27
  @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
31
28
  project.is_clade? ? 'save-rbm' : 'no-save-rbm'
32
29
  end
33
- @opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
30
+ @opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
34
31
  if opts[:run_taxonomy] and project.metadata[:ref_project]
35
32
  ref_path = project.metadata[:ref_project]
36
33
  @home = File.expand_path('05.taxonomy', @home)
@@ -53,11 +50,14 @@ class MiGA::DistanceRunner
53
50
  @opts[:ani_p] ||= 'blast+'
54
51
  @opts[:distances_checkpoint] ||= 10
55
52
  @opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
53
+ $stderr.puts "Options: #{opts}"
56
54
  end
57
55
 
58
56
  # Launch the appropriate analysis
59
57
  def go!
58
+ $stderr.puts "Launching analysis"
60
59
  return if dataset.is_multi?
60
+
61
61
  Dir.mktmpdir do |tmp_dir|
62
62
  @tmp = tmp_dir
63
63
  create_temporals
@@ -67,23 +67,26 @@ class MiGA::DistanceRunner
67
67
 
68
68
  # Launch analysis for reference datasets
69
69
  def go_ref!
70
+ $stderr.puts "Launching analysis for reference dataset"
70
71
  # Initialize databases
71
72
  initialize_dbs! true
72
73
 
73
74
  # first-come-first-serve traverse
74
75
  ref_project.each_dataset do |ds|
75
76
  next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
77
+
76
78
  puts "[ #{Time.now} ] #{ds.name}"
77
79
  ani_after_aai(ds)
78
80
  end
79
81
 
80
82
  # Finalize
81
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
83
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
82
84
  end
83
85
 
84
86
  ##
85
87
  # Launch analysis for query datasets
86
88
  def go_query!
89
+ $stderr.puts "Launching analysis for query dataset"
87
90
  # Check if project is ready
88
91
  tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
89
92
  res = ref_project.result(tsk[0])
@@ -100,14 +103,15 @@ class MiGA::DistanceRunner
100
103
  # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
101
104
  par_dir = File.dirname(File.expand_path(classif, res.dir))
102
105
  par = File.expand_path('miga-project.classif', par_dir)
103
- closest = {dataset: nil, ani: 0.0}
106
+ closest = { dataset: nil, ani: 0.0 }
104
107
  if File.size? par
105
108
  File.open(par, 'r') do |fh|
106
109
  fh.each_line do |ln|
107
110
  r = ln.chomp.split("\t")
108
111
  next unless r[1].to_i == val_cls
112
+
109
113
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
110
- closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
114
+ closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
111
115
  end
112
116
  end
113
117
  end
@@ -115,21 +119,23 @@ class MiGA::DistanceRunner
115
119
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
116
120
  cl_path = res.file_path :clades_ani95
117
121
  if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
118
- File.foreach(cl_path).
119
- map { |i| i.chomp.split(',') }.
120
- find( lambda{[]} ){ |i| i.include? closest[:ds] }.
121
- each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
122
+ File.foreach(cl_path)
123
+ .map { |i| i.chomp.split(',') }
124
+ .find(lambda { [] }) { |i| i.include? closest[:ds] }
125
+ .each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
122
126
  end
123
127
 
124
128
  # Finalize
125
- [:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
129
+ [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
126
130
  build_medoids_tree(tsk[1])
127
131
  transfer_taxonomy(tax_test)
128
132
  end
129
133
 
130
134
  # Launch analysis for taxonomy jobs
131
135
  def go_taxonomy!
136
+ $stderr.puts "Launching taxonomy analysis"
132
137
  return unless project.metadata[:ref_project]
138
+
133
139
  go_query! # <- yeah, it's actually the same, just different ref_project
134
140
  end
135
141
  end
@@ -1,12 +1,14 @@
1
-
2
1
  require 'tmpdir'
3
2
  require 'zlib'
4
3
 
5
4
  module MiGA::DistanceRunner::Temporal
6
-
7
5
  # Copy input files to the (local) temporal folder
8
6
  def create_temporals
9
- rf = {essential_genes: :ess_genes, cds: :proteins, assembly: :largecontigs}
7
+ rf = {
8
+ essential_genes: :ess_genes,
9
+ cds: :proteins,
10
+ assembly: :largecontigs
11
+ }
10
12
  rf.each do |res, file|
11
13
  r = dataset.result(res)
12
14
  f = r.nil? ? nil : r.file_path(file)
@@ -37,8 +39,9 @@ module MiGA::DistanceRunner::Temporal
37
39
 
38
40
  # Copies temporal databases back to the MiGA Project
39
41
  def checkpoint!(metric)
42
+ $stderr.puts "Checkpoint (metric = #{metric})"
40
43
  SQLite3::Database.new(tmp_dbs[metric]) do |conn|
41
- conn.execute("select count(*) from #{metric==:haai ? :aai : metric}")
44
+ conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
42
45
  end
43
46
  FileUtils.cp(tmp_dbs[metric], dbs[metric])
44
47
  @db_counts[metric] = 0
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  dataset = ARGV.shift
6
6
  project = ARGV.shift
7
- opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
7
+ opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(dataset, project, opts)
9
9
  runner.go!
@@ -6,10 +6,10 @@ domain = ARGV.shift
6
6
 
7
7
  def quality(hsh)
8
8
  q = {}
9
- q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
10
- q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
11
- q[:cmp] = 100.0*q[:found].to_f/hsh.size
12
- q[:cnt] = 100.0*q[:multi].to_f/hsh.size
9
+ q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
10
+ q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
11
+ q[:cmp] = 100.0 * q[:found].to_f / hsh.size
12
+ q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
13
13
  q
14
14
  end
15
15
 
@@ -39,7 +39,7 @@ end
39
39
  # Find expected genes for domain
40
40
  n_dom = Hash[
41
41
  `HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
42
- .chomp.split("\n").map { |i| i.split("\t") }
42
+ .chomp.split("\n").map { |i| i.split("\t") }
43
43
  ]
44
44
  l_dom = n_dom.keys
45
45
  cnt_dom = {}
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
54
54
  ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
55
55
  if q[:multi] > 0
56
56
  ofh.puts "! Multiple copies: "
57
- cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
57
+ cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
58
58
  end
59
59
  if q[:found] < cnt_dom.size
60
60
  ofh.puts "! Missing genes: "
61
- cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
61
+ cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
62
62
  end
63
63
  end