miga-base 0.7.22.0 → 0.7.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +10 -8
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/index_wf.rb +4 -2
  14. data/lib/miga/cli/action/init.rb +93 -144
  15. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  16. data/lib/miga/cli/action/init/files_helper.rb +119 -0
  17. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  18. data/lib/miga/cli/action/new.rb +15 -9
  19. data/lib/miga/cli/action/option.rb +44 -0
  20. data/lib/miga/cli/action/preproc_wf.rb +7 -5
  21. data/lib/miga/cli/action/quality_wf.rb +3 -3
  22. data/lib/miga/cli/action/tax_dist.rb +1 -1
  23. data/lib/miga/cli/action/tax_test.rb +1 -1
  24. data/lib/miga/cli/action/wf.rb +71 -53
  25. data/lib/miga/cli/base.rb +17 -5
  26. data/lib/miga/cli/objects_helper.rb +23 -18
  27. data/lib/miga/common.rb +4 -2
  28. data/lib/miga/common/net.rb +74 -0
  29. data/lib/miga/common/with_option.rb +83 -0
  30. data/lib/miga/common/with_result.rb +3 -2
  31. data/lib/miga/dataset/base.rb +20 -2
  32. data/lib/miga/dataset/result.rb +5 -3
  33. data/lib/miga/metadata.rb +25 -13
  34. data/lib/miga/project/base.rb +82 -2
  35. data/lib/miga/project/result.rb +4 -4
  36. data/lib/miga/remote_dataset.rb +2 -0
  37. data/lib/miga/result/stats.rb +2 -2
  38. data/lib/miga/version.rb +4 -2
  39. data/scripts/essential_genes.bash +18 -3
  40. data/scripts/miga.bash +8 -2
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/lair_test.rb +1 -2
  47. data/test/net_test.rb +34 -0
  48. data/test/with_option_test.rb +115 -0
  49. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  50. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  51. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  52. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  53. data/utils/FastAAI/README.md +84 -0
  54. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  55. data/utils/cleanup-databases.rb +2 -3
  56. data/utils/distance/base.rb +9 -0
  57. data/utils/distance/commands.rb +183 -81
  58. data/utils/distance/database.rb +69 -10
  59. data/utils/distance/pipeline.rb +15 -21
  60. data/utils/distance/runner.rb +27 -49
  61. data/utils/distance/temporal.rb +4 -2
  62. data/utils/distances.rb +2 -2
  63. data/utils/index_metadata.rb +1 -2
  64. data/utils/requirements.txt +6 -5
  65. data/utils/subclade/runner.rb +10 -11
  66. metadata +18 -6
@@ -15,11 +15,11 @@ m.say 'Cleaning Databases'
15
15
  (0..thr - 1).each do |t|
16
16
  fork do
17
17
  dsn.each_with_index do |i, idx|
18
- m.advance('Dataset:', dsn.size, idx + 1) if t == 0
18
+ m.advance('Dataset:', idx + 1, dsn.size) if t == 0
19
19
  next unless (idx % thr) == t
20
20
 
21
21
  d = p.dataset(i)
22
- next unless d.is_ref? and d.is_active?
22
+ next unless d.ref? && d.active?
23
23
 
24
24
  d.cleanup_distances!
25
25
  end
@@ -28,4 +28,3 @@ end
28
28
  Process.waitall
29
29
  m.advance('Dataset:', dsn.size, dsn.size)
30
30
  m.say
31
-
@@ -2,4 +2,13 @@ require 'miga'
2
2
  require 'miga/tax_dist'
3
3
 
4
4
  class MiGA::DistanceRunner
5
+ require_relative 'temporal.rb'
6
+ require_relative 'database.rb'
7
+ require_relative 'commands.rb'
8
+ require_relative 'pipeline.rb'
9
+
10
+ include MiGA::DistanceRunner::Temporal
11
+ include MiGA::DistanceRunner::Database
12
+ include MiGA::DistanceRunner::Commands
13
+ include MiGA::DistanceRunner::Pipeline
5
14
  end
@@ -1,105 +1,207 @@
1
1
  module MiGA::DistanceRunner::Commands
2
- # Estimates or calculates AAI against +target+
3
- def aai(target)
4
- # Check if the request makes sense
5
- return nil if target.nil? || target.result(:essential_genes).nil?
6
-
7
- # Check if it's been calculated
8
- y = stored_value(target, :aai)
9
- return y unless y.nil? || y.zero?
10
-
11
- # Try hAAI (except in clade projects)
12
- unless @ref_project.is_clade?
13
- y = haai(target)
14
- return y unless y.nil? || y.zero?
2
+ ##
3
+ # Estimates AAI against +targets+ using hAAI
4
+ def haai(targets)
5
+ puts "[#{Time.now}] hAAI: #{dataset.name} vs #{targets.size} targets"
6
+ empty_vals = targets.map { |_i| nil }
7
+ return empty_vals if opts[:haai_p] == 'no'
8
+
9
+ # Launch comparisons
10
+ sbj = pending_targets(targets, :haai)
11
+ unless sbj.empty?
12
+ opts[:haai_p] == 'fastaai' ? fastaai_cmd(sbj) : haai_cmd(sbj)
15
13
  end
16
- # Full AAI
17
- aai_cmd(
18
- tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
19
- dataset.name, target.name, tmp_dbs[:aai]
20
- ).tap { checkpoint :aai }
14
+
15
+ # Return AAI estimates from the database
16
+ batch_values_from_db(:aai, targets.map { |i| i&.name })
21
17
  end
22
18
 
23
19
  ##
24
- # Estimates AAI against +target+ using hAAI
25
- def haai(target)
26
- return nil if opts[:haai_p] == 'no'
27
-
28
- haai = aai_cmd(tmp_file('ess_genes.fa'),
29
- target.result(:essential_genes).file_path(:ess_genes),
30
- dataset.name, target.name, tmp_dbs[:haai],
31
- aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
32
- checkpoint :haai
33
- return nil if haai.nil? || haai.zero? || haai > 90.0
34
-
35
- aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
36
- SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
37
- conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
38
- [dataset.name, target.name, aai]
20
+ # Estimates or calculates AAI against +targets+
21
+ def aai(targets)
22
+ puts "[#{Time.now}] AAI: #{dataset.name} vs #{targets.size} targets"
23
+
24
+ # Try hAAI first
25
+ haai(targets)
26
+
27
+ # Launch comparisons
28
+ pending_targets(targets, :aai).each do |target|
29
+ # Full AAI
30
+ target_cds = target.result(:cds).file_path(:proteins) or next
31
+ aairb_cmd(
32
+ tmp_file('proteins.fa'), target_cds,
33
+ dataset.name, target.name, tmp_dbs[:aai], checkpoint: :aai
34
+ )
39
35
  end
40
- checkpoint :aai
41
- aai
36
+
37
+ # Return AAI from the database
38
+ batch_values_from_db(:aai, targets.map { |i| i&.name })
42
39
  end
43
40
 
44
41
  ##
45
- # Calculates ANI against +target+
46
- def ani(target)
47
- # Check if the request makes sense
48
- t = tmp_file('largecontigs.fa')
49
- r = target.result(:assembly)
50
- return nil if r.nil? || !File.size?(t)
51
-
52
- # Check if it's been calculated
53
- y = stored_value(target, :ani)
54
- return y unless y.nil? || y.zero?
55
-
56
- # Run it
57
- ani_cmd(
58
- t, r.file_path(:largecontigs),
59
- dataset.name, target.name, tmp_dbs[:ani]
60
- ).tap { checkpoint :ani }
42
+ # Calculates ANI against +targets+
43
+ def ani(targets)
44
+ puts "[#{Time.now}] ANI: #{dataset.name} vs #{targets.size} targets"
45
+ empty_vals = targets.map { |_i| nil }
46
+ return empty_vals unless File.size?(tmp_file('largecontigs.fa'))
47
+
48
+ # Launch comparisons
49
+ sbj = pending_targets(targets, :ani)
50
+ unless sbj.empty?
51
+ opts[:ani_p] == 'fastani' ? fastani_cmd(sbj) : anirb_cmd(sbj)
52
+ end
53
+
54
+ # Return ANI from the database
55
+ batch_values_from_db(:ani, targets.map { |i| i&.name })
61
56
  end
62
57
 
63
58
  ##
64
- # Calculates and returns ANI against +target+ if AAI >= +aai_limit+.
65
- # Returns +nil+ otherwise
66
- def ani_after_aai(target, aai_limit = 85.0)
67
- aai = aai(target)
68
- (aai.nil? || aai < aai_limit) ? nil : ani(target)
59
+ # Calculates and returns ANI against +targets+ if AAI >= +aai_limit+.
60
+ # Note that ANI values may be returned for lower (or failing) AAIs if the
61
+ # value is already stored in the database
62
+ def ani_after_aai(targets, aai_limit = 85.0)
63
+ # Run AAI and select targets with AAI ≥ aai_limit
64
+ aai = aai(targets)
65
+ sbj = aai.each_with_index.map { |i, k| targets[k] if i&.> aai_limit }
66
+ sbj.compact!
67
+
68
+ # Run ANI
69
+ ani(sbj) unless sbj.empty?
70
+
71
+ # Return ANI from the database
72
+ batch_values_from_db(:ani, targets.map { |i| i&.name })
69
73
  end
70
74
 
71
75
  ##
72
76
  # Execute an AAI command
73
- def aai_cmd(f1, f2, n1, n2, db, o = {})
77
+ def aairb_cmd(f1, f2, n1, n2, db, o = {})
74
78
  o = opts.merge(o)
75
- v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
76
- --name1 "#{n1}" --name2 "#{n2}" \
77
- -t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
78
- -p "#{o[:aai_p] || 'blast+'}"`.chomp
79
- (v.nil? || v.empty?) ? 0 : v.to_f
79
+ run_cmd <<~CMD
80
+ aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
81
+ --name1 "#{n1}" --name2 "#{n2}" \
82
+ -t "#{o[:thr]}" -a --#{'no-' unless o[:aai_save_rbm]}save-rbm \
83
+ -p "#{o[:aai_p]}"
84
+ CMD
85
+ ensure
86
+ checkpoint(o[:checkpoint]) if o[:checkpoint]
80
87
  end
81
88
 
82
89
  ##
83
- # Execute an ANI command
84
- def ani_cmd(f1, f2, n1, n2, db, o = {})
85
- o = opts.merge(o)
86
- v = nil
87
- if o[:ani_p] == 'fastani'
88
- out = `fastANI -r "#{f1}" -q "#{f2}" \
89
- -o /dev/stdout 2>/dev/null`.chomp.split(/\s+/)
90
- unless out.empty?
91
- SQLite3::Database.new(db) do |conn|
92
- conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
93
- [n1, n2, out[2], out[3], out[4]]
94
- end
90
+ # Execute an ani.rb command
91
+ def anirb_cmd(targets)
92
+ f1 = tmp_file('largecontigs.fa')
93
+ return unless File.size?(f1)
94
+
95
+ targets.each do |target|
96
+ target_asm = target&.result(:assembly)&.file_path(:largecontigs) or next
97
+ run_cmd <<~CMD
98
+ ani.rb -1 "#{f1}" -2 "#{target_asm}" -S "#{tmp_dbs[:ani]}" \
99
+ --name1 "#{dataset.name}" --name2 "#{target.name}" \
100
+ -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
101
+ -p "#{opts[:ani_p]}"
102
+ CMD
103
+ checkpoint(:ani)
104
+ end
105
+ end
106
+
107
+ ##
108
+ # Execute a FastANI command
109
+ def fastani_cmd(targets)
110
+ f1 = tmp_file('largecontigs.fa')
111
+ return unless File.size?(f1)
112
+
113
+ # Run FastANI
114
+ File.open(f2 = tmp_file, 'w') do |fh|
115
+ targets.each do |target|
116
+ target_asm = target&.result(:assembly)&.file_path(:largecontigs)
117
+ fh.puts target_asm if target_asm
118
+ end
119
+ end
120
+ run_cmd <<~CMD
121
+ fastANI -q "#{f1}" --rl "#{f2}" -t #{opts[:thr]} \
122
+ -o "#{f3 = tmp_file}"
123
+ CMD
124
+
125
+ # Retrieve resulting data and save to DB
126
+ data = {}
127
+ File.open(f3, 'r') do |fh|
128
+ fh.each do |ln|
129
+ row = ln.chomp.split("\t")
130
+ n2 = File.basename(row[1], '.gz')
131
+ n2 = File.basename(n2, '.LargeContigs.fna')
132
+ data[n2] = [row[2].to_f, 0.0, row[3].to_i, row[4].to_i]
133
+ end
134
+ end
135
+ batch_data_to_db(:ani, data)
136
+
137
+ # Cleanup
138
+ [f2, f3].each { |i| File.unlink(i) }
139
+ end
140
+
141
+ ##
142
+ # Execute a FastAAI command
143
+ def fastaai_cmd(targets)
144
+ qry_idx = dataset.result(:essential_genes).file_path(:fastaai_index)
145
+ return nil unless qry_idx
146
+
147
+ # Run FastAAI
148
+ File.open(f1 = tmp_file, 'w') { |fh| fh.puts qry_idx }
149
+ File.open(f2 = tmp_file, 'w') do |fh|
150
+ targets.each do |target|
151
+ target_idx = target&.result(:essential_genes)&.file_path(:fastaai_index)
152
+ fh.puts target_idx if target_idx
153
+ end
154
+ end
155
+ run_cmd <<~CMD
156
+ FastAAI --qd "#{f1}" --rd "#{f2}" --output "#{f3 = tmp_file}" \
157
+ --threads #{opts[:thr]}
158
+ CMD
159
+
160
+ # Save values in the databases
161
+ haai_data = {}
162
+ aai_data = {}
163
+ File.open(f3, 'r') do |fh|
164
+ fh.each do |ln|
165
+ out = ln.chomp.split("\t")
166
+ haai_data[out[1]] = [
167
+ out[2].to_f * 100, out[3].to_f * 100, out[4].to_i, out[5].to_i
168
+ ]
169
+ aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
95
170
  end
96
- v = out[2]
97
- else
98
- v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
99
- --name1 "#{n1}" --name2 "#{n2}" \
100
- -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
101
- --lookup-first -p "#{o[:ani_p] || 'blast+'}"`.chomp
102
171
  end
103
- v.nil? || v.empty? ? 0 : v.to_f
172
+ batch_data_to_db(:haai, haai_data)
173
+ batch_data_to_db(:aai, aai_data)
174
+
175
+ # Cleanup
176
+ [f1, f2, f3].each { |i| File.unlink(i) }
177
+ end
178
+
179
+ ##
180
+ # Execute an hAAI command
181
+ def haai_cmd(targets)
182
+ aai_data = {}
183
+ targets.each do |target|
184
+ target_ess = target&.result(:essential_genes)&.file_path(:ess_genes)
185
+ next unless target_ess
186
+
187
+ # hAAI
188
+ h = aairb_cmd(
189
+ tmp_file('ess_genes.fa'), target_ess,
190
+ dataset.name, target.name, tmp_dbs[:haai],
191
+ aai_save_rbm: false, aai_p: opts[:haai_p], checkpoint: :haai
192
+ )&.chomp&.to_f
193
+ next if h.nil? || h.zero? || h > 90.0
194
+
195
+ # Estimated AAI
196
+ aai_data[target.name] = [
197
+ 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - h)), 0, 0, 0
198
+ ] unless h&.zero? || h > 90.0
199
+ end
200
+ batch_data_to_db(:aai, aai_data)
201
+ end
202
+
203
+ def run_cmd(cmd)
204
+ puts "CMD: #{cmd}"
205
+ `#{cmd}`
104
206
  end
105
207
  end
@@ -22,12 +22,16 @@ module MiGA::DistanceRunner::Database
22
22
  end
23
23
  end
24
24
  # Initialize if it doesn't exist
25
- SQLite3::Database.new(dbs[m]) do |conn|
26
- conn.execute "create table if not exists #{t}(" +
27
- "seq1 varchar(256), seq2 varchar(256), " +
28
- "#{t} float, sd float, n int, omega int" +
29
- ")"
30
- end unless File.size? dbs[m]
25
+ unless File.size? dbs[m]
26
+ SQLite3::Database.new(dbs[m]) do |conn|
27
+ conn.execute <<~SQL
28
+ create table if not exists #{t}(
29
+ seq1 varchar(256), seq2 varchar(256),
30
+ #{t} float, sd float, n int, omega int
31
+ )
32
+ SQL
33
+ end
34
+ end
31
35
  # Copy over to (local) temporals
32
36
  @tmp_dbs[m] = tmp_file("#{m}.db")
33
37
  FileUtils.cp(dbs[m], tmp_dbs[m])
@@ -66,7 +70,7 @@ module MiGA::DistanceRunner::Database
66
70
  return y unless y.nil? || y.zero?
67
71
 
68
72
  # Check if self.dataset <- target is done (another thread)
69
- if dataset.is_ref? && project.path == ref_project.path
73
+ if dataset.ref? && project.path == ref_project.path
70
74
  y = data_from_db(
71
75
  target.name, dataset.name, ref_db(metric, target.name), metric
72
76
  )
@@ -92,27 +96,72 @@ module MiGA::DistanceRunner::Database
92
96
  # possible number of matches
93
97
  def data_from_db(n1, n2, db, metric)
94
98
  y = nil
99
+ table = metric == :haai ? :aai : metric
95
100
  SQLite3::Database.new(db) do |conn|
96
101
  y = conn.execute(
97
- "select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
102
+ "select #{table}, sd, n, omega from #{table} where seq1=? and seq2=?",
98
103
  [n1, n2]
99
104
  ).first
100
- end if File.size? db
105
+ end if File.size?(db)
101
106
  y
102
107
  end
103
108
 
104
109
  ##
105
110
  # Save +data+ of +metric+ between +n1+ and +n2+ in the +db+ database.
106
111
  def data_to_db(n1, n2, db, metric, data)
112
+ table = metric == :haai ? :aai : metric
107
113
  SQLite3::Database.new(db) do |conn|
108
114
  conn.execute(
109
- "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
115
+ "insert into #{table} (seq1, seq2, #{table}, sd, n, omega) " +
110
116
  "values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
111
117
  )
112
118
  end
113
119
  checkpoint metric
114
120
  end
115
121
 
122
+ ##
123
+ # Saves +data+ of +metric+ in batch to the temporary database,
124
+ # and assumes query is +#dataset+. +data+ must be a hash with target names
125
+ # as key and arrays as values with: [val, sd, n, omega]
126
+ def batch_data_to_db(metric, data)
127
+ db = tmp_dbs[metric]
128
+ table = metric == :haai ? :aai : metric
129
+ `cp #{db} ~/here.db`
130
+ SQLite3::Database.new(db) do |conn|
131
+ data.each do |k, v|
132
+ sql = <<~SQL
133
+ insert into #{table} (
134
+ seq1, seq2, #{table}, sd, n, omega
135
+ ) values (?, ?, ?, ?, ?, ?)
136
+ SQL
137
+ conn.execute(sql, [dataset.name, k] + v)
138
+ end
139
+ end
140
+ checkpoint(metric)
141
+ end
142
+
143
+ ##
144
+ # Retrieves data of +metric+ in batch from the temporary database,
145
+ # and assumes query is +#dataset+. The output data is a hash with the same
146
+ # structure described for +#batch_data_to_db+
147
+ def batch_data_from_db(metric)
148
+ db = tmp_dbs[metric]
149
+ table = metric == :haai ? :aai : metric
150
+ data = {}
151
+ SQLite3::Database.new(db) do |conn|
152
+ sql = "select seq2, #{table}, sd, n, omega from #{table}"
153
+ conn.execute(sql).each { |row| data[row.shift] = row }
154
+ end
155
+ data
156
+ end
157
+
158
+ ##
159
+ # Retrieve only +metric+ values against +names+
160
+ def batch_values_from_db(metric, names)
161
+ data = batch_data_from_db(metric)
162
+ names.map { |i| data[i]&.first }
163
+ end
164
+
116
165
  ##
117
166
  # Iterates for each entry in +db+
118
167
  def foreach_in_db(db, metric, &blk)
@@ -120,4 +169,14 @@ module MiGA::DistanceRunner::Database
120
169
  conn.execute("select * from #{metric}").each { |r| blk[r] }
121
170
  end
122
171
  end
172
+
173
+ ##
174
+ # Select only those targets that are not yet stored in either direction
175
+ def pending_targets(targets, metric)
176
+ saved = batch_data_from_db(metric).keys
177
+ targets
178
+ .compact
179
+ .select { |i| !saved.include?(i.name) }
180
+ .select { |i| !stored_value(i, metric)&.> 0.0 }
181
+ end
123
182
  end
@@ -11,19 +11,14 @@ module MiGA::DistanceRunner::Pipeline
11
11
  val_med = ''
12
12
  val_cls = nil
13
13
  i_n = 0
14
- File.open(med, 'r') do |med_fh|
15
- med_fh.each_line do |med_ln|
16
- i_n += 1
17
- med_ln.chomp!
18
- val = send(metric, ref_project.dataset(med_ln))
19
- if !val.nil? and val >= max_val
20
- max_val = val
21
- val_med = med_ln
22
- val_cls = i_n
23
- puts "[#{classif}] New max: #{val_med} (#{val_cls}): #{max_val}"
24
- end
25
- end
26
- end
14
+ sbj_datasets = File.foreach(med).map { |i| ref_project.dataset(i.chomp) }
15
+ values = send(metric, sbj_datasets)
16
+ max_idx = values.map(&:to_f).each_with_index.max[1]
17
+ max_val = values[max_idx]
18
+ val_med = sbj_dataset[max_idx].name
19
+ val_cls = max_idx + 1
20
+ puts "[#{classif}] New max: #{val_med} (#{val_cls}): #{max_val}"
21
+
27
22
  classif = "#{classif}/miga-project.sc-#{val_cls}"
28
23
  result_fh.puts [val_cls, val_med, max_val, classif].join("\t")
29
24
  classify(clades, classif, metric, result_fh, val_cls)
@@ -31,12 +26,9 @@ module MiGA::DistanceRunner::Pipeline
31
26
 
32
27
  # Run distances against datasets listed in metadata's +:dist_req+
33
28
  def distances_by_request(metric)
34
- return unless dataset.metadata[:dist_req]
35
-
36
29
  $stderr.puts 'Running distances by request'
37
- dataset.metadata[:dist_req].each do |target|
38
- ds = ref_project.dataset(target) and send(metric, ds)
39
- end
30
+ sbj_datasets = dataset.option(:dist_req).map { |i| ref_project.dataset(i) }
31
+ send(metric, sbj_datasets)
40
32
  end
41
33
 
42
34
  # Builds a tree with all visited medoids from any classification level
@@ -76,8 +68,10 @@ module MiGA::DistanceRunner::Pipeline
76
68
  $stderr.puts "Testing taxonomy | opts = #{opts}"
77
69
  # Get taxonomy of closest relative
78
70
  from_ref_project = (project != ref_project)
79
- res_dir = from_ref_project ?
80
- File.expand_path('data/09.distances/05.taxonomy', project.path) : home
71
+ res_dir =
72
+ from_ref_project ?
73
+ File.expand_path('data/09.distances/05.taxonomy', project.path) :
74
+ home
81
75
  Dir.mkdir res_dir unless Dir.exist? res_dir
82
76
  File.open(File.expand_path("#{dataset.name}.done", res_dir), 'w') do |fh|
83
77
  fh.puts Time.now.to_s
@@ -112,7 +106,7 @@ module MiGA::DistanceRunner::Pipeline
112
106
  $stderr.puts 'Transferring taxonomy'
113
107
  return if tax.nil?
114
108
 
115
- pval = (project.metadata[:tax_pvalue] || 0.05).to_f
109
+ pval = project.option(:tax_pvalue)
116
110
  tax_a = tax
117
111
  .select { |i| i[1] != '?' && i[2] <= pval }
118
112
  .map { |i| i[0, 2].join(':') }