miga-base 0.7.5.0 → 0.7.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6361b203b4612214936255e8b285959cbf556e7e64f88119a058e167774264f9
4
- data.tar.gz: 1a3d8df11d57a363a49eecc88011a5337ddb40573cf8727942eea24e5071ecf5
3
+ metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
4
+ data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
5
5
  SHA512:
6
- metadata.gz: 53786e1830ed8b3c56bffbf1fc581745185084e96b68631323b01fd9994c2b59cdeac582c2f4cef8d0ed622136a2c2cf4f68e06cc3a9e7dbf580d10d1aedc9e5
7
- data.tar.gz: 26322daf6a52906466c4f28cc93ef455bac22e98e115fc6f6b3ad685998d58854454c8044126491e5a886910ce5ddceece2713e94acafaf86459442970617f4f
6
+ metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
7
+ data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
data/README.md CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
41
41
  collaboration between [Kostas Lab][kostas] at the Georgia Institute of
42
42
  Technology and [RDP][rdp] at Michigan State University.
43
43
 
44
+ See also the [complete list of contributors](manual/part1/contributors.md).
44
45
 
45
46
  # License
46
47
 
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
42
42
  '--no-summaries',
43
43
  'Do not generate intermediate step summaries'
44
44
  ) { |v| cli[:summaries] = v }
45
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
45
+ opts_for_wf(
46
+ opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
47
+ )
46
48
  end
47
49
  end
48
50
 
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
17
17
  'Activate dataset; requires -D'
18
18
  ) { |v| cli[:activate] = v }
19
19
  opt.on(
20
- '--inactivate',
21
- 'Inactivate dataset; requires -D'
22
- ) { |v| cli[:activate] = !v }
20
+ '--inactivate [reason]',
21
+ 'Inactivate dataset; requires -D',
22
+ 'The argument is optional: reason to inactivate dataset'
23
+ ) { |v| cli[:activate] = false ; cli[:reason] = v }
23
24
  end
24
25
  end
25
26
 
26
27
  def perform
27
28
  obj = cli.load_project_or_dataset
28
29
  unless cli[:activate].nil?
29
- cli.ensure_par({ dataset: '-D' },
30
- '%<name>s is mandatory with --[in-]activate: please provide %<flag>s')
31
- cli[:activate] ? obj.activate! : obj.inactivate!
30
+ cli.ensure_par(
31
+ { dataset: '-D' },
32
+ '%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
33
+ )
34
+ cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
32
35
  end
33
36
  cli.add_metadata(obj)
34
37
  obj.save
@@ -66,8 +66,10 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
66
66
  end
67
67
 
68
68
  def run_r_cmd(cli, paths, cmd)
69
- run_cmd(cli,
70
- "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
69
+ run_cmd(
70
+ cli,
71
+ "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1"
72
+ )
71
73
  end
72
74
 
73
75
  def test_r_package(cli, paths, pkg)
@@ -81,16 +83,21 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
81
83
  end
82
84
 
83
85
  def test_ruby_gem(cli, paths, pkg)
84
- run_cmd(cli,
85
- "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
86
+ run_cmd(
87
+ cli,
88
+ "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
89
+ )
86
90
  $?.success?
87
91
  end
88
92
 
89
93
  def install_ruby_gem(cli, paths, pkg)
90
94
  gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
91
- run_cmd(cli, "#{paths['ruby'].shellescape} \
95
+ run_cmd(
96
+ cli,
97
+ "#{paths['ruby'].shellescape} \
92
98
  -r rubygems -r rubygems/gem_runner \
93
- -e #{gem_cmd.shellescape} 2>&1")
99
+ -e #{gem_cmd.shellescape} 2>&1"
100
+ )
94
101
  end
95
102
 
96
103
  def list_requirements
@@ -99,7 +106,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
99
106
  'no', %w(yes no)
100
107
  ) == 'yes'
101
108
  cli.puts ''
102
- req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
109
+ req_path = File.join(MiGA.root_path, 'utils', 'requirements.txt')
103
110
  File.open(req_path, 'r') do |fh|
104
111
  fh.each_line { |ln| cli.puts ln }
105
112
  end
@@ -205,18 +212,18 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
205
212
  def check_additional_files(paths)
206
213
  if cli[:mytaxa]
207
214
  cli.puts 'Looking for MyTaxa databases:'
208
- mt = File.dirname paths["MyTaxa"]
215
+ mt = File.dirname paths['MyTaxa']
209
216
  cli.print 'Looking for scores... '
210
217
  unless Dir.exist?(File.expand_path('db', mt))
211
- cli.puts "no.\nExecute 'python2 #{mt}/utils/download_db.py'."
218
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
212
219
  exit(1)
213
220
  end
214
- cli.puts 'yes.'
221
+ cli.puts 'yes'
215
222
  cli.print 'Looking for diamond db... '
216
223
  unless File.exist?(File.expand_path('AllGenomes.faa.dmnd', mt))
217
- cli.puts "no.\nDownload " \
224
+ cli.puts "no\nDownload " \
218
225
  "'http://enve-omics.ce.gatech.edu/data/public_mytaxa/" \
219
- "AllGenomes.faa.dmnd' into #{mt}."
226
+ "AllGenomes.faa.dmnd' into #{mt}"
220
227
  exit(1)
221
228
  end
222
229
  cli.puts ''
@@ -228,7 +235,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
228
235
  %w(ape cluster vegan).each do |pkg|
229
236
  cli.print "Testing #{pkg}... "
230
237
  if test_r_package(cli, paths, pkg)
231
- cli.puts 'yes.'
238
+ cli.puts 'yes'
232
239
  else
233
240
  cli.puts 'no, installing'
234
241
  cli.print '' + install_r_package(cli, paths, pkg)
@@ -245,7 +252,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
245
252
  %w(sqlite3 daemons json).each do |pkg|
246
253
  cli.print "Testing #{pkg}... "
247
254
  if test_ruby_gem(cli, paths, pkg)
248
- cli.puts 'yes.'
255
+ cli.puts 'yes'
249
256
  else
250
257
  cli.puts 'no, installing'
251
258
  # This hackey mess is meant to ensure the test and installation are done
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
15
15
  '-m', '--mytaxa-scan',
16
16
  'Perform MyTaxa scan analysis'
17
17
  ) { |v| cli[:mytaxa] = v }
18
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
18
+ opts_for_wf(
19
+ opt, 'Input genome assemblies (nucleotides, FastA)',
20
+ qual: false
21
+ )
19
22
  end
20
23
  end
21
24
 
@@ -14,12 +14,16 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
14
14
  ) { |v| cli[:key] = v }
15
15
  opt.on(
16
16
  '--compute-and-save',
17
- 'Compute and saves the statistics'
17
+ 'Compute and save the statistics'
18
18
  ) { |v| cli[:compute] = v }
19
19
  opt.on(
20
20
  '--try-load',
21
21
  'Check if stat exists instead of computing on --compute-and-save'
22
22
  ) { |v| cli[:try_load] = v }
23
+ opt.on(
24
+ '--ignore-empty',
25
+ 'If the result does not exist, exit without throwing exceptions'
26
+ ) { |v| cli[:ignore_result_empty] = v }
23
27
  end
24
28
  end
25
29
 
@@ -27,7 +31,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
27
31
  if cli[:try_load] && !r[:stats].nil? && !r[:stats].empty?
28
32
  cli[:compute] = false
29
33
  end
30
- r = cli.load_result
34
+ r = cli.load_result or return
31
35
  if cli[:compute]
32
36
  cli.say 'Computing statistics'
33
37
  r.compute_stats
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
26
26
  '--with-units',
27
27
  'Include units in each cell'
28
28
  ) { |v| cli[:units] = v }
29
+ opt.on(
30
+ '--compute-and-save',
31
+ 'Compute and save the statistics if not yet available'
32
+ ) { |v| cli[:compute] = v }
29
33
  end
30
34
  end
31
35
 
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
34
38
  ds = cli.load_and_filter_datasets
35
39
  cli.say 'Loading results'
36
40
  stats = ds.map do |d|
37
- r = d.add_result(cli[:result].to_sym, false)
41
+ r = d.result(cli[:result])
42
+ r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
38
43
  s = r.nil? ? {} : r[:stats]
39
44
  s.tap { |i| i[:dataset] = d.name }
40
45
  end
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
15
15
 
16
16
  def opts_for_wf(opt, files_desc, params = {})
17
17
  {
18
- multi: false, cleanup: true, project_type: false, ncbi: true
18
+ multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
19
19
  }.each { |k, v| params[k] = v if params[k].nil? }
20
20
  opt.on(
21
21
  '-o', '--out_dir PATH',
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
40
40
  'Only download complete genomes, not drafts'
41
41
  ) { |v| cli[:ncbi_draft] = v }
42
42
  end
43
+ if params[:qual]
44
+ opt.on(
45
+ '--min-qual FLOAT', Float,
46
+ 'Minimum genome quality to include in analysis',
47
+ 'By default: 50.0'
48
+ ) { |v| cli[:min_qual] = v }
49
+ end
43
50
  if params[:cleanup]
44
51
  opt.on(
45
52
  '-c', '--clean',
@@ -89,6 +96,10 @@ module MiGA::Cli::Action::Wf
89
96
  end
90
97
 
91
98
  def opts_for_wf_distances(opt)
99
+ opt.on('--sensitive', 'Alias to: --aai-p blast+ --ani-p blast+') do
100
+ cli[:aai_p] = 'blast+'
101
+ cli[:ani_p] = 'blast+'
102
+ end
92
103
  opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
93
104
  cli[:aai_p] = 'diamond'
94
105
  cli[:ani_p] = 'fastani'
@@ -121,7 +132,7 @@ module MiGA::Cli::Action::Wf
121
132
  ]) unless MiGA::Project.exist? cli[:outdir]
122
133
  # Define project metadata
123
134
  p = cli.load_project(:outdir, '-o')
124
- [:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
135
+ %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
125
136
  p_metadata[:type] = cli[:project_type]
126
137
  transfer_metadata(p, p_metadata)
127
138
  # Download datasets
@@ -155,7 +166,7 @@ module MiGA::Cli::Action::Wf
155
166
  '-P', cli[:outdir],
156
167
  '-r', r,
157
168
  '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
158
- '--tab'
169
+ '--tab', '--ref', '--active'
159
170
  ])
160
171
  end
161
172
  end
@@ -80,7 +80,9 @@ module MiGA::Cli::ObjectsHelper
80
80
  raise "Unsupported result for #{klass}: #{self[:result]}"
81
81
  end
82
82
  r = obj.add_result(self[:result], false)
83
- raise "Cannot load result: #{self[:result]}" if r.nil?
83
+ if r.nil? && !self[:ignore_result_empty]
84
+ raise "Cannot load result: #{self[:result]}"
85
+ end
84
86
 
85
87
  @objects[:result] = r
86
88
  end
@@ -90,7 +90,8 @@ module MiGA::Common::Format
90
90
  end
91
91
  fh.close
92
92
 
93
- o = { n: l.size, tot: l.inject(:+), max: l.max }
93
+ o = { n: l.size, tot: l.inject(0, :+), max: l.max }
94
+ return o if o[:tot].zero?
94
95
  o[:avg] = o[:tot].to_f / l.size
95
96
  o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
96
97
  o[:sd] = Math.sqrt o[:var]
@@ -106,7 +107,8 @@ module MiGA::Common::Format
106
107
  break if pos >= thr
107
108
  end
108
109
  o[:med] = o[:n].even? ?
109
- 0.5 * l[o[:n] / 2 - 1, 2].inject(:+) : l[(o[:n] - 1) / 2]
110
+ 0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
111
+ l[(o[:n] - 1) / 2]
110
112
  end
111
113
  o
112
114
  end
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
72
72
  say '-----------------------------------'
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
+ recalculate_status!
75
76
  load_status
76
77
  say 'Configuration options:'
77
78
  say @runopts.to_s
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
99
100
  end
100
101
 
101
102
  def recalculate_status!
103
+ say 'Recalculating status for all datasets'
102
104
  project.each_dataset(&:recalculate_status)
103
105
  end
104
106
 
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
158
160
  end
159
161
 
160
162
  ##
161
- # Traverse datasets, and returns boolean indicating if at any datasets
162
- # are incomplete
163
+ # Traverse datasets, and returns boolean indicating if at any reference
164
+ # datasets are incomplete
163
165
  def check_datasets
164
166
  l_say(2, 'Checking datasets')
165
167
  o = false
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
167
169
  next unless ds.status == :incomplete
168
170
  next if ds.next_preprocessing(false).nil?
169
171
 
170
- o = true
172
+ o = true if ds.ref?
171
173
  queue_job(:d, ds)
172
174
  end
173
175
  o
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
97
97
 
98
98
  ##
99
99
  # Inactivate a dataset. This halts automated processing by the daemon
100
- def inactivate!
100
+ #
101
+ # If given, the +reason+ string is saved as a metadata +:warn+ entry
102
+ def inactivate!(reason = nil)
103
+ metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
101
104
  metadata[:inactive] = true
102
105
  metadata.save
103
106
  pull_hook :on_inactivate
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
107
110
  # Activate a dataset. This removes the +:inactive+ flag
108
111
  def activate!
109
112
  metadata[:inactive] = nil
113
+ metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
110
114
  metadata.save
111
115
  pull_hook :on_activate
112
116
  end
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
35
35
  mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
36
36
  mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
37
37
  # Distances (for single-species datasets)
38
- distances: '09.distances',
39
38
  taxonomy: '09.distances/05.taxonomy',
39
+ distances: '09.distances',
40
40
  # General statistics
41
41
  stats: '90.stats'
42
42
  }
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
52
52
  end
53
53
 
54
54
  ##
55
- # Run +cmd+ in the command-line with {{variables}}: dataset, project, miga,
56
- # object (as defined for the event, if any)
55
+ # Run +cmd+ in the command-line with {{variables}}:
56
+ # dataset, project, project_name, miga, object (if defined for the event)
57
57
  # - +hook_args+: +[cmd]+
58
58
  # - +event_args+: +[object (optional)]+
59
59
  def hook_run_cmd(hook_args, event_args)
60
60
  Process.wait(
61
61
  spawn hook_args.first.miga_variables(
62
- dataset: name, project: project.path, miga: MiGA::MiGA.root_path,
63
- object: event_args.first
62
+ dataset: name, project: project.path, project_name: project.name,
63
+ miga: MiGA::MiGA.root_path, object: event_args.first
64
64
  )
65
65
  )
66
66
  end
@@ -70,7 +70,7 @@ module MiGA::Dataset::Result
70
70
  false
71
71
  elsif add_result(t, save).nil?
72
72
  if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
73
- inactivate!
73
+ inactivate! "Too many errors in step #{t}"
74
74
  false
75
75
  else
76
76
  true
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
26
26
  end
27
27
 
28
28
  ##
29
- # Run +cmd+ in the command-line with {{variables}}: project, miga,
30
- # object (as defined by the event, if any)
29
+ # Run +cmd+ in the command-line with {{variables}}:
30
+ # project, project_name, miga, object (if defined by the event)
31
31
  # - +hook_args+: +[cmd]+
32
32
  # - +event_args+: +[object (optional)]+
33
33
  def hook_run_cmd(hook_args, event_args)
34
34
  Process.wait(
35
35
  spawn hook_args.first.miga_variables(
36
- project: path, miga: MiGA::MiGA.root_path, object: event_args.first
36
+ project: path, project_name: name,
37
+ miga: MiGA::MiGA.root_path, object: event_args.first
37
38
  )
38
39
  )
39
40
  end
@@ -94,11 +94,13 @@ class MiGA::RemoteDataset
94
94
  @timeout_try = 0
95
95
  begin
96
96
  DEBUG 'GET: ' + url
97
- open(url, read_timeout: 600) { |f| doc = f.read }
97
+ URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
98
98
  rescue => e
99
99
  @timeout_try += 1
100
100
  raise e if @timeout_try >= 3
101
101
 
102
+ sleep 5 # <- For: 429 Too Many Requests
103
+ DEBUG "RETRYING after: #{e}"
102
104
  retry
103
105
  end
104
106
  doc
@@ -45,10 +45,6 @@ class MiGA::Result < MiGA::MiGA
45
45
  # Hash with the result metadata
46
46
  attr_reader :data
47
47
 
48
- ##
49
- # Array of MiGA::Result objects nested within the result (if any)
50
- attr_reader :results
51
-
52
48
  ##
53
49
  # Load or create the MiGA::Result described by the JSON file +path+
54
50
  def initialize(path)
@@ -78,9 +74,9 @@ class MiGA::Result < MiGA::MiGA
78
74
  when :json
79
75
  @path
80
76
  when :start
81
- @path.sub(/\.json$/, ".start")
77
+ @path.sub(/\.json$/, '.start')
82
78
  when :done
83
- @path.sub(/\.json$/, ".done")
79
+ @path.sub(/\.json$/, '.done')
84
80
  end
85
81
  end
86
82
 
@@ -134,7 +130,7 @@ class MiGA::Result < MiGA::MiGA
134
130
  ##
135
131
  # Initialize and #save empty result
136
132
  def create
137
- @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
133
+ @data = { created: Time.now.to_s, stats: {}, files: {} }
138
134
  save
139
135
  end
140
136
 
@@ -156,19 +152,20 @@ class MiGA::Result < MiGA::MiGA
156
152
  def load
157
153
  @data = MiGA::Json.parse(path)
158
154
  @data[:files] ||= {}
159
- @results = (self[:results] || []).map { |rs| MiGA::Result.new rs }
160
155
  end
161
156
 
162
157
  ##
163
158
  # Remove result, including all associated files
164
159
  def remove!
165
- each_file do |file|
166
- f = File.expand_path(file, dir)
167
- FileUtils.rm_rf(f)
168
- end
169
- %w(.start .done).each do |ext|
170
- f = path.sub(/\.json$/, ext)
171
- File.unlink f if File.exist? f
160
+ each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
161
+ unlink
162
+ end
163
+
164
+ # Unlink result by removing the .done and .start timestamps and the
165
+ # .json descriptor, but don't remove any other associated files
166
+ def unlink
167
+ %i(start done).each do |i|
168
+ f = path(i) and File.exists?(f) and File.unlink(f)
172
169
  end
173
170
  File.unlink path
174
171
  end
@@ -182,28 +179,19 @@ class MiGA::Result < MiGA::MiGA
182
179
  # Note that multiple files may have the same symbol (file_sym), since
183
180
  # arrays of files are supported.
184
181
  def each_file(&blk)
182
+ return to_enum(:each_file) unless block_given?
183
+
185
184
  @data[:files] ||= {}
186
185
  self[:files].each do |k, files|
187
186
  files = [files] unless files.kind_of? Array
188
187
  files.each do |file|
189
188
  case blk.arity
190
- when 1
191
- blk.call(file)
192
- when 2
193
- blk.call(k, file)
194
- when 3
195
- blk.call(k, file, File.expand_path(file, dir))
196
- else
197
- raise "Wrong number of arguments: #{blk.arity} for 1..3"
189
+ when 1; blk.call(file)
190
+ when 2; blk.call(k, file)
191
+ when 3; blk.call(k, file, File.expand_path(file, dir))
192
+ else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
198
193
  end
199
194
  end
200
195
  end
201
196
  end
202
-
203
- ##
204
- # Add the MiGA::Result +result+ as part of the current result
205
- def add_result(result)
206
- @data[:results] << result.path
207
- save
208
- end
209
197
  end
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
8
8
  # (Re-)calculate and save the statistics for the result
9
9
  def compute_stats
10
10
  method = :"compute_stats_#{key}"
11
+ MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
11
12
  stats = self.respond_to?(method, true) ? send(method) : nil
12
13
  unless stats.nil?
13
14
  self[:stats] = stats
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
109
110
  end
110
111
  end
111
112
  else
112
- # Fix estimate by domain
113
- if !(tax = source.metadata[:tax]).nil? &&
114
- %w[Archaea Bacteria].include?(tax[:d]) &&
115
- file_path(:raw_report).nil?
116
- scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
117
- rep = file_path(:report)
118
- rc_p = File.expand_path('.miga_rc', ENV['HOME'])
119
- rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
120
- $stderr.print `#{rc} ruby '#{scr}' \
121
- '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
122
- add_file(:raw_report, "#{source.name}.ess/log")
123
- add_file(:report, "#{source.name}.ess/log.domain")
124
- end
125
- # Extract/compute quality values
113
+ # Estimate quality metrics
114
+ fix_essential_genes_by_domain
126
115
  stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
127
116
  File.open(file_path(:report), 'r') do |fh|
128
117
  fh.each_line do |ln|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
131
120
  end
132
121
  end
133
122
  end
123
+
124
+ # Determine qualitative range
134
125
  stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
135
126
  source.metadata[:quality] =
136
127
  case stats[:quality]
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
140
131
  else; :low
141
132
  end
142
133
  source.save
134
+
135
+ # Inactivate low-quality datasets
136
+ min_qual = (project.metadata[:min_qual] || 50)
137
+ if min_qual != 'no' && stats[:quality] < min_qual
138
+ source.inactivate! 'Low genome quality'
139
+ end
143
140
  end
144
141
  stats
145
142
  end
@@ -168,12 +165,28 @@ module MiGA::Result::Stats
168
165
  stats[:aai] = [$2.to_f, '%']
169
166
  3.times { fh.gets }
170
167
  fh.each_line do |ln|
171
- row = ln.chomp.gsub(/^\s*/, '').split(/\s+/)
172
- break if row.empty?
168
+ next unless ln.chomp =~ /^\s*(\S+)\s+(.+)\s+([0-9\.e-]+)\s+\**\s*$/
173
169
 
174
- stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
170
+ stats[:"#{$1}_pvalue"] = $3.to_f unless $1 == 'root'
175
171
  end
176
172
  end
177
173
  stats
178
174
  end
175
+
176
+ # Fix estimates based on essential genes based on taxonomy
177
+ def fix_essential_genes_by_domain
178
+ return if (tax = source.metadata[:tax]).nil? ||
179
+ !%w[Archaea Bacteria].include?(tax[:d]) ||
180
+ file_path(:raw_report)
181
+
182
+ MiGA::MiGA.DEBUG "Fixing essential genes by domain"
183
+ scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
184
+ rep = file_path(:report)
185
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
186
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
187
+ $stderr.print `#{rc} ruby '#{scr}' \
188
+ '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
189
+ add_file(:raw_report, "#{source.name}.ess/log")
190
+ add_file(:report, "#{source.name}.ess/log.domain")
191
+ end
179
192
  end
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 5, 0]
11
+ VERSION = [0.7, 10, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 5, 13)
19
+ VERSION_DATE = Date.new(2020, 6, 29)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
9
9
  # Initialize
10
10
  miga date > "$DATASET.start"
11
11
 
12
- # Run
12
+ # Check quality first
13
+ miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
14
+ inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
15
+ [[ "$inactive" == "true" ]] && exit
16
+
17
+ # Run distances
13
18
  ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
14
19
 
15
20
  # Finalize
@@ -11,6 +11,9 @@ cd "$DIR"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
+ # Execute doctor
15
+ miga doctor -P "$PROJECT" -v
16
+
14
17
  # Index taxonomy
15
18
  miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
16
19
 
@@ -14,7 +14,7 @@ miga date > "$DATASET.start"
14
14
  # Calculate statistics
15
15
  for i in raw_reads trimmed_fasta assembly cds essential_genes ssu distances taxonomy ; do
16
16
  echo "# $i"
17
- miga stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
17
+ miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
18
18
  done
19
19
 
20
20
  # Finalize
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
93
93
  0 => /-{20}\n/,
94
94
  1 => /MiGA:#{p.name} launched/,
95
95
  2 => /-{20}\n/,
96
- 5 => /Probing running jobs\n/
96
+ 6 => /Probing running jobs\n/
97
97
  }.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
98
98
  ensure
99
99
  begin
@@ -185,11 +185,18 @@ class DatasetTest < Test::Unit::TestCase
185
185
  d = dataset
186
186
  assert_equal(:incomplete, d.status)
187
187
  assert_predicate(d, :active?)
188
- d.inactivate!
188
+ d.inactivate! 'Too annoying'
189
189
  assert_equal(:inactive, d.status)
190
+ assert_equal('Inactive: Too annoying', d.metadata[:warn])
190
191
  assert_not_predicate(d, :active?)
191
192
  d.activate!
192
193
  assert_equal(:incomplete, d.status)
194
+ assert_nil(d.metadata[:warn])
193
195
  assert_predicate(d, :active?)
194
196
  end
197
+
198
+ def test_preprocessing_tasks
199
+ assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :cds)
200
+ assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :taxonomy)
201
+ end
195
202
  end
@@ -31,13 +31,15 @@ class RemoteDatasetTest < Test::Unit::TestCase
31
31
  assert_equal(MiGA::Taxonomy, tx.class, msg)
32
32
  assert_equal('Lentivirus', tx[:g], msg)
33
33
  assert_equal(
34
- 'ns:ncbi o:Ortervirales f:Retroviridae ' \
35
- 'g:Lentivirus s:Human_immunodeficiency_virus_2',
34
+ 'ns:ncbi k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
35
+ 'o:Ortervirales f:Retroviridae g:Lentivirus ' \
36
+ 's:Human_immunodeficiency_virus_2',
36
37
  tx.to_s, msg
37
38
  )
38
39
  assert_equal(
39
- 'ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
40
- 'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
40
+ 'ns:ncbi d: k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
41
+ 'o:Ortervirales f:Retroviridae g:Lentivirus ' \
42
+ 's:Human_immunodeficiency_virus_2 ssp: str: ds:',
41
43
  tx.to_s(true), msg
42
44
  )
43
45
  assert_equal('ncbi', tx.namespace, msg)
@@ -99,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
99
101
 
100
102
  def test_ref_type_status
101
103
  declare_remote_access
102
- rd = MiGA::RemoteDataset.new('GCA_002849345', :assembly, :ncbi)
104
+ rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
103
105
  assert { !rd.get_metadata[:is_type] }
104
106
  assert { rd.get_metadata[:is_ref_type] }
105
107
  end
@@ -300,3 +300,16 @@ AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
300
300
  AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
301
301
  >TruSeq3_UniversalAdapter
302
302
  AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
303
+
304
+ >Nextera_PE_PrefixNX/1
305
+ AGATGTGTATAAGAGACAG
306
+ >Nextera_PE_PrefixNX/2
307
+ AGATGTGTATAAGAGACAG
308
+ >Nextera_PE_Trans1
309
+ TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG
310
+ >Nextera_PE_Trans1_rc
311
+ CTGTCTCTTATACACATCTGACGCTGCCGACGA
312
+ >Nextera_PE_Trans2
313
+ GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG
314
+ >Nextera_PE_Trans2_rc
315
+ CTGTCTCTTATACACATCTCCGAGCCCACGAGAC
@@ -29,6 +29,16 @@ module MiGA::DistanceRunner::Pipeline
29
29
  classify(clades, classif, metric, result_fh, val_cls)
30
30
  end
31
31
 
32
+ # Run distances against datasets listed in metadata's +:dist_req+
33
+ def distances_by_request(metric)
34
+ return unless dataset.metadata[:dist_req]
35
+
36
+ $stderr.puts 'Running distances by request'
37
+ dataset.metadata[:dist_req].each do |target|
38
+ ds = ref_project.dataset(target) and send(metric, ds)
39
+ end
40
+ end
41
+
32
42
  # Builds a tree with all visited medoids from any classification level
33
43
  def build_medoids_tree(metric)
34
44
  $stderr.puts "Building medoids tree (metric = #{metric})"
@@ -99,7 +109,7 @@ module MiGA::DistanceRunner::Pipeline
99
109
 
100
110
  # Transfer the taxonomy to the current dataset
101
111
  def transfer_taxonomy(tax)
102
- $stderr.puts "Transferring taxonomy"
112
+ $stderr.puts 'Transferring taxonomy'
103
113
  return if tax.nil?
104
114
 
105
115
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
@@ -67,7 +67,7 @@ class MiGA::DistanceRunner
67
67
 
68
68
  # Launch analysis for reference datasets
69
69
  def go_ref!
70
- $stderr.puts "Launching analysis for reference dataset"
70
+ $stderr.puts 'Launching analysis for reference dataset'
71
71
  # Initialize databases
72
72
  initialize_dbs! true
73
73
 
@@ -80,13 +80,13 @@ class MiGA::DistanceRunner
80
80
  end
81
81
 
82
82
  # Finalize
83
- [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
83
+ %i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
84
84
  end
85
85
 
86
86
  ##
87
87
  # Launch analysis for query datasets
88
88
  def go_query!
89
- $stderr.puts "Launching analysis for query dataset"
89
+ $stderr.puts 'Launching analysis for query dataset'
90
90
  # Check if project is ready
91
91
  tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
92
92
  res = ref_project.result(tsk[0])
@@ -94,6 +94,7 @@ class MiGA::DistanceRunner
94
94
 
95
95
  # Initialize the databases
96
96
  initialize_dbs! false
97
+ distances_by_request(tsk[1])
97
98
  # Calculate the classification-informed AAI/ANI traverse
98
99
  results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
99
100
  fh = File.open(results, 'w')
@@ -111,7 +112,9 @@ class MiGA::DistanceRunner
111
112
  next unless r[1].to_i == val_cls
112
113
 
113
114
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
115
+ unless ani.nil? || ani < closest[:ani]
116
+ closest = { ds: r[0], ani: ani }
117
+ end
115
118
  end
116
119
  end
117
120
  end
@@ -133,7 +136,7 @@ class MiGA::DistanceRunner
133
136
 
134
137
  # Launch analysis for taxonomy jobs
135
138
  def go_taxonomy!
136
- $stderr.puts "Launching taxonomy analysis"
139
+ $stderr.puts 'Launching taxonomy analysis'
137
140
  return unless project.metadata[:ref_project]
138
141
 
139
142
  go_query! # <- yeah, it's actually the same, just different ref_project
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5.0
4
+ version: 0.7.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-13 00:00:00.000000000 Z
11
+ date: 2020-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.4'
47
+ version: '1.3'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.4'
54
+ version: '1.3'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
529
529
  licenses:
530
530
  - Artistic-2.0
531
531
  metadata: {}
532
- post_install_message:
532
+ post_install_message:
533
533
  rdoc_options:
534
534
  - lib
535
535
  - README.md
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
550
550
  - !ruby/object:Gem::Version
551
551
  version: '0'
552
552
  requirements: []
553
- rubygems_version: 3.0.3
554
- signing_key:
553
+ rubygems_version: 3.1.2
554
+ signing_key:
555
555
  specification_version: 4
556
556
  summary: MiGA
557
557
  test_files: []