miga-base 0.7.5.0 → 0.7.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6361b203b4612214936255e8b285959cbf556e7e64f88119a058e167774264f9
4
- data.tar.gz: 1a3d8df11d57a363a49eecc88011a5337ddb40573cf8727942eea24e5071ecf5
3
+ metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
4
+ data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
5
5
  SHA512:
6
- metadata.gz: 53786e1830ed8b3c56bffbf1fc581745185084e96b68631323b01fd9994c2b59cdeac582c2f4cef8d0ed622136a2c2cf4f68e06cc3a9e7dbf580d10d1aedc9e5
7
- data.tar.gz: 26322daf6a52906466c4f28cc93ef455bac22e98e115fc6f6b3ad685998d58854454c8044126491e5a886910ce5ddceece2713e94acafaf86459442970617f4f
6
+ metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
7
+ data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
data/README.md CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
41
41
  collaboration between [Kostas Lab][kostas] at the Georgia Institute of
42
42
  Technology and [RDP][rdp] at Michigan State University.
43
43
 
44
+ See also the [complete list of contributors](manual/part1/contributors.md).
44
45
 
45
46
  # License
46
47
 
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
42
42
  '--no-summaries',
43
43
  'Do not generate intermediate step summaries'
44
44
  ) { |v| cli[:summaries] = v }
45
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
45
+ opts_for_wf(
46
+ opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
47
+ )
46
48
  end
47
49
  end
48
50
 
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
17
17
  'Activate dataset; requires -D'
18
18
  ) { |v| cli[:activate] = v }
19
19
  opt.on(
20
- '--inactivate',
21
- 'Inactivate dataset; requires -D'
22
- ) { |v| cli[:activate] = !v }
20
+ '--inactivate [reason]',
21
+ 'Inactivate dataset; requires -D',
22
+ 'The argument is optional: reason to inactivate dataset'
23
+ ) { |v| cli[:activate] = false ; cli[:reason] = v }
23
24
  end
24
25
  end
25
26
 
26
27
  def perform
27
28
  obj = cli.load_project_or_dataset
28
29
  unless cli[:activate].nil?
29
- cli.ensure_par({ dataset: '-D' },
30
- '%<name>s is mandatory with --[in-]activate: please provide %<flag>s')
31
- cli[:activate] ? obj.activate! : obj.inactivate!
30
+ cli.ensure_par(
31
+ { dataset: '-D' },
32
+ '%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
33
+ )
34
+ cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
32
35
  end
33
36
  cli.add_metadata(obj)
34
37
  obj.save
@@ -66,8 +66,10 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
66
66
  end
67
67
 
68
68
  def run_r_cmd(cli, paths, cmd)
69
- run_cmd(cli,
70
- "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
69
+ run_cmd(
70
+ cli,
71
+ "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1"
72
+ )
71
73
  end
72
74
 
73
75
  def test_r_package(cli, paths, pkg)
@@ -81,16 +83,21 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
81
83
  end
82
84
 
83
85
  def test_ruby_gem(cli, paths, pkg)
84
- run_cmd(cli,
85
- "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
86
+ run_cmd(
87
+ cli,
88
+ "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
89
+ )
86
90
  $?.success?
87
91
  end
88
92
 
89
93
  def install_ruby_gem(cli, paths, pkg)
90
94
  gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
91
- run_cmd(cli, "#{paths['ruby'].shellescape} \
95
+ run_cmd(
96
+ cli,
97
+ "#{paths['ruby'].shellescape} \
92
98
  -r rubygems -r rubygems/gem_runner \
93
- -e #{gem_cmd.shellescape} 2>&1")
99
+ -e #{gem_cmd.shellescape} 2>&1"
100
+ )
94
101
  end
95
102
 
96
103
  def list_requirements
@@ -99,7 +106,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
99
106
  'no', %w(yes no)
100
107
  ) == 'yes'
101
108
  cli.puts ''
102
- req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
109
+ req_path = File.join(MiGA.root_path, 'utils', 'requirements.txt')
103
110
  File.open(req_path, 'r') do |fh|
104
111
  fh.each_line { |ln| cli.puts ln }
105
112
  end
@@ -205,18 +212,18 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
205
212
  def check_additional_files(paths)
206
213
  if cli[:mytaxa]
207
214
  cli.puts 'Looking for MyTaxa databases:'
208
- mt = File.dirname paths["MyTaxa"]
215
+ mt = File.dirname paths['MyTaxa']
209
216
  cli.print 'Looking for scores... '
210
217
  unless Dir.exist?(File.expand_path('db', mt))
211
- cli.puts "no.\nExecute 'python2 #{mt}/utils/download_db.py'."
218
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
212
219
  exit(1)
213
220
  end
214
- cli.puts 'yes.'
221
+ cli.puts 'yes'
215
222
  cli.print 'Looking for diamond db... '
216
223
  unless File.exist?(File.expand_path('AllGenomes.faa.dmnd', mt))
217
- cli.puts "no.\nDownload " \
224
+ cli.puts "no\nDownload " \
218
225
  "'http://enve-omics.ce.gatech.edu/data/public_mytaxa/" \
219
- "AllGenomes.faa.dmnd' into #{mt}."
226
+ "AllGenomes.faa.dmnd' into #{mt}"
220
227
  exit(1)
221
228
  end
222
229
  cli.puts ''
@@ -228,7 +235,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
228
235
  %w(ape cluster vegan).each do |pkg|
229
236
  cli.print "Testing #{pkg}... "
230
237
  if test_r_package(cli, paths, pkg)
231
- cli.puts 'yes.'
238
+ cli.puts 'yes'
232
239
  else
233
240
  cli.puts 'no, installing'
234
241
  cli.print '' + install_r_package(cli, paths, pkg)
@@ -245,7 +252,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
245
252
  %w(sqlite3 daemons json).each do |pkg|
246
253
  cli.print "Testing #{pkg}... "
247
254
  if test_ruby_gem(cli, paths, pkg)
248
- cli.puts 'yes.'
255
+ cli.puts 'yes'
249
256
  else
250
257
  cli.puts 'no, installing'
251
258
  # This hackey mess is meant to ensure the test and installation are done
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
15
15
  '-m', '--mytaxa-scan',
16
16
  'Perform MyTaxa scan analysis'
17
17
  ) { |v| cli[:mytaxa] = v }
18
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
18
+ opts_for_wf(
19
+ opt, 'Input genome assemblies (nucleotides, FastA)',
20
+ qual: false
21
+ )
19
22
  end
20
23
  end
21
24
 
@@ -14,12 +14,16 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
14
14
  ) { |v| cli[:key] = v }
15
15
  opt.on(
16
16
  '--compute-and-save',
17
- 'Compute and saves the statistics'
17
+ 'Compute and save the statistics'
18
18
  ) { |v| cli[:compute] = v }
19
19
  opt.on(
20
20
  '--try-load',
21
21
  'Check if stat exists instead of computing on --compute-and-save'
22
22
  ) { |v| cli[:try_load] = v }
23
+ opt.on(
24
+ '--ignore-empty',
25
+ 'If the result does not exist, exit without throwing exceptions'
26
+ ) { |v| cli[:ignore_result_empty] = v }
23
27
  end
24
28
  end
25
29
 
@@ -27,7 +31,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
27
31
  if cli[:try_load] && !r[:stats].nil? && !r[:stats].empty?
28
32
  cli[:compute] = false
29
33
  end
30
- r = cli.load_result
34
+ r = cli.load_result or return
31
35
  if cli[:compute]
32
36
  cli.say 'Computing statistics'
33
37
  r.compute_stats
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
26
26
  '--with-units',
27
27
  'Include units in each cell'
28
28
  ) { |v| cli[:units] = v }
29
+ opt.on(
30
+ '--compute-and-save',
31
+ 'Compute and save the statistics if not yet available'
32
+ ) { |v| cli[:compute] = v }
29
33
  end
30
34
  end
31
35
 
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
34
38
  ds = cli.load_and_filter_datasets
35
39
  cli.say 'Loading results'
36
40
  stats = ds.map do |d|
37
- r = d.add_result(cli[:result].to_sym, false)
41
+ r = d.result(cli[:result])
42
+ r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
38
43
  s = r.nil? ? {} : r[:stats]
39
44
  s.tap { |i| i[:dataset] = d.name }
40
45
  end
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
15
15
 
16
16
  def opts_for_wf(opt, files_desc, params = {})
17
17
  {
18
- multi: false, cleanup: true, project_type: false, ncbi: true
18
+ multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
19
19
  }.each { |k, v| params[k] = v if params[k].nil? }
20
20
  opt.on(
21
21
  '-o', '--out_dir PATH',
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
40
40
  'Only download complete genomes, not drafts'
41
41
  ) { |v| cli[:ncbi_draft] = v }
42
42
  end
43
+ if params[:qual]
44
+ opt.on(
45
+ '--min-qual FLOAT', Float,
46
+ 'Minimum genome quality to include in analysis',
47
+ 'By default: 50.0'
48
+ ) { |v| cli[:min_qual] = v }
49
+ end
43
50
  if params[:cleanup]
44
51
  opt.on(
45
52
  '-c', '--clean',
@@ -89,6 +96,10 @@ module MiGA::Cli::Action::Wf
89
96
  end
90
97
 
91
98
  def opts_for_wf_distances(opt)
99
+ opt.on('--sensitive', 'Alias to: --aai-p blast+ --ani-p blast+') do
100
+ cli[:aai_p] = 'blast+'
101
+ cli[:ani_p] = 'blast+'
102
+ end
92
103
  opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
93
104
  cli[:aai_p] = 'diamond'
94
105
  cli[:ani_p] = 'fastani'
@@ -121,7 +132,7 @@ module MiGA::Cli::Action::Wf
121
132
  ]) unless MiGA::Project.exist? cli[:outdir]
122
133
  # Define project metadata
123
134
  p = cli.load_project(:outdir, '-o')
124
- [:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
135
+ %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
125
136
  p_metadata[:type] = cli[:project_type]
126
137
  transfer_metadata(p, p_metadata)
127
138
  # Download datasets
@@ -155,7 +166,7 @@ module MiGA::Cli::Action::Wf
155
166
  '-P', cli[:outdir],
156
167
  '-r', r,
157
168
  '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
158
- '--tab'
169
+ '--tab', '--ref', '--active'
159
170
  ])
160
171
  end
161
172
  end
@@ -80,7 +80,9 @@ module MiGA::Cli::ObjectsHelper
80
80
  raise "Unsupported result for #{klass}: #{self[:result]}"
81
81
  end
82
82
  r = obj.add_result(self[:result], false)
83
- raise "Cannot load result: #{self[:result]}" if r.nil?
83
+ if r.nil? && !self[:ignore_result_empty]
84
+ raise "Cannot load result: #{self[:result]}"
85
+ end
84
86
 
85
87
  @objects[:result] = r
86
88
  end
@@ -90,7 +90,8 @@ module MiGA::Common::Format
90
90
  end
91
91
  fh.close
92
92
 
93
- o = { n: l.size, tot: l.inject(:+), max: l.max }
93
+ o = { n: l.size, tot: l.inject(0, :+), max: l.max }
94
+ return o if o[:tot].zero?
94
95
  o[:avg] = o[:tot].to_f / l.size
95
96
  o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
96
97
  o[:sd] = Math.sqrt o[:var]
@@ -106,7 +107,8 @@ module MiGA::Common::Format
106
107
  break if pos >= thr
107
108
  end
108
109
  o[:med] = o[:n].even? ?
109
- 0.5 * l[o[:n] / 2 - 1, 2].inject(:+) : l[(o[:n] - 1) / 2]
110
+ 0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
111
+ l[(o[:n] - 1) / 2]
110
112
  end
111
113
  o
112
114
  end
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
72
72
  say '-----------------------------------'
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
+ recalculate_status!
75
76
  load_status
76
77
  say 'Configuration options:'
77
78
  say @runopts.to_s
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
99
100
  end
100
101
 
101
102
  def recalculate_status!
103
+ say 'Recalculating status for all datasets'
102
104
  project.each_dataset(&:recalculate_status)
103
105
  end
104
106
 
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
158
160
  end
159
161
 
160
162
  ##
161
- # Traverse datasets, and returns boolean indicating if at any datasets
162
- # are incomplete
163
+ # Traverse datasets, and returns boolean indicating if at any reference
164
+ # datasets are incomplete
163
165
  def check_datasets
164
166
  l_say(2, 'Checking datasets')
165
167
  o = false
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
167
169
  next unless ds.status == :incomplete
168
170
  next if ds.next_preprocessing(false).nil?
169
171
 
170
- o = true
172
+ o = true if ds.ref?
171
173
  queue_job(:d, ds)
172
174
  end
173
175
  o
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
97
97
 
98
98
  ##
99
99
  # Inactivate a dataset. This halts automated processing by the daemon
100
- def inactivate!
100
+ #
101
+ # If given, the +reason+ string is saved as a metadata +:warn+ entry
102
+ def inactivate!(reason = nil)
103
+ metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
101
104
  metadata[:inactive] = true
102
105
  metadata.save
103
106
  pull_hook :on_inactivate
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
107
110
  # Activate a dataset. This removes the +:inactive+ flag
108
111
  def activate!
109
112
  metadata[:inactive] = nil
113
+ metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
110
114
  metadata.save
111
115
  pull_hook :on_activate
112
116
  end
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
35
35
  mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
36
36
  mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
37
37
  # Distances (for single-species datasets)
38
- distances: '09.distances',
39
38
  taxonomy: '09.distances/05.taxonomy',
39
+ distances: '09.distances',
40
40
  # General statistics
41
41
  stats: '90.stats'
42
42
  }
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
52
52
  end
53
53
 
54
54
  ##
55
- # Run +cmd+ in the command-line with {{variables}}: dataset, project, miga,
56
- # object (as defined for the event, if any)
55
+ # Run +cmd+ in the command-line with {{variables}}:
56
+ # dataset, project, project_name, miga, object (if defined for the event)
57
57
  # - +hook_args+: +[cmd]+
58
58
  # - +event_args+: +[object (optional)]+
59
59
  def hook_run_cmd(hook_args, event_args)
60
60
  Process.wait(
61
61
  spawn hook_args.first.miga_variables(
62
- dataset: name, project: project.path, miga: MiGA::MiGA.root_path,
63
- object: event_args.first
62
+ dataset: name, project: project.path, project_name: project.name,
63
+ miga: MiGA::MiGA.root_path, object: event_args.first
64
64
  )
65
65
  )
66
66
  end
@@ -70,7 +70,7 @@ module MiGA::Dataset::Result
70
70
  false
71
71
  elsif add_result(t, save).nil?
72
72
  if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
73
- inactivate!
73
+ inactivate! "Too many errors in step #{t}"
74
74
  false
75
75
  else
76
76
  true
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
26
26
  end
27
27
 
28
28
  ##
29
- # Run +cmd+ in the command-line with {{variables}}: project, miga,
30
- # object (as defined by the event, if any)
29
+ # Run +cmd+ in the command-line with {{variables}}:
30
+ # project, project_name, miga, object (if defined by the event)
31
31
  # - +hook_args+: +[cmd]+
32
32
  # - +event_args+: +[object (optional)]+
33
33
  def hook_run_cmd(hook_args, event_args)
34
34
  Process.wait(
35
35
  spawn hook_args.first.miga_variables(
36
- project: path, miga: MiGA::MiGA.root_path, object: event_args.first
36
+ project: path, project_name: name,
37
+ miga: MiGA::MiGA.root_path, object: event_args.first
37
38
  )
38
39
  )
39
40
  end
@@ -94,11 +94,13 @@ class MiGA::RemoteDataset
94
94
  @timeout_try = 0
95
95
  begin
96
96
  DEBUG 'GET: ' + url
97
- open(url, read_timeout: 600) { |f| doc = f.read }
97
+ URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
98
98
  rescue => e
99
99
  @timeout_try += 1
100
100
  raise e if @timeout_try >= 3
101
101
 
102
+ sleep 5 # <- For: 429 Too Many Requests
103
+ DEBUG "RETRYING after: #{e}"
102
104
  retry
103
105
  end
104
106
  doc
@@ -45,10 +45,6 @@ class MiGA::Result < MiGA::MiGA
45
45
  # Hash with the result metadata
46
46
  attr_reader :data
47
47
 
48
- ##
49
- # Array of MiGA::Result objects nested within the result (if any)
50
- attr_reader :results
51
-
52
48
  ##
53
49
  # Load or create the MiGA::Result described by the JSON file +path+
54
50
  def initialize(path)
@@ -78,9 +74,9 @@ class MiGA::Result < MiGA::MiGA
78
74
  when :json
79
75
  @path
80
76
  when :start
81
- @path.sub(/\.json$/, ".start")
77
+ @path.sub(/\.json$/, '.start')
82
78
  when :done
83
- @path.sub(/\.json$/, ".done")
79
+ @path.sub(/\.json$/, '.done')
84
80
  end
85
81
  end
86
82
 
@@ -134,7 +130,7 @@ class MiGA::Result < MiGA::MiGA
134
130
  ##
135
131
  # Initialize and #save empty result
136
132
  def create
137
- @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
133
+ @data = { created: Time.now.to_s, stats: {}, files: {} }
138
134
  save
139
135
  end
140
136
 
@@ -156,19 +152,20 @@ class MiGA::Result < MiGA::MiGA
156
152
  def load
157
153
  @data = MiGA::Json.parse(path)
158
154
  @data[:files] ||= {}
159
- @results = (self[:results] || []).map { |rs| MiGA::Result.new rs }
160
155
  end
161
156
 
162
157
  ##
163
158
  # Remove result, including all associated files
164
159
  def remove!
165
- each_file do |file|
166
- f = File.expand_path(file, dir)
167
- FileUtils.rm_rf(f)
168
- end
169
- %w(.start .done).each do |ext|
170
- f = path.sub(/\.json$/, ext)
171
- File.unlink f if File.exist? f
160
+ each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
161
+ unlink
162
+ end
163
+
164
+ # Unlink result by removing the .done and .start timestamps and the
165
+ # .json descriptor, but don't remove any other associated files
166
+ def unlink
167
+ %i(start done).each do |i|
168
+ f = path(i) and File.exists?(f) and File.unlink(f)
172
169
  end
173
170
  File.unlink path
174
171
  end
@@ -182,28 +179,19 @@ class MiGA::Result < MiGA::MiGA
182
179
  # Note that multiple files may have the same symbol (file_sym), since
183
180
  # arrays of files are supported.
184
181
  def each_file(&blk)
182
+ return to_enum(:each_file) unless block_given?
183
+
185
184
  @data[:files] ||= {}
186
185
  self[:files].each do |k, files|
187
186
  files = [files] unless files.kind_of? Array
188
187
  files.each do |file|
189
188
  case blk.arity
190
- when 1
191
- blk.call(file)
192
- when 2
193
- blk.call(k, file)
194
- when 3
195
- blk.call(k, file, File.expand_path(file, dir))
196
- else
197
- raise "Wrong number of arguments: #{blk.arity} for 1..3"
189
+ when 1; blk.call(file)
190
+ when 2; blk.call(k, file)
191
+ when 3; blk.call(k, file, File.expand_path(file, dir))
192
+ else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
198
193
  end
199
194
  end
200
195
  end
201
196
  end
202
-
203
- ##
204
- # Add the MiGA::Result +result+ as part of the current result
205
- def add_result(result)
206
- @data[:results] << result.path
207
- save
208
- end
209
197
  end
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
8
8
  # (Re-)calculate and save the statistics for the result
9
9
  def compute_stats
10
10
  method = :"compute_stats_#{key}"
11
+ MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
11
12
  stats = self.respond_to?(method, true) ? send(method) : nil
12
13
  unless stats.nil?
13
14
  self[:stats] = stats
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
109
110
  end
110
111
  end
111
112
  else
112
- # Fix estimate by domain
113
- if !(tax = source.metadata[:tax]).nil? &&
114
- %w[Archaea Bacteria].include?(tax[:d]) &&
115
- file_path(:raw_report).nil?
116
- scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
117
- rep = file_path(:report)
118
- rc_p = File.expand_path('.miga_rc', ENV['HOME'])
119
- rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
120
- $stderr.print `#{rc} ruby '#{scr}' \
121
- '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
122
- add_file(:raw_report, "#{source.name}.ess/log")
123
- add_file(:report, "#{source.name}.ess/log.domain")
124
- end
125
- # Extract/compute quality values
113
+ # Estimate quality metrics
114
+ fix_essential_genes_by_domain
126
115
  stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
127
116
  File.open(file_path(:report), 'r') do |fh|
128
117
  fh.each_line do |ln|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
131
120
  end
132
121
  end
133
122
  end
123
+
124
+ # Determine qualitative range
134
125
  stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
135
126
  source.metadata[:quality] =
136
127
  case stats[:quality]
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
140
131
  else; :low
141
132
  end
142
133
  source.save
134
+
135
+ # Inactivate low-quality datasets
136
+ min_qual = (project.metadata[:min_qual] || 50)
137
+ if min_qual != 'no' && stats[:quality] < min_qual
138
+ source.inactivate! 'Low genome quality'
139
+ end
143
140
  end
144
141
  stats
145
142
  end
@@ -168,12 +165,28 @@ module MiGA::Result::Stats
168
165
  stats[:aai] = [$2.to_f, '%']
169
166
  3.times { fh.gets }
170
167
  fh.each_line do |ln|
171
- row = ln.chomp.gsub(/^\s*/, '').split(/\s+/)
172
- break if row.empty?
168
+ next unless ln.chomp =~ /^\s*(\S+)\s+(.+)\s+([0-9\.e-]+)\s+\**\s*$/
173
169
 
174
- stats[:"#{row[0]}_pvalue"] = row[2].to_f unless row[0] == 'root'
170
+ stats[:"#{$1}_pvalue"] = $3.to_f unless $1 == 'root'
175
171
  end
176
172
  end
177
173
  stats
178
174
  end
175
+
176
+ # Fix estimates based on essential genes based on taxonomy
177
+ def fix_essential_genes_by_domain
178
+ return if (tax = source.metadata[:tax]).nil? ||
179
+ !%w[Archaea Bacteria].include?(tax[:d]) ||
180
+ file_path(:raw_report)
181
+
182
+ MiGA::MiGA.DEBUG "Fixing essential genes by domain"
183
+ scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
184
+ rep = file_path(:report)
185
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
186
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
187
+ $stderr.print `#{rc} ruby '#{scr}' \
188
+ '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
189
+ add_file(:raw_report, "#{source.name}.ess/log")
190
+ add_file(:report, "#{source.name}.ess/log.domain")
191
+ end
179
192
  end
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 5, 0]
11
+ VERSION = [0.7, 10, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 5, 13)
19
+ VERSION_DATE = Date.new(2020, 6, 29)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
9
9
  # Initialize
10
10
  miga date > "$DATASET.start"
11
11
 
12
- # Run
12
+ # Check quality first
13
+ miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
14
+ inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
15
+ [[ "$inactive" == "true" ]] && exit
16
+
17
+ # Run distances
13
18
  ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
14
19
 
15
20
  # Finalize
@@ -11,6 +11,9 @@ cd "$DIR"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
+ # Execute doctor
15
+ miga doctor -P "$PROJECT" -v
16
+
14
17
  # Index taxonomy
15
18
  miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
16
19
 
@@ -14,7 +14,7 @@ miga date > "$DATASET.start"
14
14
  # Calculate statistics
15
15
  for i in raw_reads trimmed_fasta assembly cds essential_genes ssu distances taxonomy ; do
16
16
  echo "# $i"
17
- miga stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
17
+ miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
18
18
  done
19
19
 
20
20
  # Finalize
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
93
93
  0 => /-{20}\n/,
94
94
  1 => /MiGA:#{p.name} launched/,
95
95
  2 => /-{20}\n/,
96
- 5 => /Probing running jobs\n/
96
+ 6 => /Probing running jobs\n/
97
97
  }.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
98
98
  ensure
99
99
  begin
@@ -185,11 +185,18 @@ class DatasetTest < Test::Unit::TestCase
185
185
  d = dataset
186
186
  assert_equal(:incomplete, d.status)
187
187
  assert_predicate(d, :active?)
188
- d.inactivate!
188
+ d.inactivate! 'Too annoying'
189
189
  assert_equal(:inactive, d.status)
190
+ assert_equal('Inactive: Too annoying', d.metadata[:warn])
190
191
  assert_not_predicate(d, :active?)
191
192
  d.activate!
192
193
  assert_equal(:incomplete, d.status)
194
+ assert_nil(d.metadata[:warn])
193
195
  assert_predicate(d, :active?)
194
196
  end
197
+
198
+ def test_preprocessing_tasks
199
+ assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :cds)
200
+ assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :taxonomy)
201
+ end
195
202
  end
@@ -31,13 +31,15 @@ class RemoteDatasetTest < Test::Unit::TestCase
31
31
  assert_equal(MiGA::Taxonomy, tx.class, msg)
32
32
  assert_equal('Lentivirus', tx[:g], msg)
33
33
  assert_equal(
34
- 'ns:ncbi o:Ortervirales f:Retroviridae ' \
35
- 'g:Lentivirus s:Human_immunodeficiency_virus_2',
34
+ 'ns:ncbi k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
35
+ 'o:Ortervirales f:Retroviridae g:Lentivirus ' \
36
+ 's:Human_immunodeficiency_virus_2',
36
37
  tx.to_s, msg
37
38
  )
38
39
  assert_equal(
39
- 'ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
40
- 'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
40
+ 'ns:ncbi d: k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
41
+ 'o:Ortervirales f:Retroviridae g:Lentivirus ' \
42
+ 's:Human_immunodeficiency_virus_2 ssp: str: ds:',
41
43
  tx.to_s(true), msg
42
44
  )
43
45
  assert_equal('ncbi', tx.namespace, msg)
@@ -99,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
99
101
 
100
102
  def test_ref_type_status
101
103
  declare_remote_access
102
- rd = MiGA::RemoteDataset.new('GCA_002849345', :assembly, :ncbi)
104
+ rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
103
105
  assert { !rd.get_metadata[:is_type] }
104
106
  assert { rd.get_metadata[:is_ref_type] }
105
107
  end
@@ -300,3 +300,16 @@ AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
300
300
  AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
301
301
  >TruSeq3_UniversalAdapter
302
302
  AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
303
+
304
+ >Nextera_PE_PrefixNX/1
305
+ AGATGTGTATAAGAGACAG
306
+ >Nextera_PE_PrefixNX/2
307
+ AGATGTGTATAAGAGACAG
308
+ >Nextera_PE_Trans1
309
+ TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG
310
+ >Nextera_PE_Trans1_rc
311
+ CTGTCTCTTATACACATCTGACGCTGCCGACGA
312
+ >Nextera_PE_Trans2
313
+ GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG
314
+ >Nextera_PE_Trans2_rc
315
+ CTGTCTCTTATACACATCTCCGAGCCCACGAGAC
@@ -29,6 +29,16 @@ module MiGA::DistanceRunner::Pipeline
29
29
  classify(clades, classif, metric, result_fh, val_cls)
30
30
  end
31
31
 
32
+ # Run distances against datasets listed in metadata's +:dist_req+
33
+ def distances_by_request(metric)
34
+ return unless dataset.metadata[:dist_req]
35
+
36
+ $stderr.puts 'Running distances by request'
37
+ dataset.metadata[:dist_req].each do |target|
38
+ ds = ref_project.dataset(target) and send(metric, ds)
39
+ end
40
+ end
41
+
32
42
  # Builds a tree with all visited medoids from any classification level
33
43
  def build_medoids_tree(metric)
34
44
  $stderr.puts "Building medoids tree (metric = #{metric})"
@@ -99,7 +109,7 @@ module MiGA::DistanceRunner::Pipeline
99
109
 
100
110
  # Transfer the taxonomy to the current dataset
101
111
  def transfer_taxonomy(tax)
102
- $stderr.puts "Transferring taxonomy"
112
+ $stderr.puts 'Transferring taxonomy'
103
113
  return if tax.nil?
104
114
 
105
115
  pval = (project.metadata[:tax_pvalue] || 0.05).to_f
@@ -67,7 +67,7 @@ class MiGA::DistanceRunner
67
67
 
68
68
  # Launch analysis for reference datasets
69
69
  def go_ref!
70
- $stderr.puts "Launching analysis for reference dataset"
70
+ $stderr.puts 'Launching analysis for reference dataset'
71
71
  # Initialize databases
72
72
  initialize_dbs! true
73
73
 
@@ -80,13 +80,13 @@ class MiGA::DistanceRunner
80
80
  end
81
81
 
82
82
  # Finalize
83
- [:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
83
+ %i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
84
84
  end
85
85
 
86
86
  ##
87
87
  # Launch analysis for query datasets
88
88
  def go_query!
89
- $stderr.puts "Launching analysis for query dataset"
89
+ $stderr.puts 'Launching analysis for query dataset'
90
90
  # Check if project is ready
91
91
  tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
92
92
  res = ref_project.result(tsk[0])
@@ -94,6 +94,7 @@ class MiGA::DistanceRunner
94
94
 
95
95
  # Initialize the databases
96
96
  initialize_dbs! false
97
+ distances_by_request(tsk[1])
97
98
  # Calculate the classification-informed AAI/ANI traverse
98
99
  results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
99
100
  fh = File.open(results, 'w')
@@ -111,7 +112,9 @@ class MiGA::DistanceRunner
111
112
  next unless r[1].to_i == val_cls
112
113
 
113
114
  ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
114
- closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
115
+ unless ani.nil? || ani < closest[:ani]
116
+ closest = { ds: r[0], ani: ani }
117
+ end
115
118
  end
116
119
  end
117
120
  end
@@ -133,7 +136,7 @@ class MiGA::DistanceRunner
133
136
 
134
137
  # Launch analysis for taxonomy jobs
135
138
  def go_taxonomy!
136
- $stderr.puts "Launching taxonomy analysis"
139
+ $stderr.puts 'Launching taxonomy analysis'
137
140
  return unless project.metadata[:ref_project]
138
141
 
139
142
  go_query! # <- yeah, it's actually the same, just different ref_project
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5.0
4
+ version: 0.7.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-13 00:00:00.000000000 Z
11
+ date: 2020-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.4'
47
+ version: '1.3'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.4'
54
+ version: '1.3'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
529
529
  licenses:
530
530
  - Artistic-2.0
531
531
  metadata: {}
532
- post_install_message:
532
+ post_install_message:
533
533
  rdoc_options:
534
534
  - lib
535
535
  - README.md
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
550
550
  - !ruby/object:Gem::Version
551
551
  version: '0'
552
552
  requirements: []
553
- rubygems_version: 3.0.3
554
- signing_key:
553
+ rubygems_version: 3.1.2
554
+ signing_key:
555
555
  specification_version: 4
556
556
  summary: MiGA
557
557
  test_files: []