miga-base 0.7.21.0 → 0.7.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +1 -2
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/init.rb +41 -93
  14. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  15. data/lib/miga/cli/action/init/files_helper.rb +118 -0
  16. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  17. data/lib/miga/cli/action/new.rb +15 -9
  18. data/lib/miga/cli/action/option.rb +44 -0
  19. data/lib/miga/cli/action/quality_wf.rb +3 -3
  20. data/lib/miga/cli/action/tax_dist.rb +1 -1
  21. data/lib/miga/cli/action/tax_test.rb +1 -1
  22. data/lib/miga/cli/action/wf.rb +32 -30
  23. data/lib/miga/cli/base.rb +1 -0
  24. data/lib/miga/cli/objects_helper.rb +23 -18
  25. data/lib/miga/common.rb +4 -2
  26. data/lib/miga/common/net.rb +74 -0
  27. data/lib/miga/common/with_option.rb +83 -0
  28. data/lib/miga/common/with_result.rb +3 -2
  29. data/lib/miga/dataset/base.rb +20 -2
  30. data/lib/miga/dataset/result.rb +3 -2
  31. data/lib/miga/metadata.rb +25 -13
  32. data/lib/miga/project/base.rb +82 -2
  33. data/lib/miga/project/result.rb +4 -4
  34. data/lib/miga/remote_dataset.rb +2 -0
  35. data/lib/miga/result/stats.rb +2 -2
  36. data/lib/miga/version.rb +4 -2
  37. data/scripts/aai_distances.bash +1 -1
  38. data/scripts/ani_distances.bash +1 -1
  39. data/scripts/essential_genes.bash +1 -2
  40. data/scripts/haai_distances.bash +1 -1
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/net_test.rb +34 -0
  47. data/test/with_option_test.rb +115 -0
  48. data/utils/cleanup-databases.rb +2 -3
  49. data/utils/distance/commands.rb +2 -2
  50. data/utils/distance/database.rb +1 -1
  51. data/utils/distance/pipeline.rb +2 -4
  52. data/utils/distance/runner.rb +15 -23
  53. data/utils/index_metadata.rb +1 -2
  54. data/utils/requirements.txt +6 -5
  55. data/utils/subclade/runner.rb +10 -11
  56. metadata +9 -3
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module with files configuration functions for MiGA::Cli::Action::Init
5
+ module MiGA::Cli::Action::Init::FilesHelper
6
+ def open_rc_file
7
+ rc_path = File.expand_path('.miga_rc', ENV['HOME'])
8
+ if File.exist? rc_path
9
+ if cli.ask_user(
10
+ 'I found a previous configuration. Do you want to continue?',
11
+ 'yes', %w(yes no)
12
+ ) == 'no'
13
+ cli.puts 'OK, see you soon!'
14
+ exit(0)
15
+ end
16
+ end
17
+ rc_fh = File.open(rc_path, 'w')
18
+ rc_fh.puts <<~BASH
19
+ #!/bin/bash
20
+ # `miga init` made this on #{Time.now}
21
+
22
+ BASH
23
+ rc_fh
24
+ end
25
+
26
+ def close_rc_file(rc_fh)
27
+ rc_fh.puts <<~FOOT
28
+
29
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
30
+ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
31
+ MIGA_CONFIG_DATE='#{Time.now}'
32
+
33
+ FOOT
34
+ rc_fh.close
35
+ end
36
+
37
+ def check_configuration_script(rc_fh)
38
+ unless File.exist? cli[:config]
39
+ cli[:config] = cli.ask_user(
40
+ 'Is there a script I need to load at startup?',
41
+ cli[:config]
42
+ )
43
+ end
44
+ if File.exist? cli[:config]
45
+ cli[:config] = File.expand_path(cli[:config])
46
+ cli.puts "Found bash configuration script: #{cli[:config]}"
47
+ rc_fh.puts "MIGA_STARTUP='#{cli[:config]}'"
48
+ rc_fh.puts '. "$MIGA_STARTUP"'
49
+ else
50
+ cli[:config] = '/dev/null'
51
+ end
52
+ cli.puts ''
53
+ end
54
+
55
+ def check_additional_files(paths)
56
+ if cli[:mytaxa]
57
+ check_mytaxa_scores(paths)
58
+ check_mytaxa_database(paths)
59
+ end
60
+ check_rdp_classifier if cli[:rdp]
61
+ check_phyla_lite
62
+ end
63
+
64
+ def check_mytaxa_scores(paths)
65
+ cli.print 'Looking for MyTaxa scores... '
66
+ mt = File.dirname(paths['MyTaxa'])
67
+ unless Dir.exist?(File.join(mt, 'db'))
68
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
69
+ raise 'Incomplete MyTaxa installation'
70
+ end
71
+ cli.puts 'yes'
72
+ end
73
+
74
+ def check_mytaxa_database(paths)
75
+ cli.print 'Looking for MyTaxa DB... '
76
+ mt = File.dirname(paths['MyTaxa'])
77
+ dmnd_db = 'AllGenomes.faa.dmnd'
78
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
79
+ home_db = File.join(miga_db, dmnd_db)
80
+ mt_db = File.join(mt, 'AllGenomes.faa.dmnd')
81
+ if File.exist?(home_db)
82
+ cli.puts 'yes'
83
+ elsif File.exist?(mt_db)
84
+ cli.puts 'yes, sym-linking'
85
+ File.symlink(mt_db, home_db)
86
+ else
87
+ cli.puts 'no, downloading'
88
+ MiGA::MiGA.download_file_ftp(:miga_dist, dmnd_db, home_db) do |n, size|
89
+ cli.advance("#{dmnd_db}:", n, size)
90
+ end
91
+ cli.puts
92
+ end
93
+ end
94
+
95
+ def check_rdp_classifier
96
+ cli.print 'Looking for RDP classifier... '
97
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
98
+ file = 'classifier.jar'
99
+ path = File.join(miga_db, file)
100
+ if File.size?(path)
101
+ cli.puts 'yes'
102
+ else
103
+ cli.puts 'no, downloading'
104
+ arch = 'classifier.tar.gz'
105
+ MiGA::MiGA.download_file_ftp(
106
+ :miga_dist, arch, File.join(miga_db, arch)
107
+ ) { |n, size| cli.advance("#{arch}:", n, size) }
108
+ `cd '#{miga_db}' && tar zxf '#{arch}' && rm '#{arch}'`
109
+ cli.puts
110
+ end
111
+ end
112
+
113
+ def check_phyla_lite
114
+ cli.puts 'Looking for Phyla Lite... '
115
+ cmd = ['get_db', '-n', 'Phyla_Lite', '--no-overwrite']
116
+ MiGA::Cli.new(cmd).launch(true)
117
+ end
118
+ end
@@ -269,7 +269,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
269
269
  else
270
270
  cli.say ' Creating dataset'
271
271
  rd.save_to(p, name, !cli[:query], body[:md])
272
- cli.add_metadata(p.add_dataset(name)).save
272
+ cli.add_metadata(p.add_dataset(name))
273
273
  end
274
274
  end
275
275
  end
@@ -24,6 +24,11 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
24
24
  'Use faster identity engines (Diamond-AAI and FastANI)',
25
25
  'Equivalent to: -m aai_p=diamond,ani_p=fastani'
26
26
  ) { |v| cli[:fast] = v }
27
+ opt.on(
28
+ '--sensitive',
29
+ 'Use more sensitive identity engines (BLAST+)',
30
+ 'Equivalent to: -m aai_p=blast+,ani_p=blast+'
31
+ ) { |v| cli[:sensitive] = v }
27
32
  opt.on(
28
33
  '-m', '--metadata STRING',
29
34
  'Metadata as key-value pairs separated by = and delimited by comma',
@@ -35,20 +40,21 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
35
40
  def perform
36
41
  cli.ensure_type(MiGA::Project)
37
42
  cli.ensure_par(project: '-P')
38
- unless File.exist?(File.join(ENV['HOME'], '.miga_rc')) &&
39
- File.exist?(File.join(ENV['HOME'], '.miga_daemon.json'))
40
- raise "You must initialize MiGA before creating the first project.\n" +
41
- 'Please use "miga init".'
43
+ unless MiGA::MiGA.initialized?
44
+ raise 'MiGA has not been initialized, please use "miga init" first'
42
45
  end
43
46
  cli.say "Creating project: #{cli[:project]}"
44
- raise 'Project already exists, aborting.' if Project.exist? cli[:project]
47
+ raise 'Project already exists, aborting' if Project.exist?(cli[:project])
45
48
 
46
49
  p = Project.new(cli[:project], false)
47
50
  p = cli.add_metadata(p)
48
- if cli[:fast]
49
- p.metadata[:aai_p] = 'diamond'
50
- p.metadata[:ani_p] = 'fastani'
51
+
52
+ if cli[:sensitive]
53
+ p.set_option(:aai_p, 'blast+')
54
+ p.set_option(:ani_p, 'blast+')
55
+ elsif cli[:fast]
56
+ p.set_option(:aai_p, 'diamond')
57
+ p.set_option(:ani_p, 'fastani')
51
58
  end
52
- p.save
53
59
  end
54
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ ##
6
+ # CLI: `miga option`
7
+ class MiGA::Cli::Action::Option < MiGA::Cli::Action
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, %i[project dataset_opt])
11
+ opt.on(
12
+ '-k', '--key STRING',
13
+ 'Option name to get or set (by default, all options are printed)'
14
+ ) { |v| cli[:key] = v }
15
+ opt.on(
16
+ '--value STRING',
17
+ 'Value of the option to set (by default, option value is not changed)',
18
+ 'Recognized tokens: nil, true, false'
19
+ ) { |v| cli[:value] = v }
20
+ opt.on(
21
+ '-o', '--output PATH',
22
+ 'Create output file instead of returning to STDOUT'
23
+ ) { |v| cli[:output] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ unless cli[:value].nil?
29
+ cli.ensure_par(
30
+ { key: '-k' },
31
+ '%<name>s is mandatory when --value is set: please provide %<flag>s'
32
+ )
33
+ end
34
+ obj = cli.load_project_or_dataset
35
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
36
+ if cli[:key].nil?
37
+ cli.table(%w[Key Value], obj.all_options.to_a, io)
38
+ else
39
+ obj.set_option(cli[:key], cli[:value], true) unless cli[:value].nil?
40
+ io.puts obj.option(cli[:key])
41
+ end
42
+ io.close unless cli[:output].nil?
43
+ end
44
+ end
@@ -21,10 +21,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
21
21
 
22
22
  def perform
23
23
  # Input data
24
- p_metadata = Hash[
25
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
- .map { |i| ["run_#{i}", false] }
24
+ norun = %w[
25
+ project_stats haai_distances aai_distances ani_distances clade_finding
27
26
  ]
27
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
28
28
  d_metadata = { run_distances: false }
29
29
  d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
30
  p = create_project(:assembly, p_metadata, d_metadata)
@@ -47,7 +47,7 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
47
47
 
48
48
  def read_distances
49
49
  p = cli.load_project
50
- cli[:metric] ||= p.is_clade? ? 'ani' : 'aai'
50
+ cli[:metric] ||= p.clade? ? 'ani' : 'aai'
51
51
  res_n = "#{cli[:metric]}_distances"
52
52
  cli.say "Reading distances: 1-#{cli[:metric].upcase}"
53
53
  res = p.result(res_n)
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
59
59
  cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
60
60
  p = cli.load_project
61
61
  if cli[:ref_project]
62
- if (ref = p.metadata[:ref_project]).nil?
62
+ if (ref = p.option(:ref_project)).nil?
63
63
  raise '--ref-project requested but no reference project has been set'
64
64
  end
65
65
  if (q = MiGA::Project.load(ref)).nil?
@@ -9,7 +9,7 @@ module MiGA::Cli::Action::Wf
9
9
  cli.defaults = {
10
10
  clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
11
  project_type: :genomes, dataset_type: :popgenome,
12
- ncbi_draft: true, min_qual: 25.0
12
+ ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
13
13
  }
14
14
  end
15
15
 
@@ -125,32 +125,34 @@ module MiGA::Cli::Action::Wf
125
125
  dataset_type: '--dataset-type'
126
126
  )
127
127
  # Create empty project
128
- call_cli([
129
- 'new',
130
- '-P', cli[:outdir],
131
- '-t', cli[:project_type]
132
- ]) unless MiGA::Project.exist? cli[:outdir]
128
+ call_cli(
129
+ ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
130
+ ) unless MiGA::Project.exist? cli[:outdir]
133
131
  # Define project metadata
134
132
  p = cli.load_project(:outdir, '-o')
135
- %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
136
133
  p_metadata[:type] = cli[:project_type]
137
134
  transfer_metadata(p, p_metadata)
135
+ %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
136
+ p.set_option(i, cli[i])
137
+ end
138
138
  # Download datasets
139
- call_cli([
140
- 'ncbi_get',
141
- '-P', cli[:outdir],
142
- '-T', cli[:ncbi_taxon],
143
- (cli[:ncbi_draft] ? '--all' : '--complete')
144
- ]) unless cli[:ncbi_taxon].nil?
139
+ unless cli[:ncbi_taxon].nil?
140
+ what = cli[:ncbi_draft] ? '--all' : '--complete'
141
+ call_cli(
142
+ ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
143
+ )
144
+ end
145
145
  # Add datasets
146
- call_cli([
147
- 'add',
148
- '--ignore-dups',
149
- '-P', cli[:outdir],
150
- '-t', cli[:dataset_type],
151
- '-i', stage,
152
- '-R', cli[:regexp]
153
- ] + cli.files) unless cli.files.empty?
146
+ call_cli(
147
+ [
148
+ 'add',
149
+ '--ignore-dups',
150
+ '-P', cli[:outdir],
151
+ '-t', cli[:dataset_type],
152
+ '-i', stage,
153
+ '-R', cli[:regexp]
154
+ ] + cli.files
155
+ ) unless cli.files.empty?
154
156
  # Define datasets metadata
155
157
  p.load
156
158
  d_metadata[:type] = cli[:dataset_type]
@@ -161,13 +163,13 @@ module MiGA::Cli::Action::Wf
161
163
  def summarize(which = %w[cds assembly essential_genes ssu])
162
164
  which.each do |r|
163
165
  cli.say "Summary: #{r}"
164
- call_cli([
165
- 'summary',
166
- '-P', cli[:outdir],
167
- '-r', r,
168
- '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
169
- '--tab', '--ref', '--active'
170
- ])
166
+ call_cli(
167
+ [
168
+ 'summary',
169
+ '-P', cli[:outdir], '-r', r, '--tab', '--ref', '--active',
170
+ '-o', File.join(cli[:outdir], "#{r}.tsv")
171
+ ]
172
+ )
171
173
  end
172
174
  call_cli(['browse', '-P', cli[:outdir]])
173
175
  end
@@ -193,14 +195,14 @@ module MiGA::Cli::Action::Wf
193
195
  cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
194
196
  cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
195
197
  cwd = Dir.pwd
196
- call_cli cmd
198
+ call_cli(cmd)
197
199
  Dir.chdir(cwd)
198
200
  end
199
201
 
200
202
  def transfer_metadata(obj, md)
201
203
  # Clear old metadata
202
204
  obj.metadata.each do |k, v|
203
- obj.metadata[k] = nil if k.to_s =~ /^run_/ || k == :ref_project
205
+ obj.metadata[k] = nil if k.to_s =~ /^run_/ || obj.option?(k)
204
206
  end
205
207
  # Transfer and save
206
208
  md.each { |k, v| obj.metadata[k] = v }
data/lib/miga/cli/base.rb CHANGED
@@ -34,6 +34,7 @@ module MiGA::Cli::Base
34
34
  next_step: 'Return the next task to run in a dataset or project',
35
35
  # Objects (Datasets or Projects)
36
36
  edit: 'Edit the metadata of a dataset or project',
37
+ option: 'Get or set options of a dataset or project',
37
38
  # System
38
39
  init: 'Initialize MiGA to process new projects',
39
40
  daemon: 'Control the daemon of a MiGA project',
@@ -57,12 +57,12 @@ module MiGA::Cli::ObjectsHelper
57
57
  ds.select! do |d|
58
58
  advance('Datasets:', k += 1, n, false)
59
59
  o = true
60
- o &&= (d.is_ref? == self[:ref]) unless self[:ref].nil?
61
- o &&= (d.is_active? == self[:active]) unless self[:active].nil?
62
- o &&= (self[:multi] ? d.is_multi? :
63
- d.is_nonmulti?) unless self[:multi].nil?
64
- o &&= (not d.metadata[:tax].nil?) &&
65
- d.metadata[:tax].in?(self[:taxonomy]) unless self[:taxonomy].nil?
60
+ o &&= (d.ref? == self[:ref]) unless self[:ref].nil?
61
+ o &&= (d.active? == self[:active]) unless self[:active].nil?
62
+ o &&= (self[:multi] ? d.multi? : d.nonmulti?) unless self[:multi].nil?
63
+ unless self[:taxonomy].nil?
64
+ o &&= !d.metadata[:tax].nil? && d.metadata[:tax].in?(self[:taxonomy])
65
+ end
66
66
  o
67
67
  end
68
68
  say ''
@@ -90,22 +90,27 @@ module MiGA::Cli::ObjectsHelper
90
90
  def add_metadata(obj, cli = self)
91
91
  raise "Unsupported object: #{obj.class}" unless obj.respond_to? :metadata
92
92
 
93
- cli[:metadata].split(',').each do |pair|
93
+ (cli[:metadata] || '').split(',').each do |pair|
94
94
  (k, v) = pair.split('=')
95
- case v
96
- when 'true'; v = true
97
- when 'false'; v = false
98
- when 'nil'; v = nil
99
- end
100
- if k == '_step'
101
- obj.metadata["_try_#{v}"] ||= 0
102
- obj.metadata["_try_#{v}"] += 1
95
+ if obj.option?(k)
96
+ obj.set_option(k, v, true)
97
+ else
98
+ case v
99
+ when 'true'; v = true
100
+ when 'false'; v = false
101
+ when 'nil'; v = nil
102
+ end
103
+ if k == '_step'
104
+ obj.metadata["_try_#{v}"] ||= 0
105
+ obj.metadata["_try_#{v}"] += 1
106
+ end
107
+ obj.metadata[k] = v
103
108
  end
104
- obj.metadata[k] = v
105
- end unless cli[:metadata].nil?
106
- [:type, :name, :user, :description, :comments].each do |k|
109
+ end
110
+ %i[type name user description comments].each do |k|
107
111
  obj.metadata[k] = cli[k] unless cli[k].nil?
108
112
  end
113
+ obj.save
109
114
  obj
110
115
  end
111
116
  end
data/lib/miga/common.rb CHANGED
@@ -6,6 +6,7 @@ require 'miga/json'
6
6
  require 'miga/common/base'
7
7
  require 'miga/common/path'
8
8
  require 'miga/common/format'
9
+ require 'miga/common/net'
9
10
  require 'stringio'
10
11
 
11
12
  ##
@@ -16,13 +17,14 @@ class MiGA::MiGA
16
17
 
17
18
  extend MiGA::Common::Path
18
19
  extend MiGA::Common::Format
20
+ extend MiGA::Common::Net
19
21
 
20
22
  ENV['MIGA_HOME'] ||= ENV['HOME']
21
23
 
22
24
  ##
23
25
  # Has MiGA been initialized?
24
26
  def self.initialized?
25
- File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) and
27
+ File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) &&
26
28
  File.exist?(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
27
29
  end
28
30
 
@@ -64,7 +66,7 @@ class MiGA::MiGA
64
66
  adv_n = n - @_advance_time[:n]
65
67
  unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
66
68
  if adv_n.to_f/n > 0.001
67
- this_time = Time.now - @_advance_time[:last]
69
+ this_time = (Time.now - @_advance_time[:last]).to_f
68
70
  this_avg = this_time / adv_n
69
71
  @_advance_time[:avg] ||= this_avg
70
72
  @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg