miga-base 0.7.21.0 → 0.7.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +1 -2
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/init.rb +41 -93
  14. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  15. data/lib/miga/cli/action/init/files_helper.rb +118 -0
  16. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  17. data/lib/miga/cli/action/new.rb +15 -9
  18. data/lib/miga/cli/action/option.rb +44 -0
  19. data/lib/miga/cli/action/quality_wf.rb +3 -3
  20. data/lib/miga/cli/action/tax_dist.rb +1 -1
  21. data/lib/miga/cli/action/tax_test.rb +1 -1
  22. data/lib/miga/cli/action/wf.rb +32 -30
  23. data/lib/miga/cli/base.rb +1 -0
  24. data/lib/miga/cli/objects_helper.rb +23 -18
  25. data/lib/miga/common.rb +4 -2
  26. data/lib/miga/common/net.rb +74 -0
  27. data/lib/miga/common/with_option.rb +83 -0
  28. data/lib/miga/common/with_result.rb +3 -2
  29. data/lib/miga/dataset/base.rb +20 -2
  30. data/lib/miga/dataset/result.rb +3 -2
  31. data/lib/miga/metadata.rb +25 -13
  32. data/lib/miga/project/base.rb +82 -2
  33. data/lib/miga/project/result.rb +4 -4
  34. data/lib/miga/remote_dataset.rb +2 -0
  35. data/lib/miga/result/stats.rb +2 -2
  36. data/lib/miga/version.rb +4 -2
  37. data/scripts/aai_distances.bash +1 -1
  38. data/scripts/ani_distances.bash +1 -1
  39. data/scripts/essential_genes.bash +1 -2
  40. data/scripts/haai_distances.bash +1 -1
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/net_test.rb +34 -0
  47. data/test/with_option_test.rb +115 -0
  48. data/utils/cleanup-databases.rb +2 -3
  49. data/utils/distance/commands.rb +2 -2
  50. data/utils/distance/database.rb +1 -1
  51. data/utils/distance/pipeline.rb +2 -4
  52. data/utils/distance/runner.rb +15 -23
  53. data/utils/index_metadata.rb +1 -2
  54. data/utils/requirements.txt +6 -5
  55. data/utils/subclade/runner.rb +10 -11
  56. metadata +9 -3
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module with files configuration functions for MiGA::Cli::Action::Init
5
+ module MiGA::Cli::Action::Init::FilesHelper
6
+ def open_rc_file
7
+ rc_path = File.expand_path('.miga_rc', ENV['HOME'])
8
+ if File.exist? rc_path
9
+ if cli.ask_user(
10
+ 'I found a previous configuration. Do you want to continue?',
11
+ 'yes', %w(yes no)
12
+ ) == 'no'
13
+ cli.puts 'OK, see you soon!'
14
+ exit(0)
15
+ end
16
+ end
17
+ rc_fh = File.open(rc_path, 'w')
18
+ rc_fh.puts <<~BASH
19
+ #!/bin/bash
20
+ # `miga init` made this on #{Time.now}
21
+
22
+ BASH
23
+ rc_fh
24
+ end
25
+
26
+ def close_rc_file(rc_fh)
27
+ rc_fh.puts <<~FOOT
28
+
29
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
30
+ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
31
+ MIGA_CONFIG_DATE='#{Time.now}'
32
+
33
+ FOOT
34
+ rc_fh.close
35
+ end
36
+
37
+ def check_configuration_script(rc_fh)
38
+ unless File.exist? cli[:config]
39
+ cli[:config] = cli.ask_user(
40
+ 'Is there a script I need to load at startup?',
41
+ cli[:config]
42
+ )
43
+ end
44
+ if File.exist? cli[:config]
45
+ cli[:config] = File.expand_path(cli[:config])
46
+ cli.puts "Found bash configuration script: #{cli[:config]}"
47
+ rc_fh.puts "MIGA_STARTUP='#{cli[:config]}'"
48
+ rc_fh.puts '. "$MIGA_STARTUP"'
49
+ else
50
+ cli[:config] = '/dev/null'
51
+ end
52
+ cli.puts ''
53
+ end
54
+
55
+ def check_additional_files(paths)
56
+ if cli[:mytaxa]
57
+ check_mytaxa_scores(paths)
58
+ check_mytaxa_database(paths)
59
+ end
60
+ check_rdp_classifier if cli[:rdp]
61
+ check_phyla_lite
62
+ end
63
+
64
+ def check_mytaxa_scores(paths)
65
+ cli.print 'Looking for MyTaxa scores... '
66
+ mt = File.dirname(paths['MyTaxa'])
67
+ unless Dir.exist?(File.join(mt, 'db'))
68
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
69
+ raise 'Incomplete MyTaxa installation'
70
+ end
71
+ cli.puts 'yes'
72
+ end
73
+
74
+ def check_mytaxa_database(paths)
75
+ cli.print 'Looking for MyTaxa DB... '
76
+ mt = File.dirname(paths['MyTaxa'])
77
+ dmnd_db = 'AllGenomes.faa.dmnd'
78
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
79
+ home_db = File.join(miga_db, dmnd_db)
80
+ mt_db = File.join(mt, 'AllGenomes.faa.dmnd')
81
+ if File.exist?(home_db)
82
+ cli.puts 'yes'
83
+ elsif File.exist?(mt_db)
84
+ cli.puts 'yes, sym-linking'
85
+ File.symlink(mt_db, home_db)
86
+ else
87
+ cli.puts 'no, downloading'
88
+ MiGA::MiGA.download_file_ftp(:miga_dist, dmnd_db, home_db) do |n, size|
89
+ cli.advance("#{dmnd_db}:", n, size)
90
+ end
91
+ cli.puts
92
+ end
93
+ end
94
+
95
+ def check_rdp_classifier
96
+ cli.print 'Looking for RDP classifier... '
97
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
98
+ file = 'classifier.jar'
99
+ path = File.join(miga_db, file)
100
+ if File.size?(path)
101
+ cli.puts 'yes'
102
+ else
103
+ cli.puts 'no, downloading'
104
+ arch = 'classifier.tar.gz'
105
+ MiGA::MiGA.download_file_ftp(
106
+ :miga_dist, arch, File.join(miga_db, arch)
107
+ ) { |n, size| cli.advance("#{arch}:", n, size) }
108
+ `cd '#{miga_db}' && tar zxf '#{arch}' && rm '#{arch}'`
109
+ cli.puts
110
+ end
111
+ end
112
+
113
+ def check_phyla_lite
114
+ cli.puts 'Looking for Phyla Lite... '
115
+ cmd = ['get_db', '-n', 'Phyla_Lite', '--no-overwrite']
116
+ MiGA::Cli.new(cmd).launch(true)
117
+ end
118
+ end
@@ -269,7 +269,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
269
269
  else
270
270
  cli.say ' Creating dataset'
271
271
  rd.save_to(p, name, !cli[:query], body[:md])
272
- cli.add_metadata(p.add_dataset(name)).save
272
+ cli.add_metadata(p.add_dataset(name))
273
273
  end
274
274
  end
275
275
  end
@@ -24,6 +24,11 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
24
24
  'Use faster identity engines (Diamond-AAI and FastANI)',
25
25
  'Equivalent to: -m aai_p=diamond,ani_p=fastani'
26
26
  ) { |v| cli[:fast] = v }
27
+ opt.on(
28
+ '--sensitive',
29
+ 'Use more sensitive identity engines (BLAST+)',
30
+ 'Equivalent to: -m aai_p=blast+,ani_p=blast+'
31
+ ) { |v| cli[:sensitive] = v }
27
32
  opt.on(
28
33
  '-m', '--metadata STRING',
29
34
  'Metadata as key-value pairs separated by = and delimited by comma',
@@ -35,20 +40,21 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
35
40
  def perform
36
41
  cli.ensure_type(MiGA::Project)
37
42
  cli.ensure_par(project: '-P')
38
- unless File.exist?(File.join(ENV['HOME'], '.miga_rc')) &&
39
- File.exist?(File.join(ENV['HOME'], '.miga_daemon.json'))
40
- raise "You must initialize MiGA before creating the first project.\n" +
41
- 'Please use "miga init".'
43
+ unless MiGA::MiGA.initialized?
44
+ raise 'MiGA has not been initialized, please use "miga init" first'
42
45
  end
43
46
  cli.say "Creating project: #{cli[:project]}"
44
- raise 'Project already exists, aborting.' if Project.exist? cli[:project]
47
+ raise 'Project already exists, aborting' if Project.exist?(cli[:project])
45
48
 
46
49
  p = Project.new(cli[:project], false)
47
50
  p = cli.add_metadata(p)
48
- if cli[:fast]
49
- p.metadata[:aai_p] = 'diamond'
50
- p.metadata[:ani_p] = 'fastani'
51
+
52
+ if cli[:sensitive]
53
+ p.set_option(:aai_p, 'blast+')
54
+ p.set_option(:ani_p, 'blast+')
55
+ elsif cli[:fast]
56
+ p.set_option(:aai_p, 'diamond')
57
+ p.set_option(:ani_p, 'fastani')
51
58
  end
52
- p.save
53
59
  end
54
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ ##
6
+ # CLI: `miga option`
7
+ class MiGA::Cli::Action::Option < MiGA::Cli::Action
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, %i[project dataset_opt])
11
+ opt.on(
12
+ '-k', '--key STRING',
13
+ 'Option name to get or set (by default, all options are printed)'
14
+ ) { |v| cli[:key] = v }
15
+ opt.on(
16
+ '--value STRING',
17
+ 'Value of the option to set (by default, option value is not changed)',
18
+ 'Recognized tokens: nil, true, false'
19
+ ) { |v| cli[:value] = v }
20
+ opt.on(
21
+ '-o', '--output PATH',
22
+ 'Create output file instead of returning to STDOUT'
23
+ ) { |v| cli[:output] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ unless cli[:value].nil?
29
+ cli.ensure_par(
30
+ { key: '-k' },
31
+ '%<name>s is mandatory when --value is set: please provide %<flag>s'
32
+ )
33
+ end
34
+ obj = cli.load_project_or_dataset
35
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
36
+ if cli[:key].nil?
37
+ cli.table(%w[Key Value], obj.all_options.to_a, io)
38
+ else
39
+ obj.set_option(cli[:key], cli[:value], true) unless cli[:value].nil?
40
+ io.puts obj.option(cli[:key])
41
+ end
42
+ io.close unless cli[:output].nil?
43
+ end
44
+ end
@@ -21,10 +21,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
21
21
 
22
22
  def perform
23
23
  # Input data
24
- p_metadata = Hash[
25
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
- .map { |i| ["run_#{i}", false] }
24
+ norun = %w[
25
+ project_stats haai_distances aai_distances ani_distances clade_finding
27
26
  ]
27
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
28
28
  d_metadata = { run_distances: false }
29
29
  d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
30
  p = create_project(:assembly, p_metadata, d_metadata)
@@ -47,7 +47,7 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
47
47
 
48
48
  def read_distances
49
49
  p = cli.load_project
50
- cli[:metric] ||= p.is_clade? ? 'ani' : 'aai'
50
+ cli[:metric] ||= p.clade? ? 'ani' : 'aai'
51
51
  res_n = "#{cli[:metric]}_distances"
52
52
  cli.say "Reading distances: 1-#{cli[:metric].upcase}"
53
53
  res = p.result(res_n)
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
59
59
  cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
60
60
  p = cli.load_project
61
61
  if cli[:ref_project]
62
- if (ref = p.metadata[:ref_project]).nil?
62
+ if (ref = p.option(:ref_project)).nil?
63
63
  raise '--ref-project requested but no reference project has been set'
64
64
  end
65
65
  if (q = MiGA::Project.load(ref)).nil?
@@ -9,7 +9,7 @@ module MiGA::Cli::Action::Wf
9
9
  cli.defaults = {
10
10
  clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
11
  project_type: :genomes, dataset_type: :popgenome,
12
- ncbi_draft: true, min_qual: 25.0
12
+ ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
13
13
  }
14
14
  end
15
15
 
@@ -125,32 +125,34 @@ module MiGA::Cli::Action::Wf
125
125
  dataset_type: '--dataset-type'
126
126
  )
127
127
  # Create empty project
128
- call_cli([
129
- 'new',
130
- '-P', cli[:outdir],
131
- '-t', cli[:project_type]
132
- ]) unless MiGA::Project.exist? cli[:outdir]
128
+ call_cli(
129
+ ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
130
+ ) unless MiGA::Project.exist? cli[:outdir]
133
131
  # Define project metadata
134
132
  p = cli.load_project(:outdir, '-o')
135
- %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
136
133
  p_metadata[:type] = cli[:project_type]
137
134
  transfer_metadata(p, p_metadata)
135
+ %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
136
+ p.set_option(i, cli[i])
137
+ end
138
138
  # Download datasets
139
- call_cli([
140
- 'ncbi_get',
141
- '-P', cli[:outdir],
142
- '-T', cli[:ncbi_taxon],
143
- (cli[:ncbi_draft] ? '--all' : '--complete')
144
- ]) unless cli[:ncbi_taxon].nil?
139
+ unless cli[:ncbi_taxon].nil?
140
+ what = cli[:ncbi_draft] ? '--all' : '--complete'
141
+ call_cli(
142
+ ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
143
+ )
144
+ end
145
145
  # Add datasets
146
- call_cli([
147
- 'add',
148
- '--ignore-dups',
149
- '-P', cli[:outdir],
150
- '-t', cli[:dataset_type],
151
- '-i', stage,
152
- '-R', cli[:regexp]
153
- ] + cli.files) unless cli.files.empty?
146
+ call_cli(
147
+ [
148
+ 'add',
149
+ '--ignore-dups',
150
+ '-P', cli[:outdir],
151
+ '-t', cli[:dataset_type],
152
+ '-i', stage,
153
+ '-R', cli[:regexp]
154
+ ] + cli.files
155
+ ) unless cli.files.empty?
154
156
  # Define datasets metadata
155
157
  p.load
156
158
  d_metadata[:type] = cli[:dataset_type]
@@ -161,13 +163,13 @@ module MiGA::Cli::Action::Wf
161
163
  def summarize(which = %w[cds assembly essential_genes ssu])
162
164
  which.each do |r|
163
165
  cli.say "Summary: #{r}"
164
- call_cli([
165
- 'summary',
166
- '-P', cli[:outdir],
167
- '-r', r,
168
- '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
169
- '--tab', '--ref', '--active'
170
- ])
166
+ call_cli(
167
+ [
168
+ 'summary',
169
+ '-P', cli[:outdir], '-r', r, '--tab', '--ref', '--active',
170
+ '-o', File.join(cli[:outdir], "#{r}.tsv")
171
+ ]
172
+ )
171
173
  end
172
174
  call_cli(['browse', '-P', cli[:outdir]])
173
175
  end
@@ -193,14 +195,14 @@ module MiGA::Cli::Action::Wf
193
195
  cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
194
196
  cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
195
197
  cwd = Dir.pwd
196
- call_cli cmd
198
+ call_cli(cmd)
197
199
  Dir.chdir(cwd)
198
200
  end
199
201
 
200
202
  def transfer_metadata(obj, md)
201
203
  # Clear old metadata
202
204
  obj.metadata.each do |k, v|
203
- obj.metadata[k] = nil if k.to_s =~ /^run_/ || k == :ref_project
205
+ obj.metadata[k] = nil if k.to_s =~ /^run_/ || obj.option?(k)
204
206
  end
205
207
  # Transfer and save
206
208
  md.each { |k, v| obj.metadata[k] = v }
data/lib/miga/cli/base.rb CHANGED
@@ -34,6 +34,7 @@ module MiGA::Cli::Base
34
34
  next_step: 'Return the next task to run in a dataset or project',
35
35
  # Objects (Datasets or Projects)
36
36
  edit: 'Edit the metadata of a dataset or project',
37
+ option: 'Get or set options of a dataset or project',
37
38
  # System
38
39
  init: 'Initialize MiGA to process new projects',
39
40
  daemon: 'Control the daemon of a MiGA project',
@@ -57,12 +57,12 @@ module MiGA::Cli::ObjectsHelper
57
57
  ds.select! do |d|
58
58
  advance('Datasets:', k += 1, n, false)
59
59
  o = true
60
- o &&= (d.is_ref? == self[:ref]) unless self[:ref].nil?
61
- o &&= (d.is_active? == self[:active]) unless self[:active].nil?
62
- o &&= (self[:multi] ? d.is_multi? :
63
- d.is_nonmulti?) unless self[:multi].nil?
64
- o &&= (not d.metadata[:tax].nil?) &&
65
- d.metadata[:tax].in?(self[:taxonomy]) unless self[:taxonomy].nil?
60
+ o &&= (d.ref? == self[:ref]) unless self[:ref].nil?
61
+ o &&= (d.active? == self[:active]) unless self[:active].nil?
62
+ o &&= (self[:multi] ? d.multi? : d.nonmulti?) unless self[:multi].nil?
63
+ unless self[:taxonomy].nil?
64
+ o &&= !d.metadata[:tax].nil? && d.metadata[:tax].in?(self[:taxonomy])
65
+ end
66
66
  o
67
67
  end
68
68
  say ''
@@ -90,22 +90,27 @@ module MiGA::Cli::ObjectsHelper
90
90
  def add_metadata(obj, cli = self)
91
91
  raise "Unsupported object: #{obj.class}" unless obj.respond_to? :metadata
92
92
 
93
- cli[:metadata].split(',').each do |pair|
93
+ (cli[:metadata] || '').split(',').each do |pair|
94
94
  (k, v) = pair.split('=')
95
- case v
96
- when 'true'; v = true
97
- when 'false'; v = false
98
- when 'nil'; v = nil
99
- end
100
- if k == '_step'
101
- obj.metadata["_try_#{v}"] ||= 0
102
- obj.metadata["_try_#{v}"] += 1
95
+ if obj.option?(k)
96
+ obj.set_option(k, v, true)
97
+ else
98
+ case v
99
+ when 'true'; v = true
100
+ when 'false'; v = false
101
+ when 'nil'; v = nil
102
+ end
103
+ if k == '_step'
104
+ obj.metadata["_try_#{v}"] ||= 0
105
+ obj.metadata["_try_#{v}"] += 1
106
+ end
107
+ obj.metadata[k] = v
103
108
  end
104
- obj.metadata[k] = v
105
- end unless cli[:metadata].nil?
106
- [:type, :name, :user, :description, :comments].each do |k|
109
+ end
110
+ %i[type name user description comments].each do |k|
107
111
  obj.metadata[k] = cli[k] unless cli[k].nil?
108
112
  end
113
+ obj.save
109
114
  obj
110
115
  end
111
116
  end
data/lib/miga/common.rb CHANGED
@@ -6,6 +6,7 @@ require 'miga/json'
6
6
  require 'miga/common/base'
7
7
  require 'miga/common/path'
8
8
  require 'miga/common/format'
9
+ require 'miga/common/net'
9
10
  require 'stringio'
10
11
 
11
12
  ##
@@ -16,13 +17,14 @@ class MiGA::MiGA
16
17
 
17
18
  extend MiGA::Common::Path
18
19
  extend MiGA::Common::Format
20
+ extend MiGA::Common::Net
19
21
 
20
22
  ENV['MIGA_HOME'] ||= ENV['HOME']
21
23
 
22
24
  ##
23
25
  # Has MiGA been initialized?
24
26
  def self.initialized?
25
- File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) and
27
+ File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) &&
26
28
  File.exist?(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
27
29
  end
28
30
 
@@ -64,7 +66,7 @@ class MiGA::MiGA
64
66
  adv_n = n - @_advance_time[:n]
65
67
  unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
66
68
  if adv_n.to_f/n > 0.001
67
- this_time = Time.now - @_advance_time[:last]
69
+ this_time = (Time.now - @_advance_time[:last]).to_f
68
70
  this_avg = this_time / adv_n
69
71
  @_advance_time[:avg] ||= this_avg
70
72
  @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg