miga-base 0.7.22.0 → 0.7.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +10 -8
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/index_wf.rb +4 -2
  14. data/lib/miga/cli/action/init.rb +93 -144
  15. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  16. data/lib/miga/cli/action/init/files_helper.rb +119 -0
  17. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  18. data/lib/miga/cli/action/new.rb +15 -9
  19. data/lib/miga/cli/action/option.rb +44 -0
  20. data/lib/miga/cli/action/preproc_wf.rb +7 -5
  21. data/lib/miga/cli/action/quality_wf.rb +3 -3
  22. data/lib/miga/cli/action/tax_dist.rb +1 -1
  23. data/lib/miga/cli/action/tax_test.rb +1 -1
  24. data/lib/miga/cli/action/wf.rb +71 -53
  25. data/lib/miga/cli/base.rb +17 -5
  26. data/lib/miga/cli/objects_helper.rb +23 -18
  27. data/lib/miga/common.rb +4 -2
  28. data/lib/miga/common/net.rb +74 -0
  29. data/lib/miga/common/with_option.rb +83 -0
  30. data/lib/miga/common/with_result.rb +3 -2
  31. data/lib/miga/dataset/base.rb +20 -2
  32. data/lib/miga/dataset/result.rb +5 -3
  33. data/lib/miga/metadata.rb +25 -13
  34. data/lib/miga/project/base.rb +82 -2
  35. data/lib/miga/project/result.rb +4 -4
  36. data/lib/miga/remote_dataset.rb +2 -0
  37. data/lib/miga/result/stats.rb +2 -2
  38. data/lib/miga/version.rb +4 -2
  39. data/scripts/essential_genes.bash +18 -3
  40. data/scripts/miga.bash +8 -2
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/lair_test.rb +1 -2
  47. data/test/net_test.rb +34 -0
  48. data/test/with_option_test.rb +115 -0
  49. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  50. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  51. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  52. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  53. data/utils/FastAAI/README.md +84 -0
  54. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  55. data/utils/cleanup-databases.rb +2 -3
  56. data/utils/distance/base.rb +9 -0
  57. data/utils/distance/commands.rb +183 -81
  58. data/utils/distance/database.rb +69 -10
  59. data/utils/distance/pipeline.rb +15 -21
  60. data/utils/distance/runner.rb +27 -49
  61. data/utils/distance/temporal.rb +4 -2
  62. data/utils/distances.rb +2 -2
  63. data/utils/index_metadata.rb +1 -2
  64. data/utils/requirements.txt +6 -5
  65. data/utils/subclade/runner.rb +10 -11
  66. metadata +18 -6
@@ -1,5 +1,4 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  ##
5
4
  # Helper module with daemon configuration functions for MiGA::Cli::Action::Init
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module with files configuration functions for MiGA::Cli::Action::Init
5
+ module MiGA::Cli::Action::Init::FilesHelper
6
+ def open_rc_file
7
+ rc_path = File.expand_path('.miga_rc', ENV['HOME'])
8
+ if File.exist? rc_path
9
+ if cli.ask_user(
10
+ 'I found a previous configuration. Do you want to continue?',
11
+ 'yes', %w(yes no)
12
+ ) == 'no'
13
+ cli.puts 'OK, see you soon!'
14
+ exit(0)
15
+ end
16
+ end
17
+ rc_fh = File.open(rc_path, 'w')
18
+ rc_fh.puts <<~BASH
19
+ #!/bin/bash
20
+ # `miga init` made this on #{Time.now}
21
+
22
+ BASH
23
+ rc_fh
24
+ end
25
+
26
+ def close_rc_file(rc_fh)
27
+ rc_fh.puts <<~FOOT
28
+
29
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.FULL_VERSION}'
30
+ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
31
+ MIGA_CONFIG_DATE='#{Time.now}'
32
+
33
+ FOOT
34
+ rc_fh.close
35
+ end
36
+
37
+ def check_configuration_script(rc_fh)
38
+ unless File.exist? cli[:config]
39
+ cli[:config] = cli.ask_user(
40
+ 'Is there a script I need to load at startup?',
41
+ cli[:config]
42
+ )
43
+ end
44
+ if File.exist? cli[:config]
45
+ cli[:config] = File.expand_path(cli[:config])
46
+ cli.puts "Found bash configuration script: #{cli[:config]}"
47
+ rc_fh.puts "MIGA_STARTUP='#{cli[:config]}'"
48
+ rc_fh.puts '. "$MIGA_STARTUP"'
49
+ else
50
+ cli[:config] = '/dev/null'
51
+ end
52
+ cli.puts ''
53
+ end
54
+
55
+ def check_additional_files(paths)
56
+ if cli[:mytaxa]
57
+ check_mytaxa_scores(paths)
58
+ check_mytaxa_database(paths)
59
+ end
60
+ check_rdp_classifier if cli[:rdp]
61
+ check_phyla_lite
62
+ cli.puts ''
63
+ end
64
+
65
+ def check_mytaxa_scores(paths)
66
+ cli.print 'Looking for MyTaxa scores... '
67
+ mt = File.dirname(paths['MyTaxa'])
68
+ unless Dir.exist?(File.join(mt, 'db'))
69
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
70
+ raise 'Incomplete MyTaxa installation'
71
+ end
72
+ cli.puts 'yes'
73
+ end
74
+
75
+ def check_mytaxa_database(paths)
76
+ cli.print 'Looking for MyTaxa DB... '
77
+ mt = File.dirname(paths['MyTaxa'])
78
+ dmnd_db = 'AllGenomes.faa.dmnd'
79
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
80
+ home_db = File.join(miga_db, dmnd_db)
81
+ mt_db = File.join(mt, 'AllGenomes.faa.dmnd')
82
+ if File.exist?(home_db)
83
+ cli.puts 'yes'
84
+ elsif File.exist?(mt_db)
85
+ cli.puts 'yes, sym-linking'
86
+ File.symlink(mt_db, home_db)
87
+ else
88
+ cli.puts 'no, downloading'
89
+ MiGA::MiGA.download_file_ftp(:miga_dist, dmnd_db, home_db) do |n, size|
90
+ cli.advance("#{dmnd_db}:", n, size)
91
+ end
92
+ cli.puts
93
+ end
94
+ end
95
+
96
+ def check_rdp_classifier
97
+ cli.print 'Looking for RDP classifier... '
98
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
99
+ file = 'classifier.jar'
100
+ path = File.join(miga_db, file)
101
+ if File.size?(path)
102
+ cli.puts 'yes'
103
+ else
104
+ cli.puts 'no, downloading'
105
+ arch = 'classifier.tar.gz'
106
+ MiGA::MiGA.download_file_ftp(
107
+ :miga_dist, arch, File.join(miga_db, arch)
108
+ ) { |n, size| cli.advance("#{arch}:", n, size) }
109
+ `cd '#{miga_db}' && tar zxf '#{arch}' && rm '#{arch}'`
110
+ cli.puts
111
+ end
112
+ end
113
+
114
+ def check_phyla_lite
115
+ cli.puts 'Looking for Phyla Lite... '
116
+ cmd = ['get_db', '-n', 'Phyla_Lite', '--no-overwrite']
117
+ MiGA::Cli.new(cmd).launch(true)
118
+ end
119
+ end
@@ -269,7 +269,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
269
269
  else
270
270
  cli.say ' Creating dataset'
271
271
  rd.save_to(p, name, !cli[:query], body[:md])
272
- cli.add_metadata(p.add_dataset(name)).save
272
+ cli.add_metadata(p.add_dataset(name))
273
273
  end
274
274
  end
275
275
  end
@@ -24,6 +24,11 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
24
24
  'Use faster identity engines (Diamond-AAI and FastANI)',
25
25
  'Equivalent to: -m aai_p=diamond,ani_p=fastani'
26
26
  ) { |v| cli[:fast] = v }
27
+ opt.on(
28
+ '--sensitive',
29
+ 'Use more sensitive identity engines (BLAST+)',
30
+ 'Equivalent to: -m aai_p=blast+,ani_p=blast+'
31
+ ) { |v| cli[:sensitive] = v }
27
32
  opt.on(
28
33
  '-m', '--metadata STRING',
29
34
  'Metadata as key-value pairs separated by = and delimited by comma',
@@ -35,20 +40,21 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
35
40
  def perform
36
41
  cli.ensure_type(MiGA::Project)
37
42
  cli.ensure_par(project: '-P')
38
- unless File.exist?(File.join(ENV['HOME'], '.miga_rc')) &&
39
- File.exist?(File.join(ENV['HOME'], '.miga_daemon.json'))
40
- raise "You must initialize MiGA before creating the first project.\n" +
41
- 'Please use "miga init".'
43
+ unless MiGA::MiGA.initialized?
44
+ raise 'MiGA has not been initialized, please use "miga init" first'
42
45
  end
43
46
  cli.say "Creating project: #{cli[:project]}"
44
- raise 'Project already exists, aborting.' if Project.exist? cli[:project]
47
+ raise 'Project already exists, aborting' if Project.exist?(cli[:project])
45
48
 
46
49
  p = Project.new(cli[:project], false)
47
50
  p = cli.add_metadata(p)
48
- if cli[:fast]
49
- p.metadata[:aai_p] = 'diamond'
50
- p.metadata[:ani_p] = 'fastani'
51
+
52
+ if cli[:sensitive]
53
+ p.set_option(:aai_p, 'blast+')
54
+ p.set_option(:ani_p, 'blast+')
55
+ elsif cli[:fast]
56
+ p.set_option(:aai_p, 'diamond')
57
+ p.set_option(:ani_p, 'fastani')
51
58
  end
52
- p.save
53
59
  end
54
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ ##
6
+ # CLI: `miga option`
7
+ class MiGA::Cli::Action::Option < MiGA::Cli::Action
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, %i[project dataset_opt])
11
+ opt.on(
12
+ '-k', '--key STRING',
13
+ 'Option name to get or set (by default, all options are printed)'
14
+ ) { |v| cli[:key] = v }
15
+ opt.on(
16
+ '--value STRING',
17
+ 'Value of the option to set (by default, option value is not changed)',
18
+ 'Recognized tokens: nil, true, false'
19
+ ) { |v| cli[:value] = v }
20
+ opt.on(
21
+ '-o', '--output PATH',
22
+ 'Create output file instead of returning to STDOUT'
23
+ ) { |v| cli[:output] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ unless cli[:value].nil?
29
+ cli.ensure_par(
30
+ { key: '-k' },
31
+ '%<name>s is mandatory when --value is set: please provide %<flag>s'
32
+ )
33
+ end
34
+ obj = cli.load_project_or_dataset
35
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
36
+ if cli[:key].nil?
37
+ cli.table(%w[Key Value], obj.all_options.to_a, io)
38
+ else
39
+ obj.set_option(cli[:key], cli[:value], true) unless cli[:value].nil?
40
+ io.puts obj.option(cli[:key])
41
+ end
42
+ io.close unless cli[:output].nil?
43
+ end
44
+ end
@@ -21,18 +21,20 @@ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
21
21
  '-m', '--mytaxa_scan',
22
22
  'Perform MyTaxa scan analysis'
23
23
  ) { |v| cli[:mytaxa] = v }
24
- opts_for_wf(opt, 'Input files as defined by --input-type',
25
- multi: true, cleanup: false, ncbi: false)
24
+ opts_for_wf(
25
+ opt, 'Input files as defined by --input-type',
26
+ multi: true, cleanup: false, ncbi: false, project_type: true
27
+ )
26
28
  end
27
29
  end
28
30
 
29
31
  def perform
30
32
  # Input data
31
33
  cli.ensure_par(input_type: '-i')
32
- p_metadata = Hash[
33
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
34
- .map { |i| ["run_#{i}", false] }
34
+ norun = %w[
35
+ project_stats haai_distances aai_distances ani_distances clade_finding
35
36
  ]
37
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
36
38
  d_metadata = { run_distances: false }
37
39
  unless cli[:mytaxa]
38
40
  d_metadata[:run_mytaxa_scan] = false
@@ -21,10 +21,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
21
21
 
22
22
  def perform
23
23
  # Input data
24
- p_metadata = Hash[
25
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
- .map { |i| ["run_#{i}", false] }
24
+ norun = %w[
25
+ project_stats haai_distances aai_distances ani_distances clade_finding
27
26
  ]
27
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
28
28
  d_metadata = { run_distances: false }
29
29
  d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
30
  p = create_project(:assembly, p_metadata, d_metadata)
@@ -47,7 +47,7 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
47
47
 
48
48
  def read_distances
49
49
  p = cli.load_project
50
- cli[:metric] ||= p.is_clade? ? 'ani' : 'aai'
50
+ cli[:metric] ||= p.clade? ? 'ani' : 'aai'
51
51
  res_n = "#{cli[:metric]}_distances"
52
52
  cli.say "Reading distances: 1-#{cli[:metric].upcase}"
53
53
  res = p.result(res_n)
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
59
59
  cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
60
60
  p = cli.load_project
61
61
  if cli[:ref_project]
62
- if (ref = p.metadata[:ref_project]).nil?
62
+ if (ref = p.option(:ref_project)).nil?
63
63
  raise '--ref-project requested but no reference project has been set'
64
64
  end
65
65
  if (q = MiGA::Project.load(ref)).nil?
@@ -7,9 +7,8 @@ module MiGA::Cli::Action::Wf
7
7
  def default_opts_for_wf
8
8
  cli.expect_files = true
9
9
  cli.defaults = {
10
- clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
- project_type: :genomes, dataset_type: :popgenome,
12
- ncbi_draft: true, min_qual: 25.0
10
+ clean: false, project_type: :genomes, dataset_type: :popgenome,
11
+ ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
13
12
  }
14
13
  end
15
14
 
@@ -56,28 +55,10 @@ module MiGA::Cli::Action::Wf
56
55
  opt.on(
57
56
  '-R', '--name-regexp REGEXP', Regexp,
58
57
  'Regular expression indicating how to extract the name from the path',
59
- "By default: '#{cli[:regexp]}'"
58
+ "By default: '#{MiGA::Cli.FILE_REGEXP}'"
60
59
  ) { |v| cli[:regexp] = v }
61
- opt.on(
62
- '--type STRING',
63
- "Type of datasets. By default: #{cli[:dataset_type]}",
64
- 'Recognized types:',
65
- *MiGA::Dataset.KNOWN_TYPES
66
- .map do |k, v|
67
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
68
- end.compact
69
- ) { |v| cli[:dataset_type] = v.downcase.to_sym }
70
- if params[:project_type]
71
- opt.on(
72
- '--project-type STRING',
73
- "Type of project. By default: #{cli[:project_type]}",
74
- 'Recognized types:',
75
- *MiGA::Project.KNOWN_TYPES
76
- .map do |k, v|
77
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
78
- end.compact
79
- ) { |v| cli[:project_type] = v.downcase.to_sym }
80
- end
60
+ opt_object_type(opt, :dataset, params[:multi])
61
+ opt_object_type(opt, :project, params[:multi]) if params[:project_type]
81
62
  opt.on(
82
63
  '--daemon PATH',
83
64
  'Use custom daemon configuration in JSON format',
@@ -124,33 +105,42 @@ module MiGA::Cli::Action::Wf
124
105
  project_type: '--project-type',
125
106
  dataset_type: '--dataset-type'
126
107
  )
108
+ paired = cli[:input_type].to_s.include?('_paired')
109
+ cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
110
+
127
111
  # Create empty project
128
- call_cli([
129
- 'new',
130
- '-P', cli[:outdir],
131
- '-t', cli[:project_type]
132
- ]) unless MiGA::Project.exist? cli[:outdir]
112
+ call_cli(
113
+ ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
114
+ ) unless MiGA::Project.exist? cli[:outdir]
115
+
133
116
  # Define project metadata
134
117
  p = cli.load_project(:outdir, '-o')
135
- %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
136
118
  p_metadata[:type] = cli[:project_type]
137
119
  transfer_metadata(p, p_metadata)
120
+ %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
121
+ p.set_option(i, cli[i])
122
+ end
123
+
138
124
  # Download datasets
139
- call_cli([
140
- 'ncbi_get',
141
- '-P', cli[:outdir],
142
- '-T', cli[:ncbi_taxon],
143
- (cli[:ncbi_draft] ? '--all' : '--complete')
144
- ]) unless cli[:ncbi_taxon].nil?
125
+ unless cli[:ncbi_taxon].nil?
126
+ what = cli[:ncbi_draft] ? '--all' : '--complete'
127
+ call_cli(
128
+ ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
129
+ )
130
+ end
131
+
145
132
  # Add datasets
146
- call_cli([
147
- 'add',
148
- '--ignore-dups',
149
- '-P', cli[:outdir],
150
- '-t', cli[:dataset_type],
151
- '-i', stage,
152
- '-R', cli[:regexp]
153
- ] + cli.files) unless cli.files.empty?
133
+ call_cli(
134
+ [
135
+ 'add',
136
+ '--ignore-dups',
137
+ '-P', cli[:outdir],
138
+ '-t', cli[:dataset_type],
139
+ '-i', stage,
140
+ '-R', cli[:regexp]
141
+ ] + cli.files
142
+ ) unless cli.files.empty?
143
+
154
144
  # Define datasets metadata
155
145
  p.load
156
146
  d_metadata[:type] = cli[:dataset_type]
@@ -161,13 +151,13 @@ module MiGA::Cli::Action::Wf
161
151
  def summarize(which = %w[cds assembly essential_genes ssu])
162
152
  which.each do |r|
163
153
  cli.say "Summary: #{r}"
164
- call_cli([
165
- 'summary',
166
- '-P', cli[:outdir],
167
- '-r', r,
168
- '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
169
- '--tab', '--ref', '--active'
170
- ])
154
+ call_cli(
155
+ [
156
+ 'summary',
157
+ '-P', cli[:outdir], '-r', r, '--tab', '--ref', '--active',
158
+ '-o', File.join(cli[:outdir], "#{r}.tsv")
159
+ ]
160
+ )
171
161
  end
172
162
  call_cli(['browse', '-P', cli[:outdir]])
173
163
  end
@@ -193,17 +183,45 @@ module MiGA::Cli::Action::Wf
193
183
  cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
194
184
  cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
195
185
  cwd = Dir.pwd
196
- call_cli cmd
186
+ call_cli(cmd)
197
187
  Dir.chdir(cwd)
198
188
  end
199
189
 
200
190
  def transfer_metadata(obj, md)
201
191
  # Clear old metadata
202
192
  obj.metadata.each do |k, v|
203
- obj.metadata[k] = nil if k.to_s =~ /^run_/ || k == :ref_project
193
+ obj.metadata[k] = nil if k.to_s =~ /^run_/ || obj.option?(k)
204
194
  end
205
195
  # Transfer and save
206
196
  md.each { |k, v| obj.metadata[k] = v }
207
197
  obj.save
208
198
  end
199
+
200
+ private
201
+
202
+ ##
203
+ # Add option --type or --project-type to +opt+
204
+ def opt_object_type(opt, obj, multi)
205
+ conf =
206
+ case obj
207
+ when :dataset
208
+ ['type', 'datasets', :dataset_type, MiGA::Dataset]
209
+ when :project
210
+ ['project-type', 'project', :project_type, MiGA::Project]
211
+ else
212
+ raise "Unrecognized object type: #{obj}"
213
+ end
214
+
215
+ options =
216
+ conf[3].KNOWN_TYPES.map do |k, v|
217
+ "~ #{k}: #{v[:description]}" unless !multi && v[:multi]
218
+ end.compact
219
+
220
+ opt.on(
221
+ "--#{conf[0]} STRING",
222
+ "Type of #{conf[1]}. By default: #{cli[conf[2]]}",
223
+ 'Recognized types:',
224
+ *options
225
+ ) { |v| cli[conf[2]] = v.downcase.to_sym }
226
+ end
209
227
  end