miga-base 0.7.22.0 → 0.7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +10 -8
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/index_wf.rb +4 -2
  14. data/lib/miga/cli/action/init.rb +93 -144
  15. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  16. data/lib/miga/cli/action/init/files_helper.rb +119 -0
  17. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  18. data/lib/miga/cli/action/new.rb +15 -9
  19. data/lib/miga/cli/action/option.rb +44 -0
  20. data/lib/miga/cli/action/preproc_wf.rb +7 -5
  21. data/lib/miga/cli/action/quality_wf.rb +3 -3
  22. data/lib/miga/cli/action/tax_dist.rb +1 -1
  23. data/lib/miga/cli/action/tax_test.rb +1 -1
  24. data/lib/miga/cli/action/wf.rb +71 -53
  25. data/lib/miga/cli/base.rb +17 -5
  26. data/lib/miga/cli/objects_helper.rb +23 -18
  27. data/lib/miga/common.rb +4 -2
  28. data/lib/miga/common/net.rb +74 -0
  29. data/lib/miga/common/with_option.rb +83 -0
  30. data/lib/miga/common/with_result.rb +3 -2
  31. data/lib/miga/dataset/base.rb +20 -2
  32. data/lib/miga/dataset/result.rb +5 -3
  33. data/lib/miga/metadata.rb +25 -13
  34. data/lib/miga/project/base.rb +82 -2
  35. data/lib/miga/project/result.rb +4 -4
  36. data/lib/miga/remote_dataset.rb +2 -0
  37. data/lib/miga/result/stats.rb +2 -2
  38. data/lib/miga/version.rb +4 -2
  39. data/scripts/essential_genes.bash +18 -3
  40. data/scripts/miga.bash +8 -2
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/lair_test.rb +1 -2
  47. data/test/net_test.rb +34 -0
  48. data/test/with_option_test.rb +115 -0
  49. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  50. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  51. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  52. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  53. data/utils/FastAAI/README.md +84 -0
  54. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  55. data/utils/cleanup-databases.rb +2 -3
  56. data/utils/distance/base.rb +9 -0
  57. data/utils/distance/commands.rb +183 -81
  58. data/utils/distance/database.rb +69 -10
  59. data/utils/distance/pipeline.rb +15 -21
  60. data/utils/distance/runner.rb +27 -49
  61. data/utils/distance/temporal.rb +4 -2
  62. data/utils/distances.rb +2 -2
  63. data/utils/index_metadata.rb +1 -2
  64. data/utils/requirements.txt +6 -5
  65. data/utils/subclade/runner.rb +10 -11
  66. metadata +18 -6
@@ -1,5 +1,4 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  ##
5
4
  # Helper module with daemon configuration functions for MiGA::Cli::Action::Init
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module with files configuration functions for MiGA::Cli::Action::Init
5
+ module MiGA::Cli::Action::Init::FilesHelper
6
+ def open_rc_file
7
+ rc_path = File.expand_path('.miga_rc', ENV['HOME'])
8
+ if File.exist? rc_path
9
+ if cli.ask_user(
10
+ 'I found a previous configuration. Do you want to continue?',
11
+ 'yes', %w(yes no)
12
+ ) == 'no'
13
+ cli.puts 'OK, see you soon!'
14
+ exit(0)
15
+ end
16
+ end
17
+ rc_fh = File.open(rc_path, 'w')
18
+ rc_fh.puts <<~BASH
19
+ #!/bin/bash
20
+ # `miga init` made this on #{Time.now}
21
+
22
+ BASH
23
+ rc_fh
24
+ end
25
+
26
+ def close_rc_file(rc_fh)
27
+ rc_fh.puts <<~FOOT
28
+
29
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.FULL_VERSION}'
30
+ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
31
+ MIGA_CONFIG_DATE='#{Time.now}'
32
+
33
+ FOOT
34
+ rc_fh.close
35
+ end
36
+
37
+ def check_configuration_script(rc_fh)
38
+ unless File.exist? cli[:config]
39
+ cli[:config] = cli.ask_user(
40
+ 'Is there a script I need to load at startup?',
41
+ cli[:config]
42
+ )
43
+ end
44
+ if File.exist? cli[:config]
45
+ cli[:config] = File.expand_path(cli[:config])
46
+ cli.puts "Found bash configuration script: #{cli[:config]}"
47
+ rc_fh.puts "MIGA_STARTUP='#{cli[:config]}'"
48
+ rc_fh.puts '. "$MIGA_STARTUP"'
49
+ else
50
+ cli[:config] = '/dev/null'
51
+ end
52
+ cli.puts ''
53
+ end
54
+
55
+ def check_additional_files(paths)
56
+ if cli[:mytaxa]
57
+ check_mytaxa_scores(paths)
58
+ check_mytaxa_database(paths)
59
+ end
60
+ check_rdp_classifier if cli[:rdp]
61
+ check_phyla_lite
62
+ cli.puts ''
63
+ end
64
+
65
+ def check_mytaxa_scores(paths)
66
+ cli.print 'Looking for MyTaxa scores... '
67
+ mt = File.dirname(paths['MyTaxa'])
68
+ unless Dir.exist?(File.join(mt, 'db'))
69
+ cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
70
+ raise 'Incomplete MyTaxa installation'
71
+ end
72
+ cli.puts 'yes'
73
+ end
74
+
75
+ def check_mytaxa_database(paths)
76
+ cli.print 'Looking for MyTaxa DB... '
77
+ mt = File.dirname(paths['MyTaxa'])
78
+ dmnd_db = 'AllGenomes.faa.dmnd'
79
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
80
+ home_db = File.join(miga_db, dmnd_db)
81
+ mt_db = File.join(mt, 'AllGenomes.faa.dmnd')
82
+ if File.exist?(home_db)
83
+ cli.puts 'yes'
84
+ elsif File.exist?(mt_db)
85
+ cli.puts 'yes, sym-linking'
86
+ File.symlink(mt_db, home_db)
87
+ else
88
+ cli.puts 'no, downloading'
89
+ MiGA::MiGA.download_file_ftp(:miga_dist, dmnd_db, home_db) do |n, size|
90
+ cli.advance("#{dmnd_db}:", n, size)
91
+ end
92
+ cli.puts
93
+ end
94
+ end
95
+
96
+ def check_rdp_classifier
97
+ cli.print 'Looking for RDP classifier... '
98
+ miga_db = File.join(ENV['MIGA_HOME'], '.miga_db')
99
+ file = 'classifier.jar'
100
+ path = File.join(miga_db, file)
101
+ if File.size?(path)
102
+ cli.puts 'yes'
103
+ else
104
+ cli.puts 'no, downloading'
105
+ arch = 'classifier.tar.gz'
106
+ MiGA::MiGA.download_file_ftp(
107
+ :miga_dist, arch, File.join(miga_db, arch)
108
+ ) { |n, size| cli.advance("#{arch}:", n, size) }
109
+ `cd '#{miga_db}' && tar zxf '#{arch}' && rm '#{arch}'`
110
+ cli.puts
111
+ end
112
+ end
113
+
114
+ def check_phyla_lite
115
+ cli.puts 'Looking for Phyla Lite... '
116
+ cmd = ['get_db', '-n', 'Phyla_Lite', '--no-overwrite']
117
+ MiGA::Cli.new(cmd).launch(true)
118
+ end
119
+ end
@@ -269,7 +269,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
269
269
  else
270
270
  cli.say ' Creating dataset'
271
271
  rd.save_to(p, name, !cli[:query], body[:md])
272
- cli.add_metadata(p.add_dataset(name)).save
272
+ cli.add_metadata(p.add_dataset(name))
273
273
  end
274
274
  end
275
275
  end
@@ -24,6 +24,11 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
24
24
  'Use faster identity engines (Diamond-AAI and FastANI)',
25
25
  'Equivalent to: -m aai_p=diamond,ani_p=fastani'
26
26
  ) { |v| cli[:fast] = v }
27
+ opt.on(
28
+ '--sensitive',
29
+ 'Use more sensitive identity engines (BLAST+)',
30
+ 'Equivalent to: -m aai_p=blast+,ani_p=blast+'
31
+ ) { |v| cli[:sensitive] = v }
27
32
  opt.on(
28
33
  '-m', '--metadata STRING',
29
34
  'Metadata as key-value pairs separated by = and delimited by comma',
@@ -35,20 +40,21 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
35
40
  def perform
36
41
  cli.ensure_type(MiGA::Project)
37
42
  cli.ensure_par(project: '-P')
38
- unless File.exist?(File.join(ENV['HOME'], '.miga_rc')) &&
39
- File.exist?(File.join(ENV['HOME'], '.miga_daemon.json'))
40
- raise "You must initialize MiGA before creating the first project.\n" +
41
- 'Please use "miga init".'
43
+ unless MiGA::MiGA.initialized?
44
+ raise 'MiGA has not been initialized, please use "miga init" first'
42
45
  end
43
46
  cli.say "Creating project: #{cli[:project]}"
44
- raise 'Project already exists, aborting.' if Project.exist? cli[:project]
47
+ raise 'Project already exists, aborting' if Project.exist?(cli[:project])
45
48
 
46
49
  p = Project.new(cli[:project], false)
47
50
  p = cli.add_metadata(p)
48
- if cli[:fast]
49
- p.metadata[:aai_p] = 'diamond'
50
- p.metadata[:ani_p] = 'fastani'
51
+
52
+ if cli[:sensitive]
53
+ p.set_option(:aai_p, 'blast+')
54
+ p.set_option(:ani_p, 'blast+')
55
+ elsif cli[:fast]
56
+ p.set_option(:aai_p, 'diamond')
57
+ p.set_option(:ani_p, 'fastani')
51
58
  end
52
- p.save
53
59
  end
54
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ ##
6
+ # CLI: `miga option`
7
+ class MiGA::Cli::Action::Option < MiGA::Cli::Action
8
+ def parse_cli
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, %i[project dataset_opt])
11
+ opt.on(
12
+ '-k', '--key STRING',
13
+ 'Option name to get or set (by default, all options are printed)'
14
+ ) { |v| cli[:key] = v }
15
+ opt.on(
16
+ '--value STRING',
17
+ 'Value of the option to set (by default, option value is not changed)',
18
+ 'Recognized tokens: nil, true, false'
19
+ ) { |v| cli[:value] = v }
20
+ opt.on(
21
+ '-o', '--output PATH',
22
+ 'Create output file instead of returning to STDOUT'
23
+ ) { |v| cli[:output] = v }
24
+ end
25
+ end
26
+
27
+ def perform
28
+ unless cli[:value].nil?
29
+ cli.ensure_par(
30
+ { key: '-k' },
31
+ '%<name>s is mandatory when --value is set: please provide %<flag>s'
32
+ )
33
+ end
34
+ obj = cli.load_project_or_dataset
35
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
36
+ if cli[:key].nil?
37
+ cli.table(%w[Key Value], obj.all_options.to_a, io)
38
+ else
39
+ obj.set_option(cli[:key], cli[:value], true) unless cli[:value].nil?
40
+ io.puts obj.option(cli[:key])
41
+ end
42
+ io.close unless cli[:output].nil?
43
+ end
44
+ end
@@ -21,18 +21,20 @@ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
21
21
  '-m', '--mytaxa_scan',
22
22
  'Perform MyTaxa scan analysis'
23
23
  ) { |v| cli[:mytaxa] = v }
24
- opts_for_wf(opt, 'Input files as defined by --input-type',
25
- multi: true, cleanup: false, ncbi: false)
24
+ opts_for_wf(
25
+ opt, 'Input files as defined by --input-type',
26
+ multi: true, cleanup: false, ncbi: false, project_type: true
27
+ )
26
28
  end
27
29
  end
28
30
 
29
31
  def perform
30
32
  # Input data
31
33
  cli.ensure_par(input_type: '-i')
32
- p_metadata = Hash[
33
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
34
- .map { |i| ["run_#{i}", false] }
34
+ norun = %w[
35
+ project_stats haai_distances aai_distances ani_distances clade_finding
35
36
  ]
37
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
36
38
  d_metadata = { run_distances: false }
37
39
  unless cli[:mytaxa]
38
40
  d_metadata[:run_mytaxa_scan] = false
@@ -21,10 +21,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
21
21
 
22
22
  def perform
23
23
  # Input data
24
- p_metadata = Hash[
25
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
26
- .map { |i| ["run_#{i}", false] }
24
+ norun = %w[
25
+ project_stats haai_distances aai_distances ani_distances clade_finding
27
26
  ]
27
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
28
28
  d_metadata = { run_distances: false }
29
29
  d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
30
30
  p = create_project(:assembly, p_metadata, d_metadata)
@@ -47,7 +47,7 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
47
47
 
48
48
  def read_distances
49
49
  p = cli.load_project
50
- cli[:metric] ||= p.is_clade? ? 'ani' : 'aai'
50
+ cli[:metric] ||= p.clade? ? 'ani' : 'aai'
51
51
  res_n = "#{cli[:metric]}_distances"
52
52
  cli.say "Reading distances: 1-#{cli[:metric].upcase}"
53
53
  res = p.result(res_n)
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
59
59
  cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
60
60
  p = cli.load_project
61
61
  if cli[:ref_project]
62
- if (ref = p.metadata[:ref_project]).nil?
62
+ if (ref = p.option(:ref_project)).nil?
63
63
  raise '--ref-project requested but no reference project has been set'
64
64
  end
65
65
  if (q = MiGA::Project.load(ref)).nil?
@@ -7,9 +7,8 @@ module MiGA::Cli::Action::Wf
7
7
  def default_opts_for_wf
8
8
  cli.expect_files = true
9
9
  cli.defaults = {
10
- clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
- project_type: :genomes, dataset_type: :popgenome,
12
- ncbi_draft: true, min_qual: 25.0
10
+ clean: false, project_type: :genomes, dataset_type: :popgenome,
11
+ ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
13
12
  }
14
13
  end
15
14
 
@@ -56,28 +55,10 @@ module MiGA::Cli::Action::Wf
56
55
  opt.on(
57
56
  '-R', '--name-regexp REGEXP', Regexp,
58
57
  'Regular expression indicating how to extract the name from the path',
59
- "By default: '#{cli[:regexp]}'"
58
+ "By default: '#{MiGA::Cli.FILE_REGEXP}'"
60
59
  ) { |v| cli[:regexp] = v }
61
- opt.on(
62
- '--type STRING',
63
- "Type of datasets. By default: #{cli[:dataset_type]}",
64
- 'Recognized types:',
65
- *MiGA::Dataset.KNOWN_TYPES
66
- .map do |k, v|
67
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
68
- end.compact
69
- ) { |v| cli[:dataset_type] = v.downcase.to_sym }
70
- if params[:project_type]
71
- opt.on(
72
- '--project-type STRING',
73
- "Type of project. By default: #{cli[:project_type]}",
74
- 'Recognized types:',
75
- *MiGA::Project.KNOWN_TYPES
76
- .map do |k, v|
77
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
78
- end.compact
79
- ) { |v| cli[:project_type] = v.downcase.to_sym }
80
- end
60
+ opt_object_type(opt, :dataset, params[:multi])
61
+ opt_object_type(opt, :project, params[:multi]) if params[:project_type]
81
62
  opt.on(
82
63
  '--daemon PATH',
83
64
  'Use custom daemon configuration in JSON format',
@@ -124,33 +105,42 @@ module MiGA::Cli::Action::Wf
124
105
  project_type: '--project-type',
125
106
  dataset_type: '--dataset-type'
126
107
  )
108
+ paired = cli[:input_type].to_s.include?('_paired')
109
+ cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
110
+
127
111
  # Create empty project
128
- call_cli([
129
- 'new',
130
- '-P', cli[:outdir],
131
- '-t', cli[:project_type]
132
- ]) unless MiGA::Project.exist? cli[:outdir]
112
+ call_cli(
113
+ ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
114
+ ) unless MiGA::Project.exist? cli[:outdir]
115
+
133
116
  # Define project metadata
134
117
  p = cli.load_project(:outdir, '-o')
135
- %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
136
118
  p_metadata[:type] = cli[:project_type]
137
119
  transfer_metadata(p, p_metadata)
120
+ %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
121
+ p.set_option(i, cli[i])
122
+ end
123
+
138
124
  # Download datasets
139
- call_cli([
140
- 'ncbi_get',
141
- '-P', cli[:outdir],
142
- '-T', cli[:ncbi_taxon],
143
- (cli[:ncbi_draft] ? '--all' : '--complete')
144
- ]) unless cli[:ncbi_taxon].nil?
125
+ unless cli[:ncbi_taxon].nil?
126
+ what = cli[:ncbi_draft] ? '--all' : '--complete'
127
+ call_cli(
128
+ ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
129
+ )
130
+ end
131
+
145
132
  # Add datasets
146
- call_cli([
147
- 'add',
148
- '--ignore-dups',
149
- '-P', cli[:outdir],
150
- '-t', cli[:dataset_type],
151
- '-i', stage,
152
- '-R', cli[:regexp]
153
- ] + cli.files) unless cli.files.empty?
133
+ call_cli(
134
+ [
135
+ 'add',
136
+ '--ignore-dups',
137
+ '-P', cli[:outdir],
138
+ '-t', cli[:dataset_type],
139
+ '-i', stage,
140
+ '-R', cli[:regexp]
141
+ ] + cli.files
142
+ ) unless cli.files.empty?
143
+
154
144
  # Define datasets metadata
155
145
  p.load
156
146
  d_metadata[:type] = cli[:dataset_type]
@@ -161,13 +151,13 @@ module MiGA::Cli::Action::Wf
161
151
  def summarize(which = %w[cds assembly essential_genes ssu])
162
152
  which.each do |r|
163
153
  cli.say "Summary: #{r}"
164
- call_cli([
165
- 'summary',
166
- '-P', cli[:outdir],
167
- '-r', r,
168
- '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
169
- '--tab', '--ref', '--active'
170
- ])
154
+ call_cli(
155
+ [
156
+ 'summary',
157
+ '-P', cli[:outdir], '-r', r, '--tab', '--ref', '--active',
158
+ '-o', File.join(cli[:outdir], "#{r}.tsv")
159
+ ]
160
+ )
171
161
  end
172
162
  call_cli(['browse', '-P', cli[:outdir]])
173
163
  end
@@ -193,17 +183,45 @@ module MiGA::Cli::Action::Wf
193
183
  cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
194
184
  cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
195
185
  cwd = Dir.pwd
196
- call_cli cmd
186
+ call_cli(cmd)
197
187
  Dir.chdir(cwd)
198
188
  end
199
189
 
200
190
  def transfer_metadata(obj, md)
201
191
  # Clear old metadata
202
192
  obj.metadata.each do |k, v|
203
- obj.metadata[k] = nil if k.to_s =~ /^run_/ || k == :ref_project
193
+ obj.metadata[k] = nil if k.to_s =~ /^run_/ || obj.option?(k)
204
194
  end
205
195
  # Transfer and save
206
196
  md.each { |k, v| obj.metadata[k] = v }
207
197
  obj.save
208
198
  end
199
+
200
+ private
201
+
202
+ ##
203
+ # Add option --type or --project-type to +opt+
204
+ def opt_object_type(opt, obj, multi)
205
+ conf =
206
+ case obj
207
+ when :dataset
208
+ ['type', 'datasets', :dataset_type, MiGA::Dataset]
209
+ when :project
210
+ ['project-type', 'project', :project_type, MiGA::Project]
211
+ else
212
+ raise "Unrecognized object type: #{obj}"
213
+ end
214
+
215
+ options =
216
+ conf[3].KNOWN_TYPES.map do |k, v|
217
+ "~ #{k}: #{v[:description]}" unless !multi && v[:multi]
218
+ end.compact
219
+
220
+ opt.on(
221
+ "--#{conf[0]} STRING",
222
+ "Type of #{conf[1]}. By default: #{cli[conf[2]]}",
223
+ 'Recognized types:',
224
+ *options
225
+ ) { |v| cli[conf[2]] = v.downcase.to_sym }
226
+ end
209
227
  end