miga-base 0.7.24.0 → 0.7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82ca84b468df712ab38eba74071c26e8dcb1d87335764dde2ba86075abc8c8d5
4
- data.tar.gz: 36583e298fc020faa5cc16af2f6a3fcc88c30619a4f529fd6f03772f6c437bb9
3
+ metadata.gz: fefd5ed0ec84f2b49d25243ecaec7e567000c04598605ba8a78d0a866c35e5b1
4
+ data.tar.gz: b0f473401ccf64d31fcfc0ed19b54a2b84d29c138e3f983a69083659fcfe547e
5
5
  SHA512:
6
- metadata.gz: d78a3709fecba4594d8dc9a2351f217b3605022237e424eb1a194ccf62ddf2059eb1df274f68a15cda74936dfece3f2123485483773172cc62cfba9ff454c5d2
7
- data.tar.gz: fbf11b04c062701b204f4ba93425dea6f8ff1492fe48f7182472e65b16fccbde98e86d396a79cf51c6e410a246c3b9e9509043eaf952b320191ec086e2efa573
6
+ metadata.gz: 2520f7169e91e38aed0027e679d0cbc126271a6948ce27916bef9a34299eb50df249568d06bbdb09c5ff13754ec7628d48d7154a18c5bf363628b69d241757c2
7
+ data.tar.gz: 0e684e321ec13e022f95d0499c2c83fea6230bbcadbaacce226e20423cda9ea7466fac929b579e29b6a50745b243da53b9d1697a5e08876b2d423d577205e269
@@ -6,10 +6,7 @@ require 'miga/cli/action'
6
6
  class MiGA::Cli::Action::Add < MiGA::Cli::Action
7
7
  def parse_cli
8
8
  cli.expect_files = true
9
- cli.defaults = {
10
- ref: true, ignore_dups: false,
11
- regexp: MiGA::Cli.FILE_REGEXP
12
- }
9
+ cli.defaults = { ref: true, ignore_dups: false }
13
10
  cli.parse do |opt|
14
11
  opt.separator 'You can create multiple datasets with a single command; ' \
15
12
  'simply pass all the files at the end: {FILES...}'
@@ -37,7 +34,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
37
34
  opt.on(
38
35
  '-R', '--name-regexp REGEXP', Regexp,
39
36
  'Regular expression indicating how to extract the name from the path',
40
- "By default: '#{cli[:regexp]}'"
37
+ 'By default for paired files:',
38
+ "'#{MiGA::Cli.FILE_REGEXP(true)}'",
39
+ 'By default for other files:',
40
+ "'#{MiGA::Cli.FILE_REGEXP}'"
41
41
  ) { |v| cli[:regexp] = v }
42
42
  opt.on(
43
43
  '--prefix STRING',
@@ -59,6 +59,9 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
59
59
  p = cli.load_project
60
60
  files, file_type = get_files_and_type
61
61
 
62
+ paired = cli[:input_type].to_s.include?('_paired')
63
+ cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
64
+
62
65
  cli.say 'Creating datasets:'
63
66
  files.each do |file|
64
67
  d = create_dataset(file, p)
@@ -166,7 +169,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
166
169
  file_type[2].each_with_index do |ext, i|
167
170
  gz = file[i] =~ /\.gz/ ? '.gz' : ''
168
171
  FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
169
- cli.say " file: #{file[i]}"
172
+ cli.say " file: #{File.basename(file[i])}"
170
173
  end
171
174
  File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
172
175
  end
@@ -55,7 +55,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
55
55
  { run_project_stats: false, run_clades: false },
56
56
  { run_mytaxa_scan: false, run_ssu: false }
57
57
  )
58
- p.set_option(:gsp_metric, cli[:metric])
58
+ p.set_option(:gsp_metric, cli[:metric].to_s)
59
59
  p.set_option(:"gsp_#{cli[:metric]}", cli[:threshold])
60
60
 
61
61
  # Run
@@ -16,8 +16,10 @@ class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
16
16
  'Perform MyTaxa scan analysis'
17
17
  ) { |v| cli[:mytaxa] = v }
18
18
  opts_for_wf_distances(opt)
19
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)',
20
- cleanup: false, project_type: true)
19
+ opts_for_wf(
20
+ opt, 'Input genome assemblies (nucleotides, FastA)',
21
+ cleanup: false, project_type: true
22
+ )
21
23
  end
22
24
  end
23
25
 
@@ -61,8 +61,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
61
61
  check_configuration_script(rc_fh)
62
62
  paths = check_software_requirements(rc_fh)
63
63
  check_additional_files(paths)
64
- check_r_packages(paths)
65
- check_ruby_gems(paths)
64
+ check_libraries(paths)
66
65
  configure_daemon
67
66
  close_rc_file(rc_fh)
68
67
  cli.puts 'Configuration complete. MiGA is ready to work!'
@@ -83,34 +82,6 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
83
82
  )
84
83
  end
85
84
 
86
- def test_r_package(cli, paths, pkg)
87
- run_r_cmd(cli, paths, "library('#{pkg}')")
88
- $?.success?
89
- end
90
-
91
- def install_r_package(cli, paths, pkg)
92
- r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
93
- run_r_cmd(cli, paths, r_cmd)
94
- end
95
-
96
- def test_ruby_gem(cli, paths, pkg)
97
- run_cmd(
98
- cli,
99
- "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
100
- )
101
- $?.success?
102
- end
103
-
104
- def install_ruby_gem(cli, paths, pkg)
105
- gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
106
- run_cmd(
107
- cli,
108
- "#{paths['ruby'].shellescape} \
109
- -r rubygems -r rubygems/gem_runner \
110
- -e #{gem_cmd.shellescape} 2>&1"
111
- )
112
- end
113
-
114
85
  def list_requirements
115
86
  if cli.ask_user(
116
87
  'Would you like to see all the requirements before starting?',
@@ -189,40 +160,70 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
189
160
  path
190
161
  end
191
162
 
192
- def check_r_packages(paths)
193
- cli.puts 'Looking for R packages:'
194
- %w(ape cluster vegan).each do |pkg|
195
- cli.print "Testing #{pkg}... "
196
- if test_r_package(cli, paths, pkg)
197
- cli.puts 'yes'
198
- else
199
- cli.puts 'no, installing'
200
- cli.print '' + install_r_package(cli, paths, pkg)
201
- unless test_r_package(cli, paths, pkg)
202
- raise "Unable to auto-install R package: #{pkg}"
203
- end
163
+ def check_libraries(paths)
164
+ req_libraries = {
165
+ r: %w[ape cluster vegan],
166
+ ruby: %w[sqlite3 daemons json],
167
+ python: %w[numpy]
168
+ }
169
+
170
+ req_libraries.each do |language, libraries|
171
+ cli.puts "Looking for #{language.to_s.capitalize} libraries:"
172
+ libraries.each do |lib|
173
+ check_and_install_library(paths, language, lib)
204
174
  end
175
+ cli.puts ''
205
176
  end
206
- cli.puts ''
207
177
  end
208
178
 
209
- def check_ruby_gems(paths)
210
- cli.puts 'Looking for Ruby gems:'
211
- %w(sqlite3 daemons json).each do |pkg|
212
- cli.print "Testing #{pkg}... "
213
- if test_ruby_gem(cli, paths, pkg)
214
- cli.puts 'yes'
215
- else
216
- cli.puts 'no, installing'
217
- # This hackey mess is meant to ensure the test and installation are done
218
- # on the configuration Ruby, not on the Ruby currently executing the
219
- # init action
220
- cli.print install_ruby_gem(cli, paths, pkg)
221
- unless test_ruby_gem(cli, paths, pkg)
222
- raise "Unable to auto-install Ruby gem: #{pkg}"
223
- end
179
+ def check_and_install_library(paths, language, library)
180
+ cli.print "Testing #{library}... "
181
+ if test_library(cli, paths, language, library)
182
+ cli.puts 'yes'
183
+ else
184
+ cli.puts 'no, installing'
185
+ cli.print '' + install_library(cli, paths, language, library)
186
+ unless test_library(cli, paths, language, library)
187
+ raise "Cannot install #{language.to_s.capitalize} library: #{library}"
224
188
  end
225
189
  end
226
- cli.puts ''
190
+ end
191
+
192
+ def test_library(cli, paths, language, pkg)
193
+ case language
194
+ when :r
195
+ run_r_cmd(cli, paths, "library('#{pkg}')")
196
+ when :ruby
197
+ x = "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
198
+ run_cmd(cli, x)
199
+ when :python
200
+ x = "#{paths['python3'].shellescape} -c 'import #{pkg}' 2>/dev/null"
201
+ run_cmd(cli, x)
202
+ else
203
+ raise "Unrecognized language: #{language}"
204
+ end
205
+ $?.success?
206
+ end
207
+
208
+ def install_library(cli, paths, language, pkg)
209
+ case language
210
+ when :r
211
+ r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
212
+ run_r_cmd(cli, paths, r_cmd)
213
+ when :ruby
214
+ # This hackey mess is meant to ensure the test and installation are done
215
+ # on the configuration Ruby, not on the Ruby currently executing the
216
+ # init action
217
+ gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
218
+ x = "#{paths['ruby'].shellescape} -r rubygems -r rubygems/gem_runner \
219
+ -e #{gem_cmd.shellescape} 2>&1"
220
+ run_cmd(cli, x)
221
+ when :python
222
+ x = "#{paths['python3'].shellescape} \
223
+ -m pip install #{pkg.shellescape} 2>&1"
224
+ run_cmd(cli, x)
225
+ else
226
+ raise "Unrecognized language: #{language}"
227
+ end
227
228
  end
228
229
  end
@@ -26,7 +26,7 @@ module MiGA::Cli::Action::Init::FilesHelper
26
26
  def close_rc_file(rc_fh)
27
27
  rc_fh.puts <<~FOOT
28
28
 
29
- MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
29
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.FULL_VERSION}'
30
30
  MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
31
31
  MIGA_CONFIG_DATE='#{Time.now}'
32
32
 
@@ -59,6 +59,7 @@ module MiGA::Cli::Action::Init::FilesHelper
59
59
  end
60
60
  check_rdp_classifier if cli[:rdp]
61
61
  check_phyla_lite
62
+ cli.puts ''
62
63
  end
63
64
 
64
65
  def check_mytaxa_scores(paths)
@@ -21,18 +21,20 @@ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
21
21
  '-m', '--mytaxa_scan',
22
22
  'Perform MyTaxa scan analysis'
23
23
  ) { |v| cli[:mytaxa] = v }
24
- opts_for_wf(opt, 'Input files as defined by --input-type',
25
- multi: true, cleanup: false, ncbi: false)
24
+ opts_for_wf(
25
+ opt, 'Input files as defined by --input-type',
26
+ multi: true, cleanup: false, ncbi: false, project_type: true
27
+ )
26
28
  end
27
29
  end
28
30
 
29
31
  def perform
30
32
  # Input data
31
33
  cli.ensure_par(input_type: '-i')
32
- p_metadata = Hash[
33
- %w[project_stats haai_distances aai_distances ani_distances clade_finding]
34
- .map { |i| ["run_#{i}", false] }
34
+ norun = %w[
35
+ project_stats haai_distances aai_distances ani_distances clade_finding
35
36
  ]
37
+ p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
36
38
  d_metadata = { run_distances: false }
37
39
  unless cli[:mytaxa]
38
40
  d_metadata[:run_mytaxa_scan] = false
@@ -7,8 +7,7 @@ module MiGA::Cli::Action::Wf
7
7
  def default_opts_for_wf
8
8
  cli.expect_files = true
9
9
  cli.defaults = {
10
- clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
- project_type: :genomes, dataset_type: :popgenome,
10
+ clean: false, project_type: :genomes, dataset_type: :popgenome,
12
11
  ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
13
12
  }
14
13
  end
@@ -56,28 +55,10 @@ module MiGA::Cli::Action::Wf
56
55
  opt.on(
57
56
  '-R', '--name-regexp REGEXP', Regexp,
58
57
  'Regular expression indicating how to extract the name from the path',
59
- "By default: '#{cli[:regexp]}'"
58
+ "By default: '#{MiGA::Cli.FILE_REGEXP}'"
60
59
  ) { |v| cli[:regexp] = v }
61
- opt.on(
62
- '--type STRING',
63
- "Type of datasets. By default: #{cli[:dataset_type]}",
64
- 'Recognized types:',
65
- *MiGA::Dataset.KNOWN_TYPES
66
- .map do |k, v|
67
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
68
- end.compact
69
- ) { |v| cli[:dataset_type] = v.downcase.to_sym }
70
- if params[:project_type]
71
- opt.on(
72
- '--project-type STRING',
73
- "Type of project. By default: #{cli[:project_type]}",
74
- 'Recognized types:',
75
- *MiGA::Project.KNOWN_TYPES
76
- .map do |k, v|
77
- "~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
78
- end.compact
79
- ) { |v| cli[:project_type] = v.downcase.to_sym }
80
- end
60
+ opt_object_type(opt, :dataset, params[:multi])
61
+ opt_object_type(opt, :project, params[:multi]) if params[:project_type]
81
62
  opt.on(
82
63
  '--daemon PATH',
83
64
  'Use custom daemon configuration in JSON format',
@@ -124,10 +105,14 @@ module MiGA::Cli::Action::Wf
124
105
  project_type: '--project-type',
125
106
  dataset_type: '--dataset-type'
126
107
  )
108
+ paired = cli[:input_type].to_s.include?('_paired')
109
+ cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
110
+
127
111
  # Create empty project
128
112
  call_cli(
129
113
  ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
130
114
  ) unless MiGA::Project.exist? cli[:outdir]
115
+
131
116
  # Define project metadata
132
117
  p = cli.load_project(:outdir, '-o')
133
118
  p_metadata[:type] = cli[:project_type]
@@ -135,6 +120,7 @@ module MiGA::Cli::Action::Wf
135
120
  %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
136
121
  p.set_option(i, cli[i])
137
122
  end
123
+
138
124
  # Download datasets
139
125
  unless cli[:ncbi_taxon].nil?
140
126
  what = cli[:ncbi_draft] ? '--all' : '--complete'
@@ -142,6 +128,7 @@ module MiGA::Cli::Action::Wf
142
128
  ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
143
129
  )
144
130
  end
131
+
145
132
  # Add datasets
146
133
  call_cli(
147
134
  [
@@ -153,6 +140,7 @@ module MiGA::Cli::Action::Wf
153
140
  '-R', cli[:regexp]
154
141
  ] + cli.files
155
142
  ) unless cli.files.empty?
143
+
156
144
  # Define datasets metadata
157
145
  p.load
158
146
  d_metadata[:type] = cli[:dataset_type]
@@ -208,4 +196,32 @@ module MiGA::Cli::Action::Wf
208
196
  md.each { |k, v| obj.metadata[k] = v }
209
197
  obj.save
210
198
  end
199
+
200
+ private
201
+
202
+ ##
203
+ # Add option --type or --project-type to +opt+
204
+ def opt_object_type(opt, obj, multi)
205
+ conf =
206
+ case obj
207
+ when :dataset
208
+ ['type', 'datasets', :dataset_type, MiGA::Dataset]
209
+ when :project
210
+ ['project-type', 'project', :project_type, MiGA::Project]
211
+ else
212
+ raise "Unrecognized object type: #{obj}"
213
+ end
214
+
215
+ options =
216
+ conf[3].KNOWN_TYPES.map do |k, v|
217
+ "~ #{k}: #{v[:description]}" unless !multi && v[:multi]
218
+ end.compact
219
+
220
+ opt.on(
221
+ "--#{conf[0]} STRING",
222
+ "Type of #{conf[1]}. By default: #{cli[conf[2]]}",
223
+ 'Recognized types:',
224
+ *options
225
+ ) { |v| cli[conf[2]] = v.downcase.to_sym }
226
+ end
211
227
  end
data/lib/miga/cli/base.rb CHANGED
@@ -88,19 +88,30 @@ module MiGA::Cli::Base
88
88
  @@EXECS = @@TASK_DESC.keys
89
89
 
90
90
  @@FILE_REGEXP =
91
- %r{^(?:.*/)?(.+?)(\.[A-Z]*([12]|Reads|Contigs))?(\.f[nastq]+)?$}i
91
+ %r{^(?:.*/)?(.+?)(\.[A-Z]*(Reads|Contigs))?(\.f[nastq]+)?(\.gz)?$}i
92
+
93
+ @@PAIRED_FILE_REGEXP =
94
+ %r{^(?:.*/)?(.+?)(\.[A-Z]*([12]|Reads))?(\.f[nastq]+)?(\.gz)?$}i
92
95
  end
93
96
 
94
97
  class MiGA::Cli < MiGA::MiGA
95
98
  include MiGA::Cli::Base
96
99
 
97
100
  class << self
98
- def TASK_DESC; @@TASK_DESC end
101
+ def TASK_DESC
102
+ @@TASK_DESC
103
+ end
99
104
 
100
- def TASK_ALIAS; @@TASK_ALIAS end
105
+ def TASK_ALIAS
106
+ @@TASK_ALIAS
107
+ end
101
108
 
102
- def EXECS; @@EXECS end
109
+ def EXECS
110
+ @@EXECS
111
+ end
103
112
 
104
- def FILE_REGEXP; @@FILE_REGEXP end
113
+ def FILE_REGEXP(paired = false)
114
+ paired ? @@PAIRED_FILE_REGEXP : @@FILE_REGEXP
115
+ end
105
116
  end
106
117
  end
@@ -53,7 +53,7 @@ module MiGA::Common::WithOption
53
53
  end
54
54
 
55
55
  if opt[:in] && !opt[:in].include?(value)
56
- raise "Value out of range for #{key}: #{value}, not #{opt[:in]}"
56
+ raise "Value out of range for #{key}: #{value}, not in #{opt[:in]}"
57
57
  end
58
58
 
59
59
  value
@@ -276,7 +276,8 @@ module MiGA::Dataset::Result
276
276
  ess_genes: '.ess.faa',
277
277
  collection: '.ess',
278
278
  report: '.ess/log',
279
- alignments: '.ess/proteins.aln'
279
+ alignments: '.ess/proteins.aln',
280
+ fastaai_index: '.faix.db.gz'
280
281
  )
281
282
  end
282
283
 
@@ -132,7 +132,7 @@ module MiGA::Project::Base
132
132
  haai_p: {
133
133
  desc: 'Value of aai.rb -p on hAAI', type: String,
134
134
  default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
- in: %w[blast+ blast blat diamond no]
135
+ in: %w[fastaai blast+ blast blat diamond no]
136
136
  },
137
137
  aai_p: {
138
138
  desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,