miga-base 0.7.21.0 → 0.7.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -0
  5. data/lib/miga/cli/action/add.rb +1 -2
  6. data/lib/miga/cli/action/classify_wf.rb +12 -11
  7. data/lib/miga/cli/action/derep_wf.rb +3 -9
  8. data/lib/miga/cli/action/edit.rb +0 -1
  9. data/lib/miga/cli/action/find.rb +1 -1
  10. data/lib/miga/cli/action/generic.rb +1 -1
  11. data/lib/miga/cli/action/get.rb +7 -2
  12. data/lib/miga/cli/action/get_db.rb +16 -21
  13. data/lib/miga/cli/action/init.rb +41 -93
  14. data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
  15. data/lib/miga/cli/action/init/files_helper.rb +118 -0
  16. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  17. data/lib/miga/cli/action/new.rb +15 -9
  18. data/lib/miga/cli/action/option.rb +44 -0
  19. data/lib/miga/cli/action/quality_wf.rb +3 -3
  20. data/lib/miga/cli/action/tax_dist.rb +1 -1
  21. data/lib/miga/cli/action/tax_test.rb +1 -1
  22. data/lib/miga/cli/action/wf.rb +32 -30
  23. data/lib/miga/cli/base.rb +1 -0
  24. data/lib/miga/cli/objects_helper.rb +23 -18
  25. data/lib/miga/common.rb +4 -2
  26. data/lib/miga/common/net.rb +74 -0
  27. data/lib/miga/common/with_option.rb +83 -0
  28. data/lib/miga/common/with_result.rb +3 -2
  29. data/lib/miga/dataset/base.rb +20 -2
  30. data/lib/miga/dataset/result.rb +3 -2
  31. data/lib/miga/metadata.rb +25 -13
  32. data/lib/miga/project/base.rb +82 -2
  33. data/lib/miga/project/result.rb +4 -4
  34. data/lib/miga/remote_dataset.rb +2 -0
  35. data/lib/miga/result/stats.rb +2 -2
  36. data/lib/miga/version.rb +4 -2
  37. data/scripts/aai_distances.bash +1 -1
  38. data/scripts/ani_distances.bash +1 -1
  39. data/scripts/essential_genes.bash +1 -2
  40. data/scripts/haai_distances.bash +1 -1
  41. data/scripts/mytaxa.bash +6 -5
  42. data/scripts/mytaxa_scan.bash +8 -7
  43. data/scripts/ogs.bash +2 -3
  44. data/scripts/ssu.bash +16 -2
  45. data/test/dataset_test.rb +5 -5
  46. data/test/net_test.rb +34 -0
  47. data/test/with_option_test.rb +115 -0
  48. data/utils/cleanup-databases.rb +2 -3
  49. data/utils/distance/commands.rb +2 -2
  50. data/utils/distance/database.rb +1 -1
  51. data/utils/distance/pipeline.rb +2 -4
  52. data/utils/distance/runner.rb +15 -23
  53. data/utils/index_metadata.rb +1 -2
  54. data/utils/requirements.txt +6 -5
  55. data/utils/subclade/runner.rb +10 -11
  56. metadata +9 -3
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/ftp'
4
+ require 'open-uri'
5
+ require 'fileutils'
6
+
7
+ Net::FTP.send(:remove_const, 'FTP_PORT') # just to avoid warnings
8
+ Net::FTP.const_set('FTP_PORT', 21)
9
+
10
+ ##
11
+ # General web-access functions shared throughout MiGA.
12
+ module MiGA::Common::Net
13
+ ##
14
+ # Returns the URL of the host +name+ (Symbol)
15
+ def known_hosts(name)
16
+ case name.to_sym
17
+ when :miga_online_ftp
18
+ 'ftp://microbial-genomes.org//' # <- // to simplify chdir in connection
19
+ when :miga_db
20
+ 'ftp://microbial-genomes.org/db'
21
+ when :miga_dist
22
+ 'ftp://microbial-genomes.org/dist'
23
+ else
24
+ raise "Unrecognized server name: #{host}"
25
+ end
26
+ end
27
+
28
+ ##
29
+ # Connect to an FTP +host+ (String) or a known host name (Symbol, see
30
+ # +.known_hosts+)
31
+ def remote_connection(host)
32
+ host = known_hosts(host) if host.is_a?(Symbol)
33
+ uri = URI.parse(host)
34
+ raise 'Only FTP hosts are currently supported' unless uri.scheme == 'ftp'
35
+
36
+ ftp = Net::FTP.new(uri.host)
37
+ ftp.passive = true
38
+ ftp.login
39
+ ftp.chdir(uri.path)
40
+ ftp
41
+ end
42
+
43
+ ##
44
+ # Download a file via FTP using the +connection+ (returned by
45
+ # +.remote_connection+) with remote name +file+ into local +target+.
46
+ #
47
+ # Alternatively, +connection+ can simply be the host (String) or a recognized
48
+ # Symbol (see +.remote_connection+), in which case the function opens the
49
+ # connection automatically
50
+ #
51
+ # Reports progress to the function block with two arguments: the
52
+ # currently transferred size and the total file size
53
+ def download_file_ftp(connection, file, target)
54
+ # Open connection unless passed
55
+ close_conn = false
56
+ if connection.is_a?(String) || connection.is_a?(Symbol)
57
+ connection = remote_connection(connection)
58
+ close_conn = true
59
+ end
60
+
61
+ # Prepare download
62
+ FileUtils.mkdir_p(File.dirname(target))
63
+ filesize = connection.size(file)
64
+ transferred = 0
65
+
66
+ # Get in chunks of 1KiB
67
+ connection.getbinaryfile(file, target, 1024) do |data|
68
+ yield(transferred += data.size, filesize) if block_given?
69
+ end
70
+
71
+ # Close connection if automatically opened
72
+ connection.close if close_conn
73
+ end
74
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module including specific functions to handle objects that
5
+ # have configurable options. The class including this module must implement
6
+ # the methods +.OPTIONS+, +#metadata+, and +#save+.
7
+ module MiGA::Common::WithOption
8
+ def option(key)
9
+ assert_has_option(key)
10
+ opt = option_by_metadata(key)
11
+ value = opt.nil? ? option_by_default(key) : opt
12
+ value = value[self] if value.is_a?(Proc)
13
+ value
14
+ end
15
+
16
+ def set_option(key, value, from_string = false)
17
+ metadata[key] = assert_valid_option_value(key, value, from_string)
18
+ save
19
+ option(key)
20
+ end
21
+
22
+ def all_options
23
+ Hash[self.class.OPTIONS.each_key.map { |key| [key, option(key)] }]
24
+ end
25
+
26
+ def option?(key)
27
+ !self.class.OPTIONS[key.to_sym].nil?
28
+ end
29
+
30
+ def option_by_metadata(key)
31
+ metadata[key]
32
+ end
33
+
34
+ def option_by_default(key)
35
+ self.class.OPTIONS[key.to_sym][:default]
36
+ end
37
+
38
+ def assert_has_option(key)
39
+ opt = self.class.OPTIONS[key.to_sym]
40
+ raise "Unrecognized option: #{key}" if opt.nil?
41
+ opt
42
+ end
43
+
44
+ def assert_valid_option_value(key, value, from_string = false)
45
+ opt = assert_has_option(key)
46
+ value = option_from_string(key, value) if from_string
47
+
48
+ # nil is always valid, and so are supported tokens
49
+ return value if value.nil? || opt[:tokens]&.include?(value)
50
+
51
+ if opt[:type] && !value.is_a?(opt[:type])
52
+ raise "Invalid value type for #{key}: #{value.class}, not #{opt[:type]}"
53
+ end
54
+
55
+ if opt[:in] && !opt[:in].include?(value)
56
+ raise "Value out of range for #{key}: #{value}, not #{opt[:in]}"
57
+ end
58
+
59
+ value
60
+ end
61
+
62
+ def option_from_string(key, value)
63
+ opt = assert_has_option(key)
64
+
65
+ if ['', 'nil'].include?(value)
66
+ nil
67
+ elsif opt[:tokens]&.include?(value)
68
+ value
69
+ elsif opt[:type]&.equal?(Float)
70
+ raise "Not a float: #{value}" unless value =~ /^-?\.?\d/
71
+ value.to_f
72
+ elsif opt[:type]&.equal?(Integer)
73
+ raise "Not an integer: #{value}" unless value =~ /^-?\d/
74
+ value.to_i
75
+ elsif opt[:in]&.include?(true) && value == 'true'
76
+ true
77
+ elsif opt[:in]&.include?(false) && value == 'false'
78
+ false
79
+ else
80
+ value
81
+ end
82
+ end
83
+ end
@@ -86,7 +86,8 @@ module MiGA::Common::WithResult
86
86
  if res.nil?
87
87
  # Run if the step has not been calculated,
88
88
  # unless too many attempts were already made
89
- if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
89
+ cur_try = metadata["_try_#{t}"] || 0
90
+ if cur_try > project.option(:max_try)
90
91
  inactivate! "Too many errors in step #{t}"
91
92
  false
92
93
  else
@@ -103,7 +104,7 @@ module MiGA::Common::WithResult
103
104
  ##
104
105
  # Mark all results for recalculation
105
106
  def recalculate_tasks(reason = nil)
106
- each_result { |res| res.recalculate!(reason).save }
107
+ each_result { |_k, res| res.recalculate!(reason).save }
107
108
  end
108
109
 
109
110
  end
@@ -1,7 +1,10 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/common/with_option'
3
4
 
4
5
  class MiGA::Dataset < MiGA::MiGA
6
+ include MiGA::Common::WithOption
7
+
5
8
  # Class-level
6
9
  class << self
7
10
  def RESULT_DIRS
@@ -15,6 +18,10 @@ class MiGA::Dataset < MiGA::MiGA
15
18
  def PREPROCESSING_TASKS
16
19
  @@PREPROCESSING_TASKS
17
20
  end
21
+
22
+ def OPTIONS
23
+ @@OPTIONS
24
+ end
18
25
  end
19
26
  end
20
27
 
@@ -85,4 +92,15 @@ module MiGA::Dataset::Base
85
92
  # tasks are ignored for single-organism datasets or for unknwon types.
86
93
  @@ONLY_MULTI_TASKS = [:mytaxa]
87
94
  @@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map { |i| [i, true] }]
95
+
96
+ ##
97
+ # Options supported by datasets
98
+ @@OPTIONS = {
99
+ db_project: {
100
+ desc: 'Project to use as database', type: String
101
+ },
102
+ dist_req: {
103
+ desc: 'Run distances against these datasets', type: Array, default: []
104
+ }
105
+ }
88
106
  end
@@ -50,7 +50,7 @@ module MiGA::Dataset::Result
50
50
  :upstream
51
51
  elsif !metadata["run_#{task}"].nil?
52
52
  metadata["run_#{task}"] ? :execute : :force
53
- elsif task == :taxonomy && project.metadata[:ref_project].nil?
53
+ elsif task == :taxonomy && project.option(:ref_project).nil?
54
54
  :project
55
55
  elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !ref?
56
56
  :noref
@@ -290,7 +290,8 @@ module MiGA::Dataset::Result
290
290
  MiGA::Result.new("#{base}.json"), name,
291
291
  longest_ssu_gene: '.ssu.fa',
292
292
  gff: '.ssu.gff',
293
- all_ssu_genes: '.ssu.all.fa'
293
+ all_ssu_genes: '.ssu.all.fa',
294
+ classification: '.rdp.tsv'
294
295
  )
295
296
  opts[:is_clean] ||= false
296
297
  r.clean! if opts[:is_clean]
data/lib/miga/metadata.rb CHANGED
@@ -56,24 +56,20 @@ class MiGA::Metadata < MiGA::MiGA
56
56
  ##
57
57
  # Save the metadata into #path
58
58
  def save
59
- MiGA.DEBUG "Metadata.save #{path}"
59
+ return if self[:never_save]
60
+
61
+ MiGA::MiGA.DEBUG "Metadata.save #{path}"
60
62
  self[:updated] = Time.now.to_s
61
63
  json = to_json
62
- sleeper = 0.0
63
- slept = 0
64
- while File.exist?(lock_file)
65
- MiGA::MiGA.DEBUG "Waiting for lock: #{lock_file}"
66
- sleeper += 0.1 if sleeper <= 10.0
67
- sleep(sleeper.to_i)
68
- slept += sleeper.to_i
69
- raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
70
- end
71
- FileUtils.touch lock_file
64
+ wait_for_lock
65
+ FileUtils.touch(lock_file)
72
66
  ofh = File.open("#{path}.tmp", 'w')
73
67
  ofh.puts json
74
68
  ofh.close
75
- raise "Lock-racing detected for #{path}" unless
76
- File.exist?("#{path}.tmp") and File.exist?(lock_file)
69
+
70
+ unless File.exist?("#{path}.tmp") && File.exist?(lock_file)
71
+ raise "Lock-racing detected for #{path}"
72
+ end
77
73
 
78
74
  File.rename("#{path}.tmp", path)
79
75
  File.unlink(lock_file)
@@ -154,4 +150,20 @@ class MiGA::Metadata < MiGA::MiGA
154
150
  def to_json
155
151
  MiGA::Json.generate(data)
156
152
  end
153
+
154
+ private
155
+
156
+ ##
157
+ # Wait for the lock to go away
158
+ def wait_for_lock
159
+ sleeper = 0.0
160
+ slept = 0.0
161
+ while File.exist?(lock_file)
162
+ MiGA::MiGA.DEBUG "Waiting for lock: #{lock_file}"
163
+ sleeper += 0.1 if sleeper <= 10.0
164
+ sleep(sleeper)
165
+ slept += sleeper
166
+ raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
167
+ end
168
+ end
157
169
  end
@@ -1,7 +1,10 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/common/with_option'
3
4
 
4
5
  class MiGA::Project < MiGA::MiGA
6
+ include MiGA::Common::WithOption
7
+
5
8
  class << self
6
9
  ##
7
10
  # Does the project at +path+ exist?
@@ -33,6 +36,10 @@ class MiGA::Project < MiGA::MiGA
33
36
  def RESULT_DIRS
34
37
  @@RESULT_DIRS
35
38
  end
39
+
40
+ def OPTIONS
41
+ @@OPTIONS
42
+ end
36
43
  end
37
44
  end
38
45
 
@@ -108,4 +115,77 @@ module MiGA::Project::Base
108
115
  ##
109
116
  # Project-wide tasks for :clade projects
110
117
  @@INCLADE_TASKS = [:subclades, :ogs]
118
+
119
+ ##
120
+ # Options supported by projects
121
+ @@OPTIONS = {
122
+ ref_project: {
123
+ desc: 'Project with reference taxonomy', type: String
124
+ },
125
+ db_proj_dir: {
126
+ desc: 'Directory containing database projects', type: String
127
+ },
128
+ tax_pvalue: {
129
+ desc: 'Maximum p-value to transfer taxonomy', default: 0.05, type: Float,
130
+ in: 0.0..1.0
131
+ },
132
+ haai_p: {
133
+ desc: 'Value of aai.rb -p on hAAI', type: String,
134
+ default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
+ in: %w[blast+ blast blat diamond no]
136
+ },
137
+ aai_p: {
138
+ desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
139
+ in: %w[blast+ blast blat diamond]
140
+ },
141
+ ani_p: {
142
+ desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
143
+ in: %w[blast+ blast blat fastani]
144
+ },
145
+ max_try: {
146
+ desc: 'Maximum number of task attempts', default: 10, type: Integer,
147
+ in: (0..1000)
148
+ },
149
+ aai_save_rbm: {
150
+ desc: 'Should RBMs be saved for OGS analysis?',
151
+ default: proc { |project| project.clade? },
152
+ in: [true, false]
153
+ },
154
+ ogs_identity: {
155
+ desc: 'Min RBM identity for OGS', default: 80.0, type: Float,
156
+ in: (0.0..100.0)
157
+ },
158
+ clean_ogs: {
159
+ desc: 'If false, keeps ABC files (clades only)', default: true,
160
+ in: [true, false]
161
+ },
162
+ run_clades: {
163
+ desc: 'Should clades be estimated from distances?', default: true,
164
+ in: [true, false]
165
+ },
166
+ gsp_ani: {
167
+ desc: 'ANI limit to propose gsp clades', default: 95.0, type: Float,
168
+ in: (0.0..100.0)
169
+ },
170
+ gsp_aai: {
171
+ desc: 'AAI limit to propose gsp clades', default: 90.0, type: Float,
172
+ in: (0.0..100.0)
173
+ },
174
+ gsp_metric: {
175
+ desc: 'Metric to propose clades', default: 'ani', type: String,
176
+ in: %w[ani aai]
177
+ },
178
+ ess_coll: {
179
+ desc: 'Collection of essential genes to use', default: 'dupont_2012',
180
+ type: String, in: %w[dupont_2012 lee_2019]
181
+ },
182
+ min_qual: {
183
+ desc: 'Minimum genome quality', default: 25.0, type: Float,
184
+ in: -Float::INFINITY..100.0, tokens: %w[no]
185
+ },
186
+ distances_checkpoint: {
187
+ desc: 'Number of comparisons before storing data', default: 10,
188
+ type: Integer, in: 1...Float::INFINITY
189
+ }
190
+ }
111
191
  end
@@ -31,9 +31,9 @@ module MiGA::Project::Result
31
31
  ##
32
32
  # Is this +task+ to be bypassed?
33
33
  def ignore_task?(task)
34
- metadata["run_#{task}"] == false ||
35
- (!is_clade? && @@INCLADE_TASKS.include?(task) &&
36
- metadata["run_#{task}"] != true)
34
+ return true if metadata["run_#{task}"] == false
35
+
36
+ !clade? && @@INCLADE_TASKS.include?(task) && metadata["run_#{task}"] != true
37
37
  end
38
38
 
39
39
  ##
@@ -74,7 +74,7 @@ module MiGA::Project::Result
74
74
  return r
75
75
  end
76
76
  return nil unless result_files_exist?(base, %w[.proposed-clades])
77
- unless is_clade? ||
77
+ unless clade? ||
78
78
  result_files_exist?(
79
79
  base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
80
80
  )
@@ -162,6 +162,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
162
162
  txt.empty? ? sleep(1) : break
163
163
  end
164
164
  doc = MiGA::Json.parse(txt, symbolize: false, contents: true)
165
+ return if doc.nil? || doc['result'].nil? || doc['result'].empty?
166
+
165
167
  @_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
166
168
  end
167
169
 
@@ -118,7 +118,7 @@ module MiGA::Result::Stats
118
118
 
119
119
  def compute_stats_essential_genes
120
120
  stats = {}
121
- if source.is_multi?
121
+ if source.multi?
122
122
  stats = { median_copies: 0, mean_copies: 0 }
123
123
  File.open(file_path(:report), 'r') do |fh|
124
124
  fh.each_line do |ln|
@@ -151,7 +151,7 @@ module MiGA::Result::Stats
151
151
  source.save
152
152
 
153
153
  # Inactivate low-quality datasets
154
- min_qual = (project.metadata[:min_qual] || 25)
154
+ min_qual = project.option(:min_qual)
155
155
  if min_qual != 'no' && stats[:quality] < min_qual
156
156
  source.inactivate! 'Low quality genome'
157
157
  end
data/lib/miga/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
 
3
5
  ##
@@ -8,7 +10,7 @@ module MiGA
8
10
  # - Float representing the major.minor version.
9
11
  # - Integer representing gem releases of the current version.
10
12
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 21, 0]
13
+ VERSION = [0.7, 24, 0].freeze
12
14
 
13
15
  ##
14
16
  # Nickname for the current major.minor version.
@@ -16,7 +18,7 @@ module MiGA
16
18
 
17
19
  ##
18
20
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2021, 1, 13)
21
+ VERSION_DATE = Date.new(2021, 2, 16)
20
22
 
21
23
  ##
22
24
  # Reference of MiGA.