miga-base 0.7.23.0 → 0.7.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/Rakefile +1 -0
  4. data/lib/miga/cli/action/add.rb +1 -2
  5. data/lib/miga/cli/action/classify_wf.rb +12 -11
  6. data/lib/miga/cli/action/derep_wf.rb +3 -9
  7. data/lib/miga/cli/action/edit.rb +0 -1
  8. data/lib/miga/cli/action/find.rb +1 -1
  9. data/lib/miga/cli/action/generic.rb +1 -1
  10. data/lib/miga/cli/action/get.rb +7 -2
  11. data/lib/miga/cli/action/ncbi_get.rb +1 -1
  12. data/lib/miga/cli/action/new.rb +15 -9
  13. data/lib/miga/cli/action/option.rb +44 -0
  14. data/lib/miga/cli/action/quality_wf.rb +3 -3
  15. data/lib/miga/cli/action/tax_dist.rb +1 -1
  16. data/lib/miga/cli/action/tax_test.rb +1 -1
  17. data/lib/miga/cli/action/wf.rb +32 -30
  18. data/lib/miga/cli/base.rb +1 -0
  19. data/lib/miga/cli/objects_helper.rb +23 -18
  20. data/lib/miga/common.rb +1 -1
  21. data/lib/miga/common/with_option.rb +83 -0
  22. data/lib/miga/common/with_result.rb +2 -1
  23. data/lib/miga/dataset/base.rb +20 -2
  24. data/lib/miga/dataset/result.rb +1 -1
  25. data/lib/miga/metadata.rb +25 -13
  26. data/lib/miga/project/base.rb +82 -2
  27. data/lib/miga/project/result.rb +4 -4
  28. data/lib/miga/result/stats.rb +2 -2
  29. data/lib/miga/version.rb +2 -2
  30. data/scripts/essential_genes.bash +1 -2
  31. data/scripts/ogs.bash +2 -3
  32. data/test/dataset_test.rb +5 -5
  33. data/test/with_option_test.rb +115 -0
  34. data/utils/cleanup-databases.rb +1 -2
  35. data/utils/distance/commands.rb +2 -2
  36. data/utils/distance/database.rb +1 -1
  37. data/utils/distance/pipeline.rb +2 -4
  38. data/utils/distance/runner.rb +15 -23
  39. data/utils/index_metadata.rb +1 -2
  40. data/utils/subclade/runner.rb +9 -10
  41. metadata +6 -3
data/lib/miga/common.rb CHANGED
@@ -24,7 +24,7 @@ class MiGA::MiGA
24
24
  ##
25
25
  # Has MiGA been initialized?
26
26
  def self.initialized?
27
- File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) and
27
+ File.exist?(File.expand_path('.miga_rc', ENV['MIGA_HOME'])) &&
28
28
  File.exist?(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
29
29
  end
30
30
 
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Helper module including specific functions to handle objects that
5
+ # have configurable options. The class including this module must implement
6
+ # the methods +.OPTIONS+, +#metadata+, and +#save+.
7
+ module MiGA::Common::WithOption
8
+ def option(key)
9
+ assert_has_option(key)
10
+ opt = option_by_metadata(key)
11
+ value = opt.nil? ? option_by_default(key) : opt
12
+ value = value[self] if value.is_a?(Proc)
13
+ value
14
+ end
15
+
16
+ def set_option(key, value, from_string = false)
17
+ metadata[key] = assert_valid_option_value(key, value, from_string)
18
+ save
19
+ option(key)
20
+ end
21
+
22
+ def all_options
23
+ Hash[self.class.OPTIONS.each_key.map { |key| [key, option(key)] }]
24
+ end
25
+
26
+ def option?(key)
27
+ !self.class.OPTIONS[key.to_sym].nil?
28
+ end
29
+
30
+ def option_by_metadata(key)
31
+ metadata[key]
32
+ end
33
+
34
+ def option_by_default(key)
35
+ self.class.OPTIONS[key.to_sym][:default]
36
+ end
37
+
38
+ def assert_has_option(key)
39
+ opt = self.class.OPTIONS[key.to_sym]
40
+ raise "Unrecognized option: #{key}" if opt.nil?
41
+ opt
42
+ end
43
+
44
+ def assert_valid_option_value(key, value, from_string = false)
45
+ opt = assert_has_option(key)
46
+ value = option_from_string(key, value) if from_string
47
+
48
+ # nil is always valid, and so are supported tokens
49
+ return value if value.nil? || opt[:tokens]&.include?(value)
50
+
51
+ if opt[:type] && !value.is_a?(opt[:type])
52
+ raise "Invalid value type for #{key}: #{value.class}, not #{opt[:type]}"
53
+ end
54
+
55
+ if opt[:in] && !opt[:in].include?(value)
56
+ raise "Value out of range for #{key}: #{value}, not #{opt[:in]}"
57
+ end
58
+
59
+ value
60
+ end
61
+
62
+ def option_from_string(key, value)
63
+ opt = assert_has_option(key)
64
+
65
+ if ['', 'nil'].include?(value)
66
+ nil
67
+ elsif opt[:tokens]&.include?(value)
68
+ value
69
+ elsif opt[:type]&.equal?(Float)
70
+ raise "Not a float: #{value}" unless value =~ /^-?\.?\d/
71
+ value.to_f
72
+ elsif opt[:type]&.equal?(Integer)
73
+ raise "Not an integer: #{value}" unless value =~ /^-?\d/
74
+ value.to_i
75
+ elsif opt[:in]&.include?(true) && value == 'true'
76
+ true
77
+ elsif opt[:in]&.include?(false) && value == 'false'
78
+ false
79
+ else
80
+ value
81
+ end
82
+ end
83
+ end
@@ -86,7 +86,8 @@ module MiGA::Common::WithResult
86
86
  if res.nil?
87
87
  # Run if the step has not been calculated,
88
88
  # unless too many attempts were already made
89
- if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
89
+ cur_try = metadata["_try_#{t}"] || 0
90
+ if cur_try > project.option(:max_try)
90
91
  inactivate! "Too many errors in step #{t}"
91
92
  false
92
93
  else
@@ -1,7 +1,10 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/common/with_option'
3
4
 
4
5
  class MiGA::Dataset < MiGA::MiGA
6
+ include MiGA::Common::WithOption
7
+
5
8
  # Class-level
6
9
  class << self
7
10
  def RESULT_DIRS
@@ -15,6 +18,10 @@ class MiGA::Dataset < MiGA::MiGA
15
18
  def PREPROCESSING_TASKS
16
19
  @@PREPROCESSING_TASKS
17
20
  end
21
+
22
+ def OPTIONS
23
+ @@OPTIONS
24
+ end
18
25
  end
19
26
  end
20
27
 
@@ -85,4 +92,15 @@ module MiGA::Dataset::Base
85
92
  # tasks are ignored for single-organism datasets or for unknwon types.
86
93
  @@ONLY_MULTI_TASKS = [:mytaxa]
87
94
  @@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map { |i| [i, true] }]
95
+
96
+ ##
97
+ # Options supported by datasets
98
+ @@OPTIONS = {
99
+ db_project: {
100
+ desc: 'Project to use as database', type: String
101
+ },
102
+ dist_req: {
103
+ desc: 'Run distances against these datasets', type: Array, default: []
104
+ }
105
+ }
88
106
  end
@@ -50,7 +50,7 @@ module MiGA::Dataset::Result
50
50
  :upstream
51
51
  elsif !metadata["run_#{task}"].nil?
52
52
  metadata["run_#{task}"] ? :execute : :force
53
- elsif task == :taxonomy && project.metadata[:ref_project].nil?
53
+ elsif task == :taxonomy && project.option(:ref_project).nil?
54
54
  :project
55
55
  elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !ref?
56
56
  :noref
data/lib/miga/metadata.rb CHANGED
@@ -56,24 +56,20 @@ class MiGA::Metadata < MiGA::MiGA
56
56
  ##
57
57
  # Save the metadata into #path
58
58
  def save
59
- MiGA.DEBUG "Metadata.save #{path}"
59
+ return if self[:never_save]
60
+
61
+ MiGA::MiGA.DEBUG "Metadata.save #{path}"
60
62
  self[:updated] = Time.now.to_s
61
63
  json = to_json
62
- sleeper = 0.0
63
- slept = 0
64
- while File.exist?(lock_file)
65
- MiGA::MiGA.DEBUG "Waiting for lock: #{lock_file}"
66
- sleeper += 0.1 if sleeper <= 10.0
67
- sleep(sleeper.to_i)
68
- slept += sleeper.to_i
69
- raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
70
- end
71
- FileUtils.touch lock_file
64
+ wait_for_lock
65
+ FileUtils.touch(lock_file)
72
66
  ofh = File.open("#{path}.tmp", 'w')
73
67
  ofh.puts json
74
68
  ofh.close
75
- raise "Lock-racing detected for #{path}" unless
76
- File.exist?("#{path}.tmp") and File.exist?(lock_file)
69
+
70
+ unless File.exist?("#{path}.tmp") && File.exist?(lock_file)
71
+ raise "Lock-racing detected for #{path}"
72
+ end
77
73
 
78
74
  File.rename("#{path}.tmp", path)
79
75
  File.unlink(lock_file)
@@ -154,4 +150,20 @@ class MiGA::Metadata < MiGA::MiGA
154
150
  def to_json
155
151
  MiGA::Json.generate(data)
156
152
  end
153
+
154
+ private
155
+
156
+ ##
157
+ # Wait for the lock to go away
158
+ def wait_for_lock
159
+ sleeper = 0.0
160
+ slept = 0.0
161
+ while File.exist?(lock_file)
162
+ MiGA::MiGA.DEBUG "Waiting for lock: #{lock_file}"
163
+ sleeper += 0.1 if sleeper <= 10.0
164
+ sleep(sleeper)
165
+ slept += sleeper
166
+ raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
167
+ end
168
+ end
157
169
  end
@@ -1,7 +1,10 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/common/with_option'
3
4
 
4
5
  class MiGA::Project < MiGA::MiGA
6
+ include MiGA::Common::WithOption
7
+
5
8
  class << self
6
9
  ##
7
10
  # Does the project at +path+ exist?
@@ -33,6 +36,10 @@ class MiGA::Project < MiGA::MiGA
33
36
  def RESULT_DIRS
34
37
  @@RESULT_DIRS
35
38
  end
39
+
40
+ def OPTIONS
41
+ @@OPTIONS
42
+ end
36
43
  end
37
44
  end
38
45
 
@@ -108,4 +115,77 @@ module MiGA::Project::Base
108
115
  ##
109
116
  # Project-wide tasks for :clade projects
110
117
  @@INCLADE_TASKS = [:subclades, :ogs]
118
+
119
+ ##
120
+ # Options supported by projects
121
+ @@OPTIONS = {
122
+ ref_project: {
123
+ desc: 'Project with reference taxonomy', type: String
124
+ },
125
+ db_proj_dir: {
126
+ desc: 'Directory containing database projects', type: String
127
+ },
128
+ tax_pvalue: {
129
+ desc: 'Maximum p-value to transfer taxonomy', default: 0.05, type: Float,
130
+ in: 0.0..1.0
131
+ },
132
+ haai_p: {
133
+ desc: 'Value of aai.rb -p on hAAI', type: String,
134
+ default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
+ in: %w[blast+ blast blat diamond no]
136
+ },
137
+ aai_p: {
138
+ desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
139
+ in: %w[blast+ blast blat diamond]
140
+ },
141
+ ani_p: {
142
+ desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
143
+ in: %w[blast+ blast blat fastani]
144
+ },
145
+ max_try: {
146
+ desc: 'Maximum number of task attempts', default: 10, type: Integer,
147
+ in: (0..1000)
148
+ },
149
+ aai_save_rbm: {
150
+ desc: 'Should RBMs be saved for OGS analysis?',
151
+ default: proc { |project| project.clade? },
152
+ in: [true, false]
153
+ },
154
+ ogs_identity: {
155
+ desc: 'Min RBM identity for OGS', default: 80.0, type: Float,
156
+ in: (0.0..100.0)
157
+ },
158
+ clean_ogs: {
159
+ desc: 'If false, keeps ABC files (clades only)', default: true,
160
+ in: [true, false]
161
+ },
162
+ run_clades: {
163
+ desc: 'Should clades be estimated from distances?', default: true,
164
+ in: [true, false]
165
+ },
166
+ gsp_ani: {
167
+ desc: 'ANI limit to propose gsp clades', default: 95.0, type: Float,
168
+ in: (0.0..100.0)
169
+ },
170
+ gsp_aai: {
171
+ desc: 'AAI limit to propose gsp clades', default: 90.0, type: Float,
172
+ in: (0.0..100.0)
173
+ },
174
+ gsp_metric: {
175
+ desc: 'Metric to propose clades', default: 'ani', type: String,
176
+ in: %w[ani aai]
177
+ },
178
+ ess_coll: {
179
+ desc: 'Collection of essential genes to use', default: 'dupont_2012',
180
+ type: String, in: %w[dupont_2012 lee_2019]
181
+ },
182
+ min_qual: {
183
+ desc: 'Minimum genome quality', default: 25.0, type: Float,
184
+ in: -Float::INFINITY..100.0, tokens: %w[no]
185
+ },
186
+ distances_checkpoint: {
187
+ desc: 'Number of comparisons before storing data', default: 10,
188
+ type: Integer, in: 1...Float::INFINITY
189
+ }
190
+ }
111
191
  end
@@ -31,9 +31,9 @@ module MiGA::Project::Result
31
31
  ##
32
32
  # Is this +task+ to be bypassed?
33
33
  def ignore_task?(task)
34
- metadata["run_#{task}"] == false ||
35
- (!is_clade? && @@INCLADE_TASKS.include?(task) &&
36
- metadata["run_#{task}"] != true)
34
+ return true if metadata["run_#{task}"] == false
35
+
36
+ !clade? && @@INCLADE_TASKS.include?(task) && metadata["run_#{task}"] != true
37
37
  end
38
38
 
39
39
  ##
@@ -74,7 +74,7 @@ module MiGA::Project::Result
74
74
  return r
75
75
  end
76
76
  return nil unless result_files_exist?(base, %w[.proposed-clades])
77
- unless is_clade? ||
77
+ unless clade? ||
78
78
  result_files_exist?(
79
79
  base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
80
80
  )
@@ -118,7 +118,7 @@ module MiGA::Result::Stats
118
118
 
119
119
  def compute_stats_essential_genes
120
120
  stats = {}
121
- if source.is_multi?
121
+ if source.multi?
122
122
  stats = { median_copies: 0, mean_copies: 0 }
123
123
  File.open(file_path(:report), 'r') do |fh|
124
124
  fh.each_line do |ln|
@@ -151,7 +151,7 @@ module MiGA::Result::Stats
151
151
  source.save
152
152
 
153
153
  # Inactivate low-quality datasets
154
- min_qual = (project.metadata[:min_qual] || 25)
154
+ min_qual = project.option(:min_qual)
155
155
  if min_qual != 'no' && stats[:quality] < min_qual
156
156
  source.inactivate! 'Low quality genome'
157
157
  end
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 23, 0].freeze
13
+ VERSION = [0.7, 24, 0].freeze
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 2, 9)
21
+ VERSION_DATE = Date.new(2021, 2, 16)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -24,8 +24,7 @@ fi
24
24
  mkdir "${DATASET}.ess"
25
25
  TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" \
26
26
  --metadata "type" | awk '{print $2}')
27
- COLL=$(miga about -P "$PROJECT" -m ess_coll)
28
- [[ "$COLL" == "?" ]] && COLL=dupont_2012
27
+ COLL=$(miga option -P "$PROJECT" --key ess_coll)
29
28
  if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
30
29
  FLAGS="--metagenome"
31
30
  else
data/scripts/ogs.bash CHANGED
@@ -12,8 +12,7 @@ miga_start_project_step "$DIR"
12
12
  DS=$(miga ls -P "$PROJECT" --ref --no-multi)
13
13
 
14
14
  if [[ -n $DS ]] ; then
15
- MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
16
- [[ $MIN_ID == "?" ]] && MIN_ID=80
15
+ MIN_ID=$(miga option -P "$PROJECT" --key ogs_identity)
17
16
  if [[ ! -s miga-project.ogs ]] ; then
18
17
  # Extract RBMs
19
18
  if [[ ! -s miga-project.abc ]] ; then
@@ -34,7 +33,7 @@ if [[ -n $DS ]] ; then
34
33
 
35
34
  # Estimate OGs and Clean RBMs
36
35
  ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
37
- if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
36
+ if [[ $(miga option -P "$PROJECT" --key clean_ogs) == "false" ]] ; then
38
37
  gzip -9 miga-project.abc
39
38
  else
40
39
  rm miga-project.abc
data/test/dataset_test.rb CHANGED
@@ -29,7 +29,7 @@ class DatasetTest < Test::Unit::TestCase
29
29
  assert_raise { MiGA::Dataset.new(project, 'dataset-1') }
30
30
  assert_equal(project, dataset.project)
31
31
  assert_equal('dataset0', dataset.name)
32
- assert_predicate(dataset, :is_ref?)
32
+ assert_predicate(dataset, :ref?)
33
33
  assert_equal(MiGA::Metadata, dataset.metadata.class)
34
34
  assert_equal(:incomplete, dataset.status)
35
35
  end
@@ -38,14 +38,14 @@ class DatasetTest < Test::Unit::TestCase
38
38
  d2 = project.add_dataset('ds_save')
39
39
  assert_respond_to(d2, :save)
40
40
  d2.save
41
- assert_not_predicate(d2, :is_multi?)
42
- assert_not_predicate(d2, :is_nonmulti?)
41
+ assert_not_predicate(d2, :multi?)
42
+ assert_not_predicate(d2, :nonmulti?)
43
43
  assert_nil(d2.metadata[:type])
44
44
  d2.metadata[:type] = :metagenome
45
45
  d2.save
46
46
  assert_equal(:metagenome, d2.metadata[:type])
47
- assert_predicate(d2, :is_multi?)
48
- assert_not_predicate(d2, :is_nonmulti?)
47
+ assert_predicate(d2, :multi?)
48
+ assert_not_predicate(d2, :nonmulti?)
49
49
  end
50
50
 
51
51
  def test_remove
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+ require 'miga/common/with_option'
5
+
6
+ class WithDaemonTest < Test::Unit::TestCase
7
+ include TestHelper
8
+
9
+ class TestWithOption < MiGA::MiGA
10
+ include MiGA::Common::WithOption
11
+
12
+ attr_reader :metadata, :saved
13
+
14
+ def initialize
15
+ @metadata = { range: 0.9 }
16
+ @saved = false
17
+ end
18
+
19
+ def self.OPTIONS
20
+ {
21
+ empty: {},
22
+ float: { type: Float },
23
+ range: { default: 1.0, in: -5.5..5.5, type: Float },
24
+ default: { default: 9, type: Integer },
25
+ token: { type: Integer, tokens: %w[yes no 0] },
26
+ proc: { default: proc { Date.today } },
27
+ bool: { in: [true, false] }
28
+ }
29
+ end
30
+
31
+ def save
32
+ @saved = true
33
+ end
34
+ end
35
+
36
+ def test_with_option
37
+ o = TestWithOption.new
38
+ assert_respond_to(o, :option)
39
+ assert_equal(1, o.metadata.size)
40
+ end
41
+
42
+ def test_option
43
+ o = TestWithOption.new
44
+ assert_equal(9, o.option(:default))
45
+ assert_nil(o.option(:bool))
46
+ assert_raise { o.option(:not_an_option) }
47
+ assert_nil(o.option(:empty))
48
+ end
49
+
50
+ def test_set_bool
51
+ o = TestWithOption.new
52
+ assert_nil(o.option(:bool))
53
+ assert(!o.saved)
54
+ assert_raise { o.set_option(:bool, 'true') }
55
+ assert_nil(o.option(:bool))
56
+ assert(!o.saved)
57
+ assert_equal(true, o.set_option(:bool, 'true', true))
58
+ assert(o.saved)
59
+ assert_equal(false, o.set_option(:bool, false))
60
+ assert_equal(false, o.set_option(:bool, 'false', true))
61
+ assert_nil(o.set_option(:bool, nil))
62
+ end
63
+
64
+ def test_set_empty
65
+ o = TestWithOption.new
66
+ assert_nil(o.option(:empty))
67
+ assert_equal('a', o.set_option(:empty, 'a'))
68
+ assert_equal('1', o.set_option(:empty, '1', true))
69
+ end
70
+
71
+ def test_all_options
72
+ o = TestWithOption.new
73
+ assert(o.all_options.is_a?(Hash))
74
+ assert_include(o.all_options.keys, :bool)
75
+ assert_nil(o.all_options[:bool])
76
+ end
77
+
78
+ def test_option?
79
+ o = TestWithOption.new
80
+ assert(o.option?(:range))
81
+ assert(!o.option?(:not_an_option))
82
+ end
83
+
84
+ def test_option_metadata
85
+ o = TestWithOption.new
86
+ assert_equal(0.9, o.option(:range))
87
+ assert_equal(1.0, o.set_option(:range, nil))
88
+ assert_equal(2.0, o.set_option(:range, 2.0))
89
+ assert_equal(3.0, o.set_option(:range, '3', true))
90
+ end
91
+
92
+ def test_option_range
93
+ o = TestWithOption.new
94
+ assert_raise { o.set_option(:range, 9.0) }
95
+ assert_raise { o.set_option(:range, 3) }
96
+ assert_raise { o.set_option(:range, true) }
97
+ end
98
+
99
+ def test_option_proc
100
+ o = TestWithOption.new
101
+ assert(o.option(:proc).is_a?(Date))
102
+ assert(o.set_option(:proc, 1).is_a?(Integer))
103
+ assert(o.set_option(:proc, nil).is_a?(Date))
104
+ end
105
+
106
+ def test_token
107
+ o = TestWithOption.new
108
+ assert_nil(o.option(:token))
109
+ assert_equal(1, o.set_option(:token, 1))
110
+ assert_equal(-2, o.set_option(:token, '-2', true))
111
+ assert_equal('yes', o.set_option(:token, 'yes'))
112
+ assert_equal('0', o.set_option(:token, '0', true))
113
+ assert_raise { o.set_option(:token, 'maybe') }
114
+ end
115
+ end