miga-base 0.7.9.0 → 0.7.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d699b0ec0f4b7097439d64083fec6f3187662f9bcfa5d9921aebb27cdfd1c263
4
- data.tar.gz: 7ebffb85482969f3304e643a638fdc6f45439da8d2f11d9b8cffe27713c6e286
3
+ metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
4
+ data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
5
5
  SHA512:
6
- metadata.gz: d6517ec344ff45a1ec79db56250ad0c15423b6eac0bf4cad07def7e17dde238c3c7064a70df9f50a2411ec3b2dae27756483ba92c192553cf8fdcc9104acf730
7
- data.tar.gz: 8858ed1fed6861deff6137950c42c6f8a872c9406dc6ef3969676642f3c945de21395c4c5975e6885ede2059f9ea9c11620ec3c764774dc0f5deeb9ac058bb7f
6
+ metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
7
+ data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
data/README.md CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
41
41
  collaboration between [Kostas Lab][kostas] at the Georgia Institute of
42
42
  Technology and [RDP][rdp] at Michigan State University.
43
43
 
44
+ See also the [complete list of contributors](manual/part1/contributors.md).
44
45
 
45
46
  # License
46
47
 
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
42
42
  '--no-summaries',
43
43
  'Do not generate intermediate step summaries'
44
44
  ) { |v| cli[:summaries] = v }
45
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
45
+ opts_for_wf(
46
+ opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
47
+ )
46
48
  end
47
49
  end
48
50
 
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
17
17
  'Activate dataset; requires -D'
18
18
  ) { |v| cli[:activate] = v }
19
19
  opt.on(
20
- '--inactivate',
21
- 'Inactivate dataset; requires -D'
22
- ) { |v| cli[:activate] = !v }
20
+ '--inactivate [reason]',
21
+ 'Inactivate dataset; requires -D',
22
+ 'The argument is optional: reason to inactivate dataset'
23
+ ) { |v| cli[:activate] = false ; cli[:reason] = v }
23
24
  end
24
25
  end
25
26
 
26
27
  def perform
27
28
  obj = cli.load_project_or_dataset
28
29
  unless cli[:activate].nil?
29
- cli.ensure_par({ dataset: '-D' },
30
- '%<name>s is mandatory with --[in-]activate: please provide %<flag>s')
31
- cli[:activate] ? obj.activate! : obj.inactivate!
30
+ cli.ensure_par(
31
+ { dataset: '-D' },
32
+ '%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
33
+ )
34
+ cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
32
35
  end
33
36
  cli.add_metadata(obj)
34
37
  obj.save
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
15
15
  '-m', '--mytaxa-scan',
16
16
  'Perform MyTaxa scan analysis'
17
17
  ) { |v| cli[:mytaxa] = v }
18
- opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
18
+ opts_for_wf(
19
+ opt, 'Input genome assemblies (nucleotides, FastA)',
20
+ qual: false
21
+ )
19
22
  end
20
23
  end
21
24
 
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
15
15
 
16
16
  def opts_for_wf(opt, files_desc, params = {})
17
17
  {
18
- multi: false, cleanup: true, project_type: false, ncbi: true
18
+ multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
19
19
  }.each { |k, v| params[k] = v if params[k].nil? }
20
20
  opt.on(
21
21
  '-o', '--out_dir PATH',
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
40
40
  'Only download complete genomes, not drafts'
41
41
  ) { |v| cli[:ncbi_draft] = v }
42
42
  end
43
+ if params[:qual]
44
+ opt.on(
45
+ '--min-qual FLOAT', Float,
46
+ 'Minimum genome quality to include in analysis',
47
+ 'By default: 50.0'
48
+ ) { |v| cli[:min_qual] = v }
49
+ end
43
50
  if params[:cleanup]
44
51
  opt.on(
45
52
  '-c', '--clean',
@@ -125,7 +132,7 @@ module MiGA::Cli::Action::Wf
125
132
  ]) unless MiGA::Project.exist? cli[:outdir]
126
133
  # Define project metadata
127
134
  p = cli.load_project(:outdir, '-o')
128
- [:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
135
+ %i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
129
136
  p_metadata[:type] = cli[:project_type]
130
137
  transfer_metadata(p, p_metadata)
131
138
  # Download datasets
@@ -159,7 +166,7 @@ module MiGA::Cli::Action::Wf
159
166
  '-P', cli[:outdir],
160
167
  '-r', r,
161
168
  '-o', File.expand_path("#{r}.tsv", cli[:outdir]),
162
- '--tab'
169
+ '--tab', '--ref', '--active'
163
170
  ])
164
171
  end
165
172
  end
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
72
72
  say '-----------------------------------'
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
+ recalculate_status!
75
76
  load_status
76
77
  say 'Configuration options:'
77
78
  say @runopts.to_s
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
99
100
  end
100
101
 
101
102
  def recalculate_status!
103
+ say 'Recalculating status for all datasets'
102
104
  project.each_dataset(&:recalculate_status)
103
105
  end
104
106
 
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
158
160
  end
159
161
 
160
162
  ##
161
- # Traverse datasets, and returns boolean indicating if at any datasets
162
- # are incomplete
163
+ # Traverse datasets, and returns boolean indicating if at any reference
164
+ # datasets are incomplete
163
165
  def check_datasets
164
166
  l_say(2, 'Checking datasets')
165
167
  o = false
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
167
169
  next unless ds.status == :incomplete
168
170
  next if ds.next_preprocessing(false).nil?
169
171
 
170
- o = true
172
+ o = true if ds.ref?
171
173
  queue_job(:d, ds)
172
174
  end
173
175
  o
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
97
97
 
98
98
  ##
99
99
  # Inactivate a dataset. This halts automated processing by the daemon
100
- def inactivate!
100
+ #
101
+ # If given, the +reason+ string is saved as a metadata +:warn+ entry
102
+ def inactivate!(reason = nil)
103
+ metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
101
104
  metadata[:inactive] = true
102
105
  metadata.save
103
106
  pull_hook :on_inactivate
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
107
110
  # Activate a dataset. This removes the +:inactive+ flag
108
111
  def activate!
109
112
  metadata[:inactive] = nil
113
+ metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
110
114
  metadata.save
111
115
  pull_hook :on_activate
112
116
  end
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
35
35
  mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
36
36
  mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
37
37
  # Distances (for single-species datasets)
38
- distances: '09.distances',
39
38
  taxonomy: '09.distances/05.taxonomy',
39
+ distances: '09.distances',
40
40
  # General statistics
41
41
  stats: '90.stats'
42
42
  }
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
52
52
  end
53
53
 
54
54
  ##
55
- # Run +cmd+ in the command-line with {{variables}}: dataset, project, miga,
56
- # object (as defined for the event, if any)
55
+ # Run +cmd+ in the command-line with {{variables}}:
56
+ # dataset, project, project_name, miga, object (if defined for the event)
57
57
  # - +hook_args+: +[cmd]+
58
58
  # - +event_args+: +[object (optional)]+
59
59
  def hook_run_cmd(hook_args, event_args)
60
60
  Process.wait(
61
61
  spawn hook_args.first.miga_variables(
62
- dataset: name, project: project.path, miga: MiGA::MiGA.root_path,
63
- object: event_args.first
62
+ dataset: name, project: project.path, project_name: project.name,
63
+ miga: MiGA::MiGA.root_path, object: event_args.first
64
64
  )
65
65
  )
66
66
  end
@@ -70,7 +70,7 @@ module MiGA::Dataset::Result
70
70
  false
71
71
  elsif add_result(t, save).nil?
72
72
  if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
73
- inactivate!
73
+ inactivate! "Too many errors in step #{t}"
74
74
  false
75
75
  else
76
76
  true
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
26
26
  end
27
27
 
28
28
  ##
29
- # Run +cmd+ in the command-line with {{variables}}: project, miga,
30
- # object (as defined by the event, if any)
29
+ # Run +cmd+ in the command-line with {{variables}}:
30
+ # project, project_name, miga, object (if defined by the event)
31
31
  # - +hook_args+: +[cmd]+
32
32
  # - +event_args+: +[object (optional)]+
33
33
  def hook_run_cmd(hook_args, event_args)
34
34
  Process.wait(
35
35
  spawn hook_args.first.miga_variables(
36
- project: path, miga: MiGA::MiGA.root_path, object: event_args.first
36
+ project: path, project_name: name,
37
+ miga: MiGA::MiGA.root_path, object: event_args.first
37
38
  )
38
39
  )
39
40
  end
@@ -94,12 +94,13 @@ class MiGA::RemoteDataset
94
94
  @timeout_try = 0
95
95
  begin
96
96
  DEBUG 'GET: ' + url
97
- open(url, read_timeout: 600) { |f| doc = f.read }
97
+ URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
98
98
  rescue => e
99
99
  @timeout_try += 1
100
100
  raise e if @timeout_try >= 3
101
101
 
102
102
  sleep 5 # <- For: 429 Too Many Requests
103
+ DEBUG "RETRYING after: #{e}"
103
104
  retry
104
105
  end
105
106
  doc
@@ -164,7 +164,9 @@ class MiGA::Result < MiGA::MiGA
164
164
  # Unlink result by removing the .done and .start timestamps and the
165
165
  # .json descriptor, but don't remove any other associated files
166
166
  def unlink
167
- %i(start done).each { |i| f = path(i) and File.unlink(f) }
167
+ %i(start done).each do |i|
168
+ f = path(i) and File.exists?(f) and File.unlink(f)
169
+ end
168
170
  File.unlink path
169
171
  end
170
172
 
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
8
8
  # (Re-)calculate and save the statistics for the result
9
9
  def compute_stats
10
10
  method = :"compute_stats_#{key}"
11
+ MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
11
12
  stats = self.respond_to?(method, true) ? send(method) : nil
12
13
  unless stats.nil?
13
14
  self[:stats] = stats
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
109
110
  end
110
111
  end
111
112
  else
112
- # Fix estimate by domain
113
- if !(tax = source.metadata[:tax]).nil? &&
114
- %w[Archaea Bacteria].include?(tax[:d]) &&
115
- file_path(:raw_report).nil?
116
- scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
117
- rep = file_path(:report)
118
- rc_p = File.expand_path('.miga_rc', ENV['HOME'])
119
- rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
120
- $stderr.print `#{rc} ruby '#{scr}' \
121
- '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
122
- add_file(:raw_report, "#{source.name}.ess/log")
123
- add_file(:report, "#{source.name}.ess/log.domain")
124
- end
125
- # Extract/compute quality values
113
+ # Estimate quality metrics
114
+ fix_essential_genes_by_domain
126
115
  stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
127
116
  File.open(file_path(:report), 'r') do |fh|
128
117
  fh.each_line do |ln|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
131
120
  end
132
121
  end
133
122
  end
123
+
124
+ # Determine qualitative range
134
125
  stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
135
126
  source.metadata[:quality] =
136
127
  case stats[:quality]
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
140
131
  else; :low
141
132
  end
142
133
  source.save
134
+
135
+ # Inactivate low-quality datasets
136
+ min_qual = (project.metadata[:min_qual] || 50)
137
+ if min_qual != 'no' && stats[:quality] < min_qual
138
+ source.inactivate! 'Low genome quality'
139
+ end
143
140
  end
144
141
  stats
145
142
  end
@@ -175,4 +172,21 @@ module MiGA::Result::Stats
175
172
  end
176
173
  stats
177
174
  end
175
+
176
+ # Fix estimates based on essential genes based on taxonomy
177
+ def fix_essential_genes_by_domain
178
+ return if (tax = source.metadata[:tax]).nil? ||
179
+ !%w[Archaea Bacteria].include?(tax[:d]) ||
180
+ file_path(:raw_report)
181
+
182
+ MiGA::MiGA.DEBUG "Fixing essential genes by domain"
183
+ scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
184
+ rep = file_path(:report)
185
+ rc_p = File.expand_path('.miga_rc', ENV['HOME'])
186
+ rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
187
+ $stderr.print `#{rc} ruby '#{scr}' \
188
+ '#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
189
+ add_file(:raw_report, "#{source.name}.ess/log")
190
+ add_file(:report, "#{source.name}.ess/log.domain")
191
+ end
178
192
  end
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 9, 0]
11
+ VERSION = [0.7, 10, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 8)
19
+ VERSION_DATE = Date.new(2020, 6, 29)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
9
9
  # Initialize
10
10
  miga date > "$DATASET.start"
11
11
 
12
- # Run
12
+ # Check quality first
13
+ miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
14
+ inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
15
+ [[ "$inactive" == "true" ]] && exit
16
+
17
+ # Run distances
13
18
  ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
14
19
 
15
20
  # Finalize
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
93
93
  0 => /-{20}\n/,
94
94
  1 => /MiGA:#{p.name} launched/,
95
95
  2 => /-{20}\n/,
96
- 5 => /Probing running jobs\n/
96
+ 6 => /Probing running jobs\n/
97
97
  }.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
98
98
  ensure
99
99
  begin
@@ -185,11 +185,13 @@ class DatasetTest < Test::Unit::TestCase
185
185
  d = dataset
186
186
  assert_equal(:incomplete, d.status)
187
187
  assert_predicate(d, :active?)
188
- d.inactivate!
188
+ d.inactivate! 'Too annoying'
189
189
  assert_equal(:inactive, d.status)
190
+ assert_equal('Inactive: Too annoying', d.metadata[:warn])
190
191
  assert_not_predicate(d, :active?)
191
192
  d.activate!
192
193
  assert_equal(:incomplete, d.status)
194
+ assert_nil(d.metadata[:warn])
193
195
  assert_predicate(d, :active?)
194
196
  end
195
197
 
@@ -101,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
101
101
 
102
102
  def test_ref_type_status
103
103
  declare_remote_access
104
- rd = MiGA::RemoteDataset.new('GCA_002849345', :assembly, :ncbi)
104
+ rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
105
105
  assert { !rd.get_metadata[:is_type] }
106
106
  assert { rd.get_metadata[:is_ref_type] }
107
107
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.9.0
4
+ version: 0.7.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-08 00:00:00.000000000 Z
11
+ date: 2020-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
529
529
  licenses:
530
530
  - Artistic-2.0
531
531
  metadata: {}
532
- post_install_message:
532
+ post_install_message:
533
533
  rdoc_options:
534
534
  - lib
535
535
  - README.md
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
550
550
  - !ruby/object:Gem::Version
551
551
  version: '0'
552
552
  requirements: []
553
- rubygems_version: 3.0.3
554
- signing_key:
553
+ rubygems_version: 3.1.2
554
+ signing_key:
555
555
  specification_version: 4
556
556
  summary: MiGA
557
557
  test_files: []