miga-base 0.7.3.1 → 0.7.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +25 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +48 -53
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -3
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +24 -21
  105. data/utils/distance/pipeline.rb +23 -10
  106. data/utils/distance/runner.rb +20 -16
  107. data/utils/distance/temporal.rb +1 -3
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e3f943800139b48778d227b3f0680bd71cac91774037f9f9e115842ccb3e6f4
4
- data.tar.gz: 351042f7e77d3b2a3189a27fc37ead6c4fdb02ad519ba72a760d8e75a50dddf3
3
+ metadata.gz: 5cce1edf29f5f41ad7a53225978513597f2234abd781c5f179c9f45f9d6f1ec2
4
+ data.tar.gz: 846a570b68efe9dba8acbc1434c55752373d9864df2cea2ff467330bf7e8b1ac
5
5
  SHA512:
6
- metadata.gz: 95f0728ad6f0d4359c32ed37deb9b215e6ecbd15fd5807796a8927eadb5ebfc1466d88e94afec179b33df9009d53e2f109166c5db5bf82424d6c31c6289cf780
7
- data.tar.gz: 75a5c9f95150c0177f1cf29a2bac58547bb51e60f75fe1d3c42acc3a3945aee161d51e1af479187459312b39c9164a6955d28232cbeb923cb8dbe5b748b590d8
6
+ metadata.gz: 123117d43c4ef4200ebe4a51cb14d19e48b89d249e66fa27b6e0834cf1c3b738535c3f514856e373259428c42543a25886744b4a8a6775f75b587d0a557e4d48
7
+ data.tar.gz: fa178d1e899da25212983bf7640cc391f3922e560d5d5ee4ff072781b6301013d928212868c9db1590dfcd1f41449e07b5019765dc866d02295226c9ed8a3e2d
@@ -7,7 +7,6 @@ require 'optparse'
7
7
  ##
8
8
  # MiGA Command Line Interface API.
9
9
  class MiGA::Cli < MiGA::MiGA
10
-
11
10
  require 'miga/cli/base'
12
11
  require 'miga/cli/opt_helper'
13
12
  require 'miga/cli/objects_helper'
@@ -61,7 +60,7 @@ class MiGA::Cli < MiGA::MiGA
61
60
 
62
61
  def initialize(argv)
63
62
  @data = {}
64
- @defaults = {verbose: false, tabular: false}
63
+ @defaults = { verbose: false, tabular: false }
65
64
  @opt_common = true
66
65
  @objects = {}
67
66
  if argv[0].nil? or argv[0].to_s[0] == '-'
@@ -106,6 +105,7 @@ class MiGA::Cli < MiGA::MiGA
106
105
  # otherwise it's sent to +$stderr+
107
106
  def say(*par)
108
107
  return unless self[:verbose]
108
+
109
109
  super(*par)
110
110
  end
111
111
 
@@ -116,16 +116,17 @@ class MiGA::Cli < MiGA::MiGA
116
116
  # The report goes to $stderr iff --verborse
117
117
  def advance(step, n = 0, total = nil, bin = true)
118
118
  return unless self[:verbose]
119
+
119
120
  adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
120
121
  ('%.1f%% (%s/%s)' % [100.0 * n / total,
121
- num_suffix(n, bin), num_suffix(total, bin)])
122
+ num_suffix(n, bin), num_suffix(total, bin)])
122
123
  $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
123
124
  end
124
125
 
125
126
  def num_suffix(n, bin = false)
126
127
  p = ''
127
- {T: 4, G: 3, M: 2, K: 1}.each do |k,x|
128
- v = (bin ? 1024 : 1e3) ** x
128
+ { T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
129
+ v = (bin ? 1024 : 1e3)**x
129
130
  if n > v
130
131
  n = '%.1f' % (n / v)
131
132
  p = k
@@ -160,7 +161,7 @@ class MiGA::Cli < MiGA::MiGA
160
161
  ##
161
162
  # Set default values in the Hash +hsh+
162
163
  def defaults=(hsh)
163
- hsh.each{ |k,v| @defaults[k] = v }
164
+ hsh.each { |k, v| @defaults[k] = v }
164
165
  end
165
166
 
166
167
  ##
@@ -192,6 +193,7 @@ class MiGA::Cli < MiGA::MiGA
192
193
  def launch
193
194
  begin
194
195
  raise "See `miga -h`" if action.nil?
196
+
195
197
  action.launch
196
198
  rescue => err
197
199
  $stderr.puts "Exception: #{err}"
@@ -222,8 +224,8 @@ class MiGA::Cli < MiGA::MiGA
222
224
  # +par+, a Hash with object names as keys and parameter flag as values.
223
225
  # If missing, raise an error with message +msg+
224
226
  def ensure_par(req, msg = '%<name>s is mandatory: please provide %<flag>s')
225
- req.each do |k,v|
226
- raise (msg % {name: k, flag: v}) if self[k].nil?
227
+ req.each do |k, v|
228
+ raise (msg % { name: k, flag: v }) if self[k].nil?
227
229
  end
228
230
  end
229
231
 
@@ -8,7 +8,6 @@ require 'miga/cli'
8
8
  # by MiGA::Cli::Action::* classes. Do not attempt creating directly with +new+,
9
9
  # use instead the MiGA::Cli::Action.load interface.
10
10
  class MiGA::Cli::Action < MiGA::MiGA
11
-
12
11
  class << self
13
12
  def load(task, cli)
14
13
  require "miga/cli/action/#{task}"
@@ -55,8 +54,8 @@ class MiGA::Cli::Action < MiGA::MiGA
55
54
  ##
56
55
  # Name of the action, as referred to by the CLI
57
56
  def name
58
- camel = self.class.to_s.gsub(/.*::/,'')
59
- camel.gsub(/(\S)([A-Z])/,'\1_\2').downcase
57
+ camel = self.class.to_s.gsub(/.*::/, '')
58
+ camel.gsub(/(\S)([A-Z])/, '\1_\2').downcase
60
59
  end
61
60
 
62
61
  ##
@@ -4,23 +4,22 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::About < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
- cli.defaults = {info: false, processing: false, tabular: false}
8
+ cli.defaults = { info: false, processing: false, tabular: false }
10
9
  cli.parse do |opt|
11
10
  cli.opt_object(opt, [:project])
12
11
  opt.on(
13
12
  '-p', '--processing',
14
13
  'Print information on processing advance'
15
- ){ |v| cli[:processing] = v }
14
+ ) { |v| cli[:processing] = v }
16
15
  opt.on(
17
16
  '-m', '--metadata STRING',
18
17
  'Print name and metadata field only'
19
- ){ |v| cli[:datum] = v }
18
+ ) { |v| cli[:datum] = v }
20
19
  opt.on(
21
20
  '--tab',
22
21
  'Return a tab-delimited table'
23
- ){ |v| cli[:tabular] = v }
22
+ ) { |v| cli[:tabular] = v }
24
23
  end
25
24
  end
26
25
 
@@ -37,7 +36,7 @@ class MiGA::Cli::Action::About < MiGA::Cli::Action
37
36
  else
38
37
  cli.puts MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
39
38
  v = p.metadata[k]
40
- [k, k==:datasets ? v.size : v]
39
+ [k, k == :datasets ? v.size : v]
41
40
  end, cli[:tabular])
42
41
  end
43
42
  end
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Add < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  cli.expect_files = true
10
9
  cli.defaults = {
@@ -47,7 +46,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
47
46
  opt.on(
48
47
  '-i', '--input-type STRING',
49
48
  'Type of input data, one of the following:',
50
- *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
49
+ *self.class.INPUT_TYPES.map { |k, v| "~ #{k}: #{v[0]}" }
51
50
  ) { |v| cli[:input_type] = v.downcase.to_sym }
52
51
  opt.on(
53
52
  '--ignore-dups',
@@ -64,6 +63,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
64
63
  files.each do |file|
65
64
  d = create_dataset(file, p)
66
65
  next if d.nil?
66
+
67
67
  copy_file_to_project(file, file_type, d, p)
68
68
  d = cli.add_metadata(d)
69
69
  d.save
@@ -76,22 +76,22 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
76
76
  @@INPUT_TYPES = {
77
77
  raw_reads_single:
78
78
  ['Single raw reads in a single FastQ file',
79
- :raw_reads, %w[.1.fastq]],
79
+ :raw_reads, %w[.1.fastq]],
80
80
  raw_reads_paired:
81
81
  ['Paired raw reads in two FastQ files',
82
- :raw_reads, %w[.1.fastq .2.fastq]],
82
+ :raw_reads, %w[.1.fastq .2.fastq]],
83
83
  trimmed_reads_single:
84
84
  ['Single trimmed reads in a single FastA file',
85
- :trimmed_fasta, %w[.SingleReads.fa]],
85
+ :trimmed_fasta, %w[.SingleReads.fa]],
86
86
  trimmed_reads_paired:
87
87
  ['Paired trimmed reads in two FastA files',
88
- :trimmed_fasta, %w[.1.fasta .2.fasta]],
88
+ :trimmed_fasta, %w[.1.fasta .2.fasta]],
89
89
  trimmed_reads_interleaved:
90
90
  ['Paired trimmed reads in a single FastA file',
91
- :trimmed_fasta, %w[.CoupledReads.fa]],
91
+ :trimmed_fasta, %w[.CoupledReads.fa]],
92
92
  assembly:
93
93
  ['Assembled contigs or scaffolds in FastA format',
94
- :assembly, %w[.LargeContigs.fna]]
94
+ :assembly, %w[.LargeContigs.fna]]
95
95
  }
96
96
 
97
97
  class << self
@@ -106,23 +106,26 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
106
106
  files = cli.files
107
107
  file_type = nil
108
108
  if files.empty?
109
- cli.ensure_par({dataset: '-D'},
110
- 'dataset is mandatory (-D) unless files are provided')
109
+ cli.ensure_par({ dataset: '-D' },
110
+ 'dataset is mandatory (-D) unless files are provided')
111
111
  cli.ensure_type(Dataset)
112
112
  files = [nil]
113
113
  else
114
114
  raise 'Please specify input type (-i).' if cli[:input_type].nil?
115
+
115
116
  file_type = self.class.INPUT_TYPES[cli[:input_type]]
116
117
  raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
117
118
  raise 'Some files are duplicated, files must be unique.' if
118
119
  files.size != files.uniq.size
120
+
119
121
  if cli[:input_type].to_s =~ /_paired$/
120
122
  if files.size.odd?
121
123
  raise 'Odd number of files incompatible with input type.'
122
124
  end
125
+
123
126
  files = Hash[*files].to_a
124
127
  else
125
- files = files.map{ |i| [i] }
128
+ files = files.map { |i| [i] }
126
129
  end
127
130
  if files.size > 1 && !cli[:dataset].nil?
128
131
  raise 'The dataset name (-D) can only be specified with one input file.'
@@ -137,6 +140,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
137
140
  ref_file = file.is_a?(Array) ? file.first : file
138
141
  m = cli[:regexp].match(ref_file)
139
142
  raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
143
+
140
144
  name = cli[:prefix].to_s + m[1].miga_name
141
145
  end
142
146
  if Dataset.exist?(p, name)
@@ -151,12 +155,14 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
151
155
  cli.say "o #{name}"
152
156
  d = Dataset.new(p, name, cli[:ref])
153
157
  raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
158
+
154
159
  d
155
160
  end
156
161
 
157
162
  def copy_file_to_project(file, file_type, d, p)
158
163
  return if file.nil?
159
- r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
164
+
165
+ r_dir = Dataset.RESULT_DIRS[file_type[1]]
160
166
  r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
161
167
  file_type[2].each_with_index do |ext, i|
162
168
  gz = file[i] =~ /\.gz/ ? '.gz' : ''
@@ -4,15 +4,14 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::AddResult < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
- cli.defaults = {force: false}
8
+ cli.defaults = { force: false }
10
9
  cli.parse do |opt|
11
10
  cli.opt_object(opt, [:project, :dataset_opt, :result])
12
11
  opt.on(
13
12
  '-f', '--force',
14
13
  'Force re-indexing of the result even if it\'s already registered'
15
- ){ |v| cli[:force] = v }
14
+ ) { |v| cli[:force] = v }
16
15
  end
17
16
  end
18
17
 
@@ -5,7 +5,6 @@ require 'miga/cli/action'
5
5
  require 'rubygems/package'
6
6
 
7
7
  class MiGA::Cli::Action::Archive < MiGA::Cli::Action
8
-
9
8
  def parse_cli
10
9
  cli.parse do |opt|
11
10
  opt.on(
@@ -26,6 +25,7 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
26
25
  unless cli[:tarball] =~ /\.tar\.gz$/
27
26
  raise 'The tarball path (-o) must have .tar.gz extension'
28
27
  end
28
+
29
29
  cli[:folder] ||= cli.load_project.name
30
30
  ds = cli.load_and_filter_datasets
31
31
 
@@ -95,5 +95,4 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
95
95
  in_tar = File.join(cli[:folder], rel_path)
96
96
  tar.add_file_simple(in_tar, 0666, string.size) { |fh| fh.write(string) }
97
97
  end
98
-
99
98
  end
@@ -51,24 +51,24 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
51
51
  ref_db = reference_db
52
52
  p_metadata = Hash[
53
53
  %w[project_stats haai_distances aai_distances ani_distances clade_finding]
54
- .map { |i| ["run_#{i}", false] }
54
+ .map { |i| ["run_#{i}", false] }
55
55
  ]
56
56
  p_metadata[:ref_project] = ref_db.path
57
57
  p_metadata[:tax_pvalue] = cli[:pvalue]
58
58
  p = create_project(:assembly, p_metadata,
59
- run_ssu: false, run_mytaxa_scan: false, run_distances: false)
59
+ run_ssu: false, run_mytaxa_scan: false, run_distances: false)
60
60
  # Run
61
61
  run_daemon
62
62
  summarize(%w[cds assembly essential_genes]) if cli[:summaries]
63
63
  summarize(['taxonomy'])
64
64
  cli.say "Summary: classification"
65
65
  call_cli([
66
- 'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
67
- '-o', File.expand_path('classification.tsv', cli[:outdir])
68
- ])
66
+ 'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
67
+ '-o', File.expand_path('classification.tsv', cli[:outdir])
68
+ ])
69
69
  cleanup
70
70
  end
71
-
71
+
72
72
  private
73
73
 
74
74
  def reference_db
@@ -85,12 +85,14 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
85
85
  unless File.size? lm_f
86
86
  raise 'No locally listed databases, call "miga get_db" first'
87
87
  end
88
+
88
89
  cli[:database] = MiGA::Json.parse(lm_f)[:databases].keys.first
89
90
  end
90
91
  ref_db_path = File.expand_path(cli[:database].to_s, cli[:local])
91
92
  end
92
93
  ref_db = MiGA::Project.load(ref_db_path)
93
94
  raise "Cannot locate reference database: #{ref_db_path}" if ref_db.nil?
95
+
94
96
  cli.say "Reference database: #{ref_db.name}"
95
97
  ref_db
96
98
  end
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Console < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  end
10
9
 
@@ -5,17 +5,17 @@ require 'miga/cli/action'
5
5
  require 'miga/daemon'
6
6
 
7
7
  class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
8
-
9
8
  def parse_cli
10
- cli.defaults = {daemon_opts: []}
9
+ cli.defaults = { daemon_opts: [] }
11
10
  cli.expect_operation = true
12
11
  cli.parse do |opt|
13
12
  opt.separator 'Available operations:'
14
- { start: 'Start an instance of the application',
15
- stop: 'Start an instance of the application',
16
- run: 'Start the application and stay on top',
17
- status: 'Show status (PID) of application instances'
18
- }.each { |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
13
+ {
14
+ start: 'Start an instance of the application',
15
+ stop: 'Start an instance of the application',
16
+ run: 'Start the application and stay on top',
17
+ status: 'Show status (PID) of application instances'
18
+ }.each { |k, v| opt.separator sprintf(' %*s%s', -33, k, v) }
19
19
  opt.separator ''
20
20
 
21
21
  opt.separator 'MiGA options:'
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Date < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  cli.parse { |_| }
10
9
  end
@@ -44,12 +44,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
44
44
  def perform
45
45
  # Input data
46
46
  p = create_project(:assembly,
47
- { run_project_stats: false, run_clades: false,
48
- gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
49
- { run_mytaxa_scan: false, run_ssu: false })
47
+ { run_project_stats: false, run_clades: false,
48
+ gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
49
+ { run_mytaxa_scan: false, run_ssu: false })
50
50
  unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
51
51
  raise "The threshold of identity must be in the range [0,100]"
52
52
  end
53
+
53
54
  # Run
54
55
  run_daemon
55
56
  dereplicate(p)
@@ -68,7 +69,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
68
69
  File.open(File.expand_path('genomospecies.tsv', cli[:outdir]), 'w') do |fh|
69
70
  fh.puts "Clade\tRepresentative\tMembers"
70
71
  clades.each_with_index do |i, k|
71
- fh.puts ["gsp_#{k+1}", rep[k], i.join(',')].join("\t")
72
+ fh.puts ["gsp_#{k + 1}", rep[k], i.join(',')].join("\t")
72
73
  end
73
74
  end
74
75
  if cli[:collection]
@@ -1,15 +1,15 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'miga/cli/action'
5
- require 'sqlite3'
4
+ require 'miga/cli/action/doctor/base'
6
5
 
7
6
  class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
7
+ include MiGA::Cli::Action::Doctor::Base
8
8
 
9
9
  def parse_cli
10
- @@OPERATIONS.keys.each { |i| cli.defaults = { i => true } }
10
+ cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
11
11
  cli.parse do |opt|
12
- operation_n = Hash[@@OPERATIONS.map { |k,v| [v[0], k] }]
12
+ operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
13
13
  cli.opt_object(opt, [:project])
14
14
  opt.on(
15
15
  '--ignore TASK1,TASK2', Array,
@@ -21,20 +21,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
21
21
  'Perform only the specified task (see --ignore)'
22
22
  ) do |v|
23
23
  op_k = @@OPERATIONS.find { |_, i| i[0] == v.downcase }.first
24
- @@OPERATIONS.keys.each { |i| cli[i] = false }
24
+ @@OPERATIONS.each_key { |i| cli[i] = false }
25
25
  cli[op_k] = true
26
26
  end
27
27
  end
28
28
  end
29
29
 
30
- def check_sqlite3_database(db_file, metric)
31
- SQLite3::Database.new(db_file) do |conn|
32
- conn.execute("select count(*) from #{metric}").first
33
- end
34
- rescue SQLite3::SQLException
35
- yield
36
- end
37
-
38
30
  def perform
39
31
  p = cli.load_project
40
32
  @@OPERATIONS.keys.each do |k|
@@ -43,7 +35,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
43
35
  end
44
36
 
45
37
  @@OPERATIONS = {
46
- db: ['databases', 'Check database files integrity'],
38
+ status: ['status', 'Update metadata status of all datasets'],
39
+ db: ['databases', 'Check integrity of database files'],
47
40
  dist: ['distances', 'Check distance summary tables'],
48
41
  files: ['files', 'Check for outdated files'],
49
42
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -52,36 +45,54 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
52
45
  start: ['start', 'Check for lingering .start files'],
53
46
  tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
54
47
  }
48
+
55
49
  class << self
50
+ ##
51
+ # All supported operations
56
52
  def OPERATIONS
57
53
  @@OPERATIONS
58
54
  end
59
55
  end
60
56
 
57
+ ##
58
+ # Perform status operation with MiGA::Cli +cli+
59
+ def check_status(cli)
60
+ cli.say 'Updating metadata status'
61
+ n, k = cli.load_project.dataset_names.size, 0
62
+ cli.load_project.each_dataset do |d|
63
+ cli.advance('Datasets:', k += 1, n, false)
64
+ d.recalculate_status
65
+ end
66
+ cli.say
67
+ end
68
+
69
+ ##
70
+ # Perform databases operation with MiGA::Cli +cli+
61
71
  def check_db(cli)
62
- cli.say 'Checking databases integrity'
72
+ cli.say 'Checking integrity of databases'
73
+ n, k = cli.load_project.dataset_names.size, 0
63
74
  cli.load_project.each_dataset do |d|
64
- [:distances, :taxonomy].each do |r_key|
65
- r = d.result(r_key) or next
66
- {haai_db: :aai, aai_db: :aai, ani_db: :ani}.each do |db_key, metric|
67
- db_file = r.file_path(db_key) or next
68
- check_sqlite3_database(db_file, metric) do
69
- cli.say(
70
- " > Removing #{db_key} #{r_key} table for #{d.name}")
71
- [db_file, r.path(:done), r.path].each do |f|
72
- File.unlink(f) if File.exist? f
73
- end # each |f|
74
- end # check_sqlite3_database
75
- end # each |db_key, metric|
76
- end # each |r_key|
77
- end # each |d|
75
+ cli.advance('Datasets:', k += 1, n, false)
76
+ each_database_file(d) do |db_file, metric, result|
77
+ check_sqlite3_database(db_file, metric) do
78
+ cli.say(" > Removing malformed database from #{d.name}:#{result} ")
79
+ File.unlink(db_file)
80
+ r = d.result(result) or next
81
+ [r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
82
+ end
83
+ end
84
+ end
85
+ cli.say
78
86
  end
79
87
 
88
+ ##
89
+ # Perform distances operation with MiGA::Cli +cli+
80
90
  def check_dist(cli)
81
91
  p = cli.load_project
82
- [:ani, :aai].each do |dist|
92
+ %i[ani aai].each do |dist|
83
93
  res = p.result("#{dist}_distances")
84
94
  next if res.nil?
95
+
85
96
  cli.say "Checking #{dist} table for consistent datasets"
86
97
  notok, fix = check_dist_eval(cli, p, res)
87
98
  check_dist_fix(cli, p, fix)
@@ -89,10 +100,13 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
89
100
  end
90
101
  end
91
102
 
103
+ ##
104
+ # Perform files operation with MiGA::Cli +cli+
92
105
  def check_files(cli)
93
106
  cli.say 'Looking for outdated files in results'
94
- p = cli.load_project
95
- p.each_dataset do |d|
107
+ n, k = cli.load_project.dataset_names.size, 0
108
+ cli.load_project.each_dataset do |d|
109
+ cli.advance('Datasets:', k += 1, n, false)
96
110
  d.each_result do |r_k, r|
97
111
  ok = true
98
112
  r.each_file do |_f_sym, _f_rel, f_abs|
@@ -102,23 +116,28 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
102
116
  end
103
117
  end
104
118
  unless ok
105
- cli.say " > Registering again #{d.name}:#{r_k}"
119
+ cli.say " > Registering again #{d.name}:#{r_k} "
106
120
  d.add_result(r_k, true, force: true)
107
121
  sr = d.result(:stats) and sr.remove!
108
122
  end
109
123
  end
110
124
  end
125
+ cli.say
111
126
  end
112
127
 
128
+ ##
129
+ # Perform cds operation with MiGA::Cli +cli+
113
130
  def check_cds(cli)
114
131
  cli.say 'Looking for unzipped genes or proteins'
132
+ n, k = cli.load_project.dataset_names.size, 0
115
133
  cli.load_project.each_dataset do |d|
134
+ cli.advance('Datasets:', k += 1, n, false)
116
135
  res = d.result(:cds) or next
117
136
  changed = false
118
- [:genes, :proteins, :gff3, :gff2, :tab].each do |f|
137
+ %i[genes proteins gff3 gff2 tab].each do |f|
119
138
  file = res.file_path(f) or next
120
139
  if file !~ /\.gz/
121
- cli.say " > Gzipping #{d.name} #{f}"
140
+ cli.say " > Gzipping #{d.name} #{f} "
122
141
  cmdo = `gzip -9 '#{file}'`.chomp
123
142
  warn(cmdo) unless cmdo.empty?
124
143
  changed = true
@@ -129,13 +148,17 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
129
148
  sr = d.result(:stats) and sr.remove!
130
149
  end
131
150
  end
151
+ cli.say
132
152
  end
133
153
 
154
+ ##
155
+ # Perform essential-genes operation with MiGA::Cli +cli+
134
156
  def check_ess(cli)
135
157
  cli.say 'Looking for unarchived essential genes'
136
158
  cli.load_project.each_dataset do |d|
137
159
  res = d.result(:essential_genes)
138
160
  next if res.nil?
161
+
139
162
  dir = res.file_path(:collection)
140
163
  if dir.nil?
141
164
  cli.say " > Removing #{d.name}:essential_genes"
@@ -144,17 +167,21 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
144
167
  next
145
168
  end
146
169
  next if Dir["#{dir}/*.faa"].empty?
170
+
147
171
  cli.say " > Fixing #{d.name}"
148
172
  cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
149
173
  warn(cmdo) unless cmdo.empty?
150
174
  end
151
175
  end
152
176
 
177
+ ##
178
+ # Perform mytaxa-scan operation with MiGA::Cli +cli+
153
179
  def check_mts(cli)
154
180
  cli.say 'Looking for unarchived MyTaxa Scan runs'
155
181
  cli.load_project.each_dataset do |d|
156
182
  res = d.result(:mytaxa_scan)
157
183
  next if res.nil?
184
+
158
185
  dir = res.file_path(:regions)
159
186
  fix = false
160
187
  unless dir.nil?
@@ -166,8 +193,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
166
193
  end
167
194
  fix = true
168
195
  end
169
- %w[blast mytaxain wintax gene_ids region_ids].each do |ext|
170
- file = res.file_path(ext.to_sym)
196
+ %i[blast mytaxain wintax gene_ids region_ids].each do |ext|
197
+ file = res.file_path(ext)
171
198
  unless file.nil?
172
199
  FileUtils.rm(file) if File.exist? file
173
200
  fix = true
@@ -180,6 +207,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
180
207
  end
181
208
  end
182
209
 
210
+ ##
211
+ # Perform start operation with MiGA::Cli +cli+
183
212
  def check_start(cli)
184
213
  cli.say 'Looking for legacy .start files lingering'
185
214
  cli.load_project.each_dataset do |d|
@@ -192,52 +221,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
192
221
  end
193
222
  end
194
223
 
224
+ ##
225
+ # Perform taxonomy operation with MiGA::Cli +cli+
195
226
  def check_tax(cli)
196
- #cli.say 'o Checking for taxonomy/distances consistency'
227
+ # cli.say 'o Checking for taxonomy/distances consistency'
197
228
  # TODO: Find 95%ANI clusters with entries from different species
198
- end
199
-
200
- private
201
-
202
- def check_dist_eval(cli, p, res)
203
- notok = {}
204
- fix = {}
205
- Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
206
- lineno = 0
207
- fh.each_line do |ln|
208
- next if (lineno += 1) == 1
209
- r = ln.split("\t")
210
- next unless [1, 2].map { |i| p.dataset(r[i]).nil? }.any?
211
- [1, 2].each do |i|
212
- if p.dataset(r[i]).nil?
213
- notok[r[i]] = true
214
- else
215
- fix[r[i]] = true
216
- end
217
- end
218
- end
219
- end
220
- [notok, fix]
221
- end
222
-
223
- def check_dist_fix(cli, p, fix)
224
- return if fix.empty?
225
- cli.say("- Fixing #{fix.size} datasets")
226
- fix.keys.each do |d_n|
227
- cli.say " > Fixing #{d_n}."
228
- p.dataset(d_n).cleanup_distances!
229
- end
230
- end
231
-
232
- def check_dist_recompute(cli, p, notok)
233
- return if notok.empty?
234
- cli.say '- Unregistered datasets detected: '
235
- if notok.size <= 5
236
- notok.keys.each { |i| cli.say " > #{i}" }
237
- else
238
- cli.say " > #{notok.size}, including #{notok.keys.first}"
239
- end
240
- cli.say '- Removing tables, recompute'
241
- res.remove!
229
+ # TODO: Find different 95%ANI clusters with genomes from the same species
230
+ # TODO: Find AAI values too high or too low for each LCA rank
242
231
  end
243
232
  end