miga-base 0.7.3.0 → 0.7.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +21 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +6 -3
  50. data/lib/miga/common/with_daemon_class.rb +3 -2
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +34 -25
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -4
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +25 -21
  105. data/utils/distance/pipeline.rb +16 -10
  106. data/utils/distance/runner.rb +19 -13
  107. data/utils/distance/temporal.rb +7 -4
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b233f892ba1294bd0959433c443944f267ff9b8c7ec4d220dc4bbacaca985a6
4
- data.tar.gz: bdc51401c6680d63872e7aab594eab50dbc500e6662d244a6fa04f6b6ea2587d
3
+ metadata.gz: 3682f50e3efe936ce751cd83cc7945edddb8e1c3ea6e654c4d54f8ea79efbfcb
4
+ data.tar.gz: a5bc821d8f1b6f55baf495eea28e8783c86c61ffaca2d486e4589b818a60038f
5
5
  SHA512:
6
- metadata.gz: 96bc61749ae2964656a9d82a2b5b0c74691513af237837960dfe146482a5691dadf7ea8aa958fb5ff35abd1e8ac829c447e219fdf330095e151efd4448470d73
7
- data.tar.gz: 299a4806eea3364a0a64d86aa0eabfa5798f27801d7892241e13bc8ec36e0d3325cddc4eb321ec5b0826b498bb86bdf98494e1384e42cfa6441d97940c690b4f
6
+ metadata.gz: 23e986949f97ae31498b7310eba666f0fc4b5f3e4ab9d38a135b2934db901449dca70ce74830e4353bb60f2196ce2c195b1bfb20400f884494e9766e58ea5214
7
+ data.tar.gz: 3857008111b8a65b1fbf09442eb3a657789ebf964769c7805485be57298141c363296b7d2491ef3379726344f07892211d10d4c72fc74a8095bc0eaf00d4e873
@@ -7,7 +7,6 @@ require 'optparse'
7
7
  ##
8
8
  # MiGA Command Line Interface API.
9
9
  class MiGA::Cli < MiGA::MiGA
10
-
11
10
  require 'miga/cli/base'
12
11
  require 'miga/cli/opt_helper'
13
12
  require 'miga/cli/objects_helper'
@@ -61,7 +60,7 @@ class MiGA::Cli < MiGA::MiGA
61
60
 
62
61
  def initialize(argv)
63
62
  @data = {}
64
- @defaults = {verbose: false, tabular: false}
63
+ @defaults = { verbose: false, tabular: false }
65
64
  @opt_common = true
66
65
  @objects = {}
67
66
  if argv[0].nil? or argv[0].to_s[0] == '-'
@@ -106,6 +105,7 @@ class MiGA::Cli < MiGA::MiGA
106
105
  # otherwise it's sent to +$stderr+
107
106
  def say(*par)
108
107
  return unless self[:verbose]
108
+
109
109
  super(*par)
110
110
  end
111
111
 
@@ -116,16 +116,17 @@ class MiGA::Cli < MiGA::MiGA
116
116
  # The report goes to $stderr iff --verborse
117
117
  def advance(step, n = 0, total = nil, bin = true)
118
118
  return unless self[:verbose]
119
+
119
120
  adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
120
121
  ('%.1f%% (%s/%s)' % [100.0 * n / total,
121
- num_suffix(n, bin), num_suffix(total, bin)])
122
+ num_suffix(n, bin), num_suffix(total, bin)])
122
123
  $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
123
124
  end
124
125
 
125
126
  def num_suffix(n, bin = false)
126
127
  p = ''
127
- {T: 4, G: 3, M: 2, K: 1}.each do |k,x|
128
- v = (bin ? 1024 : 1e3) ** x
128
+ { T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
129
+ v = (bin ? 1024 : 1e3)**x
129
130
  if n > v
130
131
  n = '%.1f' % (n / v)
131
132
  p = k
@@ -160,7 +161,7 @@ class MiGA::Cli < MiGA::MiGA
160
161
  ##
161
162
  # Set default values in the Hash +hsh+
162
163
  def defaults=(hsh)
163
- hsh.each{ |k,v| @defaults[k] = v }
164
+ hsh.each { |k, v| @defaults[k] = v }
164
165
  end
165
166
 
166
167
  ##
@@ -192,6 +193,7 @@ class MiGA::Cli < MiGA::MiGA
192
193
  def launch
193
194
  begin
194
195
  raise "See `miga -h`" if action.nil?
196
+
195
197
  action.launch
196
198
  rescue => err
197
199
  $stderr.puts "Exception: #{err}"
@@ -222,8 +224,8 @@ class MiGA::Cli < MiGA::MiGA
222
224
  # +par+, a Hash with object names as keys and parameter flag as values.
223
225
  # If missing, raise an error with message +msg+
224
226
  def ensure_par(req, msg = '%<name>s is mandatory: please provide %<flag>s')
225
- req.each do |k,v|
226
- raise (msg % {name: k, flag: v}) if self[k].nil?
227
+ req.each do |k, v|
228
+ raise (msg % { name: k, flag: v }) if self[k].nil?
227
229
  end
228
230
  end
229
231
 
@@ -8,7 +8,6 @@ require 'miga/cli'
8
8
  # by MiGA::Cli::Action::* classes. Do not attempt creating directly with +new+,
9
9
  # use instead the MiGA::Cli::Action.load interface.
10
10
  class MiGA::Cli::Action < MiGA::MiGA
11
-
12
11
  class << self
13
12
  def load(task, cli)
14
13
  require "miga/cli/action/#{task}"
@@ -55,8 +54,8 @@ class MiGA::Cli::Action < MiGA::MiGA
55
54
  ##
56
55
  # Name of the action, as referred to by the CLI
57
56
  def name
58
- camel = self.class.to_s.gsub(/.*::/,'')
59
- camel.gsub(/(\S)([A-Z])/,'\1_\2').downcase
57
+ camel = self.class.to_s.gsub(/.*::/, '')
58
+ camel.gsub(/(\S)([A-Z])/, '\1_\2').downcase
60
59
  end
61
60
 
62
61
  ##
@@ -4,23 +4,22 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::About < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
- cli.defaults = {info: false, processing: false, tabular: false}
8
+ cli.defaults = { info: false, processing: false, tabular: false }
10
9
  cli.parse do |opt|
11
10
  cli.opt_object(opt, [:project])
12
11
  opt.on(
13
12
  '-p', '--processing',
14
13
  'Print information on processing advance'
15
- ){ |v| cli[:processing] = v }
14
+ ) { |v| cli[:processing] = v }
16
15
  opt.on(
17
16
  '-m', '--metadata STRING',
18
17
  'Print name and metadata field only'
19
- ){ |v| cli[:datum] = v }
18
+ ) { |v| cli[:datum] = v }
20
19
  opt.on(
21
20
  '--tab',
22
21
  'Return a tab-delimited table'
23
- ){ |v| cli[:tabular] = v }
22
+ ) { |v| cli[:tabular] = v }
24
23
  end
25
24
  end
26
25
 
@@ -37,7 +36,7 @@ class MiGA::Cli::Action::About < MiGA::Cli::Action
37
36
  else
38
37
  cli.puts MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
39
38
  v = p.metadata[k]
40
- [k, k==:datasets ? v.size : v]
39
+ [k, k == :datasets ? v.size : v]
41
40
  end, cli[:tabular])
42
41
  end
43
42
  end
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Add < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  cli.expect_files = true
10
9
  cli.defaults = {
@@ -47,7 +46,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
47
46
  opt.on(
48
47
  '-i', '--input-type STRING',
49
48
  'Type of input data, one of the following:',
50
- *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
49
+ *self.class.INPUT_TYPES.map { |k, v| "~ #{k}: #{v[0]}" }
51
50
  ) { |v| cli[:input_type] = v.downcase.to_sym }
52
51
  opt.on(
53
52
  '--ignore-dups',
@@ -64,6 +63,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
64
63
  files.each do |file|
65
64
  d = create_dataset(file, p)
66
65
  next if d.nil?
66
+
67
67
  copy_file_to_project(file, file_type, d, p)
68
68
  d = cli.add_metadata(d)
69
69
  d.save
@@ -76,22 +76,22 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
76
76
  @@INPUT_TYPES = {
77
77
  raw_reads_single:
78
78
  ['Single raw reads in a single FastQ file',
79
- :raw_reads, %w[.1.fastq]],
79
+ :raw_reads, %w[.1.fastq]],
80
80
  raw_reads_paired:
81
81
  ['Paired raw reads in two FastQ files',
82
- :raw_reads, %w[.1.fastq .2.fastq]],
82
+ :raw_reads, %w[.1.fastq .2.fastq]],
83
83
  trimmed_reads_single:
84
84
  ['Single trimmed reads in a single FastA file',
85
- :trimmed_fasta, %w[.SingleReads.fa]],
85
+ :trimmed_fasta, %w[.SingleReads.fa]],
86
86
  trimmed_reads_paired:
87
87
  ['Paired trimmed reads in two FastA files',
88
- :trimmed_fasta, %w[.1.fasta .2.fasta]],
88
+ :trimmed_fasta, %w[.1.fasta .2.fasta]],
89
89
  trimmed_reads_interleaved:
90
90
  ['Paired trimmed reads in a single FastA file',
91
- :trimmed_fasta, %w[.CoupledReads.fa]],
91
+ :trimmed_fasta, %w[.CoupledReads.fa]],
92
92
  assembly:
93
93
  ['Assembled contigs or scaffolds in FastA format',
94
- :assembly, %w[.LargeContigs.fna]]
94
+ :assembly, %w[.LargeContigs.fna]]
95
95
  }
96
96
 
97
97
  class << self
@@ -106,23 +106,26 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
106
106
  files = cli.files
107
107
  file_type = nil
108
108
  if files.empty?
109
- cli.ensure_par({dataset: '-D'},
110
- 'dataset is mandatory (-D) unless files are provided')
109
+ cli.ensure_par({ dataset: '-D' },
110
+ 'dataset is mandatory (-D) unless files are provided')
111
111
  cli.ensure_type(Dataset)
112
112
  files = [nil]
113
113
  else
114
114
  raise 'Please specify input type (-i).' if cli[:input_type].nil?
115
+
115
116
  file_type = self.class.INPUT_TYPES[cli[:input_type]]
116
117
  raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
117
118
  raise 'Some files are duplicated, files must be unique.' if
118
119
  files.size != files.uniq.size
120
+
119
121
  if cli[:input_type].to_s =~ /_paired$/
120
122
  if files.size.odd?
121
123
  raise 'Odd number of files incompatible with input type.'
122
124
  end
125
+
123
126
  files = Hash[*files].to_a
124
127
  else
125
- files = files.map{ |i| [i] }
128
+ files = files.map { |i| [i] }
126
129
  end
127
130
  if files.size > 1 && !cli[:dataset].nil?
128
131
  raise 'The dataset name (-D) can only be specified with one input file.'
@@ -137,6 +140,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
137
140
  ref_file = file.is_a?(Array) ? file.first : file
138
141
  m = cli[:regexp].match(ref_file)
139
142
  raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
143
+
140
144
  name = cli[:prefix].to_s + m[1].miga_name
141
145
  end
142
146
  if Dataset.exist?(p, name)
@@ -151,12 +155,14 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
151
155
  cli.say "o #{name}"
152
156
  d = Dataset.new(p, name, cli[:ref])
153
157
  raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
158
+
154
159
  d
155
160
  end
156
161
 
157
162
  def copy_file_to_project(file, file_type, d, p)
158
163
  return if file.nil?
159
- r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
164
+
165
+ r_dir = Dataset.RESULT_DIRS[file_type[1]]
160
166
  r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
161
167
  file_type[2].each_with_index do |ext, i|
162
168
  gz = file[i] =~ /\.gz/ ? '.gz' : ''
@@ -4,15 +4,14 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::AddResult < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
- cli.defaults = {force: false}
8
+ cli.defaults = { force: false }
10
9
  cli.parse do |opt|
11
10
  cli.opt_object(opt, [:project, :dataset_opt, :result])
12
11
  opt.on(
13
12
  '-f', '--force',
14
13
  'Force re-indexing of the result even if it\'s already registered'
15
- ){ |v| cli[:force] = v }
14
+ ) { |v| cli[:force] = v }
16
15
  end
17
16
  end
18
17
 
@@ -5,7 +5,6 @@ require 'miga/cli/action'
5
5
  require 'rubygems/package'
6
6
 
7
7
  class MiGA::Cli::Action::Archive < MiGA::Cli::Action
8
-
9
8
  def parse_cli
10
9
  cli.parse do |opt|
11
10
  opt.on(
@@ -26,6 +25,7 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
26
25
  unless cli[:tarball] =~ /\.tar\.gz$/
27
26
  raise 'The tarball path (-o) must have .tar.gz extension'
28
27
  end
28
+
29
29
  cli[:folder] ||= cli.load_project.name
30
30
  ds = cli.load_and_filter_datasets
31
31
 
@@ -95,5 +95,4 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
95
95
  in_tar = File.join(cli[:folder], rel_path)
96
96
  tar.add_file_simple(in_tar, 0666, string.size) { |fh| fh.write(string) }
97
97
  end
98
-
99
98
  end
@@ -51,24 +51,24 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
51
51
  ref_db = reference_db
52
52
  p_metadata = Hash[
53
53
  %w[project_stats haai_distances aai_distances ani_distances clade_finding]
54
- .map { |i| ["run_#{i}", false] }
54
+ .map { |i| ["run_#{i}", false] }
55
55
  ]
56
56
  p_metadata[:ref_project] = ref_db.path
57
57
  p_metadata[:tax_pvalue] = cli[:pvalue]
58
58
  p = create_project(:assembly, p_metadata,
59
- run_ssu: false, run_mytaxa_scan: false, run_distances: false)
59
+ run_ssu: false, run_mytaxa_scan: false, run_distances: false)
60
60
  # Run
61
61
  run_daemon
62
62
  summarize(%w[cds assembly essential_genes]) if cli[:summaries]
63
63
  summarize(['taxonomy'])
64
64
  cli.say "Summary: classification"
65
65
  call_cli([
66
- 'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
67
- '-o', File.expand_path('classification.tsv', cli[:outdir])
68
- ])
66
+ 'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
67
+ '-o', File.expand_path('classification.tsv', cli[:outdir])
68
+ ])
69
69
  cleanup
70
70
  end
71
-
71
+
72
72
  private
73
73
 
74
74
  def reference_db
@@ -85,12 +85,14 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
85
85
  unless File.size? lm_f
86
86
  raise 'No locally listed databases, call "miga get_db" first'
87
87
  end
88
+
88
89
  cli[:database] = MiGA::Json.parse(lm_f)[:databases].keys.first
89
90
  end
90
91
  ref_db_path = File.expand_path(cli[:database].to_s, cli[:local])
91
92
  end
92
93
  ref_db = MiGA::Project.load(ref_db_path)
93
94
  raise "Cannot locate reference database: #{ref_db_path}" if ref_db.nil?
95
+
94
96
  cli.say "Reference database: #{ref_db.name}"
95
97
  ref_db
96
98
  end
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Console < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  end
10
9
 
@@ -5,17 +5,17 @@ require 'miga/cli/action'
5
5
  require 'miga/daemon'
6
6
 
7
7
  class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
8
-
9
8
  def parse_cli
10
- cli.defaults = {daemon_opts: []}
9
+ cli.defaults = { daemon_opts: [] }
11
10
  cli.expect_operation = true
12
11
  cli.parse do |opt|
13
12
  opt.separator 'Available operations:'
14
- { start: 'Start an instance of the application',
15
- stop: 'Start an instance of the application',
16
- run: 'Start the application and stay on top',
17
- status: 'Show status (PID) of application instances'
18
- }.each { |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
13
+ {
14
+ start: 'Start an instance of the application',
15
+ stop: 'Start an instance of the application',
16
+ run: 'Start the application and stay on top',
17
+ status: 'Show status (PID) of application instances'
18
+ }.each { |k, v| opt.separator sprintf(' %*s%s', -33, k, v) }
19
19
  opt.separator ''
20
20
 
21
21
  opt.separator 'MiGA options:'
@@ -4,7 +4,6 @@
4
4
  require 'miga/cli/action'
5
5
 
6
6
  class MiGA::Cli::Action::Date < MiGA::Cli::Action
7
-
8
7
  def parse_cli
9
8
  cli.parse { |_| }
10
9
  end
@@ -44,12 +44,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
44
44
  def perform
45
45
  # Input data
46
46
  p = create_project(:assembly,
47
- { run_project_stats: false, run_clades: false,
48
- gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
49
- { run_mytaxa_scan: false, run_ssu: false })
47
+ { run_project_stats: false, run_clades: false,
48
+ gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
49
+ { run_mytaxa_scan: false, run_ssu: false })
50
50
  unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
51
51
  raise "The threshold of identity must be in the range [0,100]"
52
52
  end
53
+
53
54
  # Run
54
55
  run_daemon
55
56
  dereplicate(p)
@@ -68,7 +69,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
68
69
  File.open(File.expand_path('genomospecies.tsv', cli[:outdir]), 'w') do |fh|
69
70
  fh.puts "Clade\tRepresentative\tMembers"
70
71
  clades.each_with_index do |i, k|
71
- fh.puts ["gsp_#{k+1}", rep[k], i.join(',')].join("\t")
72
+ fh.puts ["gsp_#{k + 1}", rep[k], i.join(',')].join("\t")
72
73
  end
73
74
  end
74
75
  if cli[:collection]
@@ -1,15 +1,15 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'miga/cli/action'
5
- require 'sqlite3'
4
+ require 'miga/cli/action/doctor/base'
6
5
 
7
6
  class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
7
+ include MiGA::Cli::Action::Doctor::Base
8
8
 
9
9
  def parse_cli
10
- @@OPERATIONS.keys.each { |i| cli.defaults = { i => true } }
10
+ cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
11
11
  cli.parse do |opt|
12
- operation_n = Hash[@@OPERATIONS.map { |k,v| [v[0], k] }]
12
+ operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
13
13
  cli.opt_object(opt, [:project])
14
14
  opt.on(
15
15
  '--ignore TASK1,TASK2', Array,
@@ -21,20 +21,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
21
21
  'Perform only the specified task (see --ignore)'
22
22
  ) do |v|
23
23
  op_k = @@OPERATIONS.find { |_, i| i[0] == v.downcase }.first
24
- @@OPERATIONS.keys.each { |i| cli[i] = false }
24
+ @@OPERATIONS.each_key { |i| cli[i] = false }
25
25
  cli[op_k] = true
26
26
  end
27
27
  end
28
28
  end
29
29
 
30
- def check_sqlite3_database(db_file, metric)
31
- SQLite3::Database.new(db_file) do |conn|
32
- conn.execute("select count(*) from #{metric}").first
33
- end
34
- rescue SQLite3::SQLException
35
- yield
36
- end
37
-
38
30
  def perform
39
31
  p = cli.load_project
40
32
  @@OPERATIONS.keys.each do |k|
@@ -43,7 +35,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
43
35
  end
44
36
 
45
37
  @@OPERATIONS = {
46
- db: ['databases', 'Check database files integrity'],
38
+ status: ['status', 'Update metadata status of all datasets'],
39
+ db: ['databases', 'Check integrity of database files'],
47
40
  dist: ['distances', 'Check distance summary tables'],
48
41
  files: ['files', 'Check for outdated files'],
49
42
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -52,36 +45,54 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
52
45
  start: ['start', 'Check for lingering .start files'],
53
46
  tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
54
47
  }
48
+
55
49
  class << self
50
+ ##
51
+ # All supported operations
56
52
  def OPERATIONS
57
53
  @@OPERATIONS
58
54
  end
59
55
  end
60
56
 
57
+ ##
58
+ # Perform status operation with MiGA::Cli +cli+
59
+ def check_status(cli)
60
+ cli.say 'Updating metadata status'
61
+ n, k = cli.load_project.dataset_names.size, 0
62
+ cli.load_project.each_dataset do |d|
63
+ cli.advance('Datasets:', k += 1, n, false)
64
+ d.recalculate_status
65
+ end
66
+ cli.say
67
+ end
68
+
69
+ ##
70
+ # Perform databases operation with MiGA::Cli +cli+
61
71
  def check_db(cli)
62
- cli.say 'Checking databases integrity'
72
+ cli.say 'Checking integrity of databases'
73
+ n, k = cli.load_project.dataset_names.size, 0
63
74
  cli.load_project.each_dataset do |d|
64
- [:distances, :taxonomy].each do |r_key|
65
- r = d.result(r_key) or next
66
- {haai_db: :aai, aai_db: :aai, ani_db: :ani}.each do |db_key, metric|
67
- db_file = r.file_path(db_key) or next
68
- check_sqlite3_database(db_file, metric) do
69
- cli.say(
70
- " > Removing #{db_key} #{r_key} table for #{d.name}")
71
- [db_file, r.path(:done), r.path].each do |f|
72
- File.unlink(f) if File.exist? f
73
- end # each |f|
74
- end # check_sqlite3_database
75
- end # each |db_key, metric|
76
- end # each |r_key|
77
- end # each |d|
75
+ cli.advance('Datasets:', k += 1, n, false)
76
+ each_database_file(d) do |db_file, metric, result|
77
+ check_sqlite3_database(db_file, metric) do
78
+ cli.say(" > Removing malformed database from #{d.name}:#{result} ")
79
+ File.unlink(db_file)
80
+ r = d.result(result) or next
81
+ [r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
82
+ end
83
+ end
84
+ end
85
+ cli.say
78
86
  end
79
87
 
88
+ ##
89
+ # Perform distances operation with MiGA::Cli +cli+
80
90
  def check_dist(cli)
81
91
  p = cli.load_project
82
- [:ani, :aai].each do |dist|
92
+ %i[ani aai].each do |dist|
83
93
  res = p.result("#{dist}_distances")
84
94
  next if res.nil?
95
+
85
96
  cli.say "Checking #{dist} table for consistent datasets"
86
97
  notok, fix = check_dist_eval(cli, p, res)
87
98
  check_dist_fix(cli, p, fix)
@@ -89,10 +100,13 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
89
100
  end
90
101
  end
91
102
 
103
+ ##
104
+ # Perform files operation with MiGA::Cli +cli+
92
105
  def check_files(cli)
93
106
  cli.say 'Looking for outdated files in results'
94
- p = cli.load_project
95
- p.each_dataset do |d|
107
+ n, k = cli.load_project.dataset_names.size, 0
108
+ cli.load_project.each_dataset do |d|
109
+ cli.advance('Datasets:', k += 1, n, false)
96
110
  d.each_result do |r_k, r|
97
111
  ok = true
98
112
  r.each_file do |_f_sym, _f_rel, f_abs|
@@ -102,23 +116,28 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
102
116
  end
103
117
  end
104
118
  unless ok
105
- cli.say " > Registering again #{d.name}:#{r_k}"
119
+ cli.say " > Registering again #{d.name}:#{r_k} "
106
120
  d.add_result(r_k, true, force: true)
107
121
  sr = d.result(:stats) and sr.remove!
108
122
  end
109
123
  end
110
124
  end
125
+ cli.say
111
126
  end
112
127
 
128
+ ##
129
+ # Perform cds operation with MiGA::Cli +cli+
113
130
  def check_cds(cli)
114
131
  cli.say 'Looking for unzipped genes or proteins'
132
+ n, k = cli.load_project.dataset_names.size, 0
115
133
  cli.load_project.each_dataset do |d|
134
+ cli.advance('Datasets:', k += 1, n, false)
116
135
  res = d.result(:cds) or next
117
136
  changed = false
118
- [:genes, :proteins, :gff3, :gff2, :tab].each do |f|
137
+ %i[genes proteins gff3 gff2 tab].each do |f|
119
138
  file = res.file_path(f) or next
120
139
  if file !~ /\.gz/
121
- cli.say " > Gzipping #{d.name} #{f}"
140
+ cli.say " > Gzipping #{d.name} #{f} "
122
141
  cmdo = `gzip -9 '#{file}'`.chomp
123
142
  warn(cmdo) unless cmdo.empty?
124
143
  changed = true
@@ -129,13 +148,17 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
129
148
  sr = d.result(:stats) and sr.remove!
130
149
  end
131
150
  end
151
+ cli.say
132
152
  end
133
153
 
154
+ ##
155
+ # Perform essential-genes operation with MiGA::Cli +cli+
134
156
  def check_ess(cli)
135
157
  cli.say 'Looking for unarchived essential genes'
136
158
  cli.load_project.each_dataset do |d|
137
159
  res = d.result(:essential_genes)
138
160
  next if res.nil?
161
+
139
162
  dir = res.file_path(:collection)
140
163
  if dir.nil?
141
164
  cli.say " > Removing #{d.name}:essential_genes"
@@ -144,17 +167,21 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
144
167
  next
145
168
  end
146
169
  next if Dir["#{dir}/*.faa"].empty?
170
+
147
171
  cli.say " > Fixing #{d.name}"
148
172
  cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
149
173
  warn(cmdo) unless cmdo.empty?
150
174
  end
151
175
  end
152
176
 
177
+ ##
178
+ # Perform mytaxa-scan operation with MiGA::Cli +cli+
153
179
  def check_mts(cli)
154
180
  cli.say 'Looking for unarchived MyTaxa Scan runs'
155
181
  cli.load_project.each_dataset do |d|
156
182
  res = d.result(:mytaxa_scan)
157
183
  next if res.nil?
184
+
158
185
  dir = res.file_path(:regions)
159
186
  fix = false
160
187
  unless dir.nil?
@@ -166,8 +193,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
166
193
  end
167
194
  fix = true
168
195
  end
169
- %w[blast mytaxain wintax gene_ids region_ids].each do |ext|
170
- file = res.file_path(ext.to_sym)
196
+ %i[blast mytaxain wintax gene_ids region_ids].each do |ext|
197
+ file = res.file_path(ext)
171
198
  unless file.nil?
172
199
  FileUtils.rm(file) if File.exist? file
173
200
  fix = true
@@ -180,6 +207,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
180
207
  end
181
208
  end
182
209
 
210
+ ##
211
+ # Perform start operation with MiGA::Cli +cli+
183
212
  def check_start(cli)
184
213
  cli.say 'Looking for legacy .start files lingering'
185
214
  cli.load_project.each_dataset do |d|
@@ -192,52 +221,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
192
221
  end
193
222
  end
194
223
 
224
+ ##
225
+ # Perform taxonomy operation with MiGA::Cli +cli+
195
226
  def check_tax(cli)
196
- #cli.say 'o Checking for taxonomy/distances consistency'
227
+ # cli.say 'o Checking for taxonomy/distances consistency'
197
228
  # TODO: Find 95%ANI clusters with entries from different species
198
- end
199
-
200
- private
201
-
202
- def check_dist_eval(cli, p, res)
203
- notok = {}
204
- fix = {}
205
- Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
206
- lineno = 0
207
- fh.each_line do |ln|
208
- next if (lineno += 1) == 1
209
- r = ln.split("\t")
210
- next unless [1, 2].map { |i| p.dataset(r[i]).nil? }.any?
211
- [1, 2].each do |i|
212
- if p.dataset(r[i]).nil?
213
- notok[r[i]] = true
214
- else
215
- fix[r[i]] = true
216
- end
217
- end
218
- end
219
- end
220
- [notok, fix]
221
- end
222
-
223
- def check_dist_fix(cli, p, fix)
224
- return if fix.empty?
225
- cli.say("- Fixing #{fix.size} datasets")
226
- fix.keys.each do |d_n|
227
- cli.say " > Fixing #{d_n}."
228
- p.dataset(d_n).cleanup_distances!
229
- end
230
- end
231
-
232
- def check_dist_recompute(cli, p, notok)
233
- return if notok.empty?
234
- cli.say '- Unregistered datasets detected: '
235
- if notok.size <= 5
236
- notok.keys.each { |i| cli.say " > #{i}" }
237
- else
238
- cli.say " > #{notok.size}, including #{notok.keys.first}"
239
- end
240
- cli.say '- Removing tables, recompute'
241
- res.remove!
229
+ # TODO: Find different 95%ANI clusters with genomes from the same species
230
+ # TODO: Find AAI values too high or too low for each LCA rank
242
231
  end
243
232
  end