miga-base 0.4.1.0 → 0.4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1.0
4
+ version: 0.4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-17 00:00:00.000000000 Z
11
+ date: 2019-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -16,68 +16,56 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.2'
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.2.4
19
+ version: '1.3'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
24
  - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: '1.2'
30
- - - ">="
31
- - !ruby/object:Gem::Version
32
- version: 1.2.4
26
+ version: '1.3'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: json
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
- - - ">"
38
- - !ruby/object:Gem::Version
39
- version: '1.8'
40
- - - "<"
31
+ - - "~>"
41
32
  - !ruby/object:Gem::Version
42
- version: '3'
33
+ version: '2'
43
34
  type: :runtime
44
35
  prerelease: false
45
36
  version_requirements: !ruby/object:Gem::Requirement
46
37
  requirements:
47
- - - ">"
48
- - !ruby/object:Gem::Version
49
- version: '1.8'
50
- - - "<"
38
+ - - "~>"
51
39
  - !ruby/object:Gem::Version
52
- version: '3'
40
+ version: '2'
53
41
  - !ruby/object:Gem::Dependency
54
42
  name: sqlite3
55
43
  requirement: !ruby/object:Gem::Requirement
56
44
  requirements:
57
45
  - - "~>"
58
46
  - !ruby/object:Gem::Version
59
- version: '1.3'
47
+ version: '1.4'
60
48
  type: :runtime
61
49
  prerelease: false
62
50
  version_requirements: !ruby/object:Gem::Requirement
63
51
  requirements:
64
52
  - - "~>"
65
53
  - !ruby/object:Gem::Version
66
- version: '1.3'
54
+ version: '1.4'
67
55
  - !ruby/object:Gem::Dependency
68
56
  name: rake
69
57
  requirement: !ruby/object:Gem::Requirement
70
58
  requirements:
71
59
  - - "~>"
72
60
  - !ruby/object:Gem::Version
73
- version: '11'
61
+ version: '12'
74
62
  type: :development
75
63
  prerelease: false
76
64
  version_requirements: !ruby/object:Gem::Requirement
77
65
  requirements:
78
66
  - - "~>"
79
67
  - !ruby/object:Gem::Version
80
- version: '11'
68
+ version: '12'
81
69
  - !ruby/object:Gem::Dependency
82
70
  name: test-unit
83
71
  requirement: !ruby/object:Gem::Requirement
@@ -104,35 +92,40 @@ files:
104
92
  - LICENSE
105
93
  - README.md
106
94
  - Rakefile
107
- - actions/about.rb
108
- - actions/add.rb
109
- - actions/add_result.rb
110
- - actions/daemon.rb
111
- - actions/date.rb
112
- - actions/doctor.rb
113
- - actions/edit.rb
114
- - actions/files.rb
115
- - actions/find.rb
116
- - actions/get.rb
117
- - actions/init.rb
118
- - actions/ln.rb
119
- - actions/ls.rb
120
- - actions/ncbi_get.rb
121
- - actions/new.rb
122
- - actions/next_step.rb
123
- - actions/plugins.rb
124
- - actions/rm.rb
125
- - actions/run.rb
126
- - actions/stats.rb
127
- - actions/summary.rb
128
- - actions/tax_dist.rb
129
- - actions/tax_index.rb
130
- - actions/tax_set.rb
131
- - actions/tax_test.rb
132
95
  - bin/miga
133
96
  - lib/miga.rb
134
97
  - lib/miga/_data/aai-intax.tsv.gz
135
98
  - lib/miga/_data/aai-novel.tsv.gz
99
+ - lib/miga/cli.rb
100
+ - lib/miga/cli/action.rb
101
+ - lib/miga/cli/action/about.rb
102
+ - lib/miga/cli/action/add.rb
103
+ - lib/miga/cli/action/add_result.rb
104
+ - lib/miga/cli/action/console.rb
105
+ - lib/miga/cli/action/daemon.rb
106
+ - lib/miga/cli/action/date.rb
107
+ - lib/miga/cli/action/doctor.rb
108
+ - lib/miga/cli/action/edit.rb
109
+ - lib/miga/cli/action/files.rb
110
+ - lib/miga/cli/action/find.rb
111
+ - lib/miga/cli/action/generic.rb
112
+ - lib/miga/cli/action/get.rb
113
+ - lib/miga/cli/action/init.rb
114
+ - lib/miga/cli/action/ln.rb
115
+ - lib/miga/cli/action/ls.rb
116
+ - lib/miga/cli/action/ncbi_get.rb
117
+ - lib/miga/cli/action/new.rb
118
+ - lib/miga/cli/action/next_step.rb
119
+ - lib/miga/cli/action/plugins.rb
120
+ - lib/miga/cli/action/rm.rb
121
+ - lib/miga/cli/action/run.rb
122
+ - lib/miga/cli/action/stats.rb
123
+ - lib/miga/cli/action/summary.rb
124
+ - lib/miga/cli/action/tax_dist.rb
125
+ - lib/miga/cli/action/tax_index.rb
126
+ - lib/miga/cli/action/tax_set.rb
127
+ - lib/miga/cli/action/tax_test.rb
128
+ - lib/miga/cli/base.rb
136
129
  - lib/miga/common.rb
137
130
  - lib/miga/common/base.rb
138
131
  - lib/miga/common/format.rb
@@ -158,6 +151,7 @@ files:
158
151
  - lib/miga/tax_dist.rb
159
152
  - lib/miga/tax_index.rb
160
153
  - lib/miga/taxonomy.rb
154
+ - lib/miga/taxonomy/base.rb
161
155
  - lib/miga/version.rb
162
156
  - scripts/aai_distances.bash
163
157
  - scripts/ani_distances.bash
data/actions/about.rb DELETED
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, info:false, processing:false, tabular:false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project])
10
- opt.on("-p", "--processing",
11
- "Print information on processing advance."){ |v| o[:processing]=v }
12
- opt.on("-m", "--metadata STRING",
13
- "Print name and metadata field only."
14
- ){ |v| o[:datum]=v }
15
- opt.on("--tab",
16
- "Returns a tab-delimited table."){ |v| o[:tabular] = v }
17
- opt_common(opt, o)
18
- end.parse!
19
-
20
-
21
- ##=> Main <=
22
- opt_require(o, project:"-P")
23
-
24
- $stderr.puts "Loading project." unless o[:q]
25
- p = MiGA::Project.load(o[:project])
26
- raise "Impossible to load project: #{o[:project]}" if p.nil?
27
-
28
- if not o[:datum].nil?
29
- v = p.metadata[ o[:datum] ]
30
- puts v.nil? ? '?' : v
31
- elsif o[:processing]
32
- keys = MiGA::Project.DISTANCE_TASKS + MiGA::Project.INCLADE_TASKS
33
- puts MiGA::MiGA.tabulate([:task, :status], keys.map do |k|
34
- [k, p.add_result(k, false).nil? ? "queued" : "done"]
35
- end, o[:tabular])
36
- else
37
- puts MiGA::MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
38
- v = p.metadata[k]
39
- [k, k==:datasets ? v.size : v]
40
- end, o[:tabular])
41
- end
42
-
43
- $stderr.puts "Done." unless o[:q]
data/actions/add.rb DELETED
@@ -1,129 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- input_types = {
7
- raw_reads_single:
8
- ['Single raw reads in a single FastQ file',
9
- :raw_reads, %w[.1.fastq]],
10
- raw_reads_paired:
11
- ['Paired raw reads in two FastQ files',
12
- :raw_reads, %w[.1.fastq .2.fastq]],
13
- trimmed_reads_single:
14
- ['Single trimmed reads in a single FastA file',
15
- :trimmed_fasta, %w[.SingleReads.fa]],
16
- trimmed_reads_paired:
17
- ['Paired trimmed reads in two FastA files',
18
- :trimmed_fasta, %w[.1.fasta .2.fasta]],
19
- trimmed_reads_interleaved:
20
- ['Paired trimmed reads in a single FastA file',
21
- :trimmed_fasta, %w[.CoupledReads.fa]],
22
- assembly:
23
- ['Assembled contigs or scaffolds in FastA format',
24
- :assembly, %w[.LargeContigs.fna]]
25
- }
26
-
27
- o = {q: true, ref: true, ignore_dups: false,
28
- regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
29
- OptionParser.new do |opt|
30
- opt_banner(opt, true)
31
- opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
32
- opt.on('-q', '--query',
33
- 'If set, the dataset is registered as a query, not a reference dataset.'
34
- ){ |v| o[:ref] = !v }
35
- opt.on('-d', '--description STRING',
36
- 'Description of the dataset.'){ |v| o[:description] = v }
37
- opt.on('-c', '--comments STRING',
38
- 'Comments on the dataset.'){ |v| o[:comments] = v }
39
- opt.on('-m', '--metadata STRING',
40
- 'Metadata as key-value pairs separated by = and delimited by comma.',
41
- 'Values are saved as strings except for booleans (true / false) or nil.'
42
- ){ |v| o[:metadata] = v }
43
- opt.on('-r', '--name-regexp REGEXP', Regexp,
44
- 'Regular expression indicating how to extract the name from the file path.',
45
- "By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
46
- opt.on('-i', '--input-type STRING',
47
- 'Type of input data, one of the following:',
48
- *input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
49
- ){ |v| o[:input_type] = v.downcase.to_sym }
50
- opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
51
- ){ |v| o[:ignore_dups] = v }
52
- opt_common(opt, o)
53
-
54
- opt.separator 'You can create multiple datasets with a single command, ' \
55
- 'simply pass all the files at the end (FILES...).'
56
- opt.separator 'If -D is passed, only one dataset will be added. ' \
57
- 'Otherwise, dataset names will be determined by the file paths (-r).'
58
- opt.separator ''
59
- end.parse!
60
-
61
- ##=> Main <=
62
- opt_require(o, project: '-P')
63
- files = ARGV
64
- file_type = nil
65
- if files.empty?
66
- opt_require_type(o, MiGA::Dataset)
67
- files = [nil]
68
- else
69
- raise 'Please specify input type (-i).' if o[:input_type].nil?
70
- file_type = input_types[o[:input_type]]
71
- raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
72
- raise 'Some files are duplicated, files must be unique.' if
73
- files.size != files.uniq.size
74
- if o[:input_type].to_s =~ /_paired$/
75
- raise 'Odd number of files incompatible with input type.' if files.size.odd?
76
- files = Hash[*files].to_a
77
- else
78
- files = files.map{ |i| [i] }
79
- end
80
- raise 'The dataset name (-D) can only be specified with one input file.' if
81
- files.size > 1 and not o[:dataset].nil?
82
- end
83
-
84
- $stderr.puts 'Loading project.' unless o[:q]
85
- p = MiGA::Project.load(o[:project])
86
- raise "Impossible to load project: #{o[:project]}" if p.nil?
87
-
88
- $stderr.puts 'Creating datasets:' unless o[:q]
89
- files.each do |file|
90
- name = o[:dataset]
91
- if name.nil?
92
- ref_file = file.is_a?(Array) ? file.first : file
93
- m = o[:regexp].match(ref_file)
94
- raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
95
- name = m[1].miga_name
96
- end
97
-
98
- if MiGA::Dataset.exist?(p, name)
99
- if o[:ignore_dups]
100
- warn "Dataset already exists: #{name}."
101
- next
102
- else
103
- raise "Dataset already exists: #{name}."
104
- end
105
- end
106
-
107
- $stderr.puts "o #{name}" unless o[:q]
108
- d = MiGA::Dataset.new(p, name, o[:ref])
109
- raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
110
-
111
- unless file.nil?
112
- r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
113
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
114
- file_type[2].each_with_index do |ext, i|
115
- gz = file[i] =~ /\.gz/ ? '.gz' : ''
116
- FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
117
- $stderr.puts " file: #{file[i]}" unless o[:q]
118
- end
119
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
120
- end
121
-
122
- d = add_metadata(o, d)
123
- d.save
124
- p.add_dataset(name)
125
- res = d.first_preprocessing(true)
126
- $stderr.puts " result: #{res}" unless o[:q]
127
- end
128
-
129
- $stderr.puts 'Done.' unless o[:q]
@@ -1,30 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, force:false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result])
10
- opt.on("-f", "--force",
11
- "Forces re-indexing of the result even if it's already registered."
12
- ){ |v| o[:force]=v }
13
- opt_common(opt, o)
14
- end.parse!
15
-
16
- ##=> Main <=
17
- opts.parse!
18
- opt_require(o, project:"-P", name:"-r")
19
-
20
- $stderr.puts "Loading project." unless o[:q]
21
- p = MiGA::Project.load(o[:project])
22
- raise "Impossible to load project: #{o[:project]}" if p.nil?
23
-
24
- $stderr.puts "Registering result." unless o[:q]
25
- obj = o[:dataset].nil? ? p : p.dataset(o[:dataset])
26
- r = obj.add_result(o[:name].to_sym, true, force: o[:force])
27
-
28
- raise "Cannot add result, incomplete expected files." if r.nil?
29
-
30
- $stderr.puts "Done." unless o[:q]
data/actions/daemon.rb DELETED
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/daemon'
7
-
8
- task = ARGV.shift unless %w[-h --help].include? ARGV.first
9
- ARGV << '-h' if ARGV.empty?
10
- o = {q: true, daemon_opts: []}
11
- OptionParser.new do |opt|
12
- opt_banner(opt)
13
- opt.separator 'task:'
14
- { start: 'Start an instance of the application.',
15
- stop: 'Start an instance of the application.',
16
- restart: 'Stop all instances and restart them afterwards.',
17
- reload: 'Send a SIGHUP to all instances of the application.',
18
- run: 'Start the application and stay on top.',
19
- zap: 'Set the application to a stopped state.',
20
- status: 'Show status (PID) of application instances.'
21
- }.each{ |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
22
- opt.separator ''
23
- opt.separator 'MiGA options:'
24
- opt_object(opt, o, [:project])
25
- opt.on('--shutdown-when-done',
26
- 'If passed, the daemon will exit when all processing is done.',
27
- 'Otherwise (default), it will stay idle awaiting for new data.'
28
- ){ |v| o[:shutdown_when_done] = v }
29
- opt.on('--latency INT',
30
- 'Number of seconds the daemon will be sleeping.'
31
- ){ |v| o[:latency]=v.to_i }
32
- opt.on('--max-jobs INT',
33
- 'Maximum number of jobs to use simultaneously.'){ |v| o[:maxjobs]=v.to_i }
34
- opt.on('--ppn INT',
35
- 'Maximum number of cores to use in a single job.'){ |v| o[:ppn]=v.to_i }
36
- opt_common(opt, o)
37
- opt.separator 'Daemon options:'
38
- opt.on('-t', '--ontop',
39
- 'Stay on top (does not daemonize).'){ o[:daemon_opts] << '-t' }
40
- opt.on('-f', '--force', 'Force operation.'){ o[:daemon_opts] << '-f' }
41
- opt.on('-n', '--no_wait',
42
- 'Do not wait for processes to stop.'){ o[:daemon_opts] << '-n' }
43
- opt.on('--shush', 'Silence the daemon.'){ o[:daemon_opts] << '--shush' }
44
- end.parse!
45
-
46
- ##=> Main <=
47
- opt_require(o, project: '-P')
48
-
49
- p = MiGA::Project.load(o[:project]) or raise 'Project doesn\'t exist, aborting.'
50
- d = MiGA::Daemon.new(p)
51
- [:latency, :maxjobs, :ppn, :shutdown_when_done].each do |k|
52
- d.runopts(k, o[k]) unless o[k].nil?
53
- end
54
- d.daemon(task, o[:daemon_opts])
55
-
data/actions/date.rb DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_common(opt, o)
10
- end.parse!
11
-
12
- ##=> Main <=
13
- opts.parse!
14
- puts Time.now.to_s
data/actions/doctor.rb DELETED
@@ -1,201 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require "sqlite3"
7
-
8
- tasks = {
9
- db: ['databases', 'Check database files integrity'],
10
- dist: ['distances', 'Check distance summary tables.'],
11
- files: ['files', 'Check for outdated files.'],
12
- ess: ['essential-genes', 'Check for unarchived essential genes'],
13
- mts: ['mytaxa-scan', 'Check for unarchived MyTaxa scan'],
14
- start: ['start', 'Check for lingering .start files'],
15
- tax: ['taxonomy', 'Check for taxonomy consistency (not implemented)']
16
- }
17
- o = {q: true, ld: false}
18
- tasks.keys.each{ |i| o[i] = true }
19
- tasks_n = Hash[tasks.map{ |k,v| [v[0], k] }]
20
-
21
- OptionParser.new do |opt|
22
- opt_banner(opt)
23
- opt_object(opt, o, [:project])
24
- opt.on('-l', '--list-datasets',
25
- 'List all fixed datasets on advance.'){ |v| o[:ld]=v }
26
- opt.on('--ignore TASK1,TASK2', Array,
27
- 'Do not perform the task(s) listed. Available tasks are:',
28
- * tasks.values.map{ |v| "#{v[0]}: #{v[1]}" }
29
- ){ |v| v.map{ |i| o[tasks_n[i]] = false } }
30
- opt.on('--only TASK',
31
- 'Perform only the specified task (see --ignore).'
32
- ){ |v| tasks.keys.each{ |i| o[i] = false }; o[v] = true }
33
- opt_common(opt, o)
34
- end.parse!
35
-
36
- ##=> Main <=
37
- opt_require(o, project: '-P')
38
-
39
- $stderr.puts 'Loading project' unless o[:q]
40
- p = MiGA::Project.load(o[:project])
41
- raise "Impossible to load project: #{o[:project]}" if p.nil?
42
-
43
- def check_sqlite3_database(db_file, metric)
44
- begin
45
- SQLite3::Database.new(db_file) do |conn|
46
- conn.execute("select count(*) from #{metric}").first
47
- end
48
- rescue SQLite3::SQLException
49
- yield
50
- end
51
- end
52
-
53
- if o[:db]
54
- $stderr.puts 'o Checking databases integrity' unless o[:q]
55
- p.each_dataset do |d|
56
- [:distances, :taxonomy].each do |r_key|
57
- r = d.result(r_key) or next
58
- {haai_db: :aai, aai_db: :aai, ani_db: :ani}.each do |db_key, metric|
59
- db_file = r.file_path(db_key) or next
60
- check_sqlite3_database(db_file, metric) do
61
- $stderr.puts(
62
- " > Removing #{db_key} #{r_key} table for #{d.name}.") if o[:ld]
63
- [db_file, r.path(:done), r.path].each do |f|
64
- File.unlink f if File.exist? f
65
- end # each |f|
66
- end # check_sqlite3_database
67
- end # each |db_key, metric|
68
- end # each |r_key|
69
- end # each |d|
70
- end
71
-
72
- [:ani, :aai].each do |dist|
73
- res = p.result("#{dist}_distances")
74
- next if res.nil?
75
- $stderr.puts "o Checking #{dist} table for consistent datasets" unless o[:q]
76
- notok = {}
77
- fix = {}
78
- Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
79
- lineno = 0
80
- fh.each_line do |ln|
81
- next if (lineno+=1)==1
82
- r = ln.split("\t")
83
- if [1,2].map{ |i| p.dataset(r[i]).nil? }.any?
84
- [1,2].each do |i|
85
- if p.dataset(r[i]).nil?
86
- notok[r[i]] = true
87
- else
88
- fix[r[i]] = true
89
- end
90
- end
91
- end
92
- end
93
- end
94
-
95
- $stderr.puts " - Fixing #{fix.size} datasets" unless fix.empty? or o[:q]
96
- fix.keys.each do |d_n|
97
- $stderr.puts " > Fixing #{d_n}." if o[:ld]
98
- p.dataset(d_n).cleanup_distances!
99
- end
100
-
101
- unless notok.empty?
102
- unless o[:q]
103
- $stderr.puts ' - Unregistered datasets detected: '
104
- if notok.size < 3
105
- $stderr.puts " - #{notok.keys.join(', ')}"
106
- else
107
- $stderr.puts " - #{notok.size}, including #{notok.keys.first}"
108
- end
109
- $stderr.puts ' - Removing tables, recompute'
110
- end
111
- res.remove!
112
- end
113
- end if o[:dist]
114
-
115
- if o[:files]
116
- $stderr.puts 'o Looking for outdated files in results' unless o[:q]
117
- p.each_dataset do |d|
118
- d.each_result do |r_k, r|
119
- ok = true
120
- r.each_file do |_f_sym, _f_rel, f_abs|
121
- unless File.exist? f_abs
122
- ok = false
123
- break
124
- end
125
- end
126
- unless ok
127
- $stderr.puts " > Registering again #{d.name}:#{r_k}" if o[:ld]
128
- d.add_result(r_k, true, force: true)
129
- end
130
- end
131
- end
132
- end
133
-
134
- if o[:ess]
135
- $stderr.puts 'o Looking for unarchived essential genes.' unless o[:q]
136
- p.each_dataset do |d|
137
- res = d.result(:essential_genes)
138
- next if res.nil?
139
- dir = res.file_path(:collection)
140
- if dir.nil?
141
- $stderr.puts " > Removing #{d.name}:essential_genes" if o[:ld]
142
- res.remove!
143
- next
144
- end
145
- unless Dir["#{dir}/*.faa"].empty?
146
- $stderr.puts " > Fixing #{d.name}." if o[:ld]
147
- cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
148
- warn cmdo unless cmdo.empty?
149
- end
150
- end
151
- end
152
-
153
- if o[:mts]
154
- $stderr.puts 'o Looking for unarchived MyTaxa Scan runs.' unless o[:q]
155
- p.each_dataset do |d|
156
- res = d.result(:mytaxa_scan)
157
- next if res.nil?
158
- dir = res.file_path(:regions)
159
- fix = false
160
- unless dir.nil?
161
- if Dir.exist? dir
162
- cmdo = `cd '#{dir}/..' \
163
- && tar -zcf '#{d.name}.reg.tar.gz' '#{d.name}.reg' \
164
- && rm -r '#{d.name}.reg'`.chomp
165
- warn cmdo unless cmdo.empty?
166
- end
167
- fix = true
168
- end
169
- %w[blast mytaxain wintax gene_ids region_ids].each do |ext|
170
- file = res.file_path(ext.to_sym)
171
- unless file.nil?
172
- FileUtils.rm(file) if File.exist? file
173
- fix = true
174
- end
175
- end
176
- if fix
177
- $stderr.puts " > Fixing #{d.name}." if o[:ld]
178
- d.add_result(:mytaxa_scan, true, force: true)
179
- end
180
- end
181
- end
182
-
183
- if o[:start]
184
- $stderr.puts 'o Looking for legacy .start files lingering.' unless o[:q]
185
- p.each_dataset do |d|
186
- d.each_result do |r_k, r|
187
- if File.exist? r.path(:start)
188
- $stderr.puts " > Registering again #{d.name}:#{r_k}" if o[:ld]
189
- r.save
190
- end
191
- end
192
- end
193
- end
194
-
195
- if o[:tax]
196
- #$stderr.puts "o Checking for taxonomy/distances consistency" unless o[:q]
197
- # TODO: Find 95%ANI clusters with entries from different species
198
- end
199
-
200
- $stderr.puts 'Done' unless o[:q]
201
-
data/actions/edit.rb DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt.on('-m', '--metadata STRING',
11
- 'Metadata as key-value pairs separated by = and delimited by comma.',
12
- 'Values are saved as strings except for booleans (true / false) or nil.'
13
- ){ |v| o[:metadata] = v }
14
- opt_common(opt, o)
15
- end.parse!
16
-
17
- ##=> Main <=
18
- opt_require(o, project: '-P')
19
-
20
- $stderr.puts 'Loading project.' unless o[:q]
21
- p = MiGA::Project.load(o[:project])
22
- raise "Impossible to load project: #{o[:project]}" if p.nil?
23
-
24
- obj = p
25
- if o[:dataset]
26
- $stderr.puts 'Loading dataset.' unless o[:q]
27
- obj = p.dataset(o[:dataset])
28
- raise 'Dataset does not exist.' if obj.nil?
29
- end
30
- obj = add_metadata(o, obj)
31
- obj.save
32
-
33
- $stderr.puts 'Done.' unless o[:q]