miga-base 0.4.1.0 → 0.4.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/bin/miga +2 -244
  3. data/lib/miga/cli/action/about.rb +44 -0
  4. data/lib/miga/cli/action/add.rb +139 -0
  5. data/lib/miga/cli/action/add_result.rb +26 -0
  6. data/lib/miga/cli/action/console.rb +19 -0
  7. data/lib/miga/cli/action/daemon.rb +74 -0
  8. data/lib/miga/cli/action/date.rb +18 -0
  9. data/lib/miga/cli/action/doctor.rb +210 -0
  10. data/lib/miga/cli/action/edit.rb +24 -0
  11. data/lib/miga/cli/action/files.rb +31 -0
  12. data/lib/miga/cli/action/find.rb +48 -0
  13. data/lib/miga/cli/action/generic.rb +44 -0
  14. data/lib/miga/cli/action/get.rb +132 -0
  15. data/lib/miga/cli/action/init.rb +343 -0
  16. data/lib/miga/cli/action/ln.rb +42 -0
  17. data/lib/miga/cli/action/ls.rb +55 -0
  18. data/lib/miga/cli/action/ncbi_get.rb +218 -0
  19. data/lib/miga/cli/action/new.rb +45 -0
  20. data/lib/miga/cli/action/next_step.rb +27 -0
  21. data/lib/miga/cli/action/plugins.rb +28 -0
  22. data/lib/miga/cli/action/rm.rb +25 -0
  23. data/lib/miga/cli/action/run.rb +39 -0
  24. data/lib/miga/cli/action/stats.rb +140 -0
  25. data/lib/miga/cli/action/summary.rb +49 -0
  26. data/lib/miga/cli/action/tax_dist.rb +102 -0
  27. data/lib/miga/cli/action/tax_index.rb +47 -0
  28. data/lib/miga/cli/action/tax_set.rb +59 -0
  29. data/lib/miga/cli/action/tax_test.rb +77 -0
  30. data/lib/miga/cli/action.rb +66 -0
  31. data/lib/miga/cli/base.rb +90 -0
  32. data/lib/miga/cli.rb +426 -0
  33. data/lib/miga/project/result.rb +14 -6
  34. data/lib/miga/remote_dataset.rb +1 -1
  35. data/lib/miga/tax_index.rb +5 -4
  36. data/lib/miga/taxonomy/base.rb +63 -0
  37. data/lib/miga/taxonomy.rb +87 -92
  38. data/lib/miga/version.rb +6 -6
  39. data/test/taxonomy_test.rb +49 -9
  40. data/utils/distance/commands.rb +11 -11
  41. data/utils/distance/pipeline.rb +5 -5
  42. metadata +43 -49
  43. data/actions/about.rb +0 -43
  44. data/actions/add.rb +0 -129
  45. data/actions/add_result.rb +0 -30
  46. data/actions/daemon.rb +0 -55
  47. data/actions/date.rb +0 -14
  48. data/actions/doctor.rb +0 -201
  49. data/actions/edit.rb +0 -33
  50. data/actions/files.rb +0 -43
  51. data/actions/find.rb +0 -41
  52. data/actions/get.rb +0 -105
  53. data/actions/init.rb +0 -301
  54. data/actions/ln.rb +0 -47
  55. data/actions/ls.rb +0 -61
  56. data/actions/ncbi_get.rb +0 -192
  57. data/actions/new.rb +0 -44
  58. data/actions/next_step.rb +0 -33
  59. data/actions/plugins.rb +0 -25
  60. data/actions/rm.rb +0 -29
  61. data/actions/run.rb +0 -45
  62. data/actions/stats.rb +0 -149
  63. data/actions/summary.rb +0 -57
  64. data/actions/tax_dist.rb +0 -106
  65. data/actions/tax_index.rb +0 -46
  66. data/actions/tax_set.rb +0 -63
  67. data/actions/tax_test.rb +0 -80
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1.0
4
+ version: 0.4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-17 00:00:00.000000000 Z
11
+ date: 2019-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -16,68 +16,56 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.2'
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.2.4
19
+ version: '1.3'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
24
  - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: '1.2'
30
- - - ">="
31
- - !ruby/object:Gem::Version
32
- version: 1.2.4
26
+ version: '1.3'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: json
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
- - - ">"
38
- - !ruby/object:Gem::Version
39
- version: '1.8'
40
- - - "<"
31
+ - - "~>"
41
32
  - !ruby/object:Gem::Version
42
- version: '3'
33
+ version: '2'
43
34
  type: :runtime
44
35
  prerelease: false
45
36
  version_requirements: !ruby/object:Gem::Requirement
46
37
  requirements:
47
- - - ">"
48
- - !ruby/object:Gem::Version
49
- version: '1.8'
50
- - - "<"
38
+ - - "~>"
51
39
  - !ruby/object:Gem::Version
52
- version: '3'
40
+ version: '2'
53
41
  - !ruby/object:Gem::Dependency
54
42
  name: sqlite3
55
43
  requirement: !ruby/object:Gem::Requirement
56
44
  requirements:
57
45
  - - "~>"
58
46
  - !ruby/object:Gem::Version
59
- version: '1.3'
47
+ version: '1.4'
60
48
  type: :runtime
61
49
  prerelease: false
62
50
  version_requirements: !ruby/object:Gem::Requirement
63
51
  requirements:
64
52
  - - "~>"
65
53
  - !ruby/object:Gem::Version
66
- version: '1.3'
54
+ version: '1.4'
67
55
  - !ruby/object:Gem::Dependency
68
56
  name: rake
69
57
  requirement: !ruby/object:Gem::Requirement
70
58
  requirements:
71
59
  - - "~>"
72
60
  - !ruby/object:Gem::Version
73
- version: '11'
61
+ version: '12'
74
62
  type: :development
75
63
  prerelease: false
76
64
  version_requirements: !ruby/object:Gem::Requirement
77
65
  requirements:
78
66
  - - "~>"
79
67
  - !ruby/object:Gem::Version
80
- version: '11'
68
+ version: '12'
81
69
  - !ruby/object:Gem::Dependency
82
70
  name: test-unit
83
71
  requirement: !ruby/object:Gem::Requirement
@@ -104,35 +92,40 @@ files:
104
92
  - LICENSE
105
93
  - README.md
106
94
  - Rakefile
107
- - actions/about.rb
108
- - actions/add.rb
109
- - actions/add_result.rb
110
- - actions/daemon.rb
111
- - actions/date.rb
112
- - actions/doctor.rb
113
- - actions/edit.rb
114
- - actions/files.rb
115
- - actions/find.rb
116
- - actions/get.rb
117
- - actions/init.rb
118
- - actions/ln.rb
119
- - actions/ls.rb
120
- - actions/ncbi_get.rb
121
- - actions/new.rb
122
- - actions/next_step.rb
123
- - actions/plugins.rb
124
- - actions/rm.rb
125
- - actions/run.rb
126
- - actions/stats.rb
127
- - actions/summary.rb
128
- - actions/tax_dist.rb
129
- - actions/tax_index.rb
130
- - actions/tax_set.rb
131
- - actions/tax_test.rb
132
95
  - bin/miga
133
96
  - lib/miga.rb
134
97
  - lib/miga/_data/aai-intax.tsv.gz
135
98
  - lib/miga/_data/aai-novel.tsv.gz
99
+ - lib/miga/cli.rb
100
+ - lib/miga/cli/action.rb
101
+ - lib/miga/cli/action/about.rb
102
+ - lib/miga/cli/action/add.rb
103
+ - lib/miga/cli/action/add_result.rb
104
+ - lib/miga/cli/action/console.rb
105
+ - lib/miga/cli/action/daemon.rb
106
+ - lib/miga/cli/action/date.rb
107
+ - lib/miga/cli/action/doctor.rb
108
+ - lib/miga/cli/action/edit.rb
109
+ - lib/miga/cli/action/files.rb
110
+ - lib/miga/cli/action/find.rb
111
+ - lib/miga/cli/action/generic.rb
112
+ - lib/miga/cli/action/get.rb
113
+ - lib/miga/cli/action/init.rb
114
+ - lib/miga/cli/action/ln.rb
115
+ - lib/miga/cli/action/ls.rb
116
+ - lib/miga/cli/action/ncbi_get.rb
117
+ - lib/miga/cli/action/new.rb
118
+ - lib/miga/cli/action/next_step.rb
119
+ - lib/miga/cli/action/plugins.rb
120
+ - lib/miga/cli/action/rm.rb
121
+ - lib/miga/cli/action/run.rb
122
+ - lib/miga/cli/action/stats.rb
123
+ - lib/miga/cli/action/summary.rb
124
+ - lib/miga/cli/action/tax_dist.rb
125
+ - lib/miga/cli/action/tax_index.rb
126
+ - lib/miga/cli/action/tax_set.rb
127
+ - lib/miga/cli/action/tax_test.rb
128
+ - lib/miga/cli/base.rb
136
129
  - lib/miga/common.rb
137
130
  - lib/miga/common/base.rb
138
131
  - lib/miga/common/format.rb
@@ -158,6 +151,7 @@ files:
158
151
  - lib/miga/tax_dist.rb
159
152
  - lib/miga/tax_index.rb
160
153
  - lib/miga/taxonomy.rb
154
+ - lib/miga/taxonomy/base.rb
161
155
  - lib/miga/version.rb
162
156
  - scripts/aai_distances.bash
163
157
  - scripts/ani_distances.bash
data/actions/about.rb DELETED
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, info:false, processing:false, tabular:false}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project])
10
- opt.on("-p", "--processing",
11
- "Print information on processing advance."){ |v| o[:processing]=v }
12
- opt.on("-m", "--metadata STRING",
13
- "Print name and metadata field only."
14
- ){ |v| o[:datum]=v }
15
- opt.on("--tab",
16
- "Returns a tab-delimited table."){ |v| o[:tabular] = v }
17
- opt_common(opt, o)
18
- end.parse!
19
-
20
-
21
- ##=> Main <=
22
- opt_require(o, project:"-P")
23
-
24
- $stderr.puts "Loading project." unless o[:q]
25
- p = MiGA::Project.load(o[:project])
26
- raise "Impossible to load project: #{o[:project]}" if p.nil?
27
-
28
- if not o[:datum].nil?
29
- v = p.metadata[ o[:datum] ]
30
- puts v.nil? ? '?' : v
31
- elsif o[:processing]
32
- keys = MiGA::Project.DISTANCE_TASKS + MiGA::Project.INCLADE_TASKS
33
- puts MiGA::MiGA.tabulate([:task, :status], keys.map do |k|
34
- [k, p.add_result(k, false).nil? ? "queued" : "done"]
35
- end, o[:tabular])
36
- else
37
- puts MiGA::MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
38
- v = p.metadata[k]
39
- [k, k==:datasets ? v.size : v]
40
- end, o[:tabular])
41
- end
42
-
43
- $stderr.puts "Done." unless o[:q]
data/actions/add.rb DELETED
@@ -1,129 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- input_types = {
7
- raw_reads_single:
8
- ['Single raw reads in a single FastQ file',
9
- :raw_reads, %w[.1.fastq]],
10
- raw_reads_paired:
11
- ['Paired raw reads in two FastQ files',
12
- :raw_reads, %w[.1.fastq .2.fastq]],
13
- trimmed_reads_single:
14
- ['Single trimmed reads in a single FastA file',
15
- :trimmed_fasta, %w[.SingleReads.fa]],
16
- trimmed_reads_paired:
17
- ['Paired trimmed reads in two FastA files',
18
- :trimmed_fasta, %w[.1.fasta .2.fasta]],
19
- trimmed_reads_interleaved:
20
- ['Paired trimmed reads in a single FastA file',
21
- :trimmed_fasta, %w[.CoupledReads.fa]],
22
- assembly:
23
- ['Assembled contigs or scaffolds in FastA format',
24
- :assembly, %w[.LargeContigs.fna]]
25
- }
26
-
27
- o = {q: true, ref: true, ignore_dups: false,
28
- regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
29
- OptionParser.new do |opt|
30
- opt_banner(opt, true)
31
- opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
32
- opt.on('-q', '--query',
33
- 'If set, the dataset is registered as a query, not a reference dataset.'
34
- ){ |v| o[:ref] = !v }
35
- opt.on('-d', '--description STRING',
36
- 'Description of the dataset.'){ |v| o[:description] = v }
37
- opt.on('-c', '--comments STRING',
38
- 'Comments on the dataset.'){ |v| o[:comments] = v }
39
- opt.on('-m', '--metadata STRING',
40
- 'Metadata as key-value pairs separated by = and delimited by comma.',
41
- 'Values are saved as strings except for booleans (true / false) or nil.'
42
- ){ |v| o[:metadata] = v }
43
- opt.on('-r', '--name-regexp REGEXP', Regexp,
44
- 'Regular expression indicating how to extract the name from the file path.',
45
- "By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
46
- opt.on('-i', '--input-type STRING',
47
- 'Type of input data, one of the following:',
48
- *input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
49
- ){ |v| o[:input_type] = v.downcase.to_sym }
50
- opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
51
- ){ |v| o[:ignore_dups] = v }
52
- opt_common(opt, o)
53
-
54
- opt.separator 'You can create multiple datasets with a single command, ' \
55
- 'simply pass all the files at the end (FILES...).'
56
- opt.separator 'If -D is passed, only one dataset will be added. ' \
57
- 'Otherwise, dataset names will be determined by the file paths (-r).'
58
- opt.separator ''
59
- end.parse!
60
-
61
- ##=> Main <=
62
- opt_require(o, project: '-P')
63
- files = ARGV
64
- file_type = nil
65
- if files.empty?
66
- opt_require_type(o, MiGA::Dataset)
67
- files = [nil]
68
- else
69
- raise 'Please specify input type (-i).' if o[:input_type].nil?
70
- file_type = input_types[o[:input_type]]
71
- raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
72
- raise 'Some files are duplicated, files must be unique.' if
73
- files.size != files.uniq.size
74
- if o[:input_type].to_s =~ /_paired$/
75
- raise 'Odd number of files incompatible with input type.' if files.size.odd?
76
- files = Hash[*files].to_a
77
- else
78
- files = files.map{ |i| [i] }
79
- end
80
- raise 'The dataset name (-D) can only be specified with one input file.' if
81
- files.size > 1 and not o[:dataset].nil?
82
- end
83
-
84
- $stderr.puts 'Loading project.' unless o[:q]
85
- p = MiGA::Project.load(o[:project])
86
- raise "Impossible to load project: #{o[:project]}" if p.nil?
87
-
88
- $stderr.puts 'Creating datasets:' unless o[:q]
89
- files.each do |file|
90
- name = o[:dataset]
91
- if name.nil?
92
- ref_file = file.is_a?(Array) ? file.first : file
93
- m = o[:regexp].match(ref_file)
94
- raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
95
- name = m[1].miga_name
96
- end
97
-
98
- if MiGA::Dataset.exist?(p, name)
99
- if o[:ignore_dups]
100
- warn "Dataset already exists: #{name}."
101
- next
102
- else
103
- raise "Dataset already exists: #{name}."
104
- end
105
- end
106
-
107
- $stderr.puts "o #{name}" unless o[:q]
108
- d = MiGA::Dataset.new(p, name, o[:ref])
109
- raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
110
-
111
- unless file.nil?
112
- r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
113
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
114
- file_type[2].each_with_index do |ext, i|
115
- gz = file[i] =~ /\.gz/ ? '.gz' : ''
116
- FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
117
- $stderr.puts " file: #{file[i]}" unless o[:q]
118
- end
119
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
120
- end
121
-
122
- d = add_metadata(o, d)
123
- d.save
124
- p.add_dataset(name)
125
- res = d.first_preprocessing(true)
126
- $stderr.puts " result: #{res}" unless o[:q]
127
- end
128
-
129
- $stderr.puts 'Done.' unless o[:q]
@@ -1,30 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, force:false}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result])
10
- opt.on("-f", "--force",
11
- "Forces re-indexing of the result even if it's already registered."
12
- ){ |v| o[:force]=v }
13
- opt_common(opt, o)
14
- end.parse!
15
-
16
- ##=> Main <=
17
- opts.parse!
18
- opt_require(o, project:"-P", name:"-r")
19
-
20
- $stderr.puts "Loading project." unless o[:q]
21
- p = MiGA::Project.load(o[:project])
22
- raise "Impossible to load project: #{o[:project]}" if p.nil?
23
-
24
- $stderr.puts "Registering result." unless o[:q]
25
- obj = o[:dataset].nil? ? p : p.dataset(o[:dataset])
26
- r = obj.add_result(o[:name].to_sym, true, force: o[:force])
27
-
28
- raise "Cannot add result, incomplete expected files." if r.nil?
29
-
30
- $stderr.puts "Done." unless o[:q]
data/actions/daemon.rb DELETED
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require 'miga/daemon'
7
-
8
- task = ARGV.shift unless %w[-h --help].include? ARGV.first
9
- ARGV << '-h' if ARGV.empty?
10
- o = {q: true, daemon_opts: []}
11
- OptionParser.new do |opt|
12
- opt_banner(opt)
13
- opt.separator 'task:'
14
- { start: 'Start an instance of the application.',
15
- stop: 'Start an instance of the application.',
16
- restart: 'Stop all instances and restart them afterwards.',
17
- reload: 'Send a SIGHUP to all instances of the application.',
18
- run: 'Start the application and stay on top.',
19
- zap: 'Set the application to a stopped state.',
20
- status: 'Show status (PID) of application instances.'
21
- }.each{ |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
22
- opt.separator ''
23
- opt.separator 'MiGA options:'
24
- opt_object(opt, o, [:project])
25
- opt.on('--shutdown-when-done',
26
- 'If passed, the daemon will exit when all processing is done.',
27
- 'Otherwise (default), it will stay idle awaiting for new data.'
28
- ){ |v| o[:shutdown_when_done] = v }
29
- opt.on('--latency INT',
30
- 'Number of seconds the daemon will be sleeping.'
31
- ){ |v| o[:latency]=v.to_i }
32
- opt.on('--max-jobs INT',
33
- 'Maximum number of jobs to use simultaneously.'){ |v| o[:maxjobs]=v.to_i }
34
- opt.on('--ppn INT',
35
- 'Maximum number of cores to use in a single job.'){ |v| o[:ppn]=v.to_i }
36
- opt_common(opt, o)
37
- opt.separator 'Daemon options:'
38
- opt.on('-t', '--ontop',
39
- 'Stay on top (does not daemonize).'){ o[:daemon_opts] << '-t' }
40
- opt.on('-f', '--force', 'Force operation.'){ o[:daemon_opts] << '-f' }
41
- opt.on('-n', '--no_wait',
42
- 'Do not wait for processes to stop.'){ o[:daemon_opts] << '-n' }
43
- opt.on('--shush', 'Silence the daemon.'){ o[:daemon_opts] << '--shush' }
44
- end.parse!
45
-
46
- ##=> Main <=
47
- opt_require(o, project: '-P')
48
-
49
- p = MiGA::Project.load(o[:project]) or raise 'Project doesn\'t exist, aborting.'
50
- d = MiGA::Daemon.new(p)
51
- [:latency, :maxjobs, :ppn, :shutdown_when_done].each do |k|
52
- d.runopts(k, o[k]) unless o[k].nil?
53
- end
54
- d.daemon(task, o[:daemon_opts])
55
-
data/actions/date.rb DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {}
7
- opts = OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_common(opt, o)
10
- end.parse!
11
-
12
- ##=> Main <=
13
- opts.parse!
14
- puts Time.now.to_s
data/actions/doctor.rb DELETED
@@ -1,201 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require "sqlite3"
7
-
8
- tasks = {
9
- db: ['databases', 'Check database files integrity'],
10
- dist: ['distances', 'Check distance summary tables.'],
11
- files: ['files', 'Check for outdated files.'],
12
- ess: ['essential-genes', 'Check for unarchived essential genes'],
13
- mts: ['mytaxa-scan', 'Check for unarchived MyTaxa scan'],
14
- start: ['start', 'Check for lingering .start files'],
15
- tax: ['taxonomy', 'Check for taxonomy consistency (not implemented)']
16
- }
17
- o = {q: true, ld: false}
18
- tasks.keys.each{ |i| o[i] = true }
19
- tasks_n = Hash[tasks.map{ |k,v| [v[0], k] }]
20
-
21
- OptionParser.new do |opt|
22
- opt_banner(opt)
23
- opt_object(opt, o, [:project])
24
- opt.on('-l', '--list-datasets',
25
- 'List all fixed datasets on advance.'){ |v| o[:ld]=v }
26
- opt.on('--ignore TASK1,TASK2', Array,
27
- 'Do not perform the task(s) listed. Available tasks are:',
28
- * tasks.values.map{ |v| "#{v[0]}: #{v[1]}" }
29
- ){ |v| v.map{ |i| o[tasks_n[i]] = false } }
30
- opt.on('--only TASK',
31
- 'Perform only the specified task (see --ignore).'
32
- ){ |v| tasks.keys.each{ |i| o[i] = false }; o[v] = true }
33
- opt_common(opt, o)
34
- end.parse!
35
-
36
- ##=> Main <=
37
- opt_require(o, project: '-P')
38
-
39
- $stderr.puts 'Loading project' unless o[:q]
40
- p = MiGA::Project.load(o[:project])
41
- raise "Impossible to load project: #{o[:project]}" if p.nil?
42
-
43
- def check_sqlite3_database(db_file, metric)
44
- begin
45
- SQLite3::Database.new(db_file) do |conn|
46
- conn.execute("select count(*) from #{metric}").first
47
- end
48
- rescue SQLite3::SQLException
49
- yield
50
- end
51
- end
52
-
53
- if o[:db]
54
- $stderr.puts 'o Checking databases integrity' unless o[:q]
55
- p.each_dataset do |d|
56
- [:distances, :taxonomy].each do |r_key|
57
- r = d.result(r_key) or next
58
- {haai_db: :aai, aai_db: :aai, ani_db: :ani}.each do |db_key, metric|
59
- db_file = r.file_path(db_key) or next
60
- check_sqlite3_database(db_file, metric) do
61
- $stderr.puts(
62
- " > Removing #{db_key} #{r_key} table for #{d.name}.") if o[:ld]
63
- [db_file, r.path(:done), r.path].each do |f|
64
- File.unlink f if File.exist? f
65
- end # each |f|
66
- end # check_sqlite3_database
67
- end # each |db_key, metric|
68
- end # each |r_key|
69
- end # each |d|
70
- end
71
-
72
- [:ani, :aai].each do |dist|
73
- res = p.result("#{dist}_distances")
74
- next if res.nil?
75
- $stderr.puts "o Checking #{dist} table for consistent datasets" unless o[:q]
76
- notok = {}
77
- fix = {}
78
- Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
79
- lineno = 0
80
- fh.each_line do |ln|
81
- next if (lineno+=1)==1
82
- r = ln.split("\t")
83
- if [1,2].map{ |i| p.dataset(r[i]).nil? }.any?
84
- [1,2].each do |i|
85
- if p.dataset(r[i]).nil?
86
- notok[r[i]] = true
87
- else
88
- fix[r[i]] = true
89
- end
90
- end
91
- end
92
- end
93
- end
94
-
95
- $stderr.puts " - Fixing #{fix.size} datasets" unless fix.empty? or o[:q]
96
- fix.keys.each do |d_n|
97
- $stderr.puts " > Fixing #{d_n}." if o[:ld]
98
- p.dataset(d_n).cleanup_distances!
99
- end
100
-
101
- unless notok.empty?
102
- unless o[:q]
103
- $stderr.puts ' - Unregistered datasets detected: '
104
- if notok.size < 3
105
- $stderr.puts " - #{notok.keys.join(', ')}"
106
- else
107
- $stderr.puts " - #{notok.size}, including #{notok.keys.first}"
108
- end
109
- $stderr.puts ' - Removing tables, recompute'
110
- end
111
- res.remove!
112
- end
113
- end if o[:dist]
114
-
115
- if o[:files]
116
- $stderr.puts 'o Looking for outdated files in results' unless o[:q]
117
- p.each_dataset do |d|
118
- d.each_result do |r_k, r|
119
- ok = true
120
- r.each_file do |_f_sym, _f_rel, f_abs|
121
- unless File.exist? f_abs
122
- ok = false
123
- break
124
- end
125
- end
126
- unless ok
127
- $stderr.puts " > Registering again #{d.name}:#{r_k}" if o[:ld]
128
- d.add_result(r_k, true, force: true)
129
- end
130
- end
131
- end
132
- end
133
-
134
- if o[:ess]
135
- $stderr.puts 'o Looking for unarchived essential genes.' unless o[:q]
136
- p.each_dataset do |d|
137
- res = d.result(:essential_genes)
138
- next if res.nil?
139
- dir = res.file_path(:collection)
140
- if dir.nil?
141
- $stderr.puts " > Removing #{d.name}:essential_genes" if o[:ld]
142
- res.remove!
143
- next
144
- end
145
- unless Dir["#{dir}/*.faa"].empty?
146
- $stderr.puts " > Fixing #{d.name}." if o[:ld]
147
- cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
148
- warn cmdo unless cmdo.empty?
149
- end
150
- end
151
- end
152
-
153
- if o[:mts]
154
- $stderr.puts 'o Looking for unarchived MyTaxa Scan runs.' unless o[:q]
155
- p.each_dataset do |d|
156
- res = d.result(:mytaxa_scan)
157
- next if res.nil?
158
- dir = res.file_path(:regions)
159
- fix = false
160
- unless dir.nil?
161
- if Dir.exist? dir
162
- cmdo = `cd '#{dir}/..' \
163
- && tar -zcf '#{d.name}.reg.tar.gz' '#{d.name}.reg' \
164
- && rm -r '#{d.name}.reg'`.chomp
165
- warn cmdo unless cmdo.empty?
166
- end
167
- fix = true
168
- end
169
- %w[blast mytaxain wintax gene_ids region_ids].each do |ext|
170
- file = res.file_path(ext.to_sym)
171
- unless file.nil?
172
- FileUtils.rm(file) if File.exist? file
173
- fix = true
174
- end
175
- end
176
- if fix
177
- $stderr.puts " > Fixing #{d.name}." if o[:ld]
178
- d.add_result(:mytaxa_scan, true, force: true)
179
- end
180
- end
181
- end
182
-
183
- if o[:start]
184
- $stderr.puts 'o Looking for legacy .start files lingering.' unless o[:q]
185
- p.each_dataset do |d|
186
- d.each_result do |r_k, r|
187
- if File.exist? r.path(:start)
188
- $stderr.puts " > Registering again #{d.name}:#{r_k}" if o[:ld]
189
- r.save
190
- end
191
- end
192
- end
193
- end
194
-
195
- if o[:tax]
196
- #$stderr.puts "o Checking for taxonomy/distances consistency" unless o[:q]
197
- # TODO: Find 95%ANI clusters with entries from different species
198
- end
199
-
200
- $stderr.puts 'Done' unless o[:q]
201
-
data/actions/edit.rb DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q: true}
7
- OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt.on('-m', '--metadata STRING',
11
- 'Metadata as key-value pairs separated by = and delimited by comma.',
12
- 'Values are saved as strings except for booleans (true / false) or nil.'
13
- ){ |v| o[:metadata] = v }
14
- opt_common(opt, o)
15
- end.parse!
16
-
17
- ##=> Main <=
18
- opt_require(o, project: '-P')
19
-
20
- $stderr.puts 'Loading project.' unless o[:q]
21
- p = MiGA::Project.load(o[:project])
22
- raise "Impossible to load project: #{o[:project]}" if p.nil?
23
-
24
- obj = p
25
- if o[:dataset]
26
- $stderr.puts 'Loading dataset.' unless o[:q]
27
- obj = p.dataset(o[:dataset])
28
- raise 'Dataset does not exist.' if obj.nil?
29
- end
30
- obj = add_metadata(o, obj)
31
- obj.save
32
-
33
- $stderr.puts 'Done.' unless o[:q]