miga-base 0.3.1.7 → 0.3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +8 -0
  3. data/lib/miga/common.rb +9 -215
  4. data/lib/miga/common/base.rb +49 -0
  5. data/lib/miga/common/format.rb +135 -0
  6. data/lib/miga/common/path.rb +49 -0
  7. data/lib/miga/daemon.rb +3 -60
  8. data/lib/miga/daemon/base.rb +69 -0
  9. data/lib/miga/dataset.rb +3 -3
  10. data/lib/miga/dataset/result.rb +5 -5
  11. data/lib/miga/result.rb +5 -0
  12. data/lib/miga/version.rb +7 -5
  13. data/scripts/distances.bash +2 -19
  14. data/scripts/taxonomy.bash +2 -21
  15. data/test/common_test.rb +9 -0
  16. data/utils/distance/base.rb +6 -0
  17. data/utils/distance/commands.rb +82 -0
  18. data/utils/distance/database.rb +86 -0
  19. data/utils/distance/pipeline.rb +98 -0
  20. data/utils/distance/runner.rb +104 -0
  21. data/utils/distance/temporal.rb +37 -0
  22. data/utils/distances.rb +9 -0
  23. data/utils/enveomics/Docs/recplot2.md +233 -0
  24. data/utils/enveomics/Makefile +1 -1
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
  26. data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
  27. data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
  28. data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
  29. data/utils/enveomics/Manifest/categories.json +11 -1
  30. data/utils/enveomics/Manifest/examples.json +2 -2
  31. data/utils/enveomics/README.md +2 -0
  32. data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
  33. data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
  34. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  35. data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
  36. data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
  37. data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
  38. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  39. data/utils/enveomics/Scripts/SRA.download.bash +28 -21
  40. data/utils/enveomics/Scripts/Table.barplot.R +1 -0
  41. data/utils/enveomics/Scripts/aai.rb +4 -2
  42. data/utils/enveomics/build_enveomics_r.bash +5 -5
  43. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  44. data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
  45. data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
  46. data/utils/enveomics/enveomics.R/README.md +26 -17
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
  48. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
  49. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
  50. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
  51. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
  52. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
  53. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
  54. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
  55. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
  56. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
  57. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
  58. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
  59. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
  60. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
  61. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
  64. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
  65. data/utils/requirements.txt +1 -1
  66. metadata +28 -4
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
  68. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b53d716162f9aedbc64f1e54e02ffc293b16a7e7
4
- data.tar.gz: a5c46555329c2da1ba1fd165d423e513a27562ef
3
+ metadata.gz: c395d2565cacafe425c91a277c03cdeaa3ac9ece
4
+ data.tar.gz: 1978e8a2df4d0646bc884ce9921bf8de2bc13a25
5
5
  SHA512:
6
- metadata.gz: c94add412b17de6a932ee247e90ef5682afdf5b61cf09a3b6b9baa64d401da09d29915f6b7a6f39a9e8e6e67ba6e7afb5ed2a982e488805e22f16974cedc9ad7
7
- data.tar.gz: 2b8e6fcbdc0b4f1b72e43bb02d47e3fb4618773ef4e86c9644b77926429b6f117cc2d965437704c2540fffd26a8576bdfc299fdd927f7fe2c0f2f33a4c961727
6
+ metadata.gz: fff79ff971c5fc9e0e0684585b9b35abe5377cff0742c772fc1a7c58005b224ea18b36e7fe489b78c6103f6c4b9ffc90be75e7a385d5fce59a917e2d68c3765a
7
+ data.tar.gz: cda85047eabd8ba1c76bb5fe690247a3e71583e034f3c5dbef9178776d4c5da02bd3075cfc0e3154e1e97aa69da8ddea85a00ef99efa065e867950c10a860a71
@@ -34,6 +34,8 @@ OptionParser.new do |opt|
34
34
  opt.on('--no-version-name',
35
35
  'Do not add sequence version to the dataset name.',
36
36
  'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
37
+ opt.on('--blacklist PATH',
38
+ 'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
37
39
  opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
38
40
  opt.on('-q', '--query',
39
41
  'Register the datasets as queries, not reference datasets.'
@@ -135,6 +137,12 @@ if o[:scaffold] or o[:contig]
135
137
  end
136
138
  end
137
139
 
140
+ # Discard blacklisted
141
+ unless o[:blacklist].nil?
142
+ $stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
143
+ File.readlines(o[:blacklist]).map(&:chomp).each{ |i| ds.delete i }
144
+ end
145
+
138
146
  # Download entries
139
147
  $stderr.puts "Downloading #{ds.size} #{ds.size>1 ? "entries" : "entry"}." unless o[:q]
140
148
  ds.each do |name,body|
@@ -1,62 +1,24 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'miga/version'
5
4
  require 'json'
6
- require 'tempfile'
7
- require 'zlib'
5
+ require 'miga/version'
6
+ require 'miga/common/base'
7
+ require 'miga/common/path'
8
+ require 'miga/common/format'
8
9
 
9
10
  ##
10
11
  # Generic class used to handle system-wide information and methods, and parent
11
12
  # of all other MiGA::* classes.
12
13
  class MiGA::MiGA
14
+
15
+ include MiGA::Common
16
+
17
+ extend MiGA::Common::Path
18
+ extend MiGA::Common::Format
13
19
 
14
20
  ENV['MIGA_HOME'] ||= ENV['HOME']
15
21
 
16
- ##
17
- # Root path to MiGA (as estimated from the location of the current file).
18
- def self.root_path
19
- File.expand_path('../../..', __FILE__)
20
- end
21
-
22
- ##
23
- # Should debugging information be reported?
24
- @@DEBUG = false
25
-
26
- ##
27
- # Should the trace of debugging information be reported?
28
- @@DEBUG_TRACE = false
29
-
30
- ##
31
- # Turn on debugging.
32
- def self.DEBUG_ON() @@DEBUG=true end
33
-
34
- ##
35
- # Turn off debugging.
36
- def self.DEBUG_OFF() @@DEBUG=false end
37
-
38
- ##
39
- # Turn on debug tracing (and debugging).
40
- def self.DEBUG_TRACE_ON
41
- @@DEBUG_TRACE=true
42
- self.DEBUG_ON
43
- end
44
-
45
- ##
46
- # Turn off debug tracing (but not debugging).
47
- def self.DEBUG_TRACE_OFF
48
- @@DEBUG_TRACE=false
49
- end
50
-
51
- ##
52
- # Send debug message.
53
- def self.DEBUG(*args)
54
- $stderr.puts(*args) if @@DEBUG
55
- if @@DEBUG_TRACE
56
- $stderr.puts caller.map{ |v| v.gsub(/^/,' ') }.join("\n")
57
- end
58
- end
59
-
60
22
  ##
61
23
  # Has MiGA been initialized?
62
24
  def self.initialized?
@@ -64,121 +26,6 @@ class MiGA::MiGA
64
26
  File.exist?(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
65
27
  end
66
28
 
67
- ##
68
- # Tabulates an +values+, and Array of Arrays, all with the same number of
69
- # entries as +header+. Returns an Array of String, one per line.
70
- def self.tabulate(header, values)
71
- fields = [header.map(&:to_s)]
72
- fields << fields.first.map{ |h| h.gsub(/\S/, '-') }
73
- fields += values.map{ |row| row.map{ |cell| cell.nil? ? '?' : cell.to_s } }
74
- clen = fields.map{ |row| row.map(&:length) }.transpose.map(&:max)
75
- fields.map do |row|
76
- (0 .. clen.size-1).map do |col_n|
77
- col_n==0 ? row[col_n].rjust(clen[col_n]) : row[col_n].ljust(clen[col_n])
78
- end.join(' ')
79
- end
80
- end
81
-
82
- ##
83
- # Cleans a FastA file in place.
84
- def self.clean_fasta_file(file)
85
- tmp_fh = nil
86
- begin
87
- if file =~ /\.gz/
88
- tmp_path = Tempfile.new('MiGA.gz').tap(&:close).path
89
- tmp_fh = Zlib::GzipWriter.open(tmp_path)
90
- fh = Zlib::GzipReader.open(file)
91
- else
92
- tmp_fh = Tempfile.new('MiGA')
93
- tmp_path = tmp_fh.path
94
- fh = File.open(file, 'r')
95
- end
96
- buffer = ''
97
- fh.each_line do |ln|
98
- ln.chomp!
99
- if ln =~ /^>\s*(\S+)(.*)/
100
- (id, df) = [$1, $2]
101
- tmp_fh.print buffer.wrap_width(80)
102
- buffer = ''
103
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
104
- else
105
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
106
- end
107
- end
108
- tmp_fh.print buffer.wrap_width(80)
109
- tmp_fh.close
110
- fh.close
111
- FileUtils.cp(tmp_path, file)
112
- ensure
113
- begin
114
- tmp_fh.close unless tmp_fh.nil?
115
- File.unlink(tmp_path) unless tmp_path.nil?
116
- rescue
117
- end
118
- end
119
- end
120
-
121
- ##
122
- # Calculates the average and standard deviation of the sequence lengths in
123
- # a FastA or FastQ file (supports gzipped files). The +format+ must be a
124
- # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
125
- # controlled via the +opts+ Hash. Supported options include:
126
- # - +:n50+: If true, it also returns the N50 and the median (in bp).
127
- # - +gc+: If true, it also returns the G+C content (in %).
128
- def self.seqs_length(file, format, opts={})
129
- fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
130
- l = []
131
- gc = 0
132
- i = 0 # <- Zlib::GzipReader doesn't set $.
133
- fh.each_line do |ln|
134
- i += 1
135
- if (format==:fasta and ln =~ /^>/) or (format==:fastq and (i % 4)==1)
136
- l << 0
137
- elsif format==:fasta or (i % 4)==2
138
- l[l.size-1] += ln.chomp.size
139
- gc += ln.scan(/[GCgc]/).count if opts[:gc]
140
- end
141
- end
142
- fh.close
143
-
144
- o = { n: l.size, tot: l.inject(:+) }
145
- o[:avg] = o[:tot].to_f/l.size
146
- o[:var] = l.map{ |a| a ** 2 }.inject(:+).to_f/l.size - o[:avg]**2
147
- o[:sd] = Math.sqrt o[:var]
148
- o[:gc] = 100.0*gc/o[:tot] if opts[:gc]
149
- if opts[:n50]
150
- l.sort!
151
- thr = o[:tot]/2
152
- pos = 0
153
- l.each do |a|
154
- pos += a
155
- o[:n50] = a
156
- break if pos >= thr
157
- end
158
- o[:med] = o[:n].even? ?
159
- 0.5*l[o[:n]/2-1,2].inject(:+) : l[(o[:n]-1)/2]
160
- end
161
- o
162
- end
163
-
164
- ##
165
- # Path to a script to be executed for +task+. Supported +opts+ are:
166
- # - +:miga+ Path to the MiGA home to use. If not passed, the home of the
167
- # library is used).
168
- # - +:project+ MiGA::Project object to check within plugins. If not passed,
169
- # only core scripts are supported.
170
- def self.script_path(task, opts={})
171
- opts[:miga] ||= root_path
172
- unless opts[:project].nil?
173
- opts[:project].plugins.each do |pl|
174
- if File.exist? File.expand_path("scripts/#{task}.bash", pl)
175
- opts[:miga] = pl
176
- end
177
- end
178
- end
179
- File.expand_path("scripts/#{task}.bash", opts[:miga])
180
- end
181
-
182
29
  ##
183
30
  # Check if the result files exist with +base+ name (String) followed by the
184
31
  # +ext+ values (Array of String).
@@ -188,59 +35,6 @@ class MiGA::MiGA
188
35
  File.exist?(base + f) or File.exist?("#{base}#{f}.gz")
189
36
  end
190
37
  end
191
- end
192
-
193
- ##
194
- # MiGA extensions to the File class.
195
- class File
196
38
 
197
- ##
198
- # Method to transfer a file from +old_name+ to +new_name+, using a +method+
199
- # that can be one of :symlink for File#symlink, :hardlink for File#link, or
200
- # :copy for FileUtils#cp_r.
201
- def self.generic_transfer(old_name, new_name, method)
202
- return nil if exist? new_name
203
- case method
204
- when :symlink
205
- File.symlink(old_name, new_name)
206
- when :hardlink
207
- File.link(old_name, new_name)
208
- when :copy
209
- FileUtils.cp_r(old_name, new_name)
210
- else
211
- raise "Unknown transfer method: #{method}."
212
- end
213
- end
214
- end
215
-
216
- ##
217
- # MiGA extensions to the String class.
218
- class String
219
-
220
- ##
221
- # Replace any character not allowed in a MiGA name for underscore (_). This
222
- # results in a MiGA-compliant name EXCEPT for empty strings, that results in
223
- # empty strings.
224
- def miga_name
225
- gsub(/[^A-Za-z0-9_]/, '_')
226
- end
227
-
228
- ##
229
- # Is the string a MiGA-compliant name?
230
- def miga_name?
231
- !(self !~ /^[A-Za-z0-9_]+$/)
232
- end
233
-
234
- ##
235
- # Replace underscores by spaces or dots (depending on context).
236
- def unmiga_name
237
- gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr('_', ' ')
238
- end
239
-
240
- ##
241
- # Wraps the string with fixed Integer +width+.
242
- def wrap_width(width)
243
- gsub(/([^\n\r]{1,#{width}})/,"\\1\n")
244
- end
245
39
  end
246
40
 
@@ -0,0 +1,49 @@
1
+
2
+ class MiGA::MiGA
3
+
4
+ # Class-level
5
+ class << self
6
+ ##
7
+ # Turn on debugging.
8
+ def DEBUG_ON ; @@DEBUG=true end
9
+
10
+ ##
11
+ # Turn off debugging.
12
+ def DEBUG_OFF ; @@DEBUG=false end
13
+
14
+ ##
15
+ # Turn on debug tracing (and debugging).
16
+ def DEBUG_TRACE_ON
17
+ @@DEBUG_TRACE=true
18
+ DEBUG_ON()
19
+ end
20
+
21
+ ##
22
+ # Turn off debug tracing (but not debugging).
23
+ def DEBUG_TRACE_OFF
24
+ @@DEBUG_TRACE=false
25
+ end
26
+
27
+ ##
28
+ # Send debug message.
29
+ def DEBUG(*args)
30
+ $stderr.puts(*args) if @@DEBUG
31
+ $stderr.puts(
32
+ caller.map{ |v| v.gsub(/^/,' ') }.join("\n") ) if @@DEBUG_TRACE
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ module MiGA::Common
39
+
40
+ ##
41
+ # Should debugging information be reported?
42
+ @@DEBUG = false
43
+
44
+ ##
45
+ # Should the trace of debugging information be reported?
46
+ @@DEBUG_TRACE = false
47
+
48
+ end
49
+
@@ -0,0 +1,135 @@
1
+
2
+ require 'tempfile'
3
+ require 'zlib'
4
+
5
+ module MiGA::Common::Format
6
+
7
+ ##
8
+ # Tabulates an +values+, and Array of Arrays, all with the same number of
9
+ # entries as +header+. Returns an Array of String, one per line.
10
+ def tabulate(header, values)
11
+ fields = [header.map(&:to_s)]
12
+ fields << fields.first.map{ |h| h.gsub(/\S/, '-') }
13
+ fields += values.map{ |row| row.map{ |cell| cell.nil? ? '?' : cell.to_s } }
14
+ clen = fields.map{ |row| row.map(&:length) }.transpose.map(&:max)
15
+ fields.map do |row|
16
+ (0 .. clen.size-1).map do |col_n|
17
+ col_n==0 ? row[col_n].rjust(clen[col_n]) : row[col_n].ljust(clen[col_n])
18
+ end.join(' ')
19
+ end
20
+ end
21
+
22
+ ##
23
+ # Cleans a FastA file in place.
24
+ def clean_fasta_file(file)
25
+ tmp_fh = nil
26
+ begin
27
+ if file =~ /\.gz/
28
+ tmp_path = Tempfile.new('MiGA.gz').tap(&:close).path
29
+ tmp_fh = Zlib::GzipWriter.open(tmp_path)
30
+ fh = Zlib::GzipReader.open(file)
31
+ else
32
+ tmp_fh = Tempfile.new('MiGA')
33
+ tmp_path = tmp_fh.path
34
+ fh = File.open(file, 'r')
35
+ end
36
+ buffer = ''
37
+ fh.each_line do |ln|
38
+ ln.chomp!
39
+ if ln =~ /^>\s*(\S+)(.*)/
40
+ (id, df) = [$1, $2]
41
+ tmp_fh.print buffer.wrap_width(80)
42
+ buffer = ''
43
+ tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
44
+ else
45
+ buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
46
+ end
47
+ end
48
+ tmp_fh.print buffer.wrap_width(80)
49
+ tmp_fh.close
50
+ fh.close
51
+ FileUtils.cp(tmp_path, file)
52
+ ensure
53
+ begin
54
+ tmp_fh.close unless tmp_fh.nil?
55
+ File.unlink(tmp_path) unless tmp_path.nil?
56
+ rescue
57
+ end
58
+ end
59
+ end
60
+
61
+ ##
62
+ # Calculates the average and standard deviation of the sequence lengths in
63
+ # a FastA or FastQ file (supports gzipped files). The +format+ must be a
64
+ # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
65
+ # controlled via the +opts+ Hash. Supported options include:
66
+ # - +:n50+: If true, it also returns the N50 and the median (in bp).
67
+ # - +gc+: If true, it also returns the G+C content (in %).
68
+ def seqs_length(file, format, opts={})
69
+ fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
70
+ l = []
71
+ gc = 0
72
+ i = 0 # <- Zlib::GzipReader doesn't set $.
73
+ fh.each_line do |ln|
74
+ i += 1
75
+ if (format==:fasta and ln =~ /^>/) or (format==:fastq and (i % 4)==1)
76
+ l << 0
77
+ elsif format==:fasta or (i % 4)==2
78
+ l[l.size-1] += ln.chomp.size
79
+ gc += ln.scan(/[GCgc]/).count if opts[:gc]
80
+ end
81
+ end
82
+ fh.close
83
+
84
+ o = { n: l.size, tot: l.inject(:+) }
85
+ o[:avg] = o[:tot].to_f/l.size
86
+ o[:var] = l.map{ |a| a ** 2 }.inject(:+).to_f/l.size - o[:avg]**2
87
+ o[:sd] = Math.sqrt o[:var]
88
+ o[:gc] = 100.0*gc/o[:tot] if opts[:gc]
89
+ if opts[:n50]
90
+ l.sort!
91
+ thr = o[:tot]/2
92
+ pos = 0
93
+ l.each do |a|
94
+ pos += a
95
+ o[:n50] = a
96
+ break if pos >= thr
97
+ end
98
+ o[:med] = o[:n].even? ?
99
+ 0.5*l[o[:n]/2-1,2].inject(:+) : l[(o[:n]-1)/2]
100
+ end
101
+ o
102
+ end
103
+ end
104
+
105
+ ##
106
+ # MiGA extensions to the String class.
107
+ class String
108
+
109
+ ##
110
+ # Replace any character not allowed in a MiGA name for underscore (_). This
111
+ # results in a MiGA-compliant name EXCEPT for empty strings, that results in
112
+ # empty strings.
113
+ def miga_name
114
+ gsub(/[^A-Za-z0-9_]/, '_')
115
+ end
116
+
117
+ ##
118
+ # Is the string a MiGA-compliant name?
119
+ def miga_name?
120
+ !(self !~ /^[A-Za-z0-9_]+$/)
121
+ end
122
+
123
+ ##
124
+ # Replace underscores by spaces or dots (depending on context).
125
+ def unmiga_name
126
+ gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr('_', ' ')
127
+ end
128
+
129
+ ##
130
+ # Wraps the string with fixed Integer +width+.
131
+ def wrap_width(width)
132
+ gsub(/([^\n\r]{1,#{width}})/,"\\1\n")
133
+ end
134
+ end
135
+