miga-base 0.3.1.7 → 0.3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +8 -0
  3. data/lib/miga/common.rb +9 -215
  4. data/lib/miga/common/base.rb +49 -0
  5. data/lib/miga/common/format.rb +135 -0
  6. data/lib/miga/common/path.rb +49 -0
  7. data/lib/miga/daemon.rb +3 -60
  8. data/lib/miga/daemon/base.rb +69 -0
  9. data/lib/miga/dataset.rb +3 -3
  10. data/lib/miga/dataset/result.rb +5 -5
  11. data/lib/miga/result.rb +5 -0
  12. data/lib/miga/version.rb +7 -5
  13. data/scripts/distances.bash +2 -19
  14. data/scripts/taxonomy.bash +2 -21
  15. data/test/common_test.rb +9 -0
  16. data/utils/distance/base.rb +6 -0
  17. data/utils/distance/commands.rb +82 -0
  18. data/utils/distance/database.rb +86 -0
  19. data/utils/distance/pipeline.rb +98 -0
  20. data/utils/distance/runner.rb +104 -0
  21. data/utils/distance/temporal.rb +37 -0
  22. data/utils/distances.rb +9 -0
  23. data/utils/enveomics/Docs/recplot2.md +233 -0
  24. data/utils/enveomics/Makefile +1 -1
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
  26. data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
  27. data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
  28. data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
  29. data/utils/enveomics/Manifest/categories.json +11 -1
  30. data/utils/enveomics/Manifest/examples.json +2 -2
  31. data/utils/enveomics/README.md +2 -0
  32. data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
  33. data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
  34. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  35. data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
  36. data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
  37. data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
  38. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  39. data/utils/enveomics/Scripts/SRA.download.bash +28 -21
  40. data/utils/enveomics/Scripts/Table.barplot.R +1 -0
  41. data/utils/enveomics/Scripts/aai.rb +4 -2
  42. data/utils/enveomics/build_enveomics_r.bash +5 -5
  43. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  44. data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
  45. data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
  46. data/utils/enveomics/enveomics.R/README.md +26 -17
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
  48. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
  49. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
  50. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
  51. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
  52. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
  53. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
  54. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
  55. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
  56. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
  57. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
  58. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
  59. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
  60. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
  61. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
  64. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
  65. data/utils/requirements.txt +1 -1
  66. metadata +28 -4
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
  68. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b53d716162f9aedbc64f1e54e02ffc293b16a7e7
4
- data.tar.gz: a5c46555329c2da1ba1fd165d423e513a27562ef
3
+ metadata.gz: c395d2565cacafe425c91a277c03cdeaa3ac9ece
4
+ data.tar.gz: 1978e8a2df4d0646bc884ce9921bf8de2bc13a25
5
5
  SHA512:
6
- metadata.gz: c94add412b17de6a932ee247e90ef5682afdf5b61cf09a3b6b9baa64d401da09d29915f6b7a6f39a9e8e6e67ba6e7afb5ed2a982e488805e22f16974cedc9ad7
7
- data.tar.gz: 2b8e6fcbdc0b4f1b72e43bb02d47e3fb4618773ef4e86c9644b77926429b6f117cc2d965437704c2540fffd26a8576bdfc299fdd927f7fe2c0f2f33a4c961727
6
+ metadata.gz: fff79ff971c5fc9e0e0684585b9b35abe5377cff0742c772fc1a7c58005b224ea18b36e7fe489b78c6103f6c4b9ffc90be75e7a385d5fce59a917e2d68c3765a
7
+ data.tar.gz: cda85047eabd8ba1c76bb5fe690247a3e71583e034f3c5dbef9178776d4c5da02bd3075cfc0e3154e1e97aa69da8ddea85a00ef99efa065e867950c10a860a71
@@ -34,6 +34,8 @@ OptionParser.new do |opt|
34
34
  opt.on('--no-version-name',
35
35
  'Do not add sequence version to the dataset name.',
36
36
  'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
37
+ opt.on('--blacklist PATH',
38
+ 'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
37
39
  opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
38
40
  opt.on('-q', '--query',
39
41
  'Register the datasets as queries, not reference datasets.'
@@ -135,6 +137,12 @@ if o[:scaffold] or o[:contig]
135
137
  end
136
138
  end
137
139
 
140
+ # Discard blacklisted
141
+ unless o[:blacklist].nil?
142
+ $stderr.puts "Discarding datasets in #{o[:blacklist]}." unless o[:q]
143
+ File.readlines(o[:blacklist]).map(&:chomp).each{ |i| ds.delete i }
144
+ end
145
+
138
146
  # Download entries
139
147
  $stderr.puts "Downloading #{ds.size} #{ds.size>1 ? "entries" : "entry"}." unless o[:q]
140
148
  ds.each do |name,body|
@@ -1,62 +1,24 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'miga/version'
5
4
  require 'json'
6
- require 'tempfile'
7
- require 'zlib'
5
+ require 'miga/version'
6
+ require 'miga/common/base'
7
+ require 'miga/common/path'
8
+ require 'miga/common/format'
8
9
 
9
10
  ##
10
11
  # Generic class used to handle system-wide information and methods, and parent
11
12
  # of all other MiGA::* classes.
12
13
  class MiGA::MiGA
14
+
15
+ include MiGA::Common
16
+
17
+ extend MiGA::Common::Path
18
+ extend MiGA::Common::Format
13
19
 
14
20
  ENV['MIGA_HOME'] ||= ENV['HOME']
15
21
 
16
- ##
17
- # Root path to MiGA (as estimated from the location of the current file).
18
- def self.root_path
19
- File.expand_path('../../..', __FILE__)
20
- end
21
-
22
- ##
23
- # Should debugging information be reported?
24
- @@DEBUG = false
25
-
26
- ##
27
- # Should the trace of debugging information be reported?
28
- @@DEBUG_TRACE = false
29
-
30
- ##
31
- # Turn on debugging.
32
- def self.DEBUG_ON() @@DEBUG=true end
33
-
34
- ##
35
- # Turn off debugging.
36
- def self.DEBUG_OFF() @@DEBUG=false end
37
-
38
- ##
39
- # Turn on debug tracing (and debugging).
40
- def self.DEBUG_TRACE_ON
41
- @@DEBUG_TRACE=true
42
- self.DEBUG_ON
43
- end
44
-
45
- ##
46
- # Turn off debug tracing (but not debugging).
47
- def self.DEBUG_TRACE_OFF
48
- @@DEBUG_TRACE=false
49
- end
50
-
51
- ##
52
- # Send debug message.
53
- def self.DEBUG(*args)
54
- $stderr.puts(*args) if @@DEBUG
55
- if @@DEBUG_TRACE
56
- $stderr.puts caller.map{ |v| v.gsub(/^/,' ') }.join("\n")
57
- end
58
- end
59
-
60
22
  ##
61
23
  # Has MiGA been initialized?
62
24
  def self.initialized?
@@ -64,121 +26,6 @@ class MiGA::MiGA
64
26
  File.exist?(File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
65
27
  end
66
28
 
67
- ##
68
- # Tabulates an +values+, and Array of Arrays, all with the same number of
69
- # entries as +header+. Returns an Array of String, one per line.
70
- def self.tabulate(header, values)
71
- fields = [header.map(&:to_s)]
72
- fields << fields.first.map{ |h| h.gsub(/\S/, '-') }
73
- fields += values.map{ |row| row.map{ |cell| cell.nil? ? '?' : cell.to_s } }
74
- clen = fields.map{ |row| row.map(&:length) }.transpose.map(&:max)
75
- fields.map do |row|
76
- (0 .. clen.size-1).map do |col_n|
77
- col_n==0 ? row[col_n].rjust(clen[col_n]) : row[col_n].ljust(clen[col_n])
78
- end.join(' ')
79
- end
80
- end
81
-
82
- ##
83
- # Cleans a FastA file in place.
84
- def self.clean_fasta_file(file)
85
- tmp_fh = nil
86
- begin
87
- if file =~ /\.gz/
88
- tmp_path = Tempfile.new('MiGA.gz').tap(&:close).path
89
- tmp_fh = Zlib::GzipWriter.open(tmp_path)
90
- fh = Zlib::GzipReader.open(file)
91
- else
92
- tmp_fh = Tempfile.new('MiGA')
93
- tmp_path = tmp_fh.path
94
- fh = File.open(file, 'r')
95
- end
96
- buffer = ''
97
- fh.each_line do |ln|
98
- ln.chomp!
99
- if ln =~ /^>\s*(\S+)(.*)/
100
- (id, df) = [$1, $2]
101
- tmp_fh.print buffer.wrap_width(80)
102
- buffer = ''
103
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
104
- else
105
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
106
- end
107
- end
108
- tmp_fh.print buffer.wrap_width(80)
109
- tmp_fh.close
110
- fh.close
111
- FileUtils.cp(tmp_path, file)
112
- ensure
113
- begin
114
- tmp_fh.close unless tmp_fh.nil?
115
- File.unlink(tmp_path) unless tmp_path.nil?
116
- rescue
117
- end
118
- end
119
- end
120
-
121
- ##
122
- # Calculates the average and standard deviation of the sequence lengths in
123
- # a FastA or FastQ file (supports gzipped files). The +format+ must be a
124
- # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
125
- # controlled via the +opts+ Hash. Supported options include:
126
- # - +:n50+: If true, it also returns the N50 and the median (in bp).
127
- # - +gc+: If true, it also returns the G+C content (in %).
128
- def self.seqs_length(file, format, opts={})
129
- fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
130
- l = []
131
- gc = 0
132
- i = 0 # <- Zlib::GzipReader doesn't set $.
133
- fh.each_line do |ln|
134
- i += 1
135
- if (format==:fasta and ln =~ /^>/) or (format==:fastq and (i % 4)==1)
136
- l << 0
137
- elsif format==:fasta or (i % 4)==2
138
- l[l.size-1] += ln.chomp.size
139
- gc += ln.scan(/[GCgc]/).count if opts[:gc]
140
- end
141
- end
142
- fh.close
143
-
144
- o = { n: l.size, tot: l.inject(:+) }
145
- o[:avg] = o[:tot].to_f/l.size
146
- o[:var] = l.map{ |a| a ** 2 }.inject(:+).to_f/l.size - o[:avg]**2
147
- o[:sd] = Math.sqrt o[:var]
148
- o[:gc] = 100.0*gc/o[:tot] if opts[:gc]
149
- if opts[:n50]
150
- l.sort!
151
- thr = o[:tot]/2
152
- pos = 0
153
- l.each do |a|
154
- pos += a
155
- o[:n50] = a
156
- break if pos >= thr
157
- end
158
- o[:med] = o[:n].even? ?
159
- 0.5*l[o[:n]/2-1,2].inject(:+) : l[(o[:n]-1)/2]
160
- end
161
- o
162
- end
163
-
164
- ##
165
- # Path to a script to be executed for +task+. Supported +opts+ are:
166
- # - +:miga+ Path to the MiGA home to use. If not passed, the home of the
167
- # library is used).
168
- # - +:project+ MiGA::Project object to check within plugins. If not passed,
169
- # only core scripts are supported.
170
- def self.script_path(task, opts={})
171
- opts[:miga] ||= root_path
172
- unless opts[:project].nil?
173
- opts[:project].plugins.each do |pl|
174
- if File.exist? File.expand_path("scripts/#{task}.bash", pl)
175
- opts[:miga] = pl
176
- end
177
- end
178
- end
179
- File.expand_path("scripts/#{task}.bash", opts[:miga])
180
- end
181
-
182
29
  ##
183
30
  # Check if the result files exist with +base+ name (String) followed by the
184
31
  # +ext+ values (Array of String).
@@ -188,59 +35,6 @@ class MiGA::MiGA
188
35
  File.exist?(base + f) or File.exist?("#{base}#{f}.gz")
189
36
  end
190
37
  end
191
- end
192
-
193
- ##
194
- # MiGA extensions to the File class.
195
- class File
196
38
 
197
- ##
198
- # Method to transfer a file from +old_name+ to +new_name+, using a +method+
199
- # that can be one of :symlink for File#symlink, :hardlink for File#link, or
200
- # :copy for FileUtils#cp_r.
201
- def self.generic_transfer(old_name, new_name, method)
202
- return nil if exist? new_name
203
- case method
204
- when :symlink
205
- File.symlink(old_name, new_name)
206
- when :hardlink
207
- File.link(old_name, new_name)
208
- when :copy
209
- FileUtils.cp_r(old_name, new_name)
210
- else
211
- raise "Unknown transfer method: #{method}."
212
- end
213
- end
214
- end
215
-
216
- ##
217
- # MiGA extensions to the String class.
218
- class String
219
-
220
- ##
221
- # Replace any character not allowed in a MiGA name for underscore (_). This
222
- # results in a MiGA-compliant name EXCEPT for empty strings, that results in
223
- # empty strings.
224
- def miga_name
225
- gsub(/[^A-Za-z0-9_]/, '_')
226
- end
227
-
228
- ##
229
- # Is the string a MiGA-compliant name?
230
- def miga_name?
231
- !(self !~ /^[A-Za-z0-9_]+$/)
232
- end
233
-
234
- ##
235
- # Replace underscores by spaces or dots (depending on context).
236
- def unmiga_name
237
- gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr('_', ' ')
238
- end
239
-
240
- ##
241
- # Wraps the string with fixed Integer +width+.
242
- def wrap_width(width)
243
- gsub(/([^\n\r]{1,#{width}})/,"\\1\n")
244
- end
245
39
  end
246
40
 
@@ -0,0 +1,49 @@
1
+
2
+ class MiGA::MiGA
3
+
4
+ # Class-level
5
+ class << self
6
+ ##
7
+ # Turn on debugging.
8
+ def DEBUG_ON ; @@DEBUG=true end
9
+
10
+ ##
11
+ # Turn off debugging.
12
+ def DEBUG_OFF ; @@DEBUG=false end
13
+
14
+ ##
15
+ # Turn on debug tracing (and debugging).
16
+ def DEBUG_TRACE_ON
17
+ @@DEBUG_TRACE=true
18
+ DEBUG_ON()
19
+ end
20
+
21
+ ##
22
+ # Turn off debug tracing (but not debugging).
23
+ def DEBUG_TRACE_OFF
24
+ @@DEBUG_TRACE=false
25
+ end
26
+
27
+ ##
28
+ # Send debug message.
29
+ def DEBUG(*args)
30
+ $stderr.puts(*args) if @@DEBUG
31
+ $stderr.puts(
32
+ caller.map{ |v| v.gsub(/^/,' ') }.join("\n") ) if @@DEBUG_TRACE
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ module MiGA::Common
39
+
40
+ ##
41
+ # Should debugging information be reported?
42
+ @@DEBUG = false
43
+
44
+ ##
45
+ # Should the trace of debugging information be reported?
46
+ @@DEBUG_TRACE = false
47
+
48
+ end
49
+
@@ -0,0 +1,135 @@
1
+
2
+ require 'tempfile'
3
+ require 'zlib'
4
+
5
+ module MiGA::Common::Format
6
+
7
+ ##
8
+ # Tabulates an +values+, and Array of Arrays, all with the same number of
9
+ # entries as +header+. Returns an Array of String, one per line.
10
+ def tabulate(header, values)
11
+ fields = [header.map(&:to_s)]
12
+ fields << fields.first.map{ |h| h.gsub(/\S/, '-') }
13
+ fields += values.map{ |row| row.map{ |cell| cell.nil? ? '?' : cell.to_s } }
14
+ clen = fields.map{ |row| row.map(&:length) }.transpose.map(&:max)
15
+ fields.map do |row|
16
+ (0 .. clen.size-1).map do |col_n|
17
+ col_n==0 ? row[col_n].rjust(clen[col_n]) : row[col_n].ljust(clen[col_n])
18
+ end.join(' ')
19
+ end
20
+ end
21
+
22
+ ##
23
+ # Cleans a FastA file in place.
24
+ def clean_fasta_file(file)
25
+ tmp_fh = nil
26
+ begin
27
+ if file =~ /\.gz/
28
+ tmp_path = Tempfile.new('MiGA.gz').tap(&:close).path
29
+ tmp_fh = Zlib::GzipWriter.open(tmp_path)
30
+ fh = Zlib::GzipReader.open(file)
31
+ else
32
+ tmp_fh = Tempfile.new('MiGA')
33
+ tmp_path = tmp_fh.path
34
+ fh = File.open(file, 'r')
35
+ end
36
+ buffer = ''
37
+ fh.each_line do |ln|
38
+ ln.chomp!
39
+ if ln =~ /^>\s*(\S+)(.*)/
40
+ (id, df) = [$1, $2]
41
+ tmp_fh.print buffer.wrap_width(80)
42
+ buffer = ''
43
+ tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
44
+ else
45
+ buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
46
+ end
47
+ end
48
+ tmp_fh.print buffer.wrap_width(80)
49
+ tmp_fh.close
50
+ fh.close
51
+ FileUtils.cp(tmp_path, file)
52
+ ensure
53
+ begin
54
+ tmp_fh.close unless tmp_fh.nil?
55
+ File.unlink(tmp_path) unless tmp_path.nil?
56
+ rescue
57
+ end
58
+ end
59
+ end
60
+
61
+ ##
62
+ # Calculates the average and standard deviation of the sequence lengths in
63
+ # a FastA or FastQ file (supports gzipped files). The +format+ must be a
64
+ # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
65
+ # controlled via the +opts+ Hash. Supported options include:
66
+ # - +:n50+: If true, it also returns the N50 and the median (in bp).
67
+ # - +gc+: If true, it also returns the G+C content (in %).
68
+ def seqs_length(file, format, opts={})
69
+ fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
70
+ l = []
71
+ gc = 0
72
+ i = 0 # <- Zlib::GzipReader doesn't set $.
73
+ fh.each_line do |ln|
74
+ i += 1
75
+ if (format==:fasta and ln =~ /^>/) or (format==:fastq and (i % 4)==1)
76
+ l << 0
77
+ elsif format==:fasta or (i % 4)==2
78
+ l[l.size-1] += ln.chomp.size
79
+ gc += ln.scan(/[GCgc]/).count if opts[:gc]
80
+ end
81
+ end
82
+ fh.close
83
+
84
+ o = { n: l.size, tot: l.inject(:+) }
85
+ o[:avg] = o[:tot].to_f/l.size
86
+ o[:var] = l.map{ |a| a ** 2 }.inject(:+).to_f/l.size - o[:avg]**2
87
+ o[:sd] = Math.sqrt o[:var]
88
+ o[:gc] = 100.0*gc/o[:tot] if opts[:gc]
89
+ if opts[:n50]
90
+ l.sort!
91
+ thr = o[:tot]/2
92
+ pos = 0
93
+ l.each do |a|
94
+ pos += a
95
+ o[:n50] = a
96
+ break if pos >= thr
97
+ end
98
+ o[:med] = o[:n].even? ?
99
+ 0.5*l[o[:n]/2-1,2].inject(:+) : l[(o[:n]-1)/2]
100
+ end
101
+ o
102
+ end
103
+ end
104
+
105
+ ##
106
+ # MiGA extensions to the String class.
107
+ class String
108
+
109
+ ##
110
+ # Replace any character not allowed in a MiGA name for underscore (_). This
111
+ # results in a MiGA-compliant name EXCEPT for empty strings, that results in
112
+ # empty strings.
113
+ def miga_name
114
+ gsub(/[^A-Za-z0-9_]/, '_')
115
+ end
116
+
117
+ ##
118
+ # Is the string a MiGA-compliant name?
119
+ def miga_name?
120
+ !(self !~ /^[A-Za-z0-9_]+$/)
121
+ end
122
+
123
+ ##
124
+ # Replace underscores by spaces or dots (depending on context).
125
+ def unmiga_name
126
+ gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr('_', ' ')
127
+ end
128
+
129
+ ##
130
+ # Wraps the string with fixed Integer +width+.
131
+ def wrap_width(width)
132
+ gsub(/([^\n\r]{1,#{width}})/,"\\1\n")
133
+ end
134
+ end
135
+