bio 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +1712 -0
- data/KNOWN_ISSUES.rdoc +11 -1
- data/README.rdoc +3 -2
- data/RELEASE_NOTES.rdoc +65 -127
- data/bioruby.gemspec +38 -2
- data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
- data/doc/Tutorial.rd +74 -16
- data/doc/Tutorial.rd.html +68 -16
- data/lib/bio.rb +2 -0
- data/lib/bio/appl/clustalw/report.rb +18 -0
- data/lib/bio/appl/paml/codeml/report.rb +579 -21
- data/lib/bio/command.rb +149 -21
- data/lib/bio/db/aaindex.rb +11 -1
- data/lib/bio/db/embl/sptr.rb +1 -1
- data/lib/bio/db/fasta/defline.rb +7 -2
- data/lib/bio/db/fasta/qual.rb +24 -0
- data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
- data/lib/bio/db/fastq.rb +15 -0
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +109 -5
- data/lib/bio/db/kegg/genes.rb +61 -15
- data/lib/bio/db/kegg/genome.rb +43 -38
- data/lib/bio/db/kegg/module.rb +158 -0
- data/lib/bio/db/kegg/orthology.rb +40 -1
- data/lib/bio/db/kegg/pathway.rb +254 -0
- data/lib/bio/db/medline.rb +6 -2
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/location.rb +39 -0
- data/lib/bio/reference.rb +24 -0
- data/lib/bio/sequence.rb +2 -0
- data/lib/bio/sequence/adapter.rb +1 -0
- data/lib/bio/sequence/format.rb +14 -0
- data/lib/bio/sequence/sequence_masker.rb +95 -0
- data/lib/bio/tree.rb +4 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
- data/lib/bio/version.rb +1 -1
- data/setup.rb +5 -0
- data/test/data/KEGG/K02338.orthology +180 -52
- data/test/data/KEGG/M00118.module +44 -0
- data/test/data/KEGG/T00005.genome +140 -0
- data/test/data/KEGG/T00070.genome +34 -0
- data/test/data/KEGG/b0529.gene +47 -0
- data/test/data/KEGG/ec00072.pathway +23 -0
- data/test/data/KEGG/hsa00790.pathway +59 -0
- data/test/data/KEGG/ko00312.pathway +16 -0
- data/test/data/KEGG/map00030.pathway +37 -0
- data/test/data/KEGG/map00052.pathway +13 -0
- data/test/data/KEGG/rn00250.pathway +114 -0
- data/test/data/clustalw/example1.aln +58 -0
- data/test/data/go/selected_component.ontology +12 -0
- data/test/data/go/selected_gene_association.sgd +31 -0
- data/test/data/go/selected_wikipedia2go +13 -0
- data/test/data/medline/20146148_modified.medline +54 -0
- data/test/data/paml/codeml/models/aa.aln +26 -0
- data/test/data/paml/codeml/models/aa.dnd +13 -0
- data/test/data/paml/codeml/models/aa.ph +13 -0
- data/test/data/paml/codeml/models/alignment.phy +49 -0
- data/test/data/paml/codeml/models/results0-3.txt +312 -0
- data/test/data/paml/codeml/models/results7-8.txt +340 -0
- data/test/functional/bio/io/test_togows.rb +8 -8
- data/test/functional/bio/test_command.rb +7 -6
- data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
- data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
- data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1 -1
- data/test/unit/bio/db/fasta/test_defline.rb +160 -0
- data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
- data/test/unit/bio/db/kegg/test_genes.rb +281 -1
- data/test/unit/bio/db/kegg/test_genome.rb +408 -0
- data/test/unit/bio/db/kegg/test_module.rb +246 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
- data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
- data/test/unit/bio/db/test_aaindex.rb +8 -7
- data/test/unit/bio/db/test_fastq.rb +36 -0
- data/test/unit/bio/db/test_go.rb +171 -0
- data/test/unit/bio/db/test_medline.rb +148 -0
- data/test/unit/bio/db/test_qual.rb +9 -2
- data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
- data/test/unit/bio/test_tree.rb +260 -1
- data/test/unit/bio/util/test_contingency_table.rb +7 -7
- metadata +53 -6
data/lib/bio/command.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# = bio/command.rb - general methods for external command execution
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2003-
|
4
|
+
# Copyright:: Copyright (C) 2003-2010
|
5
5
|
# Naohisa Goto <ng@bioruby.org>,
|
6
6
|
# Toshiaki Katayama <k@bioruby.org>
|
7
7
|
# License:: The Ruby License
|
@@ -34,6 +34,59 @@ module Command
|
|
34
34
|
|
35
35
|
module_function
|
36
36
|
|
37
|
+
# *CAUTION* Bio::Command INTERNAL USE ONLY.
|
38
|
+
# Users must NOT use the method.
|
39
|
+
# The method will be removed when it is not needed.
|
40
|
+
#
|
41
|
+
# Checks if the program is running on Microsoft Windows.
|
42
|
+
# If Windows, returns true. Otherwise, returns false.
|
43
|
+
# Note that Cygwin is not treated as Windows.
|
44
|
+
#
|
45
|
+
# Known issues:
|
46
|
+
# * It might make a mistake in minor platforms/architectures/interpreters.
|
47
|
+
# * When running JRuby on Cygwin, the result is unknown.
|
48
|
+
# ---
|
49
|
+
# *Returns*:: true or false
|
50
|
+
def windows_platform?
|
51
|
+
case RUBY_PLATFORM
|
52
|
+
when /(?:mswin|bccwin|mingw)(?:32|64)/i
|
53
|
+
true
|
54
|
+
when /java/i
|
55
|
+
# Reference: Redmine's platform.rb
|
56
|
+
# http://www.redmine.org/projects/redmine/repository/revisions/1753/entry/trunk/lib/redmine/platform.rb
|
57
|
+
if /windows/i =~ (ENV['OS'] || ENV['os']).to_s then
|
58
|
+
true
|
59
|
+
else
|
60
|
+
false
|
61
|
+
end
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
private_class_method :windows_platform?
|
67
|
+
|
68
|
+
# *CAUTION* Bio::Command INTERNAL USE ONLY.
|
69
|
+
# Users must NOT use the method.
|
70
|
+
# The method will be removed when it is not needed.
|
71
|
+
#
|
72
|
+
# Checks if the OS does not support fork(2) system call.
|
73
|
+
# When not supported, it returns true.
|
74
|
+
# When supported or unknown, it returns false or nil.
|
75
|
+
#
|
76
|
+
# Known issues:
|
77
|
+
# * It might make a mistake in minor platforms/architectures/interpreters.
|
78
|
+
# ---
|
79
|
+
# *Returns*:: true, false or nil.
|
80
|
+
def no_fork?
|
81
|
+
if (defined?(@@no_fork) && @@no_fork) or
|
82
|
+
windows_platform? or /java/i =~ RUBY_PLATFORM then
|
83
|
+
true
|
84
|
+
else
|
85
|
+
false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
private_class_method :no_fork?
|
89
|
+
|
37
90
|
# Escape special characters in command line string for cmd.exe on Windows.
|
38
91
|
# ---
|
39
92
|
# *Arguments*:
|
@@ -66,8 +119,7 @@ module Command
|
|
66
119
|
# * (required) _str_: String
|
67
120
|
# *Returns*:: String object
|
68
121
|
def escape_shell(str)
|
69
|
-
|
70
|
-
when /mswin32|bccwin32/
|
122
|
+
if windows_platform? then
|
71
123
|
escape_shell_windows(str)
|
72
124
|
else
|
73
125
|
escape_shell_unix(str)
|
@@ -80,8 +132,7 @@ module Command
|
|
80
132
|
# * (required) _ary_: Array containing String objects
|
81
133
|
# *Returns*:: String object
|
82
134
|
def make_command_line(ary)
|
83
|
-
|
84
|
-
when /mswin32|bccwin32/
|
135
|
+
if windows_platform? then
|
85
136
|
make_command_line_windows(ary)
|
86
137
|
else
|
87
138
|
make_command_line_unix(ary)
|
@@ -130,8 +181,8 @@ module Command
|
|
130
181
|
[ arg0 ]
|
131
182
|
end
|
132
183
|
|
133
|
-
# Executes the program.
|
134
|
-
# environment and fork for the others.
|
184
|
+
# Executes the program. Automatically select popen for Ruby 1.9 or
|
185
|
+
# Windows environment and fork for the others.
|
135
186
|
# A block must be given. An IO object is passed to the block.
|
136
187
|
#
|
137
188
|
# Available options:
|
@@ -143,27 +194,62 @@ module Command
|
|
143
194
|
# * (optional) _options_: Hash
|
144
195
|
# *Returns*:: (undefined)
|
145
196
|
def call_command(cmd, options = {}, &block) #:yields: io
|
146
|
-
|
147
|
-
|
197
|
+
if RUBY_VERSION >= "1.9.0" then
|
198
|
+
return call_command_popen(cmd, options, &block)
|
199
|
+
elsif no_fork? then
|
148
200
|
call_command_popen(cmd, options, &block)
|
149
201
|
else
|
150
|
-
|
202
|
+
begin
|
203
|
+
call_command_fork(cmd, options, &block)
|
204
|
+
rescue NotImplementedError
|
205
|
+
# fork(2) not implemented
|
206
|
+
@@no_fork = true
|
207
|
+
call_command_popen(cmd, options, &block)
|
208
|
+
end
|
151
209
|
end
|
152
210
|
end
|
153
211
|
|
212
|
+
# This method is internally called from the call_command method.
|
213
|
+
# In normal case, use call_command, and do not call this method directly.
|
214
|
+
#
|
154
215
|
# Executes the program via IO.popen for OS which doesn't support fork.
|
155
216
|
# A block must be given. An IO object is passed to the block.
|
217
|
+
#
|
218
|
+
# See the document of call_command for available options.
|
219
|
+
#
|
220
|
+
# Note for Ruby 1.8:
|
221
|
+
# In Ruby 1.8, although shell unsafe characters are escaped.
|
222
|
+
# If inescapable characters exists, it raises RuntimeError.
|
223
|
+
# So, call_command_fork is normally recommended.
|
224
|
+
#
|
225
|
+
# Note for Ruby 1.9:
|
226
|
+
# In Ruby 1.9, call_command_popen is safe and robust enough, and is the
|
227
|
+
# recommended way, because IO.popen is improved to get a command-line
|
228
|
+
# as an array without calling shell.
|
229
|
+
#
|
156
230
|
# ---
|
157
231
|
# *Arguments*:
|
158
232
|
# * (required) _cmd_: Array containing String objects
|
159
233
|
# * (optional) _options_: Hash
|
160
234
|
# *Returns*:: (undefined)
|
161
235
|
def call_command_popen(cmd, options = {})
|
236
|
+
if RUBY_VERSION >= "1.9.0" then
|
237
|
+
# For Ruby 1.9 or later, using command line array with options.
|
238
|
+
dir = options[:chdir]
|
239
|
+
cmd = safe_command_line_array(cmd)
|
240
|
+
if dir then
|
241
|
+
cmd = cmd + [ { :chdir => dir } ]
|
242
|
+
end
|
243
|
+
r = IO.popen(cmd, "r+") do |io|
|
244
|
+
yield io
|
245
|
+
end
|
246
|
+
return r
|
247
|
+
end
|
248
|
+
# For Ruby 1.8, using command line string.
|
162
249
|
str = make_command_line(cmd)
|
163
250
|
# processing options
|
164
251
|
if dir = options[:chdir] then
|
165
|
-
|
166
|
-
when /mswin32|bccwin32/
|
252
|
+
if windows_platform?
|
167
253
|
# Unix-like dir separator is changed to Windows dir separator
|
168
254
|
# by using String#gsub.
|
169
255
|
dirstr = dir.gsub(/\//, "\\")
|
@@ -182,11 +268,24 @@ module Command
|
|
182
268
|
end
|
183
269
|
end
|
184
270
|
|
271
|
+
# This method is internally called from the call_command method.
|
272
|
+
# In normal case, use call_command, and do not call this method directly.
|
273
|
+
#
|
185
274
|
# Executes the program via fork (by using IO.popen("-")) and exec.
|
186
275
|
# A block must be given. An IO object is passed to the block.
|
187
276
|
#
|
188
|
-
#
|
189
|
-
#
|
277
|
+
# See the document of call_command for available options.
|
278
|
+
#
|
279
|
+
# Note for Ruby 1.8:
|
280
|
+
# In Ruby 1.8, from the view point of security, this method is recommended
|
281
|
+
# rather than call_command_popen. However, this method might have problems
|
282
|
+
# with multi-threads.
|
283
|
+
#
|
284
|
+
# Note for Ruby 1.9:
|
285
|
+
# In Ruby 1.9, this method can not be used, because Thread.critical is
|
286
|
+
# removed. In Ruby 1.9, call_command_popen is safe and robust enough, and
|
287
|
+
# is the recommended way, because IO.popen is improved to get a
|
288
|
+
# command-line as an array without calling shell.
|
190
289
|
#
|
191
290
|
# ---
|
192
291
|
# *Arguments*:
|
@@ -196,12 +295,17 @@ module Command
|
|
196
295
|
def call_command_fork(cmd, options = {})
|
197
296
|
dir = options[:chdir]
|
198
297
|
cmd = safe_command_line_array(cmd)
|
298
|
+
begin
|
299
|
+
tc, Thread.critical, flag0, flag1 = Thread.critical, true, true, true
|
199
300
|
IO.popen("-", "r+") do |io|
|
200
301
|
if io then
|
201
302
|
# parent
|
303
|
+
flag0, Thread.critical, flag1 = false, tc, false
|
202
304
|
yield io
|
203
305
|
else
|
204
306
|
# child
|
307
|
+
Thread.critical = true # for safety, though already true
|
308
|
+
GC.disable
|
205
309
|
# chdir to options[:chdir] if available
|
206
310
|
begin
|
207
311
|
Dir.chdir(dir) if dir
|
@@ -218,6 +322,11 @@ module Command
|
|
218
322
|
Process.exit!(1)
|
219
323
|
end
|
220
324
|
end
|
325
|
+
ensure
|
326
|
+
# When IO.popen("-") raises error, Thread.critical will be set here.
|
327
|
+
Thread.critical = tc if flag0 or flag1
|
328
|
+
#warn 'Thread.critical might have wrong value.' if flag0 != flag1
|
329
|
+
end
|
221
330
|
end
|
222
331
|
|
223
332
|
# Executes the program via Open3.popen3
|
@@ -240,7 +349,8 @@ module Command
|
|
240
349
|
# waits the program termination, and returns the output data printed to the
|
241
350
|
# standard output as a string.
|
242
351
|
#
|
243
|
-
# Automatically select popen for Windows environment and
|
352
|
+
# Automatically select popen for Ruby 1.9 or Windows environment and
|
353
|
+
# fork for the others.
|
244
354
|
#
|
245
355
|
# Available options:
|
246
356
|
# :chdir => "path" : changes working directory to the specified path.
|
@@ -252,19 +362,32 @@ module Command
|
|
252
362
|
# * (optional) _options_: Hash
|
253
363
|
# *Returns*:: String or nil
|
254
364
|
def query_command(cmd, query = nil, options = {})
|
255
|
-
|
256
|
-
|
365
|
+
if RUBY_VERSION >= "1.9.0" then
|
366
|
+
return query_command_popen(cmd, query, options)
|
367
|
+
elsif no_fork? then
|
257
368
|
query_command_popen(cmd, query, options)
|
258
369
|
else
|
259
|
-
|
370
|
+
begin
|
371
|
+
query_command_fork(cmd, query, options)
|
372
|
+
rescue NotImplementedError
|
373
|
+
# fork(2) not implemented
|
374
|
+
@@no_fork = true
|
375
|
+
query_command_fork(cmd, query, options)
|
376
|
+
end
|
260
377
|
end
|
261
378
|
end
|
262
379
|
|
380
|
+
# This method is internally called from the query_command method.
|
381
|
+
# In normal case, use query_command, and do not call this method directly.
|
382
|
+
#
|
263
383
|
# Executes the program with the query (String) given to the standard input,
|
264
384
|
# waits the program termination, and returns the output data printed to the
|
265
385
|
# standard output as a string.
|
266
386
|
#
|
267
|
-
#
|
387
|
+
# See the document of query_command for available options.
|
388
|
+
#
|
389
|
+
# See the document of call_command_popen for the security and Ruby
|
390
|
+
# version specific issues.
|
268
391
|
#
|
269
392
|
# ---
|
270
393
|
# *Arguments*:
|
@@ -283,14 +406,19 @@ module Command
|
|
283
406
|
ret
|
284
407
|
end
|
285
408
|
|
409
|
+
# This method is internally called from the query_command method.
|
410
|
+
# In normal case, use query_command, and do not call this method directly.
|
411
|
+
#
|
286
412
|
# Executes the program with the query (String) given to the standard input,
|
287
413
|
# waits the program termination, and returns the output data printed to the
|
288
414
|
# standard output as a string.
|
289
415
|
#
|
290
416
|
# Fork (by using IO.popen("-")) and exec is used to execute the program.
|
291
417
|
#
|
292
|
-
#
|
293
|
-
#
|
418
|
+
# See the document of query_command for available options.
|
419
|
+
#
|
420
|
+
# See the document of call_command_fork for the security and Ruby
|
421
|
+
# version specific issues.
|
294
422
|
#
|
295
423
|
# ---
|
296
424
|
# *Arguments*:
|
data/lib/bio/db/aaindex.rb
CHANGED
@@ -254,7 +254,17 @@ module Bio
|
|
254
254
|
label_data.each_line do |line|
|
255
255
|
ma << line.strip.split(/\s+/).map {|x| x.to_f }
|
256
256
|
end
|
257
|
-
|
257
|
+
ma_len = ma.size
|
258
|
+
ma.each do |row|
|
259
|
+
row_size = row.size
|
260
|
+
if row_size < ma_len
|
261
|
+
(row_size..ma_len-1).each do |i|
|
262
|
+
row[i] = ma[i][row_size-1]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
mat = Matrix[*ma]
|
267
|
+
@data['matrix'] = mat
|
258
268
|
end
|
259
269
|
end
|
260
270
|
|
data/lib/bio/db/embl/sptr.rb
CHANGED
data/lib/bio/db/fasta/defline.rb
CHANGED
@@ -126,10 +126,14 @@ module Bio
|
|
126
126
|
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
127
127
|
#
|
128
128
|
# * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
|
129
|
+
# (Dead link. Please find in http://web.archive.org/ ).
|
129
130
|
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
130
131
|
#
|
131
|
-
# *
|
132
|
-
#
|
132
|
+
# * Program Parameters for formatdb and fastacmd (by Tao Tao)
|
133
|
+
# http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/formatdb_fastacmd.html#t1.1
|
134
|
+
#
|
135
|
+
# * Formatdb README
|
136
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/formatdb.html
|
133
137
|
#
|
134
138
|
class FastaDefline
|
135
139
|
|
@@ -140,6 +144,7 @@ module Bio
|
|
140
144
|
'emb' => [ 'acc_version', 'locus' ], # EMBL
|
141
145
|
'dbj' => [ 'acc_version', 'locus' ], # DDBJ
|
142
146
|
'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT
|
147
|
+
'tr' => [ 'accession', 'entry_id' ], # TREMBL
|
143
148
|
'pdb' => [ 'entry_id', 'chain' ], # PDB
|
144
149
|
'bbs' => [ 'number' ], # GenInfo Backbone Id
|
145
150
|
'gnl' => [ 'database' , 'entry_id' ], # General database identifier
|
data/lib/bio/db/fasta/qual.rb
CHANGED
@@ -95,6 +95,30 @@ module Bio
|
|
95
95
|
data[n]
|
96
96
|
end
|
97
97
|
|
98
|
+
# Returns the data as a Bio::Sequence object.
|
99
|
+
# In the returned sequence object, the length of the sequence is zero,
|
100
|
+
# and the numeric data is stored to the Bio::Sequence#quality_scores
|
101
|
+
# attirbute.
|
102
|
+
#
|
103
|
+
# Because the meaning of the numeric data is unclear,
|
104
|
+
# Bio::Sequence#quality_score_type is not set by default.
|
105
|
+
#
|
106
|
+
# Note: If you modify the returned Bio::Sequence object,
|
107
|
+
# the sequence or definition in this FastaNumericFormat object
|
108
|
+
# might also be changed (but not always be changed)
|
109
|
+
# because of efficiency.
|
110
|
+
#
|
111
|
+
# ---
|
112
|
+
# *Arguments*:
|
113
|
+
# *Returns*:: (Bio::Sequence) sequence object
|
114
|
+
def to_biosequence
|
115
|
+
s = Bio::Sequence.adapter(self,
|
116
|
+
Bio::Sequence::Adapter::FastaNumericFormat)
|
117
|
+
s.seq = Bio::Sequence::Generic.new('')
|
118
|
+
s
|
119
|
+
end
|
120
|
+
alias to_seq to_biosequence
|
121
|
+
|
98
122
|
undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
|
99
123
|
|
100
124
|
end #class FastaNumericFormat
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/fasta/qual_to_biosequence.rb - Bio::FastaNumericFormat to Bio::Sequence adapter module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2010
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'bio/sequence'
|
10
|
+
require 'bio/sequence/adapter'
|
11
|
+
require 'bio/db/fasta/fasta_to_biosequence'
|
12
|
+
|
13
|
+
# Internal use only. Normal users should not use this module.
|
14
|
+
#
|
15
|
+
# Bio::FastaNumericFormat to Bio::Sequence adapter module.
|
16
|
+
# It is internally used in Bio::FastaNumericFormat#to_biosequence.
|
17
|
+
#
|
18
|
+
module Bio::Sequence::Adapter::FastaNumericFormat
|
19
|
+
|
20
|
+
extend Bio::Sequence::Adapter
|
21
|
+
|
22
|
+
include Bio::Sequence::Adapter::FastaFormat
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def_biosequence_adapter :quality_scores, :data
|
27
|
+
|
28
|
+
end #module Bio::Sequence::Adapter::FastaNumericFormat
|
29
|
+
|
data/lib/bio/db/fastq.rb
CHANGED
@@ -640,6 +640,21 @@ class Fastq
|
|
640
640
|
Bio::Sequence.adapter(self, Bio::Sequence::Adapter::Fastq)
|
641
641
|
end
|
642
642
|
|
643
|
+
# Masks low quality sequence regions.
|
644
|
+
# For each sequence position, if the quality score is smaller than
|
645
|
+
# the threshold, the sequence in the position is replaced with
|
646
|
+
# <em>mask_char</em>.
|
647
|
+
#
|
648
|
+
# Note: This method does not care quality_score_type.
|
649
|
+
# ---
|
650
|
+
# *Arguments*:
|
651
|
+
# * (required) <em>threshold</em> : (Numeric) threshold
|
652
|
+
# * (optional) <em>mask_char</em> : (String) character used for masking
|
653
|
+
# *Returns*:: Bio::Sequence object
|
654
|
+
def mask(threshold, mask_char = 'n')
|
655
|
+
to_biosequence.mask_with_quality_score(threshold, mask_char)
|
656
|
+
end
|
657
|
+
|
643
658
|
end #class Fastq
|
644
659
|
|
645
660
|
end #module Bio
|
data/lib/bio/db/go.rb
CHANGED
@@ -293,8 +293,8 @@ class GO
|
|
293
293
|
|
294
294
|
# Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
|
295
295
|
def to_str
|
296
|
-
return [@db, @db_object_id, @db_object_symbol, @
|
297
|
-
@
|
296
|
+
return [@db, @db_object_id, @db_object_symbol, @qualifier, @goid,
|
297
|
+
@db_reference.join("|"), @evidence, @with.join("|"), @aspect,
|
298
298
|
@db_object_name, @db_object_synonym.join("|"), @db_object_type,
|
299
299
|
@taxon, @date, @assigned_by].join("\t")
|
300
300
|
end
|
data/lib/bio/db/kegg/common.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# = bio/db/kegg/common.rb - Common methods for KEGG database classes
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C)
|
4
|
+
# Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
|
6
6
|
# Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
|
7
7
|
# License:: The Ruby License
|
@@ -23,6 +23,69 @@ class KEGG
|
|
23
23
|
# Namespace for methods commonly used in the Bio::KEGG::* classes.
|
24
24
|
module Common
|
25
25
|
|
26
|
+
# The module provides references method.
|
27
|
+
module References
|
28
|
+
# REFERENCE -- Returns contents of the REFERENCE records as an Array of
|
29
|
+
# Bio::Reference objects.
|
30
|
+
def references
|
31
|
+
unless @data['REFERENCE']
|
32
|
+
ary = []
|
33
|
+
toptag2array(get('REFERENCE')).each do |ref|
|
34
|
+
hash = Hash.new
|
35
|
+
subtag2array(ref).each do |field|
|
36
|
+
case tag_get(field)
|
37
|
+
when /REFERENCE/
|
38
|
+
cmnt = tag_cut(field).chomp
|
39
|
+
if /^\s*PMID\:(\d+)\s*/ =~ cmnt then
|
40
|
+
hash['pubmed'] = $1
|
41
|
+
cmnt = $'
|
42
|
+
end
|
43
|
+
if cmnt and !cmnt.empty? then
|
44
|
+
hash['comments'] ||= []
|
45
|
+
hash['comments'].push(cmnt)
|
46
|
+
end
|
47
|
+
when /AUTHORS/
|
48
|
+
authors = truncate(tag_cut(field))
|
49
|
+
authors = authors.split(/\, /)
|
50
|
+
authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
|
51
|
+
authors = authors.flatten.map { |a| a.sub(',', ', ') }
|
52
|
+
hash['authors'] = authors
|
53
|
+
when /TITLE/
|
54
|
+
hash['title'] = truncate(tag_cut(field))
|
55
|
+
when /JOURNAL/
|
56
|
+
journal = truncate(tag_cut(field))
|
57
|
+
case journal
|
58
|
+
# KEGG style
|
59
|
+
when /(.*) (\d*(?:\([^\)]+\))?)\:(\d+\-\d+) \((\d+)\)$/
|
60
|
+
hash['journal'] = $1
|
61
|
+
hash['volume'] = $2
|
62
|
+
hash['pages'] = $3
|
63
|
+
hash['year'] = $4
|
64
|
+
# old KEGG style
|
65
|
+
when /(.*) (\d+):(\d+\-\d+) \((\d+)\) \[UI:(\d+)\]$/
|
66
|
+
hash['journal'] = $1
|
67
|
+
hash['volume'] = $2
|
68
|
+
hash['pages'] = $3
|
69
|
+
hash['year'] = $4
|
70
|
+
hash['medline'] = $5
|
71
|
+
# Only journal name and year are available
|
72
|
+
when /(.*) \((\d+)\)$/
|
73
|
+
hash['journal'] = $1
|
74
|
+
hash['year'] = $2
|
75
|
+
else
|
76
|
+
hash['journal'] = journal
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
ary.push(Reference.new(hash))
|
81
|
+
end
|
82
|
+
@data['REFERENCE'] = ary #.extend(Bio::References::BackwardCompatibility)
|
83
|
+
|
84
|
+
end
|
85
|
+
@data['REFERENCE']
|
86
|
+
end
|
87
|
+
end #module References
|
88
|
+
|
26
89
|
# The module providing dblinks_as_hash methods.
|
27
90
|
#
|
28
91
|
# Bio::KEGG::* internal use only.
|
@@ -54,7 +117,8 @@ class KEGG
|
|
54
117
|
unless defined? @pathways_as_hash then
|
55
118
|
hash = {}
|
56
119
|
pathways_as_strings.each do |line|
|
57
|
-
|
120
|
+
line = line.sub(/\APATH\:\s+/, '')
|
121
|
+
entry_id, name = line.split(/\s+/, 2)
|
58
122
|
hash[entry_id] = name
|
59
123
|
end
|
60
124
|
@pathways_as_hash = hash
|
@@ -72,9 +136,9 @@ class KEGG
|
|
72
136
|
def orthologs_as_hash
|
73
137
|
unless defined? @orthologs_as_hash
|
74
138
|
kos = {}
|
75
|
-
orthologs_as_strings.each do |
|
76
|
-
|
77
|
-
|
139
|
+
orthologs_as_strings.each do |line|
|
140
|
+
ko = line.sub(/\AKO\:\s+/, '')
|
141
|
+
entry_id, definition = ko.split(/\s+/, 2)
|
78
142
|
kos[entry_id] = definition
|
79
143
|
end
|
80
144
|
@orthologs_as_hash = kos
|
@@ -106,6 +170,46 @@ class KEGG
|
|
106
170
|
end
|
107
171
|
end #module GenesAsHash
|
108
172
|
|
173
|
+
# This module provides modules_as_hash method.
|
174
|
+
#
|
175
|
+
# Bio::KEGG::* internal use only.
|
176
|
+
module ModulesAsHash
|
177
|
+
# Returns MODULE field as a Hash.
|
178
|
+
# Each key of the hash is KEGG MODULE ID,
|
179
|
+
# and each value is the name of the Pathway Module.
|
180
|
+
# ---
|
181
|
+
# *Returns*:: Hash
|
182
|
+
def modules_as_hash
|
183
|
+
unless defined? @modules_s_as_hash then
|
184
|
+
hash = {}
|
185
|
+
modules_as_strings.each do |line|
|
186
|
+
entry_id, name = line.split(/\s+/, 2)
|
187
|
+
hash[entry_id] = name
|
188
|
+
end
|
189
|
+
@modules_as_hash = hash
|
190
|
+
end
|
191
|
+
@modules_as_hash
|
192
|
+
end
|
193
|
+
end #module ModulesAsHash
|
194
|
+
|
195
|
+
# This module provides strings_as_hash private method.
|
196
|
+
#
|
197
|
+
# Bio::KEGG::* internal use only.
|
198
|
+
module StringsAsHash
|
199
|
+
# (Private) Creates a hash from lines.
|
200
|
+
# Each line is consisted of two components, ID and description,
|
201
|
+
# separated with spaces. IDs must be unique with each other.
|
202
|
+
def strings_as_hash(lines)
|
203
|
+
hash = {}
|
204
|
+
lines.each do |line|
|
205
|
+
entry_id, definition = line.split(/\s+/, 2)
|
206
|
+
hash[entry_id] = definition
|
207
|
+
end
|
208
|
+
return hash
|
209
|
+
end
|
210
|
+
private :strings_as_hash
|
211
|
+
end #module StringsAsHash
|
212
|
+
|
109
213
|
end #module Common
|
110
214
|
end #class KEGG
|
111
215
|
end #module Bio
|