bio 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +1712 -0
- data/KNOWN_ISSUES.rdoc +11 -1
- data/README.rdoc +3 -2
- data/RELEASE_NOTES.rdoc +65 -127
- data/bioruby.gemspec +38 -2
- data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
- data/doc/Tutorial.rd +74 -16
- data/doc/Tutorial.rd.html +68 -16
- data/lib/bio.rb +2 -0
- data/lib/bio/appl/clustalw/report.rb +18 -0
- data/lib/bio/appl/paml/codeml/report.rb +579 -21
- data/lib/bio/command.rb +149 -21
- data/lib/bio/db/aaindex.rb +11 -1
- data/lib/bio/db/embl/sptr.rb +1 -1
- data/lib/bio/db/fasta/defline.rb +7 -2
- data/lib/bio/db/fasta/qual.rb +24 -0
- data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
- data/lib/bio/db/fastq.rb +15 -0
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +109 -5
- data/lib/bio/db/kegg/genes.rb +61 -15
- data/lib/bio/db/kegg/genome.rb +43 -38
- data/lib/bio/db/kegg/module.rb +158 -0
- data/lib/bio/db/kegg/orthology.rb +40 -1
- data/lib/bio/db/kegg/pathway.rb +254 -0
- data/lib/bio/db/medline.rb +6 -2
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/location.rb +39 -0
- data/lib/bio/reference.rb +24 -0
- data/lib/bio/sequence.rb +2 -0
- data/lib/bio/sequence/adapter.rb +1 -0
- data/lib/bio/sequence/format.rb +14 -0
- data/lib/bio/sequence/sequence_masker.rb +95 -0
- data/lib/bio/tree.rb +4 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
- data/lib/bio/version.rb +1 -1
- data/setup.rb +5 -0
- data/test/data/KEGG/K02338.orthology +180 -52
- data/test/data/KEGG/M00118.module +44 -0
- data/test/data/KEGG/T00005.genome +140 -0
- data/test/data/KEGG/T00070.genome +34 -0
- data/test/data/KEGG/b0529.gene +47 -0
- data/test/data/KEGG/ec00072.pathway +23 -0
- data/test/data/KEGG/hsa00790.pathway +59 -0
- data/test/data/KEGG/ko00312.pathway +16 -0
- data/test/data/KEGG/map00030.pathway +37 -0
- data/test/data/KEGG/map00052.pathway +13 -0
- data/test/data/KEGG/rn00250.pathway +114 -0
- data/test/data/clustalw/example1.aln +58 -0
- data/test/data/go/selected_component.ontology +12 -0
- data/test/data/go/selected_gene_association.sgd +31 -0
- data/test/data/go/selected_wikipedia2go +13 -0
- data/test/data/medline/20146148_modified.medline +54 -0
- data/test/data/paml/codeml/models/aa.aln +26 -0
- data/test/data/paml/codeml/models/aa.dnd +13 -0
- data/test/data/paml/codeml/models/aa.ph +13 -0
- data/test/data/paml/codeml/models/alignment.phy +49 -0
- data/test/data/paml/codeml/models/results0-3.txt +312 -0
- data/test/data/paml/codeml/models/results7-8.txt +340 -0
- data/test/functional/bio/io/test_togows.rb +8 -8
- data/test/functional/bio/test_command.rb +7 -6
- data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
- data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
- data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1 -1
- data/test/unit/bio/db/fasta/test_defline.rb +160 -0
- data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
- data/test/unit/bio/db/kegg/test_genes.rb +281 -1
- data/test/unit/bio/db/kegg/test_genome.rb +408 -0
- data/test/unit/bio/db/kegg/test_module.rb +246 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
- data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
- data/test/unit/bio/db/test_aaindex.rb +8 -7
- data/test/unit/bio/db/test_fastq.rb +36 -0
- data/test/unit/bio/db/test_go.rb +171 -0
- data/test/unit/bio/db/test_medline.rb +148 -0
- data/test/unit/bio/db/test_qual.rb +9 -2
- data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
- data/test/unit/bio/test_tree.rb +260 -1
- data/test/unit/bio/util/test_contingency_table.rb +7 -7
- metadata +53 -6
data/lib/bio/command.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# = bio/command.rb - general methods for external command execution
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2003-
|
4
|
+
# Copyright:: Copyright (C) 2003-2010
|
5
5
|
# Naohisa Goto <ng@bioruby.org>,
|
6
6
|
# Toshiaki Katayama <k@bioruby.org>
|
7
7
|
# License:: The Ruby License
|
@@ -34,6 +34,59 @@ module Command
|
|
34
34
|
|
35
35
|
module_function
|
36
36
|
|
37
|
+
# *CAUTION* Bio::Command INTERNAL USE ONLY.
|
38
|
+
# Users must NOT use the method.
|
39
|
+
# The method will be removed when it is not needed.
|
40
|
+
#
|
41
|
+
# Checks if the program is running on Microsoft Windows.
|
42
|
+
# If Windows, returns true. Otherwise, returns false.
|
43
|
+
# Note that Cygwin is not treated as Windows.
|
44
|
+
#
|
45
|
+
# Known issues:
|
46
|
+
# * It might make a mistake in minor platforms/architectures/interpreters.
|
47
|
+
# * When running JRuby on Cygwin, the result is unknown.
|
48
|
+
# ---
|
49
|
+
# *Returns*:: true or false
|
50
|
+
def windows_platform?
|
51
|
+
case RUBY_PLATFORM
|
52
|
+
when /(?:mswin|bccwin|mingw)(?:32|64)/i
|
53
|
+
true
|
54
|
+
when /java/i
|
55
|
+
# Reference: Redmine's platform.rb
|
56
|
+
# http://www.redmine.org/projects/redmine/repository/revisions/1753/entry/trunk/lib/redmine/platform.rb
|
57
|
+
if /windows/i =~ (ENV['OS'] || ENV['os']).to_s then
|
58
|
+
true
|
59
|
+
else
|
60
|
+
false
|
61
|
+
end
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
private_class_method :windows_platform?
|
67
|
+
|
68
|
+
# *CAUTION* Bio::Command INTERNAL USE ONLY.
|
69
|
+
# Users must NOT use the method.
|
70
|
+
# The method will be removed when it is not needed.
|
71
|
+
#
|
72
|
+
# Checks if the OS does not support fork(2) system call.
|
73
|
+
# When not supported, it returns true.
|
74
|
+
# When supported or unknown, it returns false or nil.
|
75
|
+
#
|
76
|
+
# Known issues:
|
77
|
+
# * It might make a mistake in minor platforms/architectures/interpreters.
|
78
|
+
# ---
|
79
|
+
# *Returns*:: true, false or nil.
|
80
|
+
def no_fork?
|
81
|
+
if (defined?(@@no_fork) && @@no_fork) or
|
82
|
+
windows_platform? or /java/i =~ RUBY_PLATFORM then
|
83
|
+
true
|
84
|
+
else
|
85
|
+
false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
private_class_method :no_fork?
|
89
|
+
|
37
90
|
# Escape special characters in command line string for cmd.exe on Windows.
|
38
91
|
# ---
|
39
92
|
# *Arguments*:
|
@@ -66,8 +119,7 @@ module Command
|
|
66
119
|
# * (required) _str_: String
|
67
120
|
# *Returns*:: String object
|
68
121
|
def escape_shell(str)
|
69
|
-
|
70
|
-
when /mswin32|bccwin32/
|
122
|
+
if windows_platform? then
|
71
123
|
escape_shell_windows(str)
|
72
124
|
else
|
73
125
|
escape_shell_unix(str)
|
@@ -80,8 +132,7 @@ module Command
|
|
80
132
|
# * (required) _ary_: Array containing String objects
|
81
133
|
# *Returns*:: String object
|
82
134
|
def make_command_line(ary)
|
83
|
-
|
84
|
-
when /mswin32|bccwin32/
|
135
|
+
if windows_platform? then
|
85
136
|
make_command_line_windows(ary)
|
86
137
|
else
|
87
138
|
make_command_line_unix(ary)
|
@@ -130,8 +181,8 @@ module Command
|
|
130
181
|
[ arg0 ]
|
131
182
|
end
|
132
183
|
|
133
|
-
# Executes the program.
|
134
|
-
# environment and fork for the others.
|
184
|
+
# Executes the program. Automatically select popen for Ruby 1.9 or
|
185
|
+
# Windows environment and fork for the others.
|
135
186
|
# A block must be given. An IO object is passed to the block.
|
136
187
|
#
|
137
188
|
# Available options:
|
@@ -143,27 +194,62 @@ module Command
|
|
143
194
|
# * (optional) _options_: Hash
|
144
195
|
# *Returns*:: (undefined)
|
145
196
|
def call_command(cmd, options = {}, &block) #:yields: io
|
146
|
-
|
147
|
-
|
197
|
+
if RUBY_VERSION >= "1.9.0" then
|
198
|
+
return call_command_popen(cmd, options, &block)
|
199
|
+
elsif no_fork? then
|
148
200
|
call_command_popen(cmd, options, &block)
|
149
201
|
else
|
150
|
-
|
202
|
+
begin
|
203
|
+
call_command_fork(cmd, options, &block)
|
204
|
+
rescue NotImplementedError
|
205
|
+
# fork(2) not implemented
|
206
|
+
@@no_fork = true
|
207
|
+
call_command_popen(cmd, options, &block)
|
208
|
+
end
|
151
209
|
end
|
152
210
|
end
|
153
211
|
|
212
|
+
# This method is internally called from the call_command method.
|
213
|
+
# In normal case, use call_command, and do not call this method directly.
|
214
|
+
#
|
154
215
|
# Executes the program via IO.popen for OS which doesn't support fork.
|
155
216
|
# A block must be given. An IO object is passed to the block.
|
217
|
+
#
|
218
|
+
# See the document of call_command for available options.
|
219
|
+
#
|
220
|
+
# Note for Ruby 1.8:
|
221
|
+
# In Ruby 1.8, although shell unsafe characters are escaped.
|
222
|
+
# If inescapable characters exists, it raises RuntimeError.
|
223
|
+
# So, call_command_fork is normally recommended.
|
224
|
+
#
|
225
|
+
# Note for Ruby 1.9:
|
226
|
+
# In Ruby 1.9, call_command_popen is safe and robust enough, and is the
|
227
|
+
# recommended way, because IO.popen is improved to get a command-line
|
228
|
+
# as an array without calling shell.
|
229
|
+
#
|
156
230
|
# ---
|
157
231
|
# *Arguments*:
|
158
232
|
# * (required) _cmd_: Array containing String objects
|
159
233
|
# * (optional) _options_: Hash
|
160
234
|
# *Returns*:: (undefined)
|
161
235
|
def call_command_popen(cmd, options = {})
|
236
|
+
if RUBY_VERSION >= "1.9.0" then
|
237
|
+
# For Ruby 1.9 or later, using command line array with options.
|
238
|
+
dir = options[:chdir]
|
239
|
+
cmd = safe_command_line_array(cmd)
|
240
|
+
if dir then
|
241
|
+
cmd = cmd + [ { :chdir => dir } ]
|
242
|
+
end
|
243
|
+
r = IO.popen(cmd, "r+") do |io|
|
244
|
+
yield io
|
245
|
+
end
|
246
|
+
return r
|
247
|
+
end
|
248
|
+
# For Ruby 1.8, using command line string.
|
162
249
|
str = make_command_line(cmd)
|
163
250
|
# processing options
|
164
251
|
if dir = options[:chdir] then
|
165
|
-
|
166
|
-
when /mswin32|bccwin32/
|
252
|
+
if windows_platform?
|
167
253
|
# Unix-like dir separator is changed to Windows dir separator
|
168
254
|
# by using String#gsub.
|
169
255
|
dirstr = dir.gsub(/\//, "\\")
|
@@ -182,11 +268,24 @@ module Command
|
|
182
268
|
end
|
183
269
|
end
|
184
270
|
|
271
|
+
# This method is internally called from the call_command method.
|
272
|
+
# In normal case, use call_command, and do not call this method directly.
|
273
|
+
#
|
185
274
|
# Executes the program via fork (by using IO.popen("-")) and exec.
|
186
275
|
# A block must be given. An IO object is passed to the block.
|
187
276
|
#
|
188
|
-
#
|
189
|
-
#
|
277
|
+
# See the document of call_command for available options.
|
278
|
+
#
|
279
|
+
# Note for Ruby 1.8:
|
280
|
+
# In Ruby 1.8, from the view point of security, this method is recommended
|
281
|
+
# rather than call_command_popen. However, this method might have problems
|
282
|
+
# with multi-threads.
|
283
|
+
#
|
284
|
+
# Note for Ruby 1.9:
|
285
|
+
# In Ruby 1.9, this method can not be used, because Thread.critical is
|
286
|
+
# removed. In Ruby 1.9, call_command_popen is safe and robust enough, and
|
287
|
+
# is the recommended way, because IO.popen is improved to get a
|
288
|
+
# command-line as an array without calling shell.
|
190
289
|
#
|
191
290
|
# ---
|
192
291
|
# *Arguments*:
|
@@ -196,12 +295,17 @@ module Command
|
|
196
295
|
def call_command_fork(cmd, options = {})
|
197
296
|
dir = options[:chdir]
|
198
297
|
cmd = safe_command_line_array(cmd)
|
298
|
+
begin
|
299
|
+
tc, Thread.critical, flag0, flag1 = Thread.critical, true, true, true
|
199
300
|
IO.popen("-", "r+") do |io|
|
200
301
|
if io then
|
201
302
|
# parent
|
303
|
+
flag0, Thread.critical, flag1 = false, tc, false
|
202
304
|
yield io
|
203
305
|
else
|
204
306
|
# child
|
307
|
+
Thread.critical = true # for safety, though already true
|
308
|
+
GC.disable
|
205
309
|
# chdir to options[:chdir] if available
|
206
310
|
begin
|
207
311
|
Dir.chdir(dir) if dir
|
@@ -218,6 +322,11 @@ module Command
|
|
218
322
|
Process.exit!(1)
|
219
323
|
end
|
220
324
|
end
|
325
|
+
ensure
|
326
|
+
# When IO.popen("-") raises error, Thread.critical will be set here.
|
327
|
+
Thread.critical = tc if flag0 or flag1
|
328
|
+
#warn 'Thread.critical might have wrong value.' if flag0 != flag1
|
329
|
+
end
|
221
330
|
end
|
222
331
|
|
223
332
|
# Executes the program via Open3.popen3
|
@@ -240,7 +349,8 @@ module Command
|
|
240
349
|
# waits the program termination, and returns the output data printed to the
|
241
350
|
# standard output as a string.
|
242
351
|
#
|
243
|
-
# Automatically select popen for Windows environment and
|
352
|
+
# Automatically select popen for Ruby 1.9 or Windows environment and
|
353
|
+
# fork for the others.
|
244
354
|
#
|
245
355
|
# Available options:
|
246
356
|
# :chdir => "path" : changes working directory to the specified path.
|
@@ -252,19 +362,32 @@ module Command
|
|
252
362
|
# * (optional) _options_: Hash
|
253
363
|
# *Returns*:: String or nil
|
254
364
|
def query_command(cmd, query = nil, options = {})
|
255
|
-
|
256
|
-
|
365
|
+
if RUBY_VERSION >= "1.9.0" then
|
366
|
+
return query_command_popen(cmd, query, options)
|
367
|
+
elsif no_fork? then
|
257
368
|
query_command_popen(cmd, query, options)
|
258
369
|
else
|
259
|
-
|
370
|
+
begin
|
371
|
+
query_command_fork(cmd, query, options)
|
372
|
+
rescue NotImplementedError
|
373
|
+
# fork(2) not implemented
|
374
|
+
@@no_fork = true
|
375
|
+
query_command_fork(cmd, query, options)
|
376
|
+
end
|
260
377
|
end
|
261
378
|
end
|
262
379
|
|
380
|
+
# This method is internally called from the query_command method.
|
381
|
+
# In normal case, use query_command, and do not call this method directly.
|
382
|
+
#
|
263
383
|
# Executes the program with the query (String) given to the standard input,
|
264
384
|
# waits the program termination, and returns the output data printed to the
|
265
385
|
# standard output as a string.
|
266
386
|
#
|
267
|
-
#
|
387
|
+
# See the document of query_command for available options.
|
388
|
+
#
|
389
|
+
# See the document of call_command_popen for the security and Ruby
|
390
|
+
# version specific issues.
|
268
391
|
#
|
269
392
|
# ---
|
270
393
|
# *Arguments*:
|
@@ -283,14 +406,19 @@ module Command
|
|
283
406
|
ret
|
284
407
|
end
|
285
408
|
|
409
|
+
# This method is internally called from the query_command method.
|
410
|
+
# In normal case, use query_command, and do not call this method directly.
|
411
|
+
#
|
286
412
|
# Executes the program with the query (String) given to the standard input,
|
287
413
|
# waits the program termination, and returns the output data printed to the
|
288
414
|
# standard output as a string.
|
289
415
|
#
|
290
416
|
# Fork (by using IO.popen("-")) and exec is used to execute the program.
|
291
417
|
#
|
292
|
-
#
|
293
|
-
#
|
418
|
+
# See the document of query_command for available options.
|
419
|
+
#
|
420
|
+
# See the document of call_command_fork for the security and Ruby
|
421
|
+
# version specific issues.
|
294
422
|
#
|
295
423
|
# ---
|
296
424
|
# *Arguments*:
|
data/lib/bio/db/aaindex.rb
CHANGED
@@ -254,7 +254,17 @@ module Bio
|
|
254
254
|
label_data.each_line do |line|
|
255
255
|
ma << line.strip.split(/\s+/).map {|x| x.to_f }
|
256
256
|
end
|
257
|
-
|
257
|
+
ma_len = ma.size
|
258
|
+
ma.each do |row|
|
259
|
+
row_size = row.size
|
260
|
+
if row_size < ma_len
|
261
|
+
(row_size..ma_len-1).each do |i|
|
262
|
+
row[i] = ma[i][row_size-1]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
mat = Matrix[*ma]
|
267
|
+
@data['matrix'] = mat
|
258
268
|
end
|
259
269
|
end
|
260
270
|
|
data/lib/bio/db/embl/sptr.rb
CHANGED
data/lib/bio/db/fasta/defline.rb
CHANGED
@@ -126,10 +126,14 @@ module Bio
|
|
126
126
|
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
127
127
|
#
|
128
128
|
# * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
|
129
|
+
# (Dead link. Please find in http://web.archive.org/ ).
|
129
130
|
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
130
131
|
#
|
131
|
-
# *
|
132
|
-
#
|
132
|
+
# * Program Parameters for formatdb and fastacmd (by Tao Tao)
|
133
|
+
# http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/formatdb_fastacmd.html#t1.1
|
134
|
+
#
|
135
|
+
# * Formatdb README
|
136
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/formatdb.html
|
133
137
|
#
|
134
138
|
class FastaDefline
|
135
139
|
|
@@ -140,6 +144,7 @@ module Bio
|
|
140
144
|
'emb' => [ 'acc_version', 'locus' ], # EMBL
|
141
145
|
'dbj' => [ 'acc_version', 'locus' ], # DDBJ
|
142
146
|
'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT
|
147
|
+
'tr' => [ 'accession', 'entry_id' ], # TREMBL
|
143
148
|
'pdb' => [ 'entry_id', 'chain' ], # PDB
|
144
149
|
'bbs' => [ 'number' ], # GenInfo Backbone Id
|
145
150
|
'gnl' => [ 'database' , 'entry_id' ], # General database identifier
|
data/lib/bio/db/fasta/qual.rb
CHANGED
@@ -95,6 +95,30 @@ module Bio
|
|
95
95
|
data[n]
|
96
96
|
end
|
97
97
|
|
98
|
+
# Returns the data as a Bio::Sequence object.
|
99
|
+
# In the returned sequence object, the length of the sequence is zero,
|
100
|
+
# and the numeric data is stored to the Bio::Sequence#quality_scores
|
101
|
+
# attirbute.
|
102
|
+
#
|
103
|
+
# Because the meaning of the numeric data is unclear,
|
104
|
+
# Bio::Sequence#quality_score_type is not set by default.
|
105
|
+
#
|
106
|
+
# Note: If you modify the returned Bio::Sequence object,
|
107
|
+
# the sequence or definition in this FastaNumericFormat object
|
108
|
+
# might also be changed (but not always be changed)
|
109
|
+
# because of efficiency.
|
110
|
+
#
|
111
|
+
# ---
|
112
|
+
# *Arguments*:
|
113
|
+
# *Returns*:: (Bio::Sequence) sequence object
|
114
|
+
def to_biosequence
|
115
|
+
s = Bio::Sequence.adapter(self,
|
116
|
+
Bio::Sequence::Adapter::FastaNumericFormat)
|
117
|
+
s.seq = Bio::Sequence::Generic.new('')
|
118
|
+
s
|
119
|
+
end
|
120
|
+
alias to_seq to_biosequence
|
121
|
+
|
98
122
|
undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
|
99
123
|
|
100
124
|
end #class FastaNumericFormat
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/fasta/qual_to_biosequence.rb - Bio::FastaNumericFormat to Bio::Sequence adapter module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2010
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'bio/sequence'
|
10
|
+
require 'bio/sequence/adapter'
|
11
|
+
require 'bio/db/fasta/fasta_to_biosequence'
|
12
|
+
|
13
|
+
# Internal use only. Normal users should not use this module.
|
14
|
+
#
|
15
|
+
# Bio::FastaNumericFormat to Bio::Sequence adapter module.
|
16
|
+
# It is internally used in Bio::FastaNumericFormat#to_biosequence.
|
17
|
+
#
|
18
|
+
module Bio::Sequence::Adapter::FastaNumericFormat
|
19
|
+
|
20
|
+
extend Bio::Sequence::Adapter
|
21
|
+
|
22
|
+
include Bio::Sequence::Adapter::FastaFormat
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def_biosequence_adapter :quality_scores, :data
|
27
|
+
|
28
|
+
end #module Bio::Sequence::Adapter::FastaNumericFormat
|
29
|
+
|
data/lib/bio/db/fastq.rb
CHANGED
@@ -640,6 +640,21 @@ class Fastq
|
|
640
640
|
Bio::Sequence.adapter(self, Bio::Sequence::Adapter::Fastq)
|
641
641
|
end
|
642
642
|
|
643
|
+
# Masks low quality sequence regions.
|
644
|
+
# For each sequence position, if the quality score is smaller than
|
645
|
+
# the threshold, the sequence in the position is replaced with
|
646
|
+
# <em>mask_char</em>.
|
647
|
+
#
|
648
|
+
# Note: This method does not care quality_score_type.
|
649
|
+
# ---
|
650
|
+
# *Arguments*:
|
651
|
+
# * (required) <em>threshold</em> : (Numeric) threshold
|
652
|
+
# * (optional) <em>mask_char</em> : (String) character used for masking
|
653
|
+
# *Returns*:: Bio::Sequence object
|
654
|
+
def mask(threshold, mask_char = 'n')
|
655
|
+
to_biosequence.mask_with_quality_score(threshold, mask_char)
|
656
|
+
end
|
657
|
+
|
643
658
|
end #class Fastq
|
644
659
|
|
645
660
|
end #module Bio
|
data/lib/bio/db/go.rb
CHANGED
@@ -293,8 +293,8 @@ class GO
|
|
293
293
|
|
294
294
|
# Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
|
295
295
|
def to_str
|
296
|
-
return [@db, @db_object_id, @db_object_symbol, @
|
297
|
-
@
|
296
|
+
return [@db, @db_object_id, @db_object_symbol, @qualifier, @goid,
|
297
|
+
@db_reference.join("|"), @evidence, @with.join("|"), @aspect,
|
298
298
|
@db_object_name, @db_object_synonym.join("|"), @db_object_type,
|
299
299
|
@taxon, @date, @assigned_by].join("\t")
|
300
300
|
end
|
data/lib/bio/db/kegg/common.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# = bio/db/kegg/common.rb - Common methods for KEGG database classes
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C)
|
4
|
+
# Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
|
6
6
|
# Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
|
7
7
|
# License:: The Ruby License
|
@@ -23,6 +23,69 @@ class KEGG
|
|
23
23
|
# Namespace for methods commonly used in the Bio::KEGG::* classes.
|
24
24
|
module Common
|
25
25
|
|
26
|
+
# The module provides references method.
|
27
|
+
module References
|
28
|
+
# REFERENCE -- Returns contents of the REFERENCE records as an Array of
|
29
|
+
# Bio::Reference objects.
|
30
|
+
def references
|
31
|
+
unless @data['REFERENCE']
|
32
|
+
ary = []
|
33
|
+
toptag2array(get('REFERENCE')).each do |ref|
|
34
|
+
hash = Hash.new
|
35
|
+
subtag2array(ref).each do |field|
|
36
|
+
case tag_get(field)
|
37
|
+
when /REFERENCE/
|
38
|
+
cmnt = tag_cut(field).chomp
|
39
|
+
if /^\s*PMID\:(\d+)\s*/ =~ cmnt then
|
40
|
+
hash['pubmed'] = $1
|
41
|
+
cmnt = $'
|
42
|
+
end
|
43
|
+
if cmnt and !cmnt.empty? then
|
44
|
+
hash['comments'] ||= []
|
45
|
+
hash['comments'].push(cmnt)
|
46
|
+
end
|
47
|
+
when /AUTHORS/
|
48
|
+
authors = truncate(tag_cut(field))
|
49
|
+
authors = authors.split(/\, /)
|
50
|
+
authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
|
51
|
+
authors = authors.flatten.map { |a| a.sub(',', ', ') }
|
52
|
+
hash['authors'] = authors
|
53
|
+
when /TITLE/
|
54
|
+
hash['title'] = truncate(tag_cut(field))
|
55
|
+
when /JOURNAL/
|
56
|
+
journal = truncate(tag_cut(field))
|
57
|
+
case journal
|
58
|
+
# KEGG style
|
59
|
+
when /(.*) (\d*(?:\([^\)]+\))?)\:(\d+\-\d+) \((\d+)\)$/
|
60
|
+
hash['journal'] = $1
|
61
|
+
hash['volume'] = $2
|
62
|
+
hash['pages'] = $3
|
63
|
+
hash['year'] = $4
|
64
|
+
# old KEGG style
|
65
|
+
when /(.*) (\d+):(\d+\-\d+) \((\d+)\) \[UI:(\d+)\]$/
|
66
|
+
hash['journal'] = $1
|
67
|
+
hash['volume'] = $2
|
68
|
+
hash['pages'] = $3
|
69
|
+
hash['year'] = $4
|
70
|
+
hash['medline'] = $5
|
71
|
+
# Only journal name and year are available
|
72
|
+
when /(.*) \((\d+)\)$/
|
73
|
+
hash['journal'] = $1
|
74
|
+
hash['year'] = $2
|
75
|
+
else
|
76
|
+
hash['journal'] = journal
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
ary.push(Reference.new(hash))
|
81
|
+
end
|
82
|
+
@data['REFERENCE'] = ary #.extend(Bio::References::BackwardCompatibility)
|
83
|
+
|
84
|
+
end
|
85
|
+
@data['REFERENCE']
|
86
|
+
end
|
87
|
+
end #module References
|
88
|
+
|
26
89
|
# The module providing dblinks_as_hash methods.
|
27
90
|
#
|
28
91
|
# Bio::KEGG::* internal use only.
|
@@ -54,7 +117,8 @@ class KEGG
|
|
54
117
|
unless defined? @pathways_as_hash then
|
55
118
|
hash = {}
|
56
119
|
pathways_as_strings.each do |line|
|
57
|
-
|
120
|
+
line = line.sub(/\APATH\:\s+/, '')
|
121
|
+
entry_id, name = line.split(/\s+/, 2)
|
58
122
|
hash[entry_id] = name
|
59
123
|
end
|
60
124
|
@pathways_as_hash = hash
|
@@ -72,9 +136,9 @@ class KEGG
|
|
72
136
|
def orthologs_as_hash
|
73
137
|
unless defined? @orthologs_as_hash
|
74
138
|
kos = {}
|
75
|
-
orthologs_as_strings.each do |
|
76
|
-
|
77
|
-
|
139
|
+
orthologs_as_strings.each do |line|
|
140
|
+
ko = line.sub(/\AKO\:\s+/, '')
|
141
|
+
entry_id, definition = ko.split(/\s+/, 2)
|
78
142
|
kos[entry_id] = definition
|
79
143
|
end
|
80
144
|
@orthologs_as_hash = kos
|
@@ -106,6 +170,46 @@ class KEGG
|
|
106
170
|
end
|
107
171
|
end #module GenesAsHash
|
108
172
|
|
173
|
+
# This module provides modules_as_hash method.
|
174
|
+
#
|
175
|
+
# Bio::KEGG::* internal use only.
|
176
|
+
module ModulesAsHash
|
177
|
+
# Returns MODULE field as a Hash.
|
178
|
+
# Each key of the hash is KEGG MODULE ID,
|
179
|
+
# and each value is the name of the Pathway Module.
|
180
|
+
# ---
|
181
|
+
# *Returns*:: Hash
|
182
|
+
def modules_as_hash
|
183
|
+
unless defined? @modules_s_as_hash then
|
184
|
+
hash = {}
|
185
|
+
modules_as_strings.each do |line|
|
186
|
+
entry_id, name = line.split(/\s+/, 2)
|
187
|
+
hash[entry_id] = name
|
188
|
+
end
|
189
|
+
@modules_as_hash = hash
|
190
|
+
end
|
191
|
+
@modules_as_hash
|
192
|
+
end
|
193
|
+
end #module ModulesAsHash
|
194
|
+
|
195
|
+
# This module provides strings_as_hash private method.
|
196
|
+
#
|
197
|
+
# Bio::KEGG::* internal use only.
|
198
|
+
module StringsAsHash
|
199
|
+
# (Private) Creates a hash from lines.
|
200
|
+
# Each line is consisted of two components, ID and description,
|
201
|
+
# separated with spaces. IDs must be unique with each other.
|
202
|
+
def strings_as_hash(lines)
|
203
|
+
hash = {}
|
204
|
+
lines.each do |line|
|
205
|
+
entry_id, definition = line.split(/\s+/, 2)
|
206
|
+
hash[entry_id] = definition
|
207
|
+
end
|
208
|
+
return hash
|
209
|
+
end
|
210
|
+
private :strings_as_hash
|
211
|
+
end #module StringsAsHash
|
212
|
+
|
109
213
|
end #module Common
|
110
214
|
end #class KEGG
|
111
215
|
end #module Bio
|