bio 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +1712 -0
- data/KNOWN_ISSUES.rdoc +11 -1
- data/README.rdoc +3 -2
- data/RELEASE_NOTES.rdoc +65 -127
- data/bioruby.gemspec +38 -2
- data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
- data/doc/Tutorial.rd +74 -16
- data/doc/Tutorial.rd.html +68 -16
- data/lib/bio.rb +2 -0
- data/lib/bio/appl/clustalw/report.rb +18 -0
- data/lib/bio/appl/paml/codeml/report.rb +579 -21
- data/lib/bio/command.rb +149 -21
- data/lib/bio/db/aaindex.rb +11 -1
- data/lib/bio/db/embl/sptr.rb +1 -1
- data/lib/bio/db/fasta/defline.rb +7 -2
- data/lib/bio/db/fasta/qual.rb +24 -0
- data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
- data/lib/bio/db/fastq.rb +15 -0
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +109 -5
- data/lib/bio/db/kegg/genes.rb +61 -15
- data/lib/bio/db/kegg/genome.rb +43 -38
- data/lib/bio/db/kegg/module.rb +158 -0
- data/lib/bio/db/kegg/orthology.rb +40 -1
- data/lib/bio/db/kegg/pathway.rb +254 -0
- data/lib/bio/db/medline.rb +6 -2
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/location.rb +39 -0
- data/lib/bio/reference.rb +24 -0
- data/lib/bio/sequence.rb +2 -0
- data/lib/bio/sequence/adapter.rb +1 -0
- data/lib/bio/sequence/format.rb +14 -0
- data/lib/bio/sequence/sequence_masker.rb +95 -0
- data/lib/bio/tree.rb +4 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
- data/lib/bio/version.rb +1 -1
- data/setup.rb +5 -0
- data/test/data/KEGG/K02338.orthology +180 -52
- data/test/data/KEGG/M00118.module +44 -0
- data/test/data/KEGG/T00005.genome +140 -0
- data/test/data/KEGG/T00070.genome +34 -0
- data/test/data/KEGG/b0529.gene +47 -0
- data/test/data/KEGG/ec00072.pathway +23 -0
- data/test/data/KEGG/hsa00790.pathway +59 -0
- data/test/data/KEGG/ko00312.pathway +16 -0
- data/test/data/KEGG/map00030.pathway +37 -0
- data/test/data/KEGG/map00052.pathway +13 -0
- data/test/data/KEGG/rn00250.pathway +114 -0
- data/test/data/clustalw/example1.aln +58 -0
- data/test/data/go/selected_component.ontology +12 -0
- data/test/data/go/selected_gene_association.sgd +31 -0
- data/test/data/go/selected_wikipedia2go +13 -0
- data/test/data/medline/20146148_modified.medline +54 -0
- data/test/data/paml/codeml/models/aa.aln +26 -0
- data/test/data/paml/codeml/models/aa.dnd +13 -0
- data/test/data/paml/codeml/models/aa.ph +13 -0
- data/test/data/paml/codeml/models/alignment.phy +49 -0
- data/test/data/paml/codeml/models/results0-3.txt +312 -0
- data/test/data/paml/codeml/models/results7-8.txt +340 -0
- data/test/functional/bio/io/test_togows.rb +8 -8
- data/test/functional/bio/test_command.rb +7 -6
- data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
- data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
- data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1 -1
- data/test/unit/bio/db/fasta/test_defline.rb +160 -0
- data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
- data/test/unit/bio/db/kegg/test_genes.rb +281 -1
- data/test/unit/bio/db/kegg/test_genome.rb +408 -0
- data/test/unit/bio/db/kegg/test_module.rb +246 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
- data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
- data/test/unit/bio/db/test_aaindex.rb +8 -7
- data/test/unit/bio/db/test_fastq.rb +36 -0
- data/test/unit/bio/db/test_go.rb +171 -0
- data/test/unit/bio/db/test_medline.rb +148 -0
- data/test/unit/bio/db/test_qual.rb +9 -2
- data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
- data/test/unit/bio/test_tree.rb +260 -1
- data/test/unit/bio/util/test_contingency_table.rb +7 -7
- metadata +53 -6
@@ -0,0 +1,254 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/kegg/pathway.rb - KEGG PATHWAY database class
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2010 Kozo Nishida <kozo-ni@is.naist.jp>
|
5
|
+
# Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'bio/db'
|
12
|
+
require 'bio/db/kegg/common'
|
13
|
+
|
14
|
+
module Bio
|
15
|
+
class KEGG
|
16
|
+
|
17
|
+
# == Description
|
18
|
+
#
|
19
|
+
# Bio::KEGG::PATHWAY is a parser class for the KEGG PATHWAY database entry.
|
20
|
+
#
|
21
|
+
# == References
|
22
|
+
#
|
23
|
+
# * http://www.genome.jp/kegg/pathway.html
|
24
|
+
# * ftp://ftp.genome.jp/pub/kegg/pathway/pathway
|
25
|
+
#
|
26
|
+
class PATHWAY < KEGGDB
|
27
|
+
|
28
|
+
DELIMITER = RS = "\n///\n"
|
29
|
+
TAGSIZE = 12
|
30
|
+
|
31
|
+
include Common::DblinksAsHash
|
32
|
+
# Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
|
33
|
+
def dblinks_as_hash; super; end if false #dummy for RDoc
|
34
|
+
alias dblinks dblinks_as_hash
|
35
|
+
|
36
|
+
include Common::PathwaysAsHash
|
37
|
+
# Returns a Hash of the pathway ID and name in PATHWAY field.
|
38
|
+
def pathways_as_hash; super; end if false #dummy for RDoc
|
39
|
+
alias pathways pathways_as_hash
|
40
|
+
|
41
|
+
include Common::OrthologsAsHash
|
42
|
+
# Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
|
43
|
+
def orthologs_as_hash; super; end if false #dummy for RDoc
|
44
|
+
alias orthologs orthologs_as_hash
|
45
|
+
|
46
|
+
include Common::References
|
47
|
+
# REFERENCE -- Returns contents of the REFERENCE records as an Array of
|
48
|
+
# Bio::Reference objects.
|
49
|
+
# ---
|
50
|
+
# *Returns*:: an Array containing Bio::Reference objects
|
51
|
+
def references; super; end if false #dummy for RDoc
|
52
|
+
|
53
|
+
include Common::ModulesAsHash
|
54
|
+
# Returns MODULE field as a Hash.
|
55
|
+
# Each key of the hash is KEGG MODULE ID,
|
56
|
+
# and each value is the name of the Pathway Module.
|
57
|
+
# ---
|
58
|
+
# *Returns*:: Hash
|
59
|
+
def modules_as_hash; super; end if false #dummy for RDoc
|
60
|
+
alias modules modules_as_hash
|
61
|
+
|
62
|
+
#--
|
63
|
+
# for a private method strings_as_hash.
|
64
|
+
#++
|
65
|
+
include Common::StringsAsHash
|
66
|
+
|
67
|
+
# Creates a new Bio::KEGG::PATHWAY object.
|
68
|
+
# ---
|
69
|
+
# *Arguments*:
|
70
|
+
# * (required) _entry_: (String) single entry as a string
|
71
|
+
# *Returns*:: Bio::KEGG::PATHWAY object
|
72
|
+
def initialize(entry)
|
73
|
+
super(entry, TAGSIZE)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return the ID of the pathway, described in the ENTRY line.
|
77
|
+
# ---
|
78
|
+
# *Returns*:: String
|
79
|
+
def entry_id
|
80
|
+
field_fetch('ENTRY')[/\S+/]
|
81
|
+
end
|
82
|
+
|
83
|
+
# Name of the pathway, described in the NAME line.
|
84
|
+
# ---
|
85
|
+
# *Returns*:: String
|
86
|
+
def name
|
87
|
+
field_fetch('NAME')
|
88
|
+
end
|
89
|
+
|
90
|
+
# Description of the pathway, described in the DESCRIPTION line.
|
91
|
+
# ---
|
92
|
+
# *Returns*:: String
|
93
|
+
def description
|
94
|
+
field_fetch('DESCRIPTION')
|
95
|
+
end
|
96
|
+
alias definition description
|
97
|
+
|
98
|
+
# Return the name of the KEGG class, described in the CLASS line.
|
99
|
+
# ---
|
100
|
+
# *Returns*:: String
|
101
|
+
def keggclass
|
102
|
+
field_fetch('CLASS')
|
103
|
+
end
|
104
|
+
|
105
|
+
# Pathways described in the PATHWAY_MAP lines.
|
106
|
+
# ---
|
107
|
+
# *Returns*:: Array containing String
|
108
|
+
def pathways_as_strings
|
109
|
+
lines_fetch('PATHWAY_MAP')
|
110
|
+
end
|
111
|
+
|
112
|
+
# Returns MODULE field of the entry.
|
113
|
+
# ---
|
114
|
+
# *Returns*:: Array containing String objects
|
115
|
+
def modules_as_strings
|
116
|
+
lines_fetch('MODULE')
|
117
|
+
end
|
118
|
+
|
119
|
+
# Disease described in the DISEASE lines.
|
120
|
+
# ---
|
121
|
+
# *Returns*:: Array containing String
|
122
|
+
def diseases_as_strings
|
123
|
+
lines_fetch('DISEASE')
|
124
|
+
end
|
125
|
+
|
126
|
+
# Diseases described in the DISEASE lines.
|
127
|
+
# ---
|
128
|
+
# *Returns*:: Hash of disease ID and its definition
|
129
|
+
def diseases_as_hash
|
130
|
+
unless @diseases_as_hash
|
131
|
+
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
132
|
+
end
|
133
|
+
@diseases_as_hash
|
134
|
+
end
|
135
|
+
alias diseases diseases_as_hash
|
136
|
+
|
137
|
+
# Returns an Array of a database name and entry IDs in DBLINKS field.
|
138
|
+
# ---
|
139
|
+
# *Returns*:: Array containing String
|
140
|
+
def dblinks_as_strings
|
141
|
+
lines_fetch('DBLINKS')
|
142
|
+
end
|
143
|
+
|
144
|
+
# Orthologs described in the ORTHOLOGY lines.
|
145
|
+
# ---
|
146
|
+
# *Returns*:: Array containing String
|
147
|
+
def orthologs_as_strings
|
148
|
+
lines_fetch('ORTHOLOGY')
|
149
|
+
end
|
150
|
+
|
151
|
+
# Organism described in the ORGANISM line.
|
152
|
+
# ---
|
153
|
+
# *Returns*:: String
|
154
|
+
def organism
|
155
|
+
field_fetch('ORGANISM')
|
156
|
+
end
|
157
|
+
|
158
|
+
# Genes described in the GENE lines.
|
159
|
+
# ---
|
160
|
+
# *Returns*:: Array containing String
|
161
|
+
def genes_as_strings
|
162
|
+
lines_fetch('GENE')
|
163
|
+
end
|
164
|
+
|
165
|
+
# Genes described in the GENE lines.
|
166
|
+
# ---
|
167
|
+
# *Returns*:: Hash of gene ID and its definition
|
168
|
+
def genes_as_hash
|
169
|
+
unless @genes_as_hash
|
170
|
+
@genes_as_hash = strings_as_hash(genes_as_strings)
|
171
|
+
end
|
172
|
+
@genes_as_hash
|
173
|
+
end
|
174
|
+
alias genes genes_as_hash
|
175
|
+
|
176
|
+
# Enzymes described in the ENZYME lines.
|
177
|
+
# ---
|
178
|
+
# *Returns*:: Array containing String
|
179
|
+
def enzymes_as_strings
|
180
|
+
lines_fetch('ENZYME')
|
181
|
+
end
|
182
|
+
alias enzymes enzymes_as_strings
|
183
|
+
|
184
|
+
# Reactions described in the REACTION lines.
|
185
|
+
# ---
|
186
|
+
# *Returns*:: Array containing String
|
187
|
+
def reactions_as_strings
|
188
|
+
lines_fetch('REACTION')
|
189
|
+
end
|
190
|
+
|
191
|
+
# Reactions described in the REACTION lines.
|
192
|
+
# ---
|
193
|
+
# *Returns*:: Hash of reaction ID and its definition
|
194
|
+
def reactions_as_hash
|
195
|
+
unless @reactions_as_hash
|
196
|
+
@reactions_as_hash = strings_as_hash(reactions_as_strings)
|
197
|
+
end
|
198
|
+
@reactions_as_hash
|
199
|
+
end
|
200
|
+
alias reactions reactions_as_hash
|
201
|
+
|
202
|
+
# Compounds described in the COMPOUND lines.
|
203
|
+
# ---
|
204
|
+
# *Returns*:: Array containing String
|
205
|
+
def compounds_as_strings
|
206
|
+
lines_fetch('COMPOUND')
|
207
|
+
end
|
208
|
+
|
209
|
+
# Compounds described in the COMPOUND lines.
|
210
|
+
# ---
|
211
|
+
# *Returns*:: Hash of compound ID and its definition
|
212
|
+
def compounds_as_hash
|
213
|
+
unless @compounds_as_hash
|
214
|
+
@compounds_as_hash = strings_as_hash(compounds_as_strings)
|
215
|
+
end
|
216
|
+
@compounds_as_hash
|
217
|
+
end
|
218
|
+
alias compounds compounds_as_hash
|
219
|
+
|
220
|
+
# Returns REL_PATHWAY field of the entry.
|
221
|
+
# ---
|
222
|
+
# *Returns*:: Array containing String objects
|
223
|
+
def rel_pathways_as_strings
|
224
|
+
lines_fetch('REL_PATHWAY')
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns REL_PATHWAY field as a Hash. Each key of the hash is
|
228
|
+
# Pathway ID, and each value is the name of the pathway.
|
229
|
+
# ---
|
230
|
+
# *Returns*:: Hash
|
231
|
+
def rel_pathways_as_hash
|
232
|
+
unless defined? @rel_pathways_as_hash then
|
233
|
+
hash = {}
|
234
|
+
rel_pathways_as_strings.each do |line|
|
235
|
+
entry_id, name = line.split(/\s+/, 2)
|
236
|
+
hash[entry_id] = name
|
237
|
+
end
|
238
|
+
@rel_pathways_as_hash = hash
|
239
|
+
end
|
240
|
+
@rel_pathways_as_hash
|
241
|
+
end
|
242
|
+
alias rel_pathways rel_pathways_as_hash
|
243
|
+
|
244
|
+
# KO pathway described in the KO_PATHWAY line.
|
245
|
+
# ---
|
246
|
+
# *Returns*:: String
|
247
|
+
def ko_pathway
|
248
|
+
field_fetch('KO_PATHWAY')
|
249
|
+
end
|
250
|
+
|
251
|
+
end # PATHWAY
|
252
|
+
|
253
|
+
end # KEGG
|
254
|
+
end # Bio
|
data/lib/bio/db/medline.rb
CHANGED
@@ -32,8 +32,12 @@ class MEDLINE < NCBIDB
|
|
32
32
|
entry.each_line do |line|
|
33
33
|
if line =~ /^\w/
|
34
34
|
tag = line[0,4].strip
|
35
|
+
else
|
36
|
+
# continuation from previous lines
|
37
|
+
@pubmed[tag] = @pubmed[tag].sub(/(?:\r|\r\n|\n)\z/, ' ')
|
35
38
|
end
|
36
|
-
|
39
|
+
value = line[6..-1]
|
40
|
+
@pubmed[tag] += value if value
|
37
41
|
end
|
38
42
|
end
|
39
43
|
attr_reader :pubmed
|
@@ -41,7 +45,7 @@ class MEDLINE < NCBIDB
|
|
41
45
|
|
42
46
|
# returns a Reference object.
|
43
47
|
def reference
|
44
|
-
hash = Hash.new
|
48
|
+
hash = Hash.new
|
45
49
|
|
46
50
|
hash['authors'] = authors
|
47
51
|
hash['title'] = title
|
@@ -399,6 +399,10 @@ module Bio
|
|
399
399
|
|
400
400
|
litdb = RuleRegexp[ 'Bio::LITDB',
|
401
401
|
/^CODE [0-9]+$/ ],
|
402
|
+
pathway_module = RuleRegexp[ 'Bio::KEGG::MODULE',
|
403
|
+
/^ENTRY .+ Pathway\s+Module\s*/ ],
|
404
|
+
pathway = RuleRegexp[ 'Bio::KEGG::PATHWAY',
|
405
|
+
/^ENTRY .+ Pathway\s*/ ],
|
402
406
|
brite = RuleRegexp[ 'Bio::KEGG::BRITE',
|
403
407
|
/^Entry [A-Z0-9]+/ ],
|
404
408
|
orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
|
@@ -510,6 +514,8 @@ module Bio
|
|
510
514
|
# KEGG
|
511
515
|
#aaindex.is_prior_to litdb
|
512
516
|
#litdb.is_prior_to brite
|
517
|
+
pathway_module.is_prior_to pathway
|
518
|
+
pathway.is_prior_to brite
|
513
519
|
brite.is_prior_to orthology
|
514
520
|
orthology.is_prior_to drug
|
515
521
|
drug.is_prior_to glycan
|
data/lib/bio/location.rb
CHANGED
@@ -179,6 +179,28 @@ class Location
|
|
179
179
|
return 0
|
180
180
|
end
|
181
181
|
|
182
|
+
# If _other_ is equal with the self, returns true.
|
183
|
+
# Otherwise, returns false.
|
184
|
+
# ---
|
185
|
+
# *Arguments*:
|
186
|
+
# * (required) _other_: any object
|
187
|
+
# *Returns*:: true or false
|
188
|
+
def ==(other)
|
189
|
+
return true if super(other)
|
190
|
+
return false unless other.instance_of?(self.class)
|
191
|
+
flag = false
|
192
|
+
[ :from, :to, :strand, :sequence, :lt, :gt,
|
193
|
+
:xref_id, :carat ].each do |m|
|
194
|
+
begin
|
195
|
+
flag = (self.__send__(m) == other.__send__(m))
|
196
|
+
rescue NoMethodError, ArgumentError, NameError
|
197
|
+
flag = false
|
198
|
+
end
|
199
|
+
break unless flag
|
200
|
+
end
|
201
|
+
flag
|
202
|
+
end
|
203
|
+
|
182
204
|
end # Location
|
183
205
|
|
184
206
|
# == Description
|
@@ -350,6 +372,23 @@ class Locations
|
|
350
372
|
end
|
351
373
|
end
|
352
374
|
|
375
|
+
# If _other_ is equal with the self, returns true.
|
376
|
+
# Otherwise, returns false.
|
377
|
+
# ---
|
378
|
+
# *Arguments*:
|
379
|
+
# * (required) _other_: any object
|
380
|
+
# *Returns*:: true or false
|
381
|
+
def ==(other)
|
382
|
+
return true if super(other)
|
383
|
+
return false unless other.instance_of?(self.class)
|
384
|
+
if self.locations == other.locations and
|
385
|
+
self.operator == other.operator then
|
386
|
+
true
|
387
|
+
else
|
388
|
+
false
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
353
392
|
# Iterates on each Bio::Location object.
|
354
393
|
def each
|
355
394
|
@locations.each do |x|
|
data/lib/bio/reference.rb
CHANGED
@@ -150,6 +150,30 @@ module Bio
|
|
150
150
|
@affiliations = hash['affiliations'] || []
|
151
151
|
end
|
152
152
|
|
153
|
+
# If _other_ is equal with the self, returns true.
|
154
|
+
# Otherwise, returns false.
|
155
|
+
# ---
|
156
|
+
# *Arguments*:
|
157
|
+
# * (required) _other_: any object
|
158
|
+
# *Returns*:: true or false
|
159
|
+
def ==(other)
|
160
|
+
return true if super(other)
|
161
|
+
return false unless other.instance_of?(self.class)
|
162
|
+
flag = false
|
163
|
+
[ :authors, :title, :journal, :volume, :issue, :pages,
|
164
|
+
:year, :pubmed, :medline, :doi, :abstract,
|
165
|
+
:url, :mesh, :embl_gb_record_number,
|
166
|
+
:sequence_position, :comments, :affiliations ].each do |m|
|
167
|
+
begin
|
168
|
+
flag = (self.__send__(m) == other.__send__(m))
|
169
|
+
rescue NoMethodError, ArgumentError, NameError
|
170
|
+
flag = false
|
171
|
+
end
|
172
|
+
break unless flag
|
173
|
+
end
|
174
|
+
flag
|
175
|
+
end
|
176
|
+
|
153
177
|
# Formats the reference in a given style.
|
154
178
|
#
|
155
179
|
# Styles:
|
data/lib/bio/sequence.rb
CHANGED
@@ -72,8 +72,10 @@ class Sequence
|
|
72
72
|
autoload :Format, 'bio/sequence/format'
|
73
73
|
autoload :Adapter, 'bio/sequence/adapter'
|
74
74
|
autoload :QualityScore, 'bio/sequence/quality_score'
|
75
|
+
autoload :SequenceMasker, 'bio/sequence/sequence_masker'
|
75
76
|
|
76
77
|
include Format
|
78
|
+
include SequenceMasker
|
77
79
|
|
78
80
|
# Create a new Bio::Sequence object
|
79
81
|
#
|
data/lib/bio/sequence/adapter.rb
CHANGED
@@ -22,6 +22,7 @@ module Bio::Sequence::Adapter
|
|
22
22
|
autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
|
23
23
|
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
24
24
|
autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
|
25
|
+
autoload :FastaNumericFormat, 'bio/db/fasta/qual_to_biosequence'
|
25
26
|
autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
|
26
27
|
autoload :SangerChromatogram,
|
27
28
|
'bio/db/sanger_chromatogram/chromatogram_to_biosequence'
|
data/lib/bio/sequence/format.rb
CHANGED
@@ -183,6 +183,20 @@ module Format
|
|
183
183
|
a
|
184
184
|
end
|
185
185
|
|
186
|
+
# The same as output(:fasta, :header=>definition, :width=>width)
|
187
|
+
# This method is intended to replace Bio::Sequence#to_fasta.
|
188
|
+
#
|
189
|
+
# s = Bio::Sequence.new('atgc')
|
190
|
+
# puts s.output_fasta #=> "> \natgc\n"
|
191
|
+
# ---
|
192
|
+
# *Arguments*:
|
193
|
+
# * (optional) _definition_: (String) definition line
|
194
|
+
# * (optional) _width_: (Integer) width (default 70)
|
195
|
+
# *Returns*:: String object
|
196
|
+
def output_fasta(definition = nil, width = 70)
|
197
|
+
output(:fasta, :header=> definition, :width => width)
|
198
|
+
end
|
199
|
+
|
186
200
|
private
|
187
201
|
|
188
202
|
# returns formatter repository modules
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#
|
2
|
+
# = bio/sequence/sequence_masker.rb - Sequence masking helper methods
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2010
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# == Description
|
9
|
+
#
|
10
|
+
# Bio::Sequence::SequenceMasker is a mix-in module to provide helpful
|
11
|
+
# methods for masking a sequence.
|
12
|
+
#
|
13
|
+
# For details, see documentation of Bio::Sequence::SequenceMasker.
|
14
|
+
#
|
15
|
+
|
16
|
+
module Bio
|
17
|
+
class Sequence
|
18
|
+
|
19
|
+
# Bio::Sequence::SequenceMasker is a mix-in module to provide helpful
|
20
|
+
# methods for masking a sequence.
|
21
|
+
#
|
22
|
+
# It is only expected to be included in Bio::Sequence.
|
23
|
+
# In the future, methods in this module might be moved to
|
24
|
+
# Bio::Sequence or other module and this module might be removed.
|
25
|
+
# Please do not depend on this module.
|
26
|
+
#
|
27
|
+
module SequenceMasker
|
28
|
+
|
29
|
+
# Masks the sequence with each value in the <em>enum</em>.
|
30
|
+
# The <em>enum<em> should be an array or enumerator.
|
31
|
+
# A block must be given.
|
32
|
+
# When the block returns true, the sequence is masked with
|
33
|
+
# <em>mask_char</em>.
|
34
|
+
# ---
|
35
|
+
# *Arguments*:
|
36
|
+
# * (required) <em>enum</em> : Enumerator
|
37
|
+
# * (required) <em>mask_char</em> : (String) character used for masking
|
38
|
+
# *Returns*:: Bio::Sequence object
|
39
|
+
def mask_with_enumerator(enum, mask_char)
|
40
|
+
offset = 0
|
41
|
+
unit = mask_char.length - 1
|
42
|
+
s = self.seq.class.new(self.seq)
|
43
|
+
j = 0
|
44
|
+
enum.each_with_index do |item, index|
|
45
|
+
if yield item then
|
46
|
+
j = index + offset
|
47
|
+
if j < s.length then
|
48
|
+
s[j, 1] = mask_char
|
49
|
+
offset += unit
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
newseq = self.dup
|
54
|
+
newseq.seq = s
|
55
|
+
newseq
|
56
|
+
end
|
57
|
+
|
58
|
+
# Masks low quality sequence regions.
|
59
|
+
# For each sequence position, if the quality score is smaller than
|
60
|
+
# the threshold, the sequence in the position is replaced with
|
61
|
+
# <em>mask_char</em>.
|
62
|
+
#
|
63
|
+
# Note: This method does not care quality_score_type.
|
64
|
+
# ---
|
65
|
+
# *Arguments*:
|
66
|
+
# * (required) <em>threshold</em> : (Numeric) threshold
|
67
|
+
# * (required) <em>mask_char</em> : (String) character used for masking
|
68
|
+
# *Returns*:: Bio::Sequence object
|
69
|
+
def mask_with_quality_score(threshold, mask_char)
|
70
|
+
scores = self.quality_scores || []
|
71
|
+
mask_with_enumerator(scores, mask_char) do |item|
|
72
|
+
item < threshold
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Masks high error-probability sequence regions.
|
77
|
+
# For each sequence position, if the error probability is larger than
|
78
|
+
# the threshold, the sequence in the position is replaced with
|
79
|
+
# <em>mask_char</em>.
|
80
|
+
#
|
81
|
+
# ---
|
82
|
+
# *Arguments*:
|
83
|
+
# * (required) <em>threshold</em> : (Numeric) threshold
|
84
|
+
# * (required) <em>mask_char</em> : (String) character used for masking
|
85
|
+
# *Returns*:: Bio::Sequence object
|
86
|
+
def mask_with_error_probability(threshold, mask_char)
|
87
|
+
values = self.error_probabilities || []
|
88
|
+
mask_with_enumerator(values, mask_char) do |item|
|
89
|
+
item > threshold
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end #module SequenceMasker
|
93
|
+
end #class Sequence
|
94
|
+
end #module Bio
|
95
|
+
|