bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/aa.rb - amino acid sequence class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
7
|
+
#
|
|
8
|
+
# $Id: aa.rb,v 1.2 2006/02/06 14:11:31 k Exp $
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require 'bio/sequence/common'
|
|
12
|
+
|
|
13
|
+
module Bio
|
|
14
|
+
|
|
15
|
+
autoload :AminoAcid, 'bio/data/aa'
|
|
16
|
+
|
|
17
|
+
class Sequence
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Amino Acid sequence
|
|
21
|
+
class AA < String
|
|
22
|
+
|
|
23
|
+
include Bio::Sequence::Common
|
|
24
|
+
|
|
25
|
+
# Generate a amino acid sequence object from a string.
|
|
26
|
+
def initialize(str)
|
|
27
|
+
super
|
|
28
|
+
self.upcase!
|
|
29
|
+
self.tr!(" \t\n\r",'')
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Estimate the weight of this protein.
|
|
34
|
+
def molecular_weight
|
|
35
|
+
Bio::AminoAcid.weight(self)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def to_re
|
|
39
|
+
Bio::AminoAcid.to_re(self)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Generate the list of the names of the each residue along with the
|
|
43
|
+
# sequence (3 letters code).
|
|
44
|
+
def codes
|
|
45
|
+
array = []
|
|
46
|
+
self.each_byte do |x|
|
|
47
|
+
array.push(Bio::AminoAcid.names[x.chr])
|
|
48
|
+
end
|
|
49
|
+
return array
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Similar to codes but returns long names.
|
|
53
|
+
def names
|
|
54
|
+
self.codes.map do |x|
|
|
55
|
+
Bio::AminoAcid.names[x]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end # AA
|
|
60
|
+
|
|
61
|
+
end # Sequence
|
|
62
|
+
|
|
63
|
+
end # Bio
|
|
64
|
+
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/common.rb - common methods for biological sequence
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
7
|
+
#
|
|
8
|
+
# $Id: common.rb,v 1.2 2006/02/06 14:16:17 k Exp $
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
module Bio
|
|
12
|
+
|
|
13
|
+
autoload :Locations, 'bio/location'
|
|
14
|
+
|
|
15
|
+
class Sequence
|
|
16
|
+
|
|
17
|
+
# This module provides common methods for biological sequence classes
|
|
18
|
+
# which must inherit String.
|
|
19
|
+
module Common
|
|
20
|
+
|
|
21
|
+
def to_s
|
|
22
|
+
String.new(self)
|
|
23
|
+
end
|
|
24
|
+
alias to_str to_s
|
|
25
|
+
|
|
26
|
+
# Force self to re-initialize for clean up (remove white spaces,
|
|
27
|
+
# case unification).
|
|
28
|
+
def seq
|
|
29
|
+
self.class.new(self)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Similar to the 'seq' method, but changes the self object destructively.
|
|
33
|
+
def normalize!
|
|
34
|
+
initialize(self)
|
|
35
|
+
self
|
|
36
|
+
end
|
|
37
|
+
alias seq! normalize!
|
|
38
|
+
|
|
39
|
+
def <<(*arg)
|
|
40
|
+
super(self.class.new(*arg))
|
|
41
|
+
end
|
|
42
|
+
alias concat <<
|
|
43
|
+
|
|
44
|
+
def +(*arg)
|
|
45
|
+
self.class.new(super(*arg))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Returns the subsequence of the self string.
|
|
49
|
+
def subseq(s = 1, e = self.length)
|
|
50
|
+
raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
|
|
51
|
+
s -= 1
|
|
52
|
+
e -= 1
|
|
53
|
+
self[s..e]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# This method iterates on sub string with specified length 'window_size'.
|
|
57
|
+
# By specifing 'step_size', codon sized shifting or spliting genome
|
|
58
|
+
# sequence with ovelapping each end can easily be yielded.
|
|
59
|
+
#
|
|
60
|
+
# The remainder sequence at the terminal end will be returned.
|
|
61
|
+
#
|
|
62
|
+
# Example:
|
|
63
|
+
# # prints average GC% on each 100bp
|
|
64
|
+
# seq.window_search(100) do |subseq|
|
|
65
|
+
# puts subseq.gc
|
|
66
|
+
# end
|
|
67
|
+
# # prints every translated peptide (length 5aa) in the same frame
|
|
68
|
+
# seq.window_search(15, 3) do |subseq|
|
|
69
|
+
# puts subseq.translate
|
|
70
|
+
# end
|
|
71
|
+
# # split genome sequence by 10000bp with 1000bp overlap in fasta format
|
|
72
|
+
# i = 1
|
|
73
|
+
# remainder = seq.window_search(10000, 9000) do |subseq|
|
|
74
|
+
# puts subseq.to_fasta("segment #{i}", 60)
|
|
75
|
+
# i += 1
|
|
76
|
+
# end
|
|
77
|
+
# puts remainder.to_fasta("segment #{i}", 60)
|
|
78
|
+
#
|
|
79
|
+
def window_search(window_size, step_size = 1)
|
|
80
|
+
i = 0
|
|
81
|
+
0.step(self.length - window_size, step_size) do |i|
|
|
82
|
+
yield self[i, window_size]
|
|
83
|
+
end
|
|
84
|
+
return self[i + window_size .. -1]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# This method receive a hash of residues/bases to the particular values,
|
|
88
|
+
# and sum up the value along with the self sequence. Especially useful
|
|
89
|
+
# to use with the window_search method and amino acid indices etc.
|
|
90
|
+
def total(hash)
|
|
91
|
+
hash.default = 0.0 unless hash.default
|
|
92
|
+
sum = 0.0
|
|
93
|
+
self.each_byte do |x|
|
|
94
|
+
begin
|
|
95
|
+
sum += hash[x.chr]
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
return sum
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Returns a hash of the occurrence counts for each residue or base.
|
|
102
|
+
def composition
|
|
103
|
+
count = Hash.new(0)
|
|
104
|
+
self.scan(/./) do |x|
|
|
105
|
+
count[x] += 1
|
|
106
|
+
end
|
|
107
|
+
return count
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Returns a randomized sequence keeping its composition by default.
|
|
111
|
+
# The argument is required when generating a random sequence from the empty
|
|
112
|
+
# sequence (used by the class methods NA.randomize, AA.randomize).
|
|
113
|
+
# If the block is given, yields for each random residue/base.
|
|
114
|
+
def randomize(hash = nil)
|
|
115
|
+
length = self.length
|
|
116
|
+
if hash
|
|
117
|
+
count = hash.clone
|
|
118
|
+
count.each_value {|x| length += x}
|
|
119
|
+
else
|
|
120
|
+
count = self.composition
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
seq = ''
|
|
124
|
+
tmp = {}
|
|
125
|
+
length.times do
|
|
126
|
+
count.each do |k, v|
|
|
127
|
+
tmp[k] = v * rand
|
|
128
|
+
end
|
|
129
|
+
max = tmp.max {|a, b| a[1] <=> b[1]}
|
|
130
|
+
count[max.first] -= 1
|
|
131
|
+
|
|
132
|
+
if block_given?
|
|
133
|
+
yield max.first
|
|
134
|
+
else
|
|
135
|
+
seq += max.first
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
return self.class.new(seq)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Generate a new random sequence with the given frequency of bases
|
|
142
|
+
# or residues. The sequence length is determined by the sum of each
|
|
143
|
+
# base/residue occurences.
|
|
144
|
+
def self.randomize(*arg, &block)
|
|
145
|
+
self.new('').randomize(*arg, &block)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Receive a GenBank style position string and convert it to the Locations
|
|
149
|
+
# objects to splice the sequence itself. See also: bio/location.rb
|
|
150
|
+
def splice(position)
|
|
151
|
+
unless position.is_a?(Locations) then
|
|
152
|
+
position = Locations.new(position)
|
|
153
|
+
end
|
|
154
|
+
s = ''
|
|
155
|
+
position.each do |location|
|
|
156
|
+
if location.sequence
|
|
157
|
+
s << location.sequence
|
|
158
|
+
else
|
|
159
|
+
exon = self.subseq(location.from, location.to)
|
|
160
|
+
begin
|
|
161
|
+
exon.complement! if location.strand < 0
|
|
162
|
+
rescue NameError
|
|
163
|
+
end
|
|
164
|
+
s << exon
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
return self.class.new(s)
|
|
168
|
+
end
|
|
169
|
+
alias splicing splice
|
|
170
|
+
|
|
171
|
+
end # Common
|
|
172
|
+
|
|
173
|
+
end # Sequence
|
|
174
|
+
|
|
175
|
+
end # Bio
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/compat.rb - methods for backward compatibility
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
7
|
+
#
|
|
8
|
+
# $Id: compat.rb,v 1.2 2006/02/06 14:18:03 k Exp $
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module Bio
|
|
13
|
+
|
|
14
|
+
class Sequence
|
|
15
|
+
|
|
16
|
+
autoload :Common, 'bio/sequence/common'
|
|
17
|
+
autoload :NA, 'bio/sequence/na'
|
|
18
|
+
autoload :AA, 'bio/sequence/aa'
|
|
19
|
+
|
|
20
|
+
def to_s
|
|
21
|
+
String.new(@seq)
|
|
22
|
+
end
|
|
23
|
+
alias to_str to_s
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
module Common
|
|
27
|
+
|
|
28
|
+
# Output the FASTA format string of the sequence. The 1st argument is
|
|
29
|
+
# used as the comment string. If the 2nd option is given, the output
|
|
30
|
+
# sequence will be folded.
|
|
31
|
+
def to_fasta(header = '', width = nil)
|
|
32
|
+
warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
|
|
33
|
+
">#{header}\n" +
|
|
34
|
+
if width
|
|
35
|
+
self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
36
|
+
else
|
|
37
|
+
self.to_s + "\n"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
end # Common
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class NA
|
|
45
|
+
|
|
46
|
+
def self.randomize(*arg, &block)
|
|
47
|
+
self.new('').randomize(*arg, &block)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def pikachu
|
|
51
|
+
self.dna.tr("atgc", "pika") # joke, of course :-)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end # NA
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AA
|
|
58
|
+
|
|
59
|
+
def self.randomize(*arg, &block)
|
|
60
|
+
self.new('').randomize(*arg, &block)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end # AA
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
end # Sequence
|
|
67
|
+
|
|
68
|
+
end # Bio
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/format.rb - various output format of the biological sequence
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>,
|
|
6
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
7
|
+
# License:: Ruby's
|
|
8
|
+
#
|
|
9
|
+
# = TODO
|
|
10
|
+
#
|
|
11
|
+
# porting from N. Goto's feature-output.rb on BioRuby list.
|
|
12
|
+
#
|
|
13
|
+
# $Id: format.rb,v 1.2 2006/02/06 14:20:35 k Exp $
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
module Bio
|
|
18
|
+
|
|
19
|
+
autoload :Sequence, 'bio/sequence'
|
|
20
|
+
|
|
21
|
+
class Sequence
|
|
22
|
+
|
|
23
|
+
module Format
|
|
24
|
+
|
|
25
|
+
# Output the FASTA format string of the sequence. The 1st argument is
|
|
26
|
+
# used in the comment line. If the 2nd argument (integer) is given,
|
|
27
|
+
# the output sequence will be folded.
|
|
28
|
+
def format_fasta(header = nil, width = nil)
|
|
29
|
+
header ||= "#{@entry_id} #{@definition}"
|
|
30
|
+
|
|
31
|
+
">#{header}\n" +
|
|
32
|
+
if width
|
|
33
|
+
@seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
34
|
+
else
|
|
35
|
+
@seq.to_s + "\n"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def format_gff
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def format_genbank
|
|
44
|
+
prefix = ' ' * 5
|
|
45
|
+
indent = prefix + ' ' * 16
|
|
46
|
+
fwidth = 79 - indent.length
|
|
47
|
+
|
|
48
|
+
format_features(prefix, indent, fwidth)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def format_embl
|
|
52
|
+
prefix = 'FT '
|
|
53
|
+
indent = prefix + ' ' * 16
|
|
54
|
+
fwidth = 80 - indent.length
|
|
55
|
+
|
|
56
|
+
format_features(prefix, indent, fwidth)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def format_features(prefix, indent, width)
|
|
63
|
+
result = ''
|
|
64
|
+
@features.each do |feature|
|
|
65
|
+
result << prefix + sprintf("%-16s", feature.feature)
|
|
66
|
+
|
|
67
|
+
position = feature.position
|
|
68
|
+
#position = feature.locations.to_s
|
|
69
|
+
|
|
70
|
+
head = ''
|
|
71
|
+
wrap(position, width).each_line do |line|
|
|
72
|
+
result << head << line
|
|
73
|
+
head = indent
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
result << format_qualifiers(feature.qualifiers, width)
|
|
77
|
+
end
|
|
78
|
+
return result
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def format_qualifiers(qualifiers, indent, width)
|
|
82
|
+
qualifiers.each do |qualifier|
|
|
83
|
+
q = qualifier.qualifier
|
|
84
|
+
v = qualifier.value.to_s
|
|
85
|
+
|
|
86
|
+
if v == true
|
|
87
|
+
lines = wrap('/' + q, width)
|
|
88
|
+
elsif q == 'translation'
|
|
89
|
+
lines = fold('/' + q + '=' + val, width)
|
|
90
|
+
else
|
|
91
|
+
if v[/\D/]
|
|
92
|
+
#v.delete!("\x00-\x1f\x7f-\xff")
|
|
93
|
+
v.gsub!(/"/, '""')
|
|
94
|
+
v = '"' + v + '"'
|
|
95
|
+
end
|
|
96
|
+
lines = wrap('/' + q + '=' + val, width)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
return lines.gsub(/^/, indent)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def fold(str, width)
|
|
104
|
+
str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def wrap(str, width)
|
|
108
|
+
result = []
|
|
109
|
+
left = str.dup
|
|
110
|
+
while left and left.length > width
|
|
111
|
+
line = nil
|
|
112
|
+
width.downto(1) do |i|
|
|
113
|
+
if left[i..i] == ' ' or /[,;]/ =~ left[(i-1)..(i-1)] then
|
|
114
|
+
line = left[0..(i-1)].sub(/ +\z/, '')
|
|
115
|
+
left = left[i..-1].sub(/\A +/, '')
|
|
116
|
+
break
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
if line.nil? then
|
|
120
|
+
line = left[0..(width-1)]
|
|
121
|
+
left = left[width..-1]
|
|
122
|
+
end
|
|
123
|
+
result << line
|
|
124
|
+
end
|
|
125
|
+
result << left if left
|
|
126
|
+
return result.join("\n")
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
end # Format
|
|
130
|
+
|
|
131
|
+
end # Sequence
|
|
132
|
+
|
|
133
|
+
end # Bio
|
|
134
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/generic.rb - generic sequence class to store an intact string
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
7
|
+
#
|
|
8
|
+
# $Id: generic.rb,v 1.3 2006/02/06 14:26:04 k Exp $
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require 'bio/sequence/common'
|
|
12
|
+
|
|
13
|
+
module Bio
|
|
14
|
+
class Sequence
|
|
15
|
+
|
|
16
|
+
class Generic < String
|
|
17
|
+
|
|
18
|
+
include Bio::Sequence::Common
|
|
19
|
+
|
|
20
|
+
end # Generic
|
|
21
|
+
|
|
22
|
+
end # Sequence
|
|
23
|
+
end # Bio
|
|
24
|
+
|